3)最重要的,很多异常处理没有做,时间比较紧,很多注释输出也没有关闭,大家想交流的,再email我吧,[thomasliu83 AT gmail.com]
下面先贴进制之间相互转换的代码:
#--coding:utf8-- #file:baseconvert.py BASE2 ="01" BASE10 ="0123456789" BASE16 ="0123456789ABCDEF" BASE62 ="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz" BASE36 ="0123456789abcdefghijklmnopqrstuvwxyz" def baseconvert(number,fromdigits,todigits): """ converts a "number" between two bases of arbitrary digits The input number is assumed to be a string of digits from the fromdigits string (which is in order of smallest to largest digit). The return value is a string of elements from todigits (ordered in the same way). The input and output bases are determined from the lengths of the digit strings. Negative signs are passed through. decimal to binary >>> baseconvert(555,BASE10,BASE2) '1000101011' binary to decimal >>> baseconvert('1000101011',BASE2,BASE10) '555' integer interpreted as binary and converted to decimal (!) >>> baseconvert(1000101011,BASE2,BASE10) '555' base10 to base4 >>> baseconvert(99,BASE10,"0123") '1203' base4 to base5 (with alphabetic digits) >>> baseconvert(1203,"0123","abcde") 'dee' base5, alpha digits back to base 10 >>> baseconvert('dee',"abcde",BASE10) '99' decimal to a base that uses A-Z0-9a-z for its digits >>> baseconvert(257938572394L,BASE10,BASE62) 'E78Lxik' ..convert back >>> baseconvert('E78Lxik',BASE62,BASE10) '257938572394' binary to a base with words for digits (the function cannot convert this back) >>> baseconvert('1101',BASE2,('Zero','One')) 'OneOneZeroOne' """ if str(number)[0]=='-': number = str(number)[1:] neg=1 else: neg=0 # make an integer out of the number x=long(0) for digit in str(number): x = x*len(fromdigits) + fromdigits.index(digit) # create the result in base 'len(todigits)' res="" while x>0: digit = x % len(todigits) res = todigits[digit] + res x /= len(todigits) if neg: res ="-"+res return res
下面是主文件,mycookie.py[名字取的不好]
#--coding:utf8 -- #file: mycookie.py import httplib import urllib2,cookielib import urllib,types import BeautifulSoup def GetContent(source,id,name): """从html的source源代码中找出name对应的值""" #利用BeautifaulSoup解决问题 s = BeautifulSoup.BeautifulSoup(source) result = s.find('input',attrs={id:name}) if result: return result['value'] else: return'' def Process(opener,strlink,cookiejar,data=None,storefile=None): """根据strlink请求,结合cookiejar和要发送的data构造http数据包""" if opener==None and cookiejar==None and strlink==None: print'opener,strlink and cookiejar must be not null' return None #构造请求 req = urllib2.Request(strlink) #初步url cookiejar.add_cookie_header(req) #将此时cookie加入到请求头 req.add_header('User-Agent','Not IE 1.0 + Say Hello') #添加数据 if data and type(data)==types.DictType: p_data=urllib.urlencode(data) req.add_data(p_data) #添加数据到http包中 else: print'Notice: No data additional' #执行链接 link = opener.open(req) if storefile: #如果需要存储网页 try: file=open(storefile,'wb') file.write(link.read()) file.close() except Exception,x: print'error: store file error,',str(x),'' return None else: print'Notice: No file stored' return link def GetPass(): """初始获得登录csdn的passport""" """ req=urllib2.Request('http://passport.csdn.net/UserLogin.aspx?from=Passport.aspx') cook.add_cookie_header(req) #向req请求中添加cookie req.add_header('User-Agent','IE 7.9 + test') #req.add_header('Connection','Keep-Alive') link = opener.open(req) content= link.read() """ link = Process(opener,'http://passport.csdn.net/UserLogin.aspx?from=Passport.aspx',cook) ifnot link: print'error: link is None' exit() content = link.read() #解析__VIEWSTATE字符 strvalue=GetContent(content,'name','__VIEWSTATE') print strvalue #生成验证码字符串 import time a=time.time() astr = int(a*1000).__str__() print type(astr) BASE36 ="0123456789abcdefghijklmnopqrstuvwxyz" BASE10 ="0123456789" import baseconvert s = baseconvert.baseconvert(astr,BASE10,BASE36) print s #获取验证码的图片 """ req=urllib2.Request() #获取请求 cook.add_cookie_header(req) link=opener.open(req) f=open('a.jpeg','wb') f.write(link.read()) f.close() """ link=Process(opener, 'http://passport.csdn.net/ShowExPwd.aspx?temp='+s, cook, None, 'a.jpeg') ifnot link: print'error: link is None' exit() #输入验证码和帐号 user=raw_input('username:') passwd=raw_input('password:') code=raw_input('verifycode:') private=[] #此处为了应对pydev的控制台输入带字符'/r' for s in [user,passwd,code]: l=len(s) if s[l-1]=='': s=s[:l-1] private.append(s) if len(private) !=3: print'error: need 3 private data' exit() #初始化发送的数据 postdata={'__EVENTTARGET':'', '__EVENTARGUMENT':'', '__VIEWSTATE':strvalue, 'tb_LoginNameOrLoginEmail':private[0], 'tb_Password':private[1], 'ClientKey':'', 'tb_ExPwd':private[2], 'from':'.', 'MailParameters':'.', 'Image_Login.x':'0', 'Image_Login.y':'0'} #发送域 postdata['from']='http://passport.csdn.net/UserLogin.aspx?from=/Passport.aspx' #特殊情况,特殊处理[获取passport] req=urllib2.Request('http://passport.csdn.net/UserLogin.aspx?from=/Passport.aspx') cook.add_cookie_header(req) #填充头部的cookie cookies=cook._cookies_for_request(req) #从请求中获取cookies,[我用的这种方法,肯定还有其他方法] #从解析出的cookie中获取ClientKey clientkey='' for cookie in cookies: if cookie.name=='ClientKey': clientkey = cookie.value break #填充域 postdata['MailParameters']='from='+postdata['from'] req.add_header('User-Agent','Not IE 1.0 + Say Hello') #数据转换 data=urllib.urlencode(postdata) req.add_data(data) #添加data #print data #打开链接,即Post数据 link = opener.open(req) #如果此时链接打开完整,则可在cookie保存访问的帐号 print cook def PostBlog(title,content,tags): strlink='http://writeblog.csdn.net/PostEdit.aspx' link = Process(opener, strlink, cook, None, None) ifnot link: return html=link.read() view_state=GetContent(html,'name','__VIEWSTATE') event_validate=GetContent(html,'name','__EVENTVALIDATION') #构造即将发送的数据 data={'__LASTFOCUS':'', '__EVENTTARGET':'', '__EVENTARGUMENT':'', '__VIEWSTATE':view_state, 'ctl00$ContentPlaceHolder1$EntryEditor1$txbTitle':title+'[z]', 'ctl00$ContentPlaceHolder1$EntryEditor1$richTextEditor$richTextEditor':content, 'ctl00$ContentPlaceHolder1$EntryEditor1$txbTags':tags, 'ctl00$ContentPlaceHolder1$EntryEditor1$txbExcerpt':content[:500]+'...', 'ctl00$ContentPlaceHolder1$EntryEditor1$SaveButton':'发表文章', 'ctl00$ContentPlaceHolder1$EntryEditor1$rblOri':'copy', 'ctl00$ContentPlaceHolder1$EntryEditor1$GlobalCategoryList':'', 'ctl00$ContentPlaceHolder1$EntryEditor1$ckbPublished':'on', 'ctl00$ContentPlaceHolder1$EntryEditor1$chkDisplayHomePage':'on', 'ctl00$ContentPlaceHolder1$EntryEditor1$chkIsAggregated':'on', 'ctl00$ContentPlaceHolder1$EntryEditor1$chkCopytoClipboard':'on', '__EVENTVALIDATION':event_validate} link = Process(opener,'http://writeblog.csdn.net/PostEdit.aspx', cook, data,'last.html') if link: print'send ok' else: print'send wrong' def GetAndPostArticle(links,type=2): """从links中读取出相应的内容,转存到发表""" ifnot links: print'No url link in links parameter' return __div_A='<div class="postText">' __div_B='</div>' for link in links: #读取相应的内容 content='' title='' try: htm=urllib.urlopen(link).read() s=BeautifulSoup.BeautifulSoup(htm) tmp=[] title=s.find('div',attrs={'class':'postTitle'}) title = str(title.a.contents[1])#获取文字信息 title= title.replace(' ', '') content = s.find('div',attrs={'class':'postText'}) print'----' """ for c in content.contents: try: print type(c),'-',c,'-' if type(c)!= types.StringType: c = str(c) tmp.append(c) except Exception,x: print 'error:',str(x),' ' pass content=''.join(tmp) """ content=content.__str__() con_len=len(content) content=content[len(__div_A):con_len-len(__div_B)] #print content[:200] tags='Linux,技术,IT,反汇编,调试,内存泄露,Win32,项目' PostBlog(title, content, tags) print'Article ',link,'Success! ^_^' except Exception,x: print'Article ',link,'Failed!!!!!!',str(x),'.....' pass pass ####### main entry ########## import sys reload(sys) sys.setdefaultencoding('gb2312') #global data #创建一个urllib的httphandle来设置调试状态,为了便于调试,可以设置为1 h=urllib2.HTTPHandler(debuglevel=0) #创建cookie保存jar cook=cookielib.CookieJar() cookie=urllib2.HTTPCookieProcessor(cook) #创建http开启 opener=urllib2.build_opener(cookie,h) GetPass() #这些links只是做一个示例 links=['http://blog.csdn.net/thomasliu83/articles/760339.aspx', 'http://blog.csdn.net/thomasliu83/articles/740153.aspx', 'http://blog.csdn.net/thomasliu83/articles/222986.aspx', 'http://blog.csdn.net/thomasliu83/articles/222975.aspx'] GetAndPostArticle(links,1) """#for test title='my tesdfgsdfgsfdgdsfgt 1' content='content slajf;ldsakjflkasdjfl;asjflka;sdjfl1' tags='tags 1' PostBlog(title,content,tags) PostBlog(title,content,tags) PostBlog(title,content,tags) """