# coding: gb2312
import urllib
import re
import urllib2
import gzip
from xmlrpclib import gzip_decode
import sys
import time
def getHtml(url):
req_header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding': 'gzip, deflate',
'Referer': 'http://cl.wecl.biz/thread0806.php?fid=7',
'Cookie': '__cfduid=d4b20a829ffba3d7e390aa12bb7a02bc11451978228; CNZZDATA950900=cnzz_eid%3D947358398-1451974316-%26ntime%3D1451979935; __utma=29374829.582752694.1451978230.1451978230.1451983608.2; __utmc=29374829; __utmz=29374829.1451978230.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmb=29374829.3.10.1451983608; 227c9_lastfid=7; 227c9_lastvisit=0%091451984992%09%2Fthread0806.php%3Ffid%3D7%26search%3D%26page%3D5; __utmt=1'
}
req_timeout = 10
req = urllib2.Request(url,None,req_header)
resp = urllib2.urlopen(req,None,req_timeout)
html = resp.read()
html = gzip_decode(html)
return html
def getNickname(html):
#Reg='class="bl">(.+)'
Reg='by: (.+)'
DomainReg=re.compile(Reg)
DomainList=re.findall(DomainReg,html)
#print DomainList
i = 0
f = open('c:/cao.md','a+')
while (i
if(len(DomainList[i])>1):
print(DomainList[i])
f.writelines(DomainList[i])
f.write("\r\n")
i += 1
f.close()
#return DomainList
pagecount = 1
while (pagecount<90):
html = getHtml("http://cl.wecl.biz/thread0806.php?fid=7&search=&page="+str(pagecount))
getNickname(html)
time.sleep(10)
print pagecount
pagecount +=1