classPaChong(object):def__init__(self):self.tiebaName="python"self.page=1self.end=3self.url="https://tieba.baidu.com/f?"self.header={"User-Agent":"Mozilla/5.0(WindowsNT6....
class PaChong(object):def __init__(self):self.tiebaName = "python"self.page = 1self.end = 3self.url = "https://tieba.baidu.com/f?"self.header = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}self.fileName = 1#构造初始urldef tiebaUrl(self):for i in range(self.page,self.end+1):pn = (i-1)*50wo = {'pn':pn,'kw':self.tiebaName}word = urllib.parse.urlencode(wo)myurl = self.url+word#print(myurl)self.biaoTi(myurl)#根据url爬取每个标题链接def biaoTi(self,url):req = request.Request(url,headers=self.header) data = request.urlopen(req).read()#print(data.decode())html = etree.HTML(data)#print(html.text)data1 = html.xpath('//div[@class="threadlist_lz clearfix"]/div/a/@href')#print(data1)#用for循环构造每个链接for x in data1:myurl1 = "https://tieba.baidu.com"+x#print(myurl1)self.lianJie(myurl1)
展开