from openpyxl import Workbook
class TiebaSpider(object):
def __init__(self,tieba_name):
self.tieba_name=tieba_name
self.tieba_url='https://tieba.baidu.com/f?kw='+tieba_name+'&ie=utf-8&pn={}'
self.headers={
'User-Agent': '填自己的',
'Host': 'tieba.baidu.com'
}
#构造列表
def get_url_list(self):
url_list = []
for i in range(10):
url_list.append(self.tieba_url.format(i * 50))
return url_list
def save_html(self,files_path,tieba_url_list):
#python-第一页
# 新建工作簿
wb = Workbook()
# 选择默认的工作表
sheet = wb.active
# 给工作表重命名
sheet.title = '百度贴吧'
header = ['页码', '链接']
sheet.append(header)
for i in range(0,10):
sheet.cell(row=i+2,column=1,value=files_path[i])
for i in range(0,10):
sheet.cell(row=i+2,column=2,value=tieba_url_list[i])
wb.save('百度贴吧.xlsx')
def run(self):
#1.构造列表
tieba_url_list=self.get_url_list()
print(tieba_url_list)
files_path=[]
for tburl in tieba_url_list:
#3.保存页面
page_num=tieba_url_list.index(tburl)+1
file_path = '{}第{}页'.format(self.tieba_name, page_num)
files_path.append(file_path)
print(files_path)
self.save_html(files_path,tieba_url_list)
if __name__ =='__main__':
tb_spider=TiebaSpider('lol')#或输入其它想看的内容
tb_spider.run()