以百度贴吧为例(爬取lol吧)
import requests
class TiebaSpider:
def __init__(self,tieba_name_crawl):
"""
初始化必要参数,完成基础设置
"""
self.tieba_name = tieba_name_crawl
self.url_base = "https://tieba.baidu.com/f?kw=" + tieba_name_crawl + "&ie=utf-8&pn={}"
self.headers = {"User-Agent":"ABC"}
def make_url_lists(self):
"""
生成下载列表
:return: 下载列表
"""
return [self.url_base.format(i * 50) for i in range(4)]
def download_url(self,url_str):
"""
使用requests get方法下载指定页面,并返回页面结果
:param url_str: 下载链接
:return: 下载结果
"""
result = requests.get(url_str,headers = self.headers)
return result.content
def save_result(self,result,page_num):
""