import requests
import sys
class Tieba(object):
def __init__(self, tieba_name, pn):
self.tieba_name = tieba_name
self.base_url = 'https://tieba.baidu.com/f?kw=%spn=' % (tieba_name)
# print(self.base_url)
self.url_list = []
for i in range(pn):
url = self.base_url + str(i * 50)
self.url_list.append(url)
# print(self.url_list)
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36'
}
pass
# 获取url对应的响应
def get_page(self, url):
response = requests.get(url, headers=self.headers)
return response.content
# 保存数据
def save_content(self, content, index):
filename = self.tieba_name + '_' + str(index) + '.html'
with open(filename, 'wb')as f:
f.write(content)
def run(self):
# 构建基础url
# 生成url列表
# 构建请求头
# 遍历url列表
for url in self.url_list:
# 发送请求
content = self.get_page(url)
# 保存响应
index = self.url_list.index(url)
self.save_content(content, index)
pass
if __name__ == '__main__':
name = sys.argv[1]
pn = sys.argv[2]
tieba = Tieba(name, int(pn))
tieba.run()
使用方法
python 代码所在文件名 美女 3