说明:无意滋生事端,仅学习分享,如有侵权,立即删除
import requests
import json
class QiushibaikeSpider():
def __init__(self):
self.url='https://www.qiushibaike.com/text/?page={}'
self.headers = {
'user-agent':'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Mobile Safari/537.36',
}
def get_url_list(self):
url_lists = list()
for num in range(12):
url_lists.append(self.url.format(num+1))
return url_lists
def get_response_content_lists(self,url_lists):
response_content_lists = list()
for url in url_lists:
ret = requests.get(url,headers=self.headers).content.decode()
ret = json.loads(ret)
content = list()
for num in range(25):
ret_ = ret[num]["data"]["content"]
content.append(ret_)
response_content_lists.append(content)
return response_content_lists
def save_file(self,ret):
len_num = len(ret)
content_list = list()
for i in ret:
for j in i:
content = j
with open('糗事百科的{}个段子.txt'.format(len_num*24),'a+',encoding="utf8") as f:
f.write("*"*20+"\n"+content+'\n'*5)
print("保存成功,请查看")
def run(self):
# 获取url列表
url_lists = self.get_url_list()
print(len(url_lists))
# 获取content
response_content_lists = self.get_response_content_lists(url_lists)
# 存入文件
self.save_file(response_content_lists)
if __name__=='__main__':
obj = QiushibaikeSpider()
obj.run()