简单爬一下糗事百科
整合代码如下
import requests
class QiuShiBaiKe():
'''
实现下载糗事百科的指定页码前的内容,存储下载内容
'''
def __init__(self,pages):
'''
初始化操作
:param pages:
'''
self.pages_download = pages
self.base_url = "https://www.qiushibaike.com/8hr/page/{}/"
def make_url_list(self):
'''
获取待下载的url
:return:返回生成的url列表
'''
return [self.base_url.format(i) for i in range(1,self.pages_download)]
def save_url(self,content,file_name):
'''
将指定内容进行存储
:param content:
:param 无
:return:
'''
with open(file_name,'wb') as f:
f.write(content)
def download_url(self,url_strr):
'''
下载指定url处的内容
:param url_strr:
:return:
'''
response = requests.get(url_strr)
return response.content
def run(self):
url_lists = self.make_url_list()
for url_str in url_lists:
result_content = self.download_url(url_str)
file_name = "糗事百科第{}页.html".format(url_lists.index(url_str)+ 1)
self.save_url(result_content,file_name)
QS = QiuShiBaiKe(5)
QS.run()