继上次的使用类的方法爬取的单页单章小说后,在准备爬取整部小说,遇到点困难,先用函数式编程试试结果。
代码如下:
'''
函数式编程
17K小说网爬取龙井迷案小说
'''
# 导入第三方库
import requests
from lxml import etree
import time
from fake_useragent import UserAgent
# 定义随机的UserAgent
ua = UserAgent()
headers = {'User-Agent':ua.random}
# 得到html文本的函数
def get_html(url):
time.sleep(1)
html = requests.get(url, headers=headers).content.decode()
return html
# 解析html文本的函数
def paser_html(html):
novel = {}
e = etree.HTML(html)
href = e.xpath('//dl[@class="Volume"]/dd/a/@href')
href = ["https://www.17k.com" + i for i in href]
novel["href"] = href
return novel
# 定义解析详情页的函数
def paser_detail(novel):
text1 = []
for url in novel["href"]:
time.sleep(1)
parg = requests.get(url, headers=headers).content.decode()
e = etree.HTML(parg)
text = e.xpath('//div[@class="p"]/p/text()')
for text in text:
text1.append(text)
return text1
# 定义保存文本的函数
def save_page(text):
for text in text:
with open('不愿负你,孤独一生.txt','a') as f:
f.write(text)
# 定义主函数
def main():
url = "https://www.17k.com/list/3080392.html"
html = get_html(url)
novel = paser_html(html)
text = paser_detail(novel)
save_page(text)
# 运行函数
if __name__ == '__main__':
main()
爬取结果: