# 此文章仅供学习参考
# http://book.zongheng.com/
# https://www.zongheng.com/books?worksTypes=6104 大类链接
# https://read.zongheng.com/chapter/1215587/68240827.html 第一章链接< li class ="vip col-4" > < a href = "https://book.zongheng.com/chapter/1284449/76763917.html" target = "_blank"
# title = "第225章、就在今夜! 字数:2034 更新时间:2024-03-11 22:40 " > 第225章、就在今夜! < / a >
# < / li >
# 加载工具模块
import requests
from lxml import etree
# 伪装浏览器
url = 'https://book.zongheng.com/showchapter/1284449.html'
headers= {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36',
'Cookie':'ZHID=94DB8B154A85AE873EE5532658AF6563; ver=2018; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221906991079d1e4f-06888a4be6cdcd-3f675015-1327104-1906991079e1793%22%2C%22%24device_id%22%3A%221906991079d1e4f-06888a4be6cdcd-3f675015-1327104-1906991079e1793%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; zh_visitTime=1719758030760; PassportCaptchaId=167327a4e811e11b8f784866c5da3d67; zhffr=0; Hm_lvt_c202865d524849216eea846069349eb9=1719758031,1719780859; Hm_lpvt_c202865d524849216eea846069349eb9=1719785764'
}
res = requests.get(url,headers=headers)
xs_list = etree.HTML(res.text)
# 定义变量
num = 0
# 执行语句
while True:
# 20章分界class 不一样
if num < 20:
zhang_list = xs_list.xpath('//li[@class=" col-4"]/a')
else:
zhang_list = xs_list.xpath('//li[@class="vip col-4"]/a')
# 循环语句获得章节名称和地址
for zhang in zhang_list:
zhangurl = zhang.xpath('@href')[0]
zhangname = zhang.xpath('./text()')[0]
# 在章节页面找到文章内容
res = requests.get(zhangurl,headers=headers)
zj_list = etree.HTML(res.text)
nr_list = zj_list.xpath('//div[@class="content"]/p')
# 将文本写入章节记事本
with open(f'./text/{zhangname}.txt', 'w', enconding = "utf-8") as f:
for nrlist in nr_list:
nr = nrlist.xpath('./text()')[0]
f.write(nr + '\n')
num += 1
利用爬虫批量下载小说内容
最新推荐文章于 2024-07-19 15:44:38 发布