利用爬虫批量下载小说内容

# 此文章仅供学习参考
#   http://book.zongheng.com/
#   https://www.zongheng.com/books?worksTypes=6104  大类链接

#   https://read.zongheng.com/chapter/1215587/68240827.html   第一章链接< li class ="vip col-4" > < a href = "https://book.zongheng.com/chapter/1284449/76763917.html" target = "_blank"
#   title = "第225章、就在今夜! 字数:2034 更新时间:2024-03-11 22:40 " > 第225章、就在今夜! < / a >
#   < / li >

#   加载工具模块
import  requests
from lxml  import etree
#   伪装浏览器
url = 'https://book.zongheng.com/showchapter/1284449.html'
headers= {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36',
    'Cookie':'ZHID=94DB8B154A85AE873EE5532658AF6563; ver=2018; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221906991079d1e4f-06888a4be6cdcd-3f675015-1327104-1906991079e1793%22%2C%22%24device_id%22%3A%221906991079d1e4f-06888a4be6cdcd-3f675015-1327104-1906991079e1793%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; zh_visitTime=1719758030760; PassportCaptchaId=167327a4e811e11b8f784866c5da3d67; zhffr=0; Hm_lvt_c202865d524849216eea846069349eb9=1719758031,1719780859; Hm_lpvt_c202865d524849216eea846069349eb9=1719785764'
}
res = requests.get(url,headers=headers)
xs_list = etree.HTML(res.text)
#   定义变量
num = 0
#   执行语句
while True:
#   20章分界class 不一样
    if num < 20:
       zhang_list = xs_list.xpath('//li[@class=" col-4"]/a')
    else:
       zhang_list = xs_list.xpath('//li[@class="vip col-4"]/a')
#   循环语句获得章节名称和地址
    for zhang in zhang_list:
        zhangurl = zhang.xpath('@href')[0]
        zhangname = zhang.xpath('./text()')[0]

#   在章节页面找到文章内容
        res = requests.get(zhangurl,headers=headers)
        zj_list = etree.HTML(res.text)
        nr_list = zj_list.xpath('//div[@class="content"]/p')
#   将文本写入章节记事本
        with open(f'./text/{zhangname}.txt', 'w', enconding = "utf-8") as f:
            for nrlist in nr_list:
                 nr = nrlist.xpath('./text()')[0]
                 f.write(nr + '\n')

    num += 1


  • 16
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值