爬取纵横网章小说,不包括vip数据

import os.path

import requests
import time
import execjs
import pprint
import parsel
from tqdm import tqdm
import csv
filename = '书\\'
if not os.path.exists(filename):
    os.mkdir(filename)

cookies = {
    'zhffr': 'cn.bing.com',
    'ZHID': 'B7A06327B5D47F3A3EAFD4D226553291',
    'sajssdk_2015_cross_new_user': '1',
    'zh_visitTime': '1709812364050',
    'Hm_lvt_c202865d524849216eea846069349eb9': '1709812364',
    'PassportCaptchaId': 'e376ededeee01bb4fcbada57c5fd694d',
    'sensorsdata2015jssdkcross': '%7B%22distinct_id%22%3A%2218e18c1fef7581-08c49188d99a4-26001951-1327104-18e18c1fef886e%22%2C%22%24device_id%22%3A%2218e18c1fef7581-08c49188d99a4-26001951-1327104-18e18c1fef886e%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D',
    'Hm_lpvt_c202865d524849216eea846069349eb9': '1709813125',
}

headers = {
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded',
    # 'Cookie': 'zhffr=cn.bing.com; ZHID=B7A06327B5D47F3A3EAFD4D226553291; sajssdk_2015_cross_new_user=1; zh_visitTime=1709812364050; Hm_lvt_c202865d524849216eea846069349eb9=1709812364; PassportCaptchaId=e376ededeee01bb4fcbada57c5fd694d; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2218e18c1fef7581-08c49188d99a4-26001951-1327104-18e18c1fef886e%22%2C%22%24device_id%22%3A%2218e18c1fef7581-08c49188d99a4-26001951-1327104-18e18c1fef886e%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; Hm_lpvt_c202865d524849216eea846069349eb9=1709813125',
    'Origin': 'https://read.zongheng.com',
    'Referer': 'https://read.zongheng.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

data = {
    'bookId': '1250673',
}

json_data = requests.post('https://bookapi.zongheng.com/api/chapter/getChapterList', cookies=cookies, headers=headers, data=data).json()
for index in json_data['result']['chapterList'][0]['chapterViewList']:
    chapterId = index['chapterId']
    chapterName = index['chapterName']
    createTime = index['createTime']
    bookId = index['bookId']

    print(chapterId,chapterName,createTime)

    url = f"https://read.zongheng.com/chapter/{bookId}/{chapterId}.html"
    new_headers = {
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }
    response = requests.get(url=url,headers=new_headers).text
    # pprint.pprint(response)
    selector = parsel.Selector(response)
    content = selector.css('div.content p::text').getall()
    text = '\n'.join(content)
    with open(filename+chapterName+'.text',mode='w',encoding='utf-8') as f:
        f.write(text)

结果展现:

 

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

努力学习各种软件

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值