起点小说免费版爬取
import requests
from lxml import etree
url="https://book.qidian.com/ajax/book/category?_csrfToken=NQQ4Nj6JAE1plYVqeNqlZrucSvDAs1UBmByiWZfb&bookId=1887208"
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:82.0) Gecko/20100101 Firefox/82.0'
}
response=requests.get(url=url,headers=headers)
response.encoding='utf-8'
page_text=response.json()
list=page_text['data']['vs']
list_detals=[[],[],[]]
for li in range(len(list)):
if li==1:
nameone=list[li]['vN']
listname=list[li]['cs']
for lis in listname:
chapter=lis['cN']
book_url=lis['cU']
id=lis['id']
di1={}
di1['chapter']=chapter
di1['book_url']=book_url
di1['id']=id
list_detals[0].append(di1)
if li==2:
nametwo=list[li]['vN']
listname1=list[li]['cs']
for lis in listname1:
chapter_1=lis['cN']
id1=lis['id']
di2={}
di2['chapter_1']=chapter_1
di2['id1']=id1
list_detals[1].append(di2)
if li==3:
nametreen=list[li]['vN']
listname2=list[li]['cs']
for lis in listname2:
chapter_2=lis['cN']
id2=lis['id']
di3={}
di3['chapter_2']=chapter_2
di3['id2']=id2
list_detals[2].append(di3)
count=0
for i in list_detals[0]:
books_url="https://read.qidian.com/chapter/"+i['book_url']
page_text=requests.get(url=books_url,headers=headers).text
tree=etree.HTML(page_text)
line=tree.xpath("//div[@class='read-content j_readContent ']/p")
for li in line:
p=li.xpath("./text()")[0]
print(p)
count+=1
print("正在爬取第{}页".format(count))