本宅女小说是本命,哈哈哈哈哈。
废话少说,上代码。
import requests
from lxml import etree
url = "http://book.zongheng.com/chapter/914739/59152253.html"
headers={
"User-Agent" : "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
}
response = requests.get(url,headers=headers)
response.encoding = 'utf-8' #防止出现乱码的情形
res = response.text
page = etree.HTML(res)
#查看网页源代码
name = page.xpath('//div[@class="reader_crumb"]/a[3]//text()')[0]#复制小说名字的class
auth = page.xpath('//div[@class="bookinfo"]/a[1]//text()')[0]#复制作者名字的class
title = page.xpath('//div[@class="title_txtbox"]//text()')[0]#复制章节名字的class
content = page.xpath('//div[@class="content"]//p//text()')#复制本章内容的class
text = '\n'.join(content)#内容部分进行分段
with open('%s.txt'%name, 'w' ,encoding='utf-8') as f:
f.write(name+'\r\n'+auth+'\r\n'+title+'\r\n'+text)
效果图如下