import requests,re,os
from lxml import etree
dv=os.path.join(os.path.expanduser("~"),"Desktop\读书")
if not os.path.exists(dv):
os.mkdir(dv)
k=input('输入要下载的书:')
url='https://wxzpyd.com/search?name='+k+'&type=3'
gl=requests.get(url)
lis=etree.HTML(gl.text)
list=lis.xpath('//*[@id="js_page_scroll"]/ul/li')
for li in list:
x=li.xpath('a/@href')[0] #每本书的链接 #【0】这里写成了搜索出来 第一本
sc=x.split('?')[0].split('/')[2] #书的中间详细参数
au=li.xpath('em/@data-auth')[0] #作者
name=li.xpath('em/@data-name') [0] #书的名字
pages='https://wxzpyd.com/novel/catalog/'+sc+'?source=%2Fnovel%2F60a0e4081da8d12e94d6c4c7%3Ffrom%3Dsearch' #目录
gu = requests.get(pages)
gul = etree.HTML(gu.text)
gus = gul.xpath('//*[@id="app"]/div[2]/main/ul/li')
print(name)
with open(dv + '/' + name + '.txt', 'w', encoding='utf-8') as file:
for i in gus:
sr = i.xpath('a/@href')[0] #某章的链接
src = 'https://www.wxzpyd.com/' + sr
page_name=i.xpath('a/text()')[0] #某章节的名字
file.write(page_name + '\n')
print(page_name)
gumo = requests.get(src)
pattern = re.compile(r'<p>(.*?)</p>') # 匹配 <p></p> 标签内的文本内容
# 使用正则表达式查找匹配项
matches = pattern.findall(gumo.text)
# 将匹配到的内容逐行写入文件
for match in matches:
file.write(match + '\n')
print(name+'-下载完毕')
# https://wxzpyd.com/search?name=被讨厌的勇气&type=3 #搜索某书
# https://wxzpyd.com/novel/60a0e4081da8d12e94d6c4c7 ?from=search #书的详情页
# https://wxzpyd.com/novel/catalog/60a0e4081da8d12e94d6c4c7? source=%2Fnovel%2F60a0e4081da8d12e94d6c4c7%3Ffrom%3Dsearch #目录页
# https://wxzpyd.com/novel/catalog/6138428bac7aff6458363c15? source=%2Fnovel%2F6138428bac7aff6458363c15%3Ffrom%3Dsearch
# https://wxzpyd.com/novel/catalog/610fadf97dbb3f28a8755413? source=%2Fnovel%2F610fadf97dbb3f28a8755413%3Ffrom%3Dsearch
输入要下载的书名,下载保存到桌面一个读书的文件夹(.txt)中