小说网站链接:https://www.heenee.com/
整本小说下载代码:
import requests
import parsel
from prettytable import PrettyTable
from tqdm import tqdm
import os
import time
# 小说网站链接:https://www.heenee.com/
# 采集单章小说 采集正本小说 实现搜索下载
# 采集单章小说
def GetResponse(url):
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(url=url,headers=headers)
return response
def GetContent(href):
'''获取单章小说内容'''
link = 'https://www.heenee.com'+href
html = GetResponse(link).text
selector = parsel.Selector(html)
content = '\n\n'.join(selector.css('div #htmlContent p::text').getall())
return content
def SaveContent(filename,title,content):
with open(filename+title+'.txt',mode='a',encoding='utf-8') as f:
f.write(content)
def GetInfo():
'''获取小说章节链接,章节名字,小说名字'''
# 目录页链接,获取数据
url = 'https://www.heenee.com/quanben/27/27001/'
html = GetResponse(url).text
#解析数据
selector = parsel.Selector(html)
name = selector.css('dd h1::text').get().split(' ')[0]
title_list = selector.css('.L a::text').getall()
href_list = selector.css('.L a::attr(href)').getall()
return name,title_list,href_list
if __name__ == '__main__':
name,title_list,href_list = GetInfo()
filename = f'{name}\\'
if not os.path.exists(filename):
os.mkdir(filename)
for title,href in tqdm(zip(title_list,href_list)):
time.sleep(1)
content = GetContent(href)
SaveContent(filename,title,content)
print(f'正在下载:{title}')
实现搜索整本小说下载:
import requests
import parsel
from prettytable import PrettyTable
from tqdm import tqdm
import os
import time
# 小说网站链接:https://www.heenee.com/
# 采集单章小说 采集整本本小说 实现搜索下载
# 采集单章小说
def GetResponse(url):
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(url=url,headers=headers)
return response
def GetContent(href):
'''获取单章小说内容'''
link = 'https://www.heenee.com'+href
html = GetResponse(link).text
selector = parsel.Selector(html)
content = '\n\n'.join(selector.css('div #htmlContent p::text').getall())
return content
def SaveContent(filename,title,content):
with open(filename+title+'.txt',mode='a',encoding='utf-8') as f:
f.write(content)
def GetInfo(link):
'''获取小说章节链接,章节名字,小说名字'''
# 目录页链接,获取数据
html = GetResponse(link).text
#解析数据
selector = parsel.Selector(html)
name = selector.css('dd h1::text').get().split(' ')[0]
title_list = selector.css('.L a::text').getall()
href_list = selector.css('.L a::attr(href)').getall()
return name,title_list,href_list
def GetSearch():
info = []
input_content = input('可输入作者和书名,请您别少字也别错别字:')
try:
search_url = f'https://www.heenee.com/api/search?q={input_content}'
json_data = GetResponse(search_url).json()
tb = PrettyTable()
num = 0
tb.field_names = ['序号','分类','书名','作者','更新时间','状态','更新章节','书籍详情链接']
for index in json_data['data']['search']:
dit = {
'分类': index['cate_name'],
'书名': index['book_name'],
'作者': index['author'],
'更新时间': index['uptime'],
'状态': index['status_str'],
'最新章节': index['latest_chapter_name'],
'书籍详情链接': index['book_list_url'],
}
info.append(dit)
tb.add_row([num,index['cate_name'],index['book_name'],index['author'],index['uptime'],index['status_str'],index['latest_chapter_name'],index['book_list_url']])
num += 1
print(tb)
except:
print('请再次详细一点输入:')
key_num = int(input('请输入你要下载的歌曲序号: '))
link = f'https://www.heenee.com{info[key_num]["书籍详情链接"]}' #注意这里里面的引号必须是双引号
return link
if __name__ == '__main__':
link = GetSearch()
print(link)
name,title_list,href_list = GetInfo(link)
filename = f'{name}\\'
if not os.path.exists(filename):
os.mkdir(filename)
for title,href in tqdm(zip(title_list,href_list)):
time.sleep(1)
content = GetContent(href)
SaveContent(filename,title,content)
print(f'正在下载:{title}')
结果展现:
分析,首先分析单章小说怎么下载,然后在目录页,找到整本小说所有章节的链接,然后在搜索页面,找到如何输入一个关键词,就能出所有结果的接口。打开开发者工具,在搜索框输入遮天。
所有结果在该数据包
详细分析过程见下面的链接:
【Python采集小说网站-制作小说下载器-哔哩哔哩】 https://b23.tv/6kbS4jU