下载全网小说

最新推荐文章于 2024-08-12 16:55:06 发布

洪荒宇宙py

最新推荐文章于 2024-08-12 16:55:06 发布

阅读量256

点赞数 10

分类专栏：应用实例文章标签： python

本文链接：https://blog.csdn.net/m0_56852291/article/details/137441250

版权

应用实例专栏收录该内容

9 篇文章 0 订阅

订阅专栏

该代码提供了一个免费小说阅读网站的爬虫，用于抓取并下载全网小说，包括书名、章节URL和内容，可用于学习网络爬虫技术。

摘要由CSDN通过智能技术生成

这是免费小说阅读网下载全网小说的代码，仅供学习使用。

#######
好的
import requests
from lxml import etree
from bs4 import BeautifulSoup
from pprint import pprint
from urllib import parse
from tqdm import tqdm

class FreeFiction:
def init(self):
self.headers={ ‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36’}
self.u=“http://www.mianfeixiaoshuoyueduwang.com/category/”
self.url=“http://www.mianfeixiaoshuoyueduwang.com/”
self.book_url=“http://www.mianfeixiaoshuoyueduwang.com/book/8/”
# 请求函数
def get_lnovel(self,url):
req=requests.get(url=url,headers=self.headers)
#pprint(req.text)
return req.text

#  解析函数	
def etree_xpath(self,html,xpath):
	text=etree.HTML(html)
	list_all=text.xpath(xpath)
	return list_all
#url的拼接
def parse_url(self,url_li):
	list_url=[]
	url_=self.url.strip('/')
	for i in url_li:
		url=url_+i
		list_url.append(url)
	return  list_url
			
#  合并成字典
def dict_cent_category(self,list_text,list_url):
	if len(list_text)==len(list_url):
		dict_cent=dict(zip(list_text,list_url))
		return dict_cent
	else:
		return "两个列表不相等,请重新处理"	

#  小说内容存储
def file_txt(self,novel_name,chapter_name,chapter_content):
	with open(f'{novel_name}.txt','a',encoding='utf-8') as f:
		f.write(chapter_name)
		f.write('\n')
		f.write(chapter_content)
		f.write('\n')
		
		
#分类函数，返回字典	
def base_dict_one(self):
	req=self.get_lnovel(url=self.url)
	sco='//ul[@itemscope="itemscope"]/@itemtype'
	sco_h='//div[@class="nav"]/ul/li/a/@href'
	sco_title='//div[@class="nav"]/ul/li/a/span/text()'
	url_=self.etree_xpath(req,sco_h)
	list_u=self.parse_url(url_)
	text=self.etree_xpath(req,sco_title)
	base_dict=self.dict_cent_category(text,list_u)
	pprint(base_dict)
	return base_dict

#  分类函数的页数，返回字典
def input_key(self,dict_):
	
	dict_key=input("请输入小说分类:").strip()
		
	if dict_key in dict_:
			
		req=self.get_lnovel(dict_[dict_key])
		page_x='//p/span[@id="cur_page"]/text()'
		page=self.etree_xpath(req,page_x)[0][-4:-2].strip()
		#print(f"{dict_key}小说有{page}页")
		dict_page={}
		dict_page["1"]=dict_[dict_key]
		one_html=dict_[dict_key].split('/')[-1].split('.')
		#print(one_html)
		for item in range(2,int(page)+1):
			page_u=self.u+one_html[0]+f"_{item}"+".html"
			dict_page[str(item)]=page_u
		pprint(dict_page)
		return dict_page
		
#  返回小说书名和url,字典类型			
def page_number(self):
	base_dict=self.base_dict_one()
	while True:
		dict_page=self.input_key(base_dict)
		if dict_page!=None:
			break
	page_nu=input("请输入第几页:").strip()
	if page_nu  in dict_page:
		req=self.get_lnovel(dict_page[page_nu])
		#pprint(req)
		book_te='//h3/a/span/text()'
		book_ur='//h3/a/@href'
		book_name=self.etree_xpath(req,book_te)
		book_hreh=self.etree_xpath(req,book_ur)
		book_url=self.parse_url(book_hreh)		
		dict_book=self.dict_cent_category(book_name,book_url)
		pprint(dict_book)
	return dict_book
		
			
#  小说章节和章节url,保存到文件夹	
def lnovel_chapter(self):
	dict_book=self.page_number()
	while True:	
		lnoverl_name=input("请输入小说书名:").strip()
		if lnoverl_name in dict_book:
			req=self.get_lnovel(dict_book[lnoverl_name])
			#pprint(req)
			w33_herf='//li[@class="w33"]/a/@href'
			list_w33_href=self.etree_xpath(req,w33_herf)
			list_w33_href_url=self.parse_url(list_w33_href)
			for url in tqdm(list_w33_href_url):
				try:
					reqs=self.get_lnovel(url)
					#pprint(reqs)
					tx='//div/h1[@class="title1"]/text()'
					te='//div[@id="content"]/p/text()'
					te_chapter=''.join(self.etree_xpath(reqs,tx))
					text_=''.join(self.etree_xpath(reqs,te))
					self.file_txt(lnoverl_name,te_chapter,text_)
				except:
					pass	
		break

free=FreeFiction()

while True:
run=input(“请输入运行:”)
if run==“结束”:
break
free.lnovel_chapter()

喜欢看小说的可以试着下，附美女图一张在这里插入图片描述