这是免费小说阅读网下载全网小说的代码,仅供学习使用。
#######
好的
import requests
from lxml import etree
from bs4 import BeautifulSoup
from pprint import pprint
from urllib import parse
from tqdm import tqdm
class FreeFiction:
def init(self):
self.headers={ ‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36’}
self.u=“http://www.mianfeixiaoshuoyueduwang.com/category/”
self.url=“http://www.mianfeixiaoshuoyueduwang.com/”
self.book_url=“http://www.mianfeixiaoshuoyueduwang.com/book/8/”
# 请求函数
def get_lnovel(self,url):
req=requests.get(url=url,headers=self.headers)
#pprint(req.text)
return req.text
# 解析函数
def etree_xpath(self,html,xpath):
text=etree.HTML(html)
list_all=text.xpath(xpath)
return list_all
#url的拼接
def parse_url(self,url_li):
list_url=[]
url_=self.url.strip('/')
for i in url_li:
url=url_+i
list_url.append(url)
return list_url
# 合并成字典
def dict_cent_category(self,list_text,list_url):
if len(list_text)==len(list_url):
dict_cent=dict(zip(list_text,list_url))
return dict_cent
else:
return "两个列表不相等,请重新处理"
# 小说内容存储
def file_txt(self,novel_name,chapter_name,chapter_content):
with open(f'{novel_name}.txt','a',encoding='utf-8') as f:
f.write(chapter_name)
f.write('\n')
f.write(chapter_content)
f.write('\n')
#分类函数,返回字典
def base_dict_one(self):
req=self.get_lnovel(url=self.url)
sco='//ul[@itemscope="itemscope"]/@itemtype'
sco_h='//div[@class="nav"]/ul/li/a/@href'
sco_title='//div[@class="nav"]/ul/li/a/span/text()'
url_=self.etree_xpath(req,sco_h)
list_u=self.parse_url(url_)
text=self.etree_xpath(req,sco_title)
base_dict=self.dict_cent_category(text,list_u)
pprint(base_dict)
return base_dict
# 分类函数的页数,返回字典
def input_key(self,dict_):
dict_key=input("请输入小说分类:").strip()
if dict_key in dict_:
req=self.get_lnovel(dict_[dict_key])
page_x='//p/span[@id="cur_page"]/text()'
page=self.etree_xpath(req,page_x)[0][-4:-2].strip()
#print(f"{dict_key}小说有{page}页")
dict_page={}
dict_page["1"]=dict_[dict_key]
one_html=dict_[dict_key].split('/')[-1].split('.')
#print(one_html)
for item in range(2,int(page)+1):
page_u=self.u+one_html[0]+f"_{item}"+".html"
dict_page[str(item)]=page_u
pprint(dict_page)
return dict_page
# 返回小说书名和url,字典类型
def page_number(self):
base_dict=self.base_dict_one()
while True:
dict_page=self.input_key(base_dict)
if dict_page!=None:
break
page_nu=input("请输入第几页:").strip()
if page_nu in dict_page:
req=self.get_lnovel(dict_page[page_nu])
#pprint(req)
book_te='//h3/a/span/text()'
book_ur='//h3/a/@href'
book_name=self.etree_xpath(req,book_te)
book_hreh=self.etree_xpath(req,book_ur)
book_url=self.parse_url(book_hreh)
dict_book=self.dict_cent_category(book_name,book_url)
pprint(dict_book)
return dict_book
# 小说章节和章节url,保存到文件夹
def lnovel_chapter(self):
dict_book=self.page_number()
while True:
lnoverl_name=input("请输入小说书名:").strip()
if lnoverl_name in dict_book:
req=self.get_lnovel(dict_book[lnoverl_name])
#pprint(req)
w33_herf='//li[@class="w33"]/a/@href'
list_w33_href=self.etree_xpath(req,w33_herf)
list_w33_href_url=self.parse_url(list_w33_href)
for url in tqdm(list_w33_href_url):
try:
reqs=self.get_lnovel(url)
#pprint(reqs)
tx='//div/h1[@class="title1"]/text()'
te='//div[@id="content"]/p/text()'
te_chapter=''.join(self.etree_xpath(reqs,tx))
text_=''.join(self.etree_xpath(reqs,te))
self.file_txt(lnoverl_name,te_chapter,text_)
except:
pass
break
free=FreeFiction()
while True:
run=input(“请输入运行:”)
if run==“结束”:
break
free.lnovel_chapter()
喜欢看小说的可以试着下,附美女图一张