##代码思路
1.获取小说目录页源码
2.获取所有章节链接到集合
3.传入章节的URL,下载章节内容
4.打开每个章节链接并逐一下载并保存内容
import requests
from bs4 import BeautifulSoup
def open_url(url):
"""获取小说HTML格式内容"""
header = {'User-Agent': str('Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)')}
request=requests.get(url,header)#访问链接
response=request.content#源码内容
response_decode=response.decode('utf-8')#转换格式
return response_decode
def chapter_url(url):
"""获取小说章节链接"""
html = open_url(url)
soup=BeautifulSoup(html,'html.parser')
chaptrurllist = []
chapterurl = soup.find('div',id="list").find_all('a')
for i in chapterurl:
i=i['href']
tureurl='https://www.xsbiquge.com/'+i
chaptrurllist.append(tureurl)
print(chaptrurllist)
print(len(chaptrurllist))
return chaptrurllist
def get_content(url):
"""传入章节的URL,下载章节内容"""
pagehtml = open_url(url)
soup = BeautifulSoup(pagehtml,'html.parser')
chaptername = soup.h1.string
chaptercontent = soup.find_all('div',id="content")[0].text
chaptercontent = ' '.join(chaptercontent.split())
content = chaptercontent.replace(' ','\r\n\n')
finallcontent = chaptername + '\r\n\n\n' + content
return finallcontent
def downloadnovel(url):
"""打开每个章节链接并逐一下载并保存内容"""
pagehtml = open_url(url)
soup = BeautifulSoup(pagehtml, 'html.parser')
novelname = soup.h1.string
auther = soup.p.string
other = soup.find('div',id="info").find_all('p')
print(novelname) # 名称
print(auther) #作者
print(other[1].text) #状态
print(other[-1].text) #最新章节
print(other[-2].text) #最后更新
print('开始下载小说')
chapterlist = chapter_url(url)#传入小说首页,获取所有章节的链接
lenchapter = len(chapterlist)
print('这部小说一共有%d 章' % lenchapter)
count = 1
for url in chapterlist:
text = get_content(url)
with open('斗罗大陆之龙王传说.txt','a+',encoding='utf-8') as f:
f.write(text + '\r\n\n\n\n')
a = ((count / lenchapter) * 100)
print('正在下载第%d章,进度%.2f%%' % (count, a)) # 这里是用来计算进度
count += 1
print('下载完成!')
if __name__=='__main__':
url = 'https://www.xsbiquge.com/66_66414//'
downloadnovel(url)
原创:https://zhuanlan.zhihu.com/p/80975802