from bs4 import BeautifulSoup
import requests,os
#判断文件存在不存在则自动创建文件夹
if __name__ =='__main__':
if not os.path.exists('./三国'):
os.mkdir('./三国')
url ='https://www.xingyueboke.com/sanguoyanyi/'
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36' }
#发起请求获取text 进行解析
page_text = requests.get(url=url,headers=headers).text
soup = BeautifulSoup(page_text,'lxml')
li_list = soup.select('#content-list > div.book-list.clearfix > ul > li ')
#print(li_list)
#循环取出三国的章节名称和对应的章节内容URL
for i in li_list:
title = i.a.string
de_url = i.a['href']
#获取章节内容
de_page_text = requests.get(url=de_url,headers=headers).text
de_soup = BeautifulSoup(de_page_text,'lxml')
a = de_soup.find('div',id='nr1')
#print(a)
b = a.text
#持久化保存 以章节名称创建文件名 保存章节内容
with open('./三国/'+title+'.txt', 'w', encoding='utf_8') as fp:
fp.write(title+'\n'+b+'\n')
print(title)
运行结果如下