from bs4 import BeautifulSoup
import requests
if __name__ == '__main__':
url = "https://www.shicimingju.com/book/sanguoyanyi.html"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/"
"85.0.4183.83 Safari/537.36"
}
page_text = requests.get(url=url, headers=headers).text
soup = BeautifulSoup(page_text, "lxml")
li_list = soup.select(".book-mulu > ul > li")
fp = open("./三国演义.txt", "w", encoding="utf-8")
for li in li_list:
title = li.a.string
detail_url = "https://www.shicimingju.com" + li.a["href"]
# 对详情页发起请求,解析出章节内容
detail_page_text = requests.get(url=detail_url, headers=headers).text
# 解析出详情页中相关的章节内容
detail_soup = BeautifulSoup(detail_page_text, "lxml")
div_tag = detail_soup.find("div", class_="chapter_content")
# 解析到章节内容
content = div_tag.text
fp.write(title + ":" + content + "\n")
print(title, "Over")
bs4 爬取某网站三国演义标题及内容
最新推荐文章于 2022-11-26 23:41:14 发布