from bs4 import BeautifulSoup
import requests
if __name__ == '__main__':
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36',
}
url = 'https://www.shicimingju.com/book/sanguoyanyi.html'
resp = requests.get(url=url, headers=headers)
resp.encoding = 'utf8'
soup = BeautifulSoup(resp.text, 'lxml')
a_list = soup.select('#main_left > div > div.book-mulu > ul > li > a')
fp = open('sanguo.txt', 'w', encoding='utf-8')
for a in a_list:
title = a.string
link = a['href']
detail_url = 'https://www.shicimingju.com' + link
resp = requests.get(url=detail_url, headers=hd.headers())
resp.encoding = 'utf8'
detail_soup = BeautifulSoup(resp.text, 'lxml')
chapter_content = detail_soup.find('div', class_='chapter_content')
content = chapter_content.text
fp.write(title + ':' + content + '\r\n')
print(title, '爬取成功')
fp.close()