import requests
from bs4 import BeautifulSoup
#爬取小说的主界面
root_url = 'http://www.89wx.cc/17/17277/'
#得到每一章小说的url
r1 = requests.get(root_url)
r1.encoding = 'gbk'
soup1 = BeautifulSoup(r1.text, "html.parser")
data_list = soup1.find_all('dd')
datas = []
for data in data_list:
link = data.find('a')['href']
title = data.get_text()
datas.append(('http://www.89wx.cc/%s'%link, title))
#读写每一章的内容
for one in datas:
r2 = requests.get(one[0])
r2.encoding = 'gbk'
soup2 = BeautifulSoup(r2.text, "html.parser")
#每一章的内容
content = soup2.find('div', id='content').get_text()
#把每一章都写入文件
with open("%s.txt"%one[1], "w", encoding='utf-8') as fout:
fout.write(content)
python爬虫项目爬取小说
最新推荐文章于 2024-04-25 19:15:00 发布