python爬虫项目爬取小说

最新推荐文章于 2024-04-25 19:15:00 发布

_fox

最新推荐文章于 2024-04-25 19:15:00 发布

阅读量756

点赞数

文章标签： python 爬虫开发语言

本文链接：https://blog.csdn.net/qq_19599237/article/details/133854820

版权

import requests
from bs4 import BeautifulSoup

#爬取小说的主界面
root_url = 'http://www.89wx.cc/17/17277/'

#得到每一章小说的url
r1 = requests.get(root_url)
r1.encoding = 'gbk'
soup1 = BeautifulSoup(r1.text, "html.parser")
data_list = soup1.find_all('dd')

datas = []
for data in data_list:
    link = data.find('a')['href']
    title = data.get_text()
    datas.append(('http://www.89wx.cc/%s'%link, title))

#读写每一章的内容
for one in datas:
    r2 = requests.get(one[0])
    r2.encoding = 'gbk'
    soup2 = BeautifulSoup(r2.text, "html.parser")

    #每一章的内容
    content = soup2.find('div', id='content').get_text()

    #把每一章都写入文件
    with open("%s.txt"%one[1], "w", encoding='utf-8') as fout:
        fout.write(content)