python爬取小说,接上文。

import requests
from bs4 import BeautifulSoup

def content(url):
    target = url

    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36 Edg/103.0.1264.77'
    }
    response = requests.get(target,headers=headers)
    response.encoding = 'utf-8'
    html = response.text
    bf = BeautifulSoup(html,"lxml")
    texts = bf.find("div",id = "content")
    return texts.text


if __name__=='__main__':
    target = 'xxxxxxxxxxxx'#这个自己找,不能发出来。
    response = requests.get(target)
    response.encoding = 'utf-8'
    html = response.text
    bf = BeautifulSoup(html,"lxml")
    texts = bf.find("div",id = "list")
    texts1 = texts.find_all("a")
    sum = 0
    names = []
    urls = []
    #windows下文件名中不能出现这些敏感字符 ? * : . < > \ / |,所以我们可以进行一些修改                 '
    for i in texts1:
        names.append(i.string)
        urls.append(i.get("href"))
    for i in range(len(names)):
        url = 'xxxxxxxxxxx'+urls[i]#同理,不能发出来。
        word = content(url)
        with open('xxxxxx.txt',"a",encoding='utf-8')as f:
            f.write(word)
    print("下载成功")
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值