python利用线程池快速下载某趣阁小说

1.首先我们先导入我们需要的包

import requests
from lxml import etree
import os
from concurrent.futures import ThreadPoolExecutor #线程池

1.然后从一本书中获取各个章节的链接

url = 'https://www.biquge9.com/book/3808/'
res = requests.get(url=url)
t = etree.HTML(res.text)
i = t.xpath('//div[@class="listmain"]/dl/dd/a/@href')
o = i[:10]
p = i[11:]
b = t.xpath('//div[@class="listmain"]/dl/span/dd/a/@href')
z = o + b + p
ur = []
for v in z:
    ur.append('https://www.biquge9.com' + v)

3.定义一个函数从章节链接中获取内容

def download(url,file):
    re = requests.get(url=url)
    re.encoding = 'utf-8'
    if not os.path.exists(file):
        os.mkdir(file)
    tr = etree.HTML(re.text)
    name = tr.xpath('//div[@class="book reader"]//div[@class="content"]/h1/text()')[0]

    path = file + '/' + name + '.txt'
    fp = open(path, 'w', encoding='utf-8')
    q = tr.xpath('//*[@id="chaptercontent"]/text()')
    for nei in q:
        fp.write(nei + '\n')
        print(name + '爬取成功')
    fp.close()

总体思路就这样,然后创建线程池快速爬取小说

下面这是源码

import requests
from lxml import etree
import time
import os
from concurrent.futures import ThreadPoolExecutor
def download(url,file):
    re = requests.get(url=url)
    re.encoding = 'utf-8'
    if not os.path.exists(file):
        os.mkdir(file)
    tr = etree.HTML(re.text)
    name = tr.xpath('//div[@class="book reader"]//div[@class="content"]/h1/text()')[0]

    path = file + '/' + name + '.txt'
    fp = open(path, 'w', encoding='utf-8')
    q = tr.xpath('//*[@id="chaptercontent"]/text()')
    for nei in q:
        fp.write(nei + '\n')
        print(name + '爬取成功')
    fp.close()
if __name__ == '__main__':
    '''获取章节内容url'''
    print('新笔趣阁的网址为:https://www.biquge9.com')
    url = str(input('请输入新笔趣阁中书的网址链接:'))
    file = input('请输入文件夹的名字:')
    # url = 'https://www.biquge9.com/book/3808/'
    res = requests.get(url=url)
    t = etree.HTML(res.text)
    i = t.xpath('//div[@class="listmain"]/dl/dd/a/@href')
    o = i[:10]
    p = i[11:]
    b = t.xpath('//div[@class="listmain"]/dl/span/dd/a/@href')
    z = o + b + p
    ur = []
    for v in z:
        ur.append('https://www.biquge9.com' + v)
    with ThreadPoolExecutor(50)as f:
        for url in ur:
            f.submit(download,url=url,file=file)

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值