python 线程池爬虫案列,爬取美女图片

废话不多说,直接上代码 

import os
import time
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup

def WebPageRequests(url):
    res = requests.get(url,headers=header)
    if res.status_code == 200:
        soup = BeautifulSoup(res.text,features='lxml')
        img = soup.select('.entry-content img')
        title = soup.select('.entry-header h1')[0].get_text()
        for i in img:
            src = i.attrs['src']
            im = requests.get(src,headers=header)
            name = src[-20:]
            print(name,src)
            with open(f'{path}/{title}{name}','wb') as f:
                f.write(im.content)


def WebPage(url):
    res = requests.get(url,headers=header)
    if res.status_code == 200:
        soup = BeautifulSoup(res.text,features='lxml')
        h2a = soup.select('.post-info h2 a')
        urls_list = []
        for a in h2a:
            href = a.attrs['href']
            urls_list.append(href)
        return urls_list

if __name__ == '__main__':
    start = time.time()
    path = './小姐姐'
    if not os.path.exists(path):
        os.mkdir(path)
    header = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
    }
    urls = [f'https://www.jdlingyu.com/tag/%e6%b8%85%e6%96%b0%e7%be%8e%e5%a5%b3/page/{url}' for url in range(1,24)]
    with ThreadPoolExecutor(max_workers=50) as pool:
        res = [pool.submit(WebPage,url) for url in urls]
        for res_ in as_completed(res):
            urls_page = res_.result()
            res_page = [pool.submit(WebPageRequests, url) for url in urls_page]
    end = time.time()
    print('耗时:%.2f' %(end-start))

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值