学习:一个多线程爬虫下载图片

import requests
import re
from bs4 import BeautifulSoup
import os
import urllib
import threading    #导入多线程库


def getTEXT(url):
    try:
        kv = {"user-agent": "mozilla/5.0"}
        r = requests.get(url, headers=kv)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return '错误'

def get_erery_site_url(text):
    every_urls = []
    every_names = []
    b = BeautifulSoup(text, "html.parser")
    for tb in b.find_all('h2'):
        th = tb.find('a')
        every_urls.append(th.attrs['href'])
        every_names.append(th.contents)
    return every_urls,every_names

def congwangzhi_natudewangzhi(wangzhi):
    shuju=getTEXT(wangzhi)
    chuli=BeautifulSoup(shuju, "html.parser")
    tb=chuli.find(id="post_content")
    tupianwangzhis=[]
    for tc in tb.find_all('p'):
        try:
            th2=tc.a.attrs['href']
            tupianwangzhis.append(th2)
        except:
            for td in tc.find_all('img'):
                tupianwangzhis.append(td.attrs['src'])
    return tupianwangzhis

def xiazaitupian(tupianwangzhis,path,biaoti):
    print(path+'图片开始下载,注意查看文件夹')
    if not os.path.isdir(path):
        os.makedirs(path)  # 判断没有此路径则创建
    paths = path + '\\'
    for tupianwangzhi in tupianwangzhis:
        tu = requests.get(tupianwangzhi)
        with open(paths+biaoti[0]+tupianwangzhi[-6:], mode='wb') as obj:
            obj.write(tu.content)
    
def pachong(i):
    url='https://www.vooc.net/page/'+str(i)
            
    a = getTEXT(url)
    wangzhis,biaotis=get_erery_site_url(a)
    for biaoti,wangzhi in zip(biaotis,wangzhis):
        tupianwangzhis=congwangzhi_natudewangzhi(wangzhi)
        path='E:\\tupian'
        # path='E:\\tupian\\' + biaoti[0]
        xiazaitupian(tupianwangzhis,path,biaoti)

i = int(input('你想爬取的页数:'))
for x in range(1,i+1):
    threading.Thread(target=pachong,args=(x,)).start() # 启动多线程
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值