多进程下载美图图片

from multiprocessing import Pool
import os
import urllib
from lxml import etree
import requests
import re
def get_list(url):
    a=[]
    response = requests.get(url).text
    html = etree.HTML(response)
    lis = html.xpath('//div[@class="box_left3"]/div[@class="channel_picbox"]/div[@class="channel_list"]/ul/li')
    for li in lis:
        url = li.xpath('./a/@href')[0]
        response = requests.get(url).text
        html = etree.HTML(response)
#        src = html.xpath('//div[@class="photo"]/a/img/@src')[0]
        a.append(url)
    print(a)
    return a
def get_page(url):
    a = []

    response = requests.get(url).text
    html = etree.HTML(response)
    src = html.xpath('//div[@class="photo"]/a/img/@src')[0]
    a.append(src)
    html = etree.HTML(response)
    page = html.xpath('//div[@class="pages"]/ul/li[1]/a/text()')[0]
    page=re.findall("(\d+)",page)[0]

    for page in range(2,int(page)+1):
        src = list(url)
        src[-5:-6]="_{}".format(page)
        b= "".join(src)
        print(b)
        response = requests.get(b).text
        html = etree.HTML(response)
        if not html.xpath('//div[@class="photo"]/a/img/@src'):
            continue
        src = html.xpath('//div[@class="photo"]/a/img/@src')[0]
        a.append(src)
    return a
def download_image(url):
    file_path='D:/book/imge'
    print(url)
    file_name = url.split('/')[-1]
    print(file_name)
    try:
     #是否有这个路径
         if not os.path.exists(file_path):
     #创建路径
             os.makedirs(file_path)
      #获得图片后缀
      #拼接图片名(包含路径)
         file_suffix = os.path.splitext(url)[1]    
         filename = '{}{}{}{}'.format(file_path,os.sep,file_name,file_suffix)
         print(filename)
      #下载图片,并保存到文件夹中
         urllib.request.urlretrieve(url,filename=filename)
      
    except IOError as e:
        print("IOError")
    except Exception as e:
        print("Exception")
    
if __name__ =="__main__":
    n=0

    
    pool = Pool()
    a=[]
    urllist =['http://www.meituba.com/xinggan/93486.html', 'http://www.meituba.com/xinggan/93494.html', 'http://www.meituba.com/xinggan/93479.html', 'http://www.meituba.com/xinggan/93497.html', 'http://www.meituba.com/xinggan/93503.html', 'http://www.meituba.com/xinggan/93510.html', 'http://www.meituba.com/xinggan/93512.html', 'http://www.meituba.com/xinggan/93504.html', 'http://www.meituba.com/xinggan/93506.html', 'http://www.meituba.com/xinggan/93505.html', 'http://www.meituba.com/xinggan/106333.html', 'http://www.meituba.com/xinggan/105890.html', 'http://www.meituba.com/xinggan/105886.html', 'http://www.meituba.com/xinggan/105882.html', 'http://www.meituba.com/xinggan/106381.html', 'http://www.meituba.com/xinggan/105885.html']
    for i in urllist:   
        a.append(pool.apply_async(get_page,(i,)))
    pool.close()
    pool.join()
    pool = Pool()    
        
    for i in a:
        print(i.get())
        
        pool.map(download_image,i.get())

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值