python3 下载远程图片

最新推荐文章于 2024-05-08 01:45:28 发布

能力不足

最新推荐文章于 2024-05-08 01:45:28 发布

阅读量928

点赞数

文章标签： python

本文链接：https://blog.csdn.net/weixin_42417351/article/details/123918374

版权

# coding:utf-8
from __future__ import print_function
from concurrent.futures import ThreadPoolExecutor, wait
import json
from math import fabs
import shutil
from wsgiref import headers
from bs4 import BeautifulSoup
import re
import os
import time
import requests
import urllib3
import pymysql
from urllib import request
import ssl
import gzip
import io
from PIL import Image


def get_pages(url):
    soup = ""
    try:
        headers = {
        }
        response = requests.get(url)
        ts_content = response.content
        data = ts_content.decode('utf-8')
        soup = BeautifulSoup(data, "html.parser")
        # print(data)
    except Exception as e:
        print(url+" 请求错误\n")
        print(e)
    return soup


def download_task(info_ary):
    # print('开始下载/{}...'.format(len(info_ary)))
    excutor = ThreadPoolExecutor(max_workers=20)  # 线程池
    len_list = len(info_ary)  # 链接总数
    all_tasks = [excutor.submit(lambda args: get_info(*args), (url_id, len_list, info_ary))
                 for url_id in range(len_list)]  # 创建任务
    wait(all_tasks)  # 等待所有任务执行完成


def get_info(url_id, len_list, info_ary):

    try:
        v = info_ary[url_id]

        soup = get_pages(v['src'])
        img_src = soup.select(".single-wallpaper")[0].find_all('img')[0]
        src = img_src.attrs['src']
        name = v['name']
        label_s = re.findall('uploads/(.+?)\.jpg', src)[0]
        label_s = label_s.replace('/', '-')

        response = requests.get(url=src, timeout=(5, 20))  # 请求获取ts数据
        ts_content = response.content

        byte_stream = io.BytesIO(ts_content)
        roiImg = Image.open(byte_stream)
        imgByteArr = io.BytesIO()
        roiImg.save(imgByteArr, format='PNG')
        imgByteArr = imgByteArr.getvalue()

        with open(r'wallpaper/' + label_s + '.png', "wb") as f:
            f.write(imgByteArr)
            
        print("下载成功  {}/{}".format(url_id+1, len_list ))
    except:
        pass
    #     # db.rollback()
        print("下载失败  {}/{}".format(url_id+1, len_list ))


if __name__ == "__main__":
    # try:
    for j in range(30, 40):
        print('页码{}'.format(j))
        print('')
        url = "")
        soup = get_pages(url)

        charts = soup.select(".wallpaper")[0].find_all('a')
        info_ary = []
        for index, v in enumerate(charts):
            info_ary.append({
                'name': v.attrs['alt'],
                'src': v.attrs['href']
            })
        inf_ary = [info_ary[26]]
        # print(inf_ary)
        download_task(info_ary)

    # except:
    # print('错误')

能力不足

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python3 下载远程图片

# coding:utf-8from __future__ import print_functionfrom concurrent.futures import ThreadPoolExecutor, waitimport jsonfrom math import fabsimport shutilfrom wsgiref import headersfrom bs4 import BeautifulSoupimport reimport osimport timeimport re
复制链接

扫一扫