# coding:utf-8
from __future__ import print_function
from concurrent.futures import ThreadPoolExecutor, wait
import json
from math import fabs
import shutil
from wsgiref import headers
from bs4 import BeautifulSoup
import re
import os
import time
import requests
import urllib3
import pymysql
from urllib import request
import ssl
import gzip
import io
from PIL import Image
def get_pages(url):
soup = ""
try:
headers = {
}
response = requests.get(url)
ts_content = response.content
data = ts_content.decode('utf-8')
soup = BeautifulSoup(data, "html.parser")
# print(data)
except Exception as e:
print(url+" 请求错误\n")
print(e)
return soup
def download_task(info_ary):
# print('开始下载/{}...'.format(len(info_ary)))
excutor = ThreadPoolExecutor(max_workers=20) # 线程池
len_list = len(info_ary) # 链接总数
all_tasks = [excutor.submit(lambda args: get_info(*args), (url_id, len_list, info_ary))
for url_id in range(len_list)] # 创建任务
wait(all_tasks) # 等待所有任务执行完成
def get_info(url_id, len_list, info_ary):
try:
v = info_ary[url_id]
soup = get_pages(v['src'])
img_src = soup.select(".single-wallpaper")[0].find_all('img')[0]
src = img_src.attrs['src']
name = v['name']
label_s = re.findall('uploads/(.+?)\.jpg', src)[0]
label_s = label_s.replace('/', '-')
response = requests.get(url=src, timeout=(5, 20)) # 请求获取ts数据
ts_content = response.content
byte_stream = io.BytesIO(ts_content)
roiImg = Image.open(byte_stream)
imgByteArr = io.BytesIO()
roiImg.save(imgByteArr, format='PNG')
imgByteArr = imgByteArr.getvalue()
with open(r'wallpaper/' + label_s + '.png', "wb") as f:
f.write(imgByteArr)
print("下载成功 {}/{}".format(url_id+1, len_list ))
except:
pass
# # db.rollback()
print("下载失败 {}/{}".format(url_id+1, len_list ))
if __name__ == "__main__":
# try:
for j in range(30, 40):
print('页码{}'.format(j))
print('')
url = "")
soup = get_pages(url)
charts = soup.select(".wallpaper")[0].find_all('a')
info_ary = []
for index, v in enumerate(charts):
info_ary.append({
'name': v.attrs['alt'],
'src': v.attrs['href']
})
inf_ary = [info_ary[26]]
# print(inf_ary)
download_task(info_ary)
# except:
# print('错误')
python3 下载远程图片
最新推荐文章于 2024-05-08 01:45:28 发布