excel文件包含的信息:此处excel文件里有每个项目的文件名称,每张照片的url接口信息和用作照片名字的id。
开发需求:需要按照文件的名称创建新的文件,将对应的照片下载到对应的文件中。
实现过程:此处提供了两种方法,一种是一张一张下载,速度比较慢,另一种是运用python协程下载(协程方法还没有完全弄清楚,再给自己写一篇文章吧)
代码
#url批量下载照片(一张一张的下载 下载时长较长)
import requests, logging, time, os
from urllib.parse import *
from fake_useragent import UserAgent
import xlrd
import os
# 读取xlrd文件获取url
book = xlrd.open_workbook('D:\\download.xls') #导入需要的文件地址
sheet1 = book.sheets()[0] #读取excel文件的表
#定义函数实现功能代码
def Download_Image(
downloadUrl: str or list, saveImagePath: str, headers: dict = None, proxies: dict = None
) -> bool or str:
agent = UserAgent()
if isinstance(downloadUrl, list):
# 循环下载
for i in range(sheet1.nrows):
downloadUrl = sheet1.cell(i,3).value #输出第四列的值
downloadId = sheet1.cell(i,0).value #输出第一列的值
path = sheet1.cell(i,1).value
path_file = '{}\\{}'.format(saveImagePath, path )#文件保存地址
newSaveImagePath = '{}\\{}\\{}.jpg'.format(saveImagePath, path ,downloadId)#影像文件
#判断路径中jpg文件是否已经存在 避免重复下载或者需要二次下载没下载好的影像
if os.path.isdir(newSaveImagePath):
return
else:
downloadUrlParse = urlparse(downloadUrl)
if headers is None:
headers = {
'User-Agent': ,
'Referer': ',
'Host': ,
}#输入头部信息
if downloadUrl == '':
continue
# 下载
error=[]
try:
response = requests.post(downloadUrl, headers=headers, timeout=20, proxies=proxies).content #post下载方式
except TimeoutError:
error.append(downloadUrl)
except Exception as e:
continue
# 新保存路径
if os.path.isdir(path_file):
newSaveImagePath = '{}\\{}\\{}.jpg'.format(saveImagePath, path ,downloadId)
with open(newSaveImagePath, 'wb') as f:
f.write(response)
else:
os.mkdir(path_file)
else:
return '无法下载'
downloadlist=[]
if __name__ == '__main__':
for i in range(sheet1.nrows):
download = sheet1.cell(i,3).value
downloadlist.append(download)
downloadUrl = downloadlist
Download_Image(
downloadUrl = (downloadUrl),
saveImagePath = r'D:\\2023\\20230111chengguan'
)