import os
import requests
from openpyxl import Workbook
from openpyxl.drawing.image import Image
from concurrent.futures import ThreadPoolExecutor
# 图片链接列表
image_urls = [
"https://uploads/file/20230205/f85Lpcv8PXrLAdmNUDE1Hh6xqkp0NHi2gSXeqyOb.png",
"https://uploads/file/20230205/geG4FOpthrsUX0LkmWvDH2veFtw6yj8JLDMYBaQ1.png",
"https://uploads/file/20230205/mjVAx4jsbke6uj0e2Qz66f8KDceL1P5tanKQkNoy.png"
]
output_dir = "C:/Users/win-10/Desktop/发票图片/" # 指定Excel文件的输出目录
# 保存图片的本地目录
save_folder = "C:/Users/win-10/Desktop/发票图片/downloaded_images/"
# Excel文件名
excel_filename = "images_with_links.xlsx"
# 最大下载尝试次数
max_download_attempts = 3
def download_image(url, filename, attempts=0):
"""
下载图片到指定文件名
:param url: 图片的URL链接
:param filename: 保存图片的本地文件名
:param attempts: 当前下载尝试次数,默认为0
:return: 成功保存的文件名,下载失败返回None
"""
try:
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(filename, 'wb') as f:
for chunk in response.iter_content(1024):
f.write(chunk)
return url, filename # 返回包含URL和文件名的元组
else:
raise Exception(f"HTTP错误码:{response.status_code}")
except Exception as e:
if attempts < max_download_attempts - 1:
print(f"下载尝试失败:{e},重试...")
return download_image(url, filename, attempts + 1)
else:
print(f"下载失败:{url},{e}")
return url, None # 返回包含URL和None(表示下载失败)的元组
def create_excel_file(image_data, output_dir, excel_filename):
"""
创建Excel文件并添加图片信息
:param image_data: 包含图片URL和本地路径的元组列表
:param output_dir: 目标Excel文件的输出目录
:param excel_filename: Excel文件名(不含目录路径)
"""
global cm_to_px_ratio
workbook = Workbook()
sheet = workbook.active
for idx, (img_url, img_path) in enumerate(image_data, start=1):
sheet[f"A{idx}"] = img_url
img = Image(img_path[1]) # 使用元组的第二个元素(文件名)
# 设置图片大小为6厘米 × 6厘米
cm_to_px_ratio = 20 # 假设1厘米等于96像素
img.width = 6 * cm_to_px_ratio
img.height = 6 * cm_to_px_ratio
# 将图片放置在与链接同一行的第二列(B列)
img.anchor = f"B{idx}"
img.left = idx # 或者 img.left = idx * 250 如果需要图片间有一定的间距
img.top = idx
sheet.add_image(img)
# 调整列宽以适应内容
sheet.column_dimensions['A'].width = 6 * cm_to_px_ratio
sheet.column_dimensions['B'].width = 6 * cm_to_px_ratio
sheet.row_dimensions[idx].height = 6 * cm_to_px_ratio
excel_full_path = os.path.join(output_dir, excel_filename)
workbook.save(excel_full_path)
print(f"图片及其链接已保存至Excel文件:{excel_full_path}")
if __name__ == "__main__":
if not image_urls:
print("图片链接列表为空,程序退出。")
exit(1)
# 创建保存目录(如果不存在)
if not os.path.exists(save_folder):
os.makedirs(save_folder)
with ThreadPoolExecutor(max_workers=5) as executor:
# 使用线程池并发下载图片
image_futures = [
executor.submit(download_image, url, os.path.join(save_folder, f"image{idx}.{url.split('.')[-1]}")) for
idx, url in enumerate(image_urls, start=1)]
# 收集下载结果
image_data = [(url, future.result()) for idx, (url, future) in
enumerate(zip(image_urls, image_futures), start=1)]
# 使用下载的图片信息创建Excel文件
create_excel_file(image_data, output_dir, excel_filename)
Python小功能实现(链接下载图品并存储到EXCEL中)
于 2024-04-24 10:57:22 首次发布
本文介绍了如何使用Python中的requests库下载图片,OpenPyXL处理Excel,以及ThreadPoolExecutor实现并发下载。函数`download_image`负责下载图片,`create_excel_file`则将图片信息整合到Excel中,生成包含图片链接的工作簿。
摘要由CSDN通过智能技术生成