需求是生成带图片的excel, 由于前端是用源生js写的,无法写多个sheetname,参考了
import base64
import aiohttp
import asyncio
import os
import shutil
import time
from io import BytesIO
from PIL import Image
import xlsxwriter
def do_work_one(data, worksheet, common, top, img_index, sheet_name, column_dict, img_dict, img_fmt,
img_type: int = 1, start_y = 0) -> None:
for y, line in enumerate(data):
y += start_y
for x, cell in enumerate(line):
# 去除空格
cell = cell.strip() if isinstance(cell, str) else cell
if y == 0:
worksheet.write_string(y, x, str(cell), top)
else:
if img_fmt and x in img_index:
worksheet.write_string(y, x, '', common)
try:
if img_type == 1:
image_data = BytesIO(base64.b64decode(cell))
else:
image_data = BytesIO(img_dict.get(str(cell)))
original_width, original_height = Image.open(image_data).size
x_scale = img_fmt['x_scale'] / original_width
y_scale = img_fmt['y_scale'] / original_height
worksheet.insert_image(y, x, cell,
{'image_data': image_data,
'x_offset': img_fmt['x_offset'],
'y_offset': img_fmt['y_offset'],
'x_scale': x_scale,
'y_scale': y_scale})
# 图片就不统计长度
continue
except Exception as e:
print('io--------', e)
worksheet.write(y, x, '无', common)
else:
worksheet.write(y, x, cell, common)
length = len(str(cell))
utf8_length = len(str(cell).encode('utf-8'))
column_max = (utf8_length - length) / 2 + length
if column_dict[sheet_name].get(x):
if column_max > column_dict[sheet_name].get(x):
column_dict[sheet_name][x] = column_max
else:
column_dict[sheet_name][x] = column_max
def async_excel(res_dict, file_path, file_name, img_index, time_str, excel_type, top_sytle, common_style,
img_fmt,
img_type: int = 1):
"""
协程生成excel
:param res_dict: 结果{sheetname1:[],sheetname1:[]}
:param file_path: 文件dir
:param file_name: 文件名
:param img_index: 图片index[]
:param time_str: 时间戳字符串
:param excel_type: 1 有图片类型 2 图片类型
:param top_sytle: 标题头样式,即第一行的样式,根据需求是否需要表头加粗之类的样式
{'align': 'center', 'valign': 'vcenter', 'font_size': 13, 'bold': True}
:param common_style: 内容样式
{'align': 'center', 'valign': 'vcenter', 'font_size': 13}
:param img_type: 1是base64类型的图片 2:是request的图片
:return: None
"""
# 先删除文件夹下的所有文件
file_dir = os.path.join(file_path, time_str)
if os.path.exists(file_path):
for i in os.listdir(file_path):
shutil.rmtree(f"{file_path}/{i}")
os.makedirs(file_dir)
# 判断目录有没有没有生成
start_time = time.time()
res_path = os.path.join(file_dir, file_name)
tag_path = os.path.join(file_dir, 'test.xlsx')
column_dict = {} # 清空全局变量
workbook = xlsxwriter.Workbook(tag_path)
# 在文件中创建工作簿, 参数为i
for sheet_name in res_dict:
# 初始化sheetname的自适应长度
column_dict[sheet_name] = {}
worksheet = workbook.add_worksheet(sheet_name)
data = res_dict.get(sheet_name)
top = workbook.add_format(top_sytle)
common = workbook.add_format(common_style)
common.set_text_wrap()
top.set_text_wrap()
# 将多有的图片下载下来
if img_type == 2:
async_img(data, worksheet, common, top, img_index, sheet_name, column_dict, img_fmt)
else:
img_dict = {}
do_work_one(data, worksheet, common, top, img_index, sheet_name, column_dict, img_dict, img_fmt, img_type=img_type)
# 设置图片中的列中的宽度
for i in img_index:
column_dict[sheet_name][i] = 20
for col in column_dict[sheet_name]:
worksheet.set_column(col, col, column_dict[sheet_name][col])
worksheet.set_default_row(20) if excel_type == 2 else worksheet.set_default_row(img_fmt.get("y_scale") if img_fmt else 60)
workbook.close()
os.renames(tag_path, res_path)
print(time.time() - start_time)
async def spider_img(img_dict, url, semaphore):
async with semaphore:
async with aiohttp.ClientSession() as session:
response=await session.get(url=url)
content_img = await response.read()
img_dict[str(url)] = content_img
return
def async_img(data, worksheet, common, top, img_index, sheet_name, column_dict, img_fmt):
num = 500
semaphore = asyncio.Semaphore(num)
img_dict = {}
range_num = len(data) // num if len(data) % num == 0 else len(data) // num + 1
start_index = 0
end_index = num
for index,i in enumerate(range(range_num)):
if index == range_num:
one_list = data[start_index: -1]
else:
one_list = data[start_index: end_index]
loop = asyncio.get_event_loop()
tasks = [spider_img(img_dict, i[img_index[0]], semaphore) for i in one_list]
loop.run_until_complete(asyncio.wait(tasks))
# 先将这五百个写入
do_work_one(one_list, worksheet, common, top, img_index, sheet_name, column_dict, img_dict, img_fmt, img_type=2, start_y=start_index)
img_dict = {}
start_index = end_index
end_index += num
if __name__ == '__main__':
res_dict = {
"test":
[
] + [[1, '测试', '431006000694', 2.0, 6.0, 30, '2022/01/11 10:40:53',
'https:///70759728a5e14c6a923e48c6b09ff921/sign/20220111/144906/9aff34923b8c4dfe9a58a0018d3155f9/icon?e=1641888043&sign=0Np1-qDPdxyARaR56Rsi3BSxlxM=',
'15178614321'] for i in range(1000)],
}
file_path = './files'
file_name = "1.xlsx"
img_index = [7]
time_str = str(int(time.time()))
excel_type = 1
async_excel(res_dict, file_path, file_name, img_index, time_str, excel_type, {'border': 1},{'border': 1}, {'x_scale': 40, 'y_scale': 20, 'x_offset': 2, 'y_offset': 3},2)