协程写入excel

最新推荐文章于 2024-08-27 16:38:10 发布

greens_bird_jj

最新推荐文章于 2024-08-27 16:38:10 发布

阅读量283

点赞数

分类专栏：协程高性能的python 文章标签： python excel

本文链接：https://blog.csdn.net/qq_41929657/article/details/120471756

版权

高性能的python 同时被 2 个专栏收录

5 篇文章 0 订阅

订阅专栏

协程

2 篇文章 0 订阅

订阅专栏

需求是生成带图片的excel，由于前端是用源生js写的，无法写多个sheetname，参考了

import base64
import aiohttp
import asyncio
import os
import shutil
import time

from io import BytesIO
from PIL import Image
import xlsxwriter


def do_work_one(data, worksheet, common, top, img_index, sheet_name, column_dict, img_dict, img_fmt,
                img_type: int = 1, start_y = 0) -> None:
    for y, line in enumerate(data):
        y += start_y
        for x, cell in enumerate(line):
            # 去除空格
            cell = cell.strip() if isinstance(cell, str) else cell
            if y == 0:
                worksheet.write_string(y, x, str(cell), top)
            else:
                if img_fmt and x in img_index:
                    worksheet.write_string(y, x, '', common)
                    try:
                        if img_type == 1:
                            image_data = BytesIO(base64.b64decode(cell))
                        else:
                            image_data = BytesIO(img_dict.get(str(cell)))
                        original_width, original_height = Image.open(image_data).size
                        x_scale = img_fmt['x_scale'] / original_width
                        y_scale = img_fmt['y_scale'] / original_height
                        worksheet.insert_image(y, x, cell,
                                               {'image_data': image_data,
                                                'x_offset': img_fmt['x_offset'],
                                                'y_offset': img_fmt['y_offset'],
                                                'x_scale': x_scale,
                                                'y_scale': y_scale})
                        # 图片就不统计长度
                        continue
                    except Exception as e:
                        print('io--------', e)
                        worksheet.write(y, x, '无', common)
                else:
                    worksheet.write(y, x, cell, common)
            length = len(str(cell))
            utf8_length = len(str(cell).encode('utf-8'))
            column_max = (utf8_length - length) / 2 + length
            if column_dict[sheet_name].get(x):
                if column_max > column_dict[sheet_name].get(x):
                    column_dict[sheet_name][x] = column_max
            else:
                column_dict[sheet_name][x] = column_max



def async_excel(res_dict, file_path, file_name, img_index, time_str, excel_type, top_sytle, common_style,
                img_fmt,
                img_type: int = 1):
    """
    协程生成excel
    :param res_dict: 结果{sheetname1:[],sheetname1:[]}
    :param file_path: 文件dir
    :param file_name: 文件名
    :param img_index: 图片index[]
    :param time_str: 时间戳字符串
    :param excel_type: 1 有图片类型 2 图片类型
    :param top_sytle: 标题头样式，即第一行的样式，根据需求是否需要表头加粗之类的样式
    {'align': 'center', 'valign': 'vcenter', 'font_size': 13, 'bold': True}
    :param common_style: 内容样式
    {'align': 'center', 'valign': 'vcenter', 'font_size': 13}
    :param img_type: 1是base64类型的图片 2:是request的图片
    :return: None
    """
    # 先删除文件夹下的所有文件
    file_dir = os.path.join(file_path, time_str)
    if os.path.exists(file_path):
        for i in os.listdir(file_path):
            shutil.rmtree(f"{file_path}/{i}")
    os.makedirs(file_dir)
    # 判断目录有没有没有生成
    start_time = time.time()
    res_path = os.path.join(file_dir, file_name)
    tag_path = os.path.join(file_dir, 'test.xlsx')
    column_dict = {}  # 清空全局变量
    workbook = xlsxwriter.Workbook(tag_path)
    # 在文件中创建工作簿, 参数为i
    for sheet_name in res_dict:
        # 初始化sheetname的自适应长度
        column_dict[sheet_name] = {}
        worksheet = workbook.add_worksheet(sheet_name)
        data = res_dict.get(sheet_name)
        top = workbook.add_format(top_sytle)
        common = workbook.add_format(common_style)
        common.set_text_wrap()
        top.set_text_wrap()
        # 将多有的图片下载下来
        if img_type == 2:
            async_img(data, worksheet, common, top, img_index, sheet_name, column_dict, img_fmt)
        else:
            img_dict = {}
            do_work_one(data, worksheet, common, top, img_index, sheet_name, column_dict, img_dict, img_fmt, img_type=img_type)
        # 设置图片中的列中的宽度
        for i in img_index:
            column_dict[sheet_name][i] = 20
        for col in column_dict[sheet_name]:
            worksheet.set_column(col, col, column_dict[sheet_name][col])
        worksheet.set_default_row(20) if excel_type == 2 else worksheet.set_default_row(img_fmt.get("y_scale") if img_fmt else 60)

    workbook.close()
    os.renames(tag_path, res_path)
    print(time.time() - start_time)


async def spider_img(img_dict, url, semaphore):

    async with semaphore:
        async with aiohttp.ClientSession() as session:
            response=await session.get(url=url)
            content_img = await response.read()
            img_dict[str(url)] = content_img
            return

def async_img(data, worksheet, common, top, img_index, sheet_name, column_dict, img_fmt):
    num = 500
    semaphore = asyncio.Semaphore(num)
    img_dict = {}
    range_num = len(data) // num if len(data) % num == 0 else len(data) // num + 1
    start_index = 0
    end_index = num
    for index,i in enumerate(range(range_num)):
        if index == range_num:
            one_list = data[start_index: -1]
        else:
            one_list = data[start_index: end_index]
        loop = asyncio.get_event_loop()
        tasks = [spider_img(img_dict, i[img_index[0]], semaphore) for i in one_list]
        loop.run_until_complete(asyncio.wait(tasks))
        # 先将这五百个写入
        do_work_one(one_list, worksheet, common, top, img_index, sheet_name, column_dict, img_dict, img_fmt, img_type=2, start_y=start_index)
        img_dict = {}
        start_index = end_index
        end_index += num


if __name__ == '__main__':
    res_dict = {

        "test":
            [

             ] + [[1, '测试', '431006000694', 2.0, 6.0, 30, '2022/01/11 10:40:53',
              'https:///70759728a5e14c6a923e48c6b09ff921/sign/20220111/144906/9aff34923b8c4dfe9a58a0018d3155f9/icon?e=1641888043&sign=0Np1-qDPdxyARaR56Rsi3BSxlxM=',
              '15178614321'] for i in range(1000)],


    }
    file_path = './files'
    file_name = "1.xlsx"
    img_index = [7]
    time_str = str(int(time.time()))
    excel_type = 1
    async_excel(res_dict, file_path, file_name, img_index, time_str, excel_type, {'border': 1},{'border': 1}, {'x_scale': 40, 'y_scale': 20, 'x_offset': 2, 'y_offset': 3},2)

greens_bird_jj

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
协程写入excel

需求是生成带图片的excel，由于前端是用源生js写的，无法写多个sheetname，参考了import base64import mathimport osimport shutilimport threadpoolimport asynciofrom io import BytesIOfrom PIL import Imageimport xlsxwriter# 存一列中的最大值,最后刷一下COLUMN_DICT = {}async def do_work_one(st
复制链接

扫一扫

专栏目录