一键拆分表格 - pandas实现表格拆分

松鼠111

已于 2024-01-02 10:50:38 修改

阅读量630

点赞数 8

分类专栏： python 文章标签： pandas python 分类

于 2023-12-31 16:42:42 首次发布

本文链接：https://blog.csdn.net/zkl718/article/details/135317173

版权

python 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

一、背景

工作中有同事需要将一个表格进行拆分，拆分的依据就是将属于同一机构或者属于同一类别的excel表格进行拆分。即一个工作簿拆成多个工作表，然后每个工作表生成单独的工作簿。

类似的问题还有另一种解决办法，详见：Excel一个拆分成多个工作簿实例经验 - 知乎

二、实现

python3.8 + pandas 读取excel表格，根据某个表头进行筛选，将结果循环筛选到结果excel中。

后打包为可执行程序（exe），下载本文章资源后开箱即用。

第一步-打开“一键拆分.exe”

一键拆分程序首页

第二步-选择要拆分的excel

第三步-输入基准单元格名称

三、源码

import os, pathlib, datetime, sys, time, shutil
import traceback
import pandas as pd
import openpyxl
from openpyxl.cell.read_only import EmptyCell
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import Font, Alignment
import tkinter as tk
from tkinter import filedialog, messagebox, font, ttk, simpledialog
from pathlib import Path

import myLogger

logger = myLogger.get_logger()



class PrepareFileClass(object):
    """文件准备类"""
    def __init__(self, root):
        self.__is_zonghang = False
        self.__root = root
        self.__root.title("表格拆分程序")
        self.__root.geometry("300x350")
        self.__root.resizable(width=False, height=False)

        self.labelFrame = tk.LabelFrame(self.__root, width=300)
        self.labelFrame.pack()
        self.labelFrame1 = tk.LabelFrame(self.__root, width=300)
        self.labelFrame1.pack()
        self.frame1 = tk.Listbox(self.__root, width=300)
        self.frame1.pack()

        self.label = tk.Label(self.labelFrame, text="欢迎使用表格拆分程序", pady=10, font=font.Font(size=12, weight="bold"))
        self.label.pack()
        self.label1 = tk.Label(self.labelFrame, text="表格拆分程序开始运行，步骤如下：")
        self.label1.pack(anchor=tk.W)
        self.label3 = tk.Label(self.labelFrame, text="1、请选择要拆封的Excel文件；")
        self.label3.pack(anchor=tk.W)

        self.label4 = tk.Label(self.labelFrame, text="2、请输入基准单元格名称")
        self.label4.pack(anchor=tk.W)
        self.label5 = tk.Label(self.labelFrame, justify=tk.LEFT, wraplength=290,
                               text="如'B3'：表示将表格第三行作为表头对B列不同的值进行筛选拆分。")
        self.label5.pack(anchor=tk.W)

        self.label6 = ttk.Label(self.labelFrame1, justify=tk.LEFT, wraplength=290,
                               text="开始拆分，先对选择的文件进行预校验，预校验不通过会有相应的提示；校验通过无提示，等待执行结果即可。")
        self.label6.pack(anchor=tk.W)

        self.button1 = tk.Button(self.frame1, text="开始拆分", font=font.Font(size=11, weight="bold"),
                                 highlightcolor="red", takefocus=True, relief=tk.RAISED, command=lambda:self.__command("zonghang"))
        self.button1.pack(side=tk.LEFT, padx=10, pady=10)

        self.button3 = tk.Button(self.frame1, text=" 取 消 ", fg='white', bg='#35a4cc', font=font.Font(size=11, weight="bold"), command=lambda:self.__exit())
        self.button3.pack(side=tk.LEFT, padx=10, pady=10)

    def __exit(self):
        self.__root.destroy()
        sys.exit(100)

    def __command(self, s):
        if s == "zonghang":
            self.__is_zonghang = True
            # self.__root.destroy()
        else:
            self.__is_zonghang = False
        self.__root.destroy()

    def getFilePath(self):
        """
        取需要的所有文件路径 \n
        :return: 绝对路径元组 ()
        """
        # 创建一个Tkinter窗口对象
        root = tk.Tk()
        root.withdraw() # 隐藏主窗口
        # 打开文件选择对话框，并获取选择的文件路径
        __mingdan_file_path = filedialog.askopenfilename(title="请选择要拆封的文件", filetypes=[("Excel File", "*.xlsx"), ("Excel File", "*.xls")])
        self.__check_path(__mingdan_file_path)

        cell = simpledialog.askstring(title="请输入基准单元格名称", prompt="如'B3'：表示将表格第三行作为表头对B列不同的值进行筛选拆分。")
        if cell is None or len(cell) < 2:
            messagebox.showwarning(title="警告", message="请输入正确的基准单元格名称")
            sys.exit(100)

        return (__mingdan_file_path, cell)

    def __check_path(self, path):
        if path is None or path == "":
            messagebox.showwarning(title="警告", message="请选择正确的文件")
            sys.exit(100)
        if os.path.getsize(path) > 5*1024*1024:
            messagebox.showwarning(title="警告", message="您选择的文件大小大于5MB了，请检查确认并调整。")
            sys.exit(100)
        else:
            logger.debug(f"文件{path}的大小为 {os.path.getsize(path)/1024} KB")


class SpilitProcessClass(object):
    """ 一键拆分表格实现类。   """
    df = pd.DataFrame()
    column_name = ""
    original_header = []

    def __init__(self, file_path, split_cell):
        self.__file_path = file_path
        self.__split_cell = split_cell

    def preDataCheck(self):
        """ 数据预校验 """
        path = Path(self.__file_path)
        logger.debug(path.name)
        if path.name.endswith(".xls"):
            tmpdf = pd.read_excel(self.__file_path)
            xlsx_path = path.parent.joinpath(path.name + ".xlsx")
            logger.debug(f"选择的Excel为xls，转化为xlsx，{xlsx_path}")
            self.__file_path = xlsx_path
            try:
                tmpdf.to_excel(xlsx_path, index=None)
            except PermissionError as e:
                logger.error(e, e)
                messagebox.showwarning(title="警告", message=f"请关闭打开的文件[{xlsx_path}]")
                sys.exit(100)
        wb = openpyxl.load_workbook(self.__file_path, read_only=True)
        ws = wb.active
        cell = ws[self.__split_cell]
        if isinstance(cell, EmptyCell):
            return False, f"输入的基准单元格[{self.__split_cell}]无数值，请确认."

        logger.debug(f"cell的类型{type(cell)}, {cell}")
        logger.debug(f"row:{cell.row}, column:{cell.column}, coordinate: {cell.coordinate}")
        skiprows = []
        if cell.row > 1 :
            for i in range(cell.row-1):
                skiprows.append(i)
        logger.debug(f"skiprows为：{skiprows}")
        tmpdf2 = pd.read_excel(self.__file_path, skiprows=skiprows)
        logger.debug(f"df的columns为：{tmpdf2.columns.values}")
        self.column_name = tmpdf2.columns.values[cell.column - 1]
        self.original_header = tmpdf2.columns.tolist()
        logger.debug(f"基准单元格[{self.__split_cell}]的数值为: {self.column_name}")
        if "Unnamed" in self.column_name:
            return False, f"输入的基准单元格[{self.__split_cell}]无数值(Unnamed)，请确认."

        self.df = tmpdf2

        unique_values = self.df[self.column_name].dropna().unique()
        logger.info(f"筛选后的结果：{unique_values}")
        if len(unique_values) < 2:
            logger.info('')
            messagebox.showwarning(title="警告", message=f"要拆封的单元格经筛选只有一个筛选结果[{unique_values}]")
            sys.exit(100)

        return True, "数据预校验通过"

    def dataProcess(self):
        """ 拆分表格功能实现 """
        path = Path(self.__file_path)
        res_path = path.parent.joinpath(path.stem)
        res_path = path.parent.joinpath("拆分结果-" + str(datetime.datetime.now()).replace(" ", "-").replace(":", "-"))
        logger.debug(f"res_path:{res_path}")
        if os.path.exists(res_path):
            shutil.rmtree(res_path)
        os.mkdir(res_path)

        unique_values = self.df[self.column_name].dropna().unique()
        unique_values_list = list(unique_values)
        logger.info(f"type:{type(unique_values_list)}, unique_values_list:{unique_values_list}")
        wb_data_dict = dict()
        wb_data_dict["原表格所有数据"] = self.df
        for n in unique_values_list:
            tmp_df = self.df[self.df[self.column_name] == n]
            wb_data_dict[n] = tmp_df
            file_name = path.stem.replace(".xls", "") + "-" + str(n) + ".xlsx"
            resfile_path = res_path.joinpath(file_name)
            logger.info(f"开始拆分[{file_name}]")
            tmp_df.to_excel(resfile_path, index=False, columns=self.original_header)

        wb = openpyxl.Workbook()
        for sh in wb_data_dict.keys():
            tm_df = wb_data_dict[sh]
            ws = wb.create_sheet(title=sh)
            op_rows = dataframe_to_rows(tm_df, index=False, header=False)
            ws.append(self.original_header)
            for r in op_rows:
                ws.append(r)
        wb.remove(wb["Sheet"])
        wb.active = wb["原表格所有数据"]  # 设置sheet页激活状态
        __res_path = res_path.joinpath("原表格所有数据-拆分.xlsx")
        wb.save(__res_path)

        logger.info("拆分完成")
        messagebox.showinfo(title="提示", message=f"拆分完成，结果文件路径为[{res_path}]")



if __name__ == '__main__':
    logger.info('开始执行一键拆分')
    root = tk.Tk()
    c = PrepareFileClass(root)
    root.mainloop()
    file_path, split_cell = c.getFilePath()
    logger.info(f"excel文件file_path：{file_path}")
    logger.info(f"按照split_cell：{split_cell} 进行拆分")

    cls = SpilitProcessClass(file_path=file_path, split_cell=split_cell)
    precheckResult, msg = cls.preDataCheck()
    logger.info("文件数据校验结果：" + str(precheckResult) + ", " + msg)

    if precheckResult:
        try:
            cls.dataProcess()
        except BaseException as e:
            logger.error("发生错误：%s", str(e))
            # 记录异常的堆栈跟踪信息到日志中
            logger.error('异常堆栈跟踪：%s', traceback.format_exc())
            messagebox.showerror(title="错误", message=f"运行异常，联系开发: {str(e)}")
    else:
        logger.warning(f"数据校验不通过，提示信息：{msg}")
        messagebox.showwarning(title="警告", message=f"数据校验不通过，提示信息：{msg}")
        sys.exit(100)

四、源码打包

# -*- mode: python ; coding: utf-8 -*-


block_cipher = None


a = Analysis(
    ['一键拆表.py'],
	pathex=[r'E:\LZKF\workspace_python3\python_RPA'],
    binaries=[],
    datas=[],
    hiddenimports=[],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[],
    win_no_prefer_redirects=False,
    win_private_assemblies=False,
    cipher=block_cipher,
    noarchive=False,
)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)

exe = EXE(
    pyz,
    a.scripts,
    a.binaries,
    a.zipfiles,
    a.datas,
    [],
    name='一键拆表',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    console=True,
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
)