一、背景
工作中有同事需要将一个表格进行拆分,拆分的依据就是将属于同一机构或者属于同一类别的excel表格进行拆分。即一个工作簿拆成多个工作表,然后每个工作表生成单独的工作簿。
类似的问题还有另一种解决办法,详见:Excel一个拆分成多个工作簿实例经验 - 知乎
二、实现
python3.8 + pandas 读取excel表格,根据某个表头进行筛选,将结果循环筛选到结果excel中。
后打包为 可执行程序(exe),下载本文章资源后开箱即用。
第一步-打开“一键拆分.exe”
一键拆分程序首页
第二步-选择要拆分的excel
第三步-输入基准单元格名称
三、源码
import os, pathlib, datetime, sys, time, shutil
import traceback
import pandas as pd
import openpyxl
from openpyxl.cell.read_only import EmptyCell
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import Font, Alignment
import tkinter as tk
from tkinter import filedialog, messagebox, font, ttk, simpledialog
from pathlib import Path
import myLogger
logger = myLogger.get_logger()
class PrepareFileClass(object):
"""文件准备类"""
def __init__(self, root):
self.__is_zonghang = False
self.__root = root
self.__root.title("表格拆分程序")
self.__root.geometry("300x350")
self.__root.resizable(width=False, height=False)
self.labelFrame = tk.LabelFrame(self.__root, width=300)
self.labelFrame.pack()
self.labelFrame1 = tk.LabelFrame(self.__root, width=300)
self.labelFrame1.pack()
self.frame1 = tk.Listbox(self.__root, width=300)
self.frame1.pack()
self.label = tk.Label(self.labelFrame, text="欢迎使用表格拆分程序", pady=10, font=font.Font(size=12, weight="bold"))
self.label.pack()
self.label1 = tk.Label(self.labelFrame, text="表格拆分程序开始运行,步骤如下:")
self.label1.pack(anchor=tk.W)
self.label3 = tk.Label(self.labelFrame, text="1、请选择要拆封的Excel文件;")
self.label3.pack(anchor=tk.W)
self.label4 = tk.Label(self.labelFrame, text="2、请输入基准单元格名称")
self.label4.pack(anchor=tk.W)
self.label5 = tk.Label(self.labelFrame, justify=tk.LEFT, wraplength=290,
text="如'B3':表示将表格第三行作为表头对B列不同的值进行筛选拆分。")
self.label5.pack(anchor=tk.W)
self.label6 = ttk.Label(self.labelFrame1, justify=tk.LEFT, wraplength=290,
text="开始拆分,先对选择的文件进行预校验,预校验不通过会有相应的提示;校验通过无提示,等待执行结果即可。")
self.label6.pack(anchor=tk.W)
self.button1 = tk.Button(self.frame1, text="开始拆分", font=font.Font(size=11, weight="bold"),
highlightcolor="red", takefocus=True, relief=tk.RAISED, command=lambda:self.__command("zonghang"))
self.button1.pack(side=tk.LEFT, padx=10, pady=10)
self.button3 = tk.Button(self.frame1, text=" 取 消 ", fg='white', bg='#35a4cc', font=font.Font(size=11, weight="bold"), command=lambda:self.__exit())
self.button3.pack(side=tk.LEFT, padx=10, pady=10)
def __exit(self):
self.__root.destroy()
sys.exit(100)
def __command(self, s):
if s == "zonghang":
self.__is_zonghang = True
# self.__root.destroy()
else:
self.__is_zonghang = False
self.__root.destroy()
def getFilePath(self):
"""
取需要的所有文件路径 \n
:return: 绝对路径元组 ()
"""
# 创建一个Tkinter窗口对象
root = tk.Tk()
root.withdraw() # 隐藏主窗口
# 打开文件选择对话框,并获取选择的文件路径
__mingdan_file_path = filedialog.askopenfilename(title="请选择要拆封的文件", filetypes=[("Excel File", "*.xlsx"), ("Excel File", "*.xls")])
self.__check_path(__mingdan_file_path)
cell = simpledialog.askstring(title="请输入基准单元格名称", prompt="如'B3':表示将表格第三行作为表头对B列不同的值进行筛选拆分。")
if cell is None or len(cell) < 2:
messagebox.showwarning(title="警告", message="请输入正确的基准单元格名称")
sys.exit(100)
return (__mingdan_file_path, cell)
def __check_path(self, path):
if path is None or path == "":
messagebox.showwarning(title="警告", message="请选择正确的文件")
sys.exit(100)
if os.path.getsize(path) > 5*1024*1024:
messagebox.showwarning(title="警告", message="您选择的文件大小大于5MB了,请检查确认并调整。")
sys.exit(100)
else:
logger.debug(f"文件{path}的大小为 {os.path.getsize(path)/1024} KB")
class SpilitProcessClass(object):
""" 一键拆分表格实现类。 """
df = pd.DataFrame()
column_name = ""
original_header = []
def __init__(self, file_path, split_cell):
self.__file_path = file_path
self.__split_cell = split_cell
def preDataCheck(self):
""" 数据预校验 """
path = Path(self.__file_path)
logger.debug(path.name)
if path.name.endswith(".xls"):
tmpdf = pd.read_excel(self.__file_path)
xlsx_path = path.parent.joinpath(path.name + ".xlsx")
logger.debug(f"选择的Excel为xls,转化为xlsx,{xlsx_path}")
self.__file_path = xlsx_path
try:
tmpdf.to_excel(xlsx_path, index=None)
except PermissionError as e:
logger.error(e, e)
messagebox.showwarning(title="警告", message=f"请关闭打开的文件[{xlsx_path}]")
sys.exit(100)
wb = openpyxl.load_workbook(self.__file_path, read_only=True)
ws = wb.active
cell = ws[self.__split_cell]
if isinstance(cell, EmptyCell):
return False, f"输入的基准单元格[{self.__split_cell}]无数值,请确认."
logger.debug(f"cell的类型{type(cell)}, {cell}")
logger.debug(f"row:{cell.row}, column:{cell.column}, coordinate: {cell.coordinate}")
skiprows = []
if cell.row > 1 :
for i in range(cell.row-1):
skiprows.append(i)
logger.debug(f"skiprows为:{skiprows}")
tmpdf2 = pd.read_excel(self.__file_path, skiprows=skiprows)
logger.debug(f"df的columns为:{tmpdf2.columns.values}")
self.column_name = tmpdf2.columns.values[cell.column - 1]
self.original_header = tmpdf2.columns.tolist()
logger.debug(f"基准单元格[{self.__split_cell}]的数值为: {self.column_name}")
if "Unnamed" in self.column_name:
return False, f"输入的基准单元格[{self.__split_cell}]无数值(Unnamed),请确认."
self.df = tmpdf2
unique_values = self.df[self.column_name].dropna().unique()
logger.info(f"筛选后的结果:{unique_values}")
if len(unique_values) < 2:
logger.info('')
messagebox.showwarning(title="警告", message=f"要拆封的单元格经筛选只有一个筛选结果[{unique_values}]")
sys.exit(100)
return True, "数据预校验通过"
def dataProcess(self):
""" 拆分表格功能实现 """
path = Path(self.__file_path)
res_path = path.parent.joinpath(path.stem)
res_path = path.parent.joinpath("拆分结果-" + str(datetime.datetime.now()).replace(" ", "-").replace(":", "-"))
logger.debug(f"res_path:{res_path}")
if os.path.exists(res_path):
shutil.rmtree(res_path)
os.mkdir(res_path)
unique_values = self.df[self.column_name].dropna().unique()
unique_values_list = list(unique_values)
logger.info(f"type:{type(unique_values_list)}, unique_values_list:{unique_values_list}")
wb_data_dict = dict()
wb_data_dict["原表格所有数据"] = self.df
for n in unique_values_list:
tmp_df = self.df[self.df[self.column_name] == n]
wb_data_dict[n] = tmp_df
file_name = path.stem.replace(".xls", "") + "-" + str(n) + ".xlsx"
resfile_path = res_path.joinpath(file_name)
logger.info(f"开始拆分[{file_name}]")
tmp_df.to_excel(resfile_path, index=False, columns=self.original_header)
wb = openpyxl.Workbook()
for sh in wb_data_dict.keys():
tm_df = wb_data_dict[sh]
ws = wb.create_sheet(title=sh)
op_rows = dataframe_to_rows(tm_df, index=False, header=False)
ws.append(self.original_header)
for r in op_rows:
ws.append(r)
wb.remove(wb["Sheet"])
wb.active = wb["原表格所有数据"] # 设置sheet页激活状态
__res_path = res_path.joinpath("原表格所有数据-拆分.xlsx")
wb.save(__res_path)
logger.info("拆分完成")
messagebox.showinfo(title="提示", message=f"拆分完成,结果文件路径为[{res_path}]")
if __name__ == '__main__':
logger.info('开始执行一键拆分')
root = tk.Tk()
c = PrepareFileClass(root)
root.mainloop()
file_path, split_cell = c.getFilePath()
logger.info(f"excel文件file_path:{file_path}")
logger.info(f"按照split_cell:{split_cell} 进行拆分")
cls = SpilitProcessClass(file_path=file_path, split_cell=split_cell)
precheckResult, msg = cls.preDataCheck()
logger.info("文件数据校验结果:" + str(precheckResult) + ", " + msg)
if precheckResult:
try:
cls.dataProcess()
except BaseException as e:
logger.error("发生错误:%s", str(e))
# 记录异常的堆栈跟踪信息到日志中
logger.error('异常堆栈跟踪:%s', traceback.format_exc())
messagebox.showerror(title="错误", message=f"运行异常,联系开发: {str(e)}")
else:
logger.warning(f"数据校验不通过,提示信息:{msg}")
messagebox.showwarning(title="警告", message=f"数据校验不通过,提示信息:{msg}")
sys.exit(100)
四、源码打包
# -*- mode: python ; coding: utf-8 -*-
block_cipher = None
a = Analysis(
['一键拆表.py'],
pathex=[r'E:\LZKF\workspace_python3\python_RPA'],
binaries=[],
datas=[],
hiddenimports=[],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False,
)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
exe = EXE(
pyz,
a.scripts,
a.binaries,
a.zipfiles,
a.datas,
[],
name='一键拆表',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)