import os
import pandas as pd
import concurrent.futures
from tqdm import tqdm
from Tools.clothes import countTime
@countTime
def split_xlsx(input_file_path, sheet_n, unique_n, output_folder='./拆分名单'):
"""
@param input_file_path: 需要拆分的表格路径(./xlsx)
@param sheet_n: 需要拆分的sheet名
@param unique_n: 根据哪列拆分,该列的标题
@param output_folder: 输出路径
@return:
"""
# 检查输入路径是否存在
if not os.path.isfile(input_file_path):
raise FileNotFoundError(f"No such file: '{input_file_path}'")
# 移除文件扩展名以获得输出路径
output_file_name, _ = os.path.splitext(os.path.basename(input_file_path))
# 创建完整的输出路径
output_file_path = os.path.join(output_folder, output_file_name)
# 创建输出文件夹(如果不存在)
os.makedirs(output_file_path, exist_ok=True)
# 读取原始Excel文件
df = pd.read_excel(input_file_path, sheet_name=sheet_n)
if unique_n in list(df.columns):
# 获取指定列的所有唯一值
unique_values = df[unique_n].unique()
def split_and_save(value):
split_df = df[df[unique_n] == value]
with pd.ExcelWriter(f'{output_file_path}/{value}.xlsx') as writer:
split_df.to_excel(writer, index=False, sheet_name='Sheet1')
with concurrent.futures.ThreadPoolExecutor() as executor:
# 提交所有任务
futures = {executor.submit(split_and_save, value) for value in unique_values}
# 处理任务
for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
pass
if __name__ == '__main__':
input_path = 'XXXXX.xlsx'
name = "XXXXX"
sheet_name = 'XXX'
split_xlsx(input_file_path=input_path, sheet_n=sheet_name, unique_n=name)
指定某一列拆分表格
于 2023-08-07 17:29:10 首次发布