需求概述
1、读取某文件夹中的N张报表(格式一致),将其合并为一张汇总表。源表格样式如下图:
2、将合并后的表根据组合名称去重,并将其所对应的余额相加,最后将余额列单元格格式设置为会计专用(余额保留小数后两位,且余额为0时显示为-),用底红色高亮显示余额小于0的行。如下图:
需求实现
import glob
import os
import pandas as pd
def merge_form(base_dir: str, write_file: str):
"""
合并表格数据
:param base_dir: 多张表格存放目录
:param write_file: 生成Excel存放路径
:return: None
"""
# 路径格式化
base_dir = f"{base_dir}{os.sep}*"
balance_file_list = glob.glob(base_dir)
concat_df = pd.DataFrame()
for file in balance_file_list:
if '汇总表' not in file:
df = pd.read_excel(file, header=5, usecols=[0, 1, 2, 3, 4], dtype={'托管户账号': str})
concat_df = pd.concat([concat_df, df])
# 剔除无用空行
concat_df = concat_df[concat_df['托管户账号'].notna().values]
# 根据组合名称分组后求和余额列
group_and_sum_df = concat_df.groupby(by='组合名称')['余额'].sum()
# 去重,保留唯一数据行
drop_duplicate_df = concat_df.drop_duplicates(subset=['组合名称'], keep='first')
# 合并分组表和去重表
merge_df = pd.merge(drop_duplicate_df, group_and_sum_df, how='left', on='组合名称')
# 余额列重新赋值
merge_df['余额_x'] = merge_df['余额_y']
# 删除多余列
merge_df = merge_df.drop(labels=['余额_y'], axis=1)
# 修改列名
merge_df.rename(columns={'余额_x': '余额'}, inplace=True)
# 获取余额小于0的数据,并拿到行索引
less_than_zero = merge_df[merge_df['余额'] < 0.0]
idx = less_than_zero.index.values
# 写入Excel
writer = pd.ExcelWriter(write_file, engine='xlsxwriter')
merge_df.to_excel(writer, index=None, sheet_name='汇总表')
workbook = writer.book
worksheet = writer.sheets['汇总表']
# 将余额小于0的行设为底红色
bg_color_format = workbook.add_format({'bg_color': 'red'})
for i in idx:
worksheet.conditional_format(i + 1, 0, i + 1, 4, {'type': 'text', 'criteria': 'containing', 'value': '',
'format': bg_color_format})
# 将余额列格式设置为 会计专用
balance_format = workbook.add_format({'num_format': '_ * #,##0.00_ ;_ * -#,##0.00_ ;_ * "-"??_ ;_ @_ '})
worksheet.set_column('A:E', width=20)
worksheet.set_column('D:D', width=20, cell_format=balance_format)
writer.save()