此代码功能逻辑:读取当前文件夹下所有excel文件,将A列数值相同的归集到一个新的excel,excel文件名以A列的数值进行命名,文件名有特殊字符自动处理。可以拿以下代码直接测试。
import os
import pandas as pd
from openpyxl import Workbook
current_dir = os.getcwd()
output_dir = os.path.join(current_dir, 'output')
if not os.path.exists(output_dir):
os.makedirs(output_dir)
all_data = pd.DataFrame()
for filename in os.listdir(curent_dir):
if filename.endswith('.xlsx') or filename.endswith('.xls'):
try:
df = pd.read_excel(filename)
if not df.empty and df.shape[0] > 0:
all_data = pd.concat([all_data, df])
else:
print(f"Warning: {filename} is empty or does not contain columns for grouping.")
except Exception as e:
print(f"Error reading {filename}: {e}")
continue
if all_data.empty:
print("No data to process. Exiting.")
else:
try:
grouped = all_data.groupby(all_data.iloc[:, 0])
for name, group in grouped:
safe_name = ''.join(c for c in str(name) if c not in r'\/:*?"<>|').strip()
output_file = os.path.join(output_dir, f'{safe_name}.xlsx')
wb = Workbook()
ws = wb.active
try:
for r in group.itertuples(index=False):
ws.append(r)
wb.save(output_file)
print(f"Saved {safe_name}.xlsx to output directory.")
except Exception as e:
print(f"Error saving {safe_name}.xlsx: {e}")
except Exception as e:
print(f"Error during grouping or iterating: {e}")