python合并多个文件夹下名称一致的数据:
方法1:合并每个文件夹下相同名称的csv,代码:
import os
import pandas as pd
import glob
root_folder = './in-situ WL/'
folder_names = os.listdir(root_folder)
# print(folder_names)
file_names = []
for folder in folder_names:
folder_path = os.path.join(root_folder, folder)
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
for file_name in csv_files:
if file_name not in file_names:
file_names.append(file_name)
merged_data = pd.DataFrame()
for file_name in file_names:
file_path = ''
for folder in folder_names:
folder_path = os.path.join(root_folder, folder)
file_path = os.path.join(folder_path, file_name)
if os.path.exists(file_path):
break
df = pd.read_csv(file_path)
merged_data = merged_data.append(df)
方法2:先添加名称(或其他分组依据的列),然后再分组
第一步合并:
# csv_path = './merge/'
# df_list=[]
# for f in os.listdir(csv_path):
# df = pd.read_csv(os.path.join(csv_path,f))
# df_list.append(df)
# result = pd.concat(df_list,axis=0)
# result.to_csv('./All_merge.csv',index=False)
# print('done!')
第二步拆分:
df = pd.read_csv('.//All_merge.csv')
save_path = './split_name/'
grouped = df.groupby(['name'])
for fid, group in grouped:
filename = f'{fid}.csv'
print(filename)
group.drop('name', axis=1, inplace=True)
group.to_csv(os.path.join(save_path,filename),index=False,encoding='utf-8-sig')
print('done!')