需求:需要对一系列的csv文件进行合并
方法:将文件都放在一个文件夹下,然后读取该文件夹下的所有文件,批量处理
import os
import pandas as pd
# get the current absolute path
os.getcwd()
# change the path to the directory of files need to edit
path = 'c:\\Users\\username\\filepath'
os.chdir(path)
# get all the files under current directory
filelist = os.listdir('path')
type(filelist)
# combine all the csv files
total_list = []
for i in range(len(filelist)):
print(filelist[i])
df_temp = pd.read_csv(filelist[i])
total_list.append(df_temp)
df_all = pd.concat(total_list)
# reset the index after combining
# drop duplicate rows
df_all.reset_index(drop=True, inplace=True)
df_all.drop_duplicates(keep=False, inplace=True)
df_all.info()
# save DataFrame to csv file
df_all.to_csv('../data/xxx.csv', encoding="utf-8-sig", index=False)