第一种情况:合并列名一致的csv文件
import pandas as pd
import numpy as np
import glob
import csv
import os
def my_HeBingCSV1(path,SaveCsvName):
file_list = glob.glob(path+'*.csv')
print(u'totally %s CSV files found' % len(file_list))
csvOne = pd.read_csv(file_list[0],encoding='utf-8-sig')
csvOne.to_csv(SaveCsvName,encoding="utf-8-sig",index=False)
for i in range(1,len(file_list)):
csvOne = pd.read_csv(file_list[i],encoding='utf-8-sig')
csvOne.to_csv(SaveCsvName, encoding="utf-8-sig",index = False, header = False, mode = 'a+' )
print(u'Done!')
"""
注意:若报错:UnicodeDecodeError: 'utf-8' codec can't decode bytes in position 383-384: invalid continuation byte
证明某个csv文件的编码并不是utf-8
"""
if __name__ == '__main__':
path = r'C:\Users\Emma\Desktop/'
SaveCsvName = path+'all.csv'
my_HeBingCSV1(path,SaveCsvName)
第二种情况:合并列名不同的csv文件
import pandas as pd
import numpy as np
import glob
import csv
import os
"""
my_HeBingCSV2函数目的:合并列并不一致的CSV文件
1. 读入CSV文件
2. 将文件转化为DataFrame类型
3. 利用DataFrame的concat函数将几个文件合并在一起,
为什么要用DataFrame的concat函数,因为这个函数合并的时候,可以处理列不同的情况,没有的列内容就补上Nan
4. 再将最后合并的DataFrame转为CSV文件保存
"""
def my_HeBingCSV2(path,SaveCsvName):
file_list = glob.glob(path + '*.csv')
print(u'totally %s CSV files found' % len(file_list))
csvFirst = pd.read_csv(file_list[0], encoding='utf-8')
df = pd.DataFrame(csvFirst)
for i in range(1, len(file_list)):
csvName = pd.read_csv(file_list[i], encoding='utf-8')
df1 = pd.DataFrame(csvName)
df = pd.concat([df, df1], axis=0, ignore_index=True, sort=False)
df.to_csv(SaveCsvName, encoding="utf_8", index=False, mode='a+')
print(u'Done!')
if __name__ == '__main__':
path = r'C:\Users\Emma\Desktop/'
SaveCsvName = path + 'all.csv'
my_HeBingCSV2(path,SaveCsvName)