功能:抓取指定文件下下所有以‘.csv’为结尾的文件,并根据指定数据进行分组并拆分成多个csv文件。
import os
import pandas as pd
t_addr = r'C:\Users\user\Desktop\test'#csv存储的文件夹
s_addr = r'C:\Users\user\Desktop\test\1'#分解后要存储的目标文件
files = []
for dirpath,dirnames,filenames in os.walk(t_addr):
print(dirpath)
for file in filenames:
if file.endswith('.csv'):
files.append(dirpath+'\\'+file)
for file in files:
data = pd.read_csv(file, sep=';')
# data = data.sort_values(by=['PRODUCT'],ascending=True)
groups = data.groupby(data['PRODUCT']) # 按“device”列的值分为多个文件,即分组
# 比如color列有“red, yellow, green”三个不同类型的值,则会分为4.csv; 5.csv; 6.csv等文件。
for group in groups:
group[1].to_csv(s_addr+'/{}.csv'.format(str(group[0])), index=False)
data = pd.DataFrame(data)
print(data)