excel文档拆分与合并
import pandas as pd
df = pd.read_excel("文件路径/文件名")
userNames = ["小帅", "小王", "小明", "小雷", "小博", "小红","小韩","小艺"]
rows = df.shape[0]
count = rows//len(userName)
if row%len(userName)!=0:
count += 1
df_subs = []
for index,userName in enumerate(userNames):
begin = index * count
end = begin + count
df_sub = df[begin:end]
df_subs.append(df_sub)
for index,userName,df_sub in df_subs:
fileName = f"文件路径/No_{index}_{userName}.xlsx"
df_sub,to_excel(fileName,index=False)
import os
import pandas as pd
excel_names = []
for excel_name in os.listdir("拆分后文件位置"):
excel_names.append(excel_name)
df_list = []
for excel_name in excel_names:
excel_path = f"文件位置/{excel_name}"
df_split = pd.read_excel(excel_path)
userName = excel_name.replace("No_","").replace(".xlsx","")[2:]
df_split["userName"] = userName
df_list.append(df_split)
df_merge = pd.concat(df_list, axis=0, ignore_index=True)
df_merge.to_excel("文件地址/新文件名", index=False)
分组统计
import pandas as pd
import numpy as np
%matplotlib inline
df = pd.DataFrame({"A":["foo","bar","foo","bar","foo","bar","foo","foo"],
"B":["one","one","two","three","two","two","one","tree"],
"C":np.random.randn(8),
"D":np.random.randn(8)
})
df.groupby("A").sum()
df.groupby(["A","B"]).mean()
df.groupby(["A","B"]).agg([np.sum,np.mean,np.std])
result = df.groupby("A").agg({'C': ['sum', 'mean', 'std'], 'D': ['sum', 'mean', 'std']})
df.groupby("A")["C"].agg([np.sum,np,mean,np.std])