import pandas as pd
import numpy as np
detail =pd.read_excel(r’文件名.xlsx’)
############# 分组聚合内计算 ##############
### 第一种聚合:通过简单的统计函数
detaigeoup =detail[['列索引1','列索引2','列索引3']].groupby(by='索引1') # by:以谁为聚合
### 第二种聚合:
# agg:不同字段要求相同的统计函数
print(detail[['列索引1','列索引2']].agg([np.mean,np,sum]))
# agg:对不同字段要求不同的统计函数
print(detail.agg({'列索引1':np.sum,'列索引2':np.mean}))
print(detai.agg({'counts':np.sum,'amounts':[np.mean,np.sum,np.median]}))
### transform 方法聚合:func
print(detai[['列索引1','列索引2']].transform(lambda x:x*2).head())
################# 透视表与交叉表 ################
### 1.透视表
## index:行分组键
detailpivot =pd.pivot_table(detai[['counts','order_id','amounts','dishes_name']],index=['order_id','dishes_name'],aggfunc=np.sum)
## columns: 列分组键
detailpivot = pd.pivot_table(detai[['counts', 'order_id', 'amounts', 'dishes_name']], values='counts', columns='dishes_name',aggfunc=np.sum)
## 行列都用
detailpivot = pd.pivot_table(detai[['counts', 'order_id', 'amounts', 'dishes_name']],values='counts', index='order_id', columns='dishes_name',aggfunc=np.sum, fill_value=0, margins=True) # margins :是否求和
### 2.交叉表
print(pd.crosstab(index=detai['列索引1'],columns =detai[列索引2],values=detai['列索引3],aggfunc=np.sum))