1.代码
import pandas as pd;
import numpy as np;
data = {"data1":np.random.randn(1000),"data2":np.random.randn(1000)};
df1 = pd.DataFrame(data);
factor = pd.cut(df1.data1,4);#get equal length
factor1 = pd.qcut(df1.data1,5, labels=False);
def get_stats(group):
return {"min":group.min(),"max":group.max(),"count":group.count()};
grouped = df1.data2.groupby(factor1);
print grouped.apply(get_stats).unstack().dropna();
2.结果
count max min
data1
0 200.0 3.111853 -3.556149
1 200.0 2.726763 -3.015856
2 200.0 2.265047 -3.439436
3 200.0 2.709829 -4.088341
4 200.0 2.978180 -2.819611