import numpy as np
import pandas as pd
df=pd.DataFrame({'key1':['a','a','b','b','a'],
'key2':['one','two','one','two','one'],
'data1':[1,4,7,3,9],
'data2':[3,8,5,1,6]})
In[4]: df
Out[4]:
data1 data2 key1 key2
0 1 3 a one
1 4 8 a two
2 7 5 b one
3 3 1 b two
4 9 6 a one
grouped=df['data1'].groupby(df['key1'])
In[5]: grouped.mean()
Out[5]:
key1
a 4.666667
b 5.000000
Name: data1, dtype: float64
grou=df[['data1','data2']].groupby(df['key1'])
In[6]: grou.mean()
Out[6]:
data1 data2
key1
a 4.666667 5.666667
b 5.000000 3.000000
gr=df['data1'].groupby([df['key1'],df['key2']])
In[7]: gr.mean()
Out[7]:
key1 key2
a one 5
two 4
b one 7
two 3
Name: data1, dtype: int64
states=np.array(['pp','cc','cc','pp','pp'])
years=np.array([2005,2005,2006,2005,2006])
In[8]: df['data1'].groupby([states,years]).mean()
Out[8]:
cc 2005 4
2006 7
pp 2005 2
2006 9
Name: data1, dtype: int64
In[9]; df.groupby(['key1','key2']).size()
Out[9]:
key1 key2
a one 2
two 1
b one 1
two 1
dtype: int64