In [1]:
import pandas as pd
import numpy as np
from pandas import *
df=DataFrame({
'key1':['a','a','b','b','a'],
'key2':['one','two','one','two','one'],
'data1':np.random.randn(5),
'data2':np.random.randn(5)})
df
Out[1]:
In [4]:
grouped=df['data1'].groupby(df['key1'])
print(grouped) #分组对象
grouped.mean()
Out[4]:
In [6]:
means=df['data1'].groupby([df['key1'],df['key2']]).mean()
means
Out[6]:
In [7]:
means.unstack() #层次化索引!
Out[7]:
In [8]:
states=np.array(['Ohio','California','California','Ohio','Ohio'])
years=np.array([2005,2005,2006,2005,2006])
df['data1'].groupby([states,years]).mean()
Out[8]:
In [9]:
df
Out[9]:
In [14]:
print(df.groupby('key1').mean())
print(df.groupby(['key1','key2']).mean())
df.groupby(['key1','key2']).size() #size()用法
Out[14]:
In [16]:
for name,group in df.groupby('key1'):
print (name)
print (group) #查看分组结果
In [18]:
for (k1,k2),group in df.groupby(['key1','key2']): #根据两个键分组
print (k1,k2)
print (group)
In [20]:
pieces=dict(list(df.groupby('key1'))) #可以将分组结果以字典的形式保存
print(pieces['a'])
print(pieces['b'])
In [21]:
df.groupby(['key1','key2'])[['data2']].mean() #只看data2
Out[21]:
In [22]:
people=DataFrame(np.random.randn(5,5),
columns=['a','b','c','d','e'],
index=['Joe','Steve','Wes','Jim','Travis'])
people.ix[2:3,['b','c']]=np.nan
people
Out[22]:
In [24]:
mapping={
'a':'red','b':'red','c':'blue','d':'blue','e':'red','f':'orange'}
by_column=people.groupby(mapping,axis=1)
by_column
by_column.sum()
Out[24]:
In [25]:
#通过函数进行分组
people