# 数据聚合与分组运算 GroupBy技术
import numpy as np
from pandas import DataFrame
df = DataFrame({'key1': ['a', 'a', 'b', 'b', 'a'],
'key2': ['one', 'two', 'one', 'two', 'one'],
'data1': np.random.randn(5),
'data2': np.random.randn(5)})
df
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
data1 | data2 | key1 | key2 | |
---|---|---|---|---|
0 | -0.361364 | -0.417369 | a | one |
1 | 0.831824 | 0.841717 | a | two |
2 | -0.569764 | 0.446787 | b | one |
3 | 0.723294 | -1.248796 | b | two |
4 | -0.813407 | -0.384997 | a | one |
grouped = df['data1'].groupby(df['key1'])
grouped.mean()
key1
a -0.244128
b 0.934956
Name: data1, dtype: float64