import pandas as pd
from numpy.random import seed
from numpy.random import rand
from numpy.random import randint
import numpy as np
# randint(low, high, size)中size表示生成随机数的个数,生成的随机数是整数。
seed(42)
df = pd.DataFrame({'Weather': ['cold', 'hot', 'cold', 'hot', 'cold', 'hot', 'cold'],
'Food': ['soup', 'soup', 'icecream', 'chocolate', 'icecream', 'icecream', 'soup'],
'Price': 10*rand(7), 'Number': randint(1, 9, 7)})
# 通过数据分组遍历各组数据
weather_group = df.groupby('Weather')
i = 0
# name是指分类的名称,group是具体的值。
for name, group in weather_group:
i = i+1
print('Group', i, name)
print(group)
# 输出各组数据的第一行内容和第二行内容,以及各组的平均值
print(weather_group.first())
print(weather_group.last())
print(weather_group.mean())
# 针对多列进行分组
wf_group = df.groupby(['Weather', 'Food'])
print('WF Groups', wf_group.groups)
# 可以通过agg()方法,对数据施加一系列的Numpy函数,agg()还是强啊。
print('WF Aggregated\n', wf_group.agg([np.min, np.median]))
Group 1 cold
Weather Food Price Number
0 cold soup 3.745401 8
2 cold icecream 7.319939 4
4 cold icecream 1.560186 8
6 cold soup 0.580836 6
Group 2 hot
Weather Food Price Number
1 hot soup 9.507143 5
3 hot chocolate 5.986585 8
5 hot icecream 1.559945 3
Food Price Number
Weather
cold soup 3.745401 8
hot soup 9.507143 5
Food Price Number
Weather
cold soup 0.580836 6
hot icecream 1.559945 3
Price Number
Weather
cold 3.301591 6.500000
hot 5.684558 5.333333
WF Groups {('cold', 'icecream'): Int64Index([2, 4], dtype='int64'), ('cold', 'soup'): Int64Index([0, 6], dtype='int64'), ('hot', 'chocolate'): Int64Index([3], dtype='int64'), ('hot', 'icecream'): Int64Index([5], dtype='int64'), ('hot', 'soup'): Int64Index([1], dtype='int64')}
WF Aggregated
Price Number
amin median amin median
Weather Food
cold icecream 1.560186 4.440063 4 6
soup 0.580836 2.163119 6 7
hot chocolate 5.986585 5.986585 8 8
icecream 1.559945 1.559945 3 3
soup 9.507143 9.507143 5 5