数据准备
参考:https://gist.github.com/aegorenkov/f7f0f8d6e9cc17ca9774572e224e2ad1
import pandas as pd
# 将数据框命名为drinks
drinks = pd.read_csv('data_analysis/drinks.csv')
数据基本信息
# group by continent
continent_group_by = drinks.groupby('continent')
# 哪个大陆(continent)平均消耗的啤酒(beer)更多?
beer_df = continent_group_by.agg({'beer_servings': 'mean'})
beer_top_continent_str = beer_df.loc[beer_df.idxmax()].index[0]
# 打印出每个大陆(continent)的红酒消耗(wine_servings)的描述性统计值。
wine_agg_describe = continent_group_by.agg({'wine_servings': 'describe'})
wine_describe = continent_group_by['wine_servings'].describe()
# 打印出每个大陆每种酒类别的消耗平均值。
continent_mean = continent_group_by.mean()
continent_agg = continent_group_by.agg(['mean', 'max', 'std'])
# 打印出每个大陆每种酒类别的消耗中位数。
continent_median = continent_group_by.median()
# 打印出每个大陆对 spirit_servings 饮品消耗的平均值,最大值和最小值。
spirit_servings = continent_group_by.agg({'spirit_servings': ['mean', 'max', 'min']})