第1关 Pandas分组聚合
import pandas as pd
import numpy as np
#返回最大值与最小值的差
def sub(df):
######## Begin #######
return df.max() - df.min()
######## End #######
def main():
######## Begin #######
data = pd.read_csv("step1/drinks.csv")
df = pd.DataFrame(data)
mapping = {"wine_servings":sub,"beer_servings":np.sum}
print(df.groupby("continent").agg(mapping))
######## End #######
if __name__ == '__main__':
main()
第2关 Pandas创建透视表和交叉表
#-*- coding: utf-8 -*-
import pandas as pd
#创建透视表
def create_pivottalbe(data):
###### Begin ######
return data.pivot_table(index=["day"],values=["tip"],columns=["time"],margins=True,aggfunc=sum)
###### End ######
#创建交叉表
def create_crosstab(data):
###### Begin ######
return pd.crosstab(index=[data.day],columns=[data.time],values=data.tip,aggfunc=sum ,margins=True)
###### End ######
def main():
#读取csv文件数据并赋值给data
###### Begin ######
data = pd.read_csv("step2/tip.csv")
###### End ######
piv_result = create_pivottalbe(data)
cro_result = create_crosstab(data)
print("透视表:\n{}".format(piv_result))
print("交叉表:\n{}".format(cro_result))
###### End ######