文中所用数据来源:https://github.com/wesm/pydata-book/tree/2nd-edition/examples/tips.csv
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None) #显示所有列
content=pd.read_csv('data/tips.csv')
content['tip_pct']=content['tip']/content['total_bill']
'''
以下实现美股四只股票与标准普尔指数的相关性计算
'''
close_px = pd.read_csv('data/stock_px_2.csv', parse_dates=True,
index_col=0)
spx_corr=lambda c:c.corrwith(c['SPX']) #corrwith()实现与c['SPX']相关性计算
rets=close_px.pct_change().dropna()
get_year=lambda x:x.year
by_year=rets.groupby(get_year) #groupby()也可以用函数名作为参数,将函数返回的结果作为groupby()的分组项
by_year.apply(spx_corr)
'''
AAPL MSFT XOM SPX
2003 0.541124 0.745174 0.661265 1.0
2004 0.374283 0.588531 0.557742 1.0
2005 0.467540 0.562374 0.631010 1.0
2006 0.428267 0.406126 0.518514 1.0
2007 0.508118 0.658770 0.786264 1.0
2008 0.681434 0.804626 0.828303 1.0
2009 0.707103 0.654902 0.797921 1.0
2010 0.710105 0.730118 0.839057 1.0
2011 0.691931 0.800996 0.859975 1.0
'''
table=content.pivot_table(index=['day', 'smoker'])
#pivot_table()透视表功能和grouped.agg(np.mean)一致,实现分组平均值计算
'''
size tip tip_pct total_bill
day smoker
Fri No 2.250000 2.812500 0.151650 18.420000
Yes 2.066667 2.714000 0.174783 16.813333
Sat No 2.555556 3.102889 0.158048 19.661778
Yes 2.476190 2.875476 0.147906 21.276667
Sun No 2.929825 3.167895 0.160113 20.506667
Yes 2.578947 3.516842 0.187250 24.120000
Thur No 2.488889 2.673778 0.160298 17.113111
Yes 2.352941 3.030000 0.163863 19.190588
'''
table1=content.pivot_table('tip_pct',index=['time','smoker'],columns='day',margins=True,aggfunc=len,fill_value=0.0)
#aggfunc=len实现分组大小计算,margins=True实现增加一列All显示aggfunc结果,默认情况是显示平均数计算,fill_value实现空值填充
'''
day Fri Sat Sun Thur All
time smoker
Dinner No 3 45 57 1 106.0
Yes 9 42 19 0 70.0
Lunch No 1 0 0 44 45.0
Yes 6 0 0 17 23.0
All 19 87 76 62 244.0
'''
stat_crosstab=pd.crosstab(content.time,content.day,margins=True)
stat_pivottable=content.pivot_table('tip',index='time',columns='day',aggfunc=len,margins=True,fill_value=0)
#以上两行代码实现同样功能和结果,分别用crosstab()实现对数量的统计,也可用pivot_table实现
'''
result:
day Fri Sat Sun Thur All
time
Dinner 12 87 76 1 176
Lunch 7 0 0 61 68
All 19 87 76 62 244
'''