# -*- coding: utf-8 -*-
"""
Created on Sat Jun 1 15:46:44 2019
@author: User
"""
import pandas as pd
import numpy as np
storesales=pd.read_csv('data\ch4\storesales.csv')
print(storesales.head(10))
print("以上是数据准备----------------------------")
print("\n 设置标签:")
storesales['store']=storesales['store'].astype('category')
storesales['store'].cat.categories=['SANFORD','MILIENIA','OCOEE','KISSIMMEE']
storesales['store'].cat.set_categories=['SANFORD','MILIENIA','OCOEE','KISSIMMEE']
storesales['method']=storesales['method'].astype('category')
storesales['method'].cat.categories=['On Line','In Store']
storesales['method'].cat.set_categories=['On Line','In Store']
print(storesales.head(10))
print("\n 用 groupby 简单汇总表:")
storesales_g=storesales.groupby(storesales['method'])
print(storesales_g['sales','orders'].agg('sum'))
print("\n 用 pd.pivot_table 简单汇总表:")
print(pd.pivot_table(storesales,
index=['store'],
values=['orders'],
aggfunc=sum))
print("\n 用 pd.pivot_table 交叉分组统计:")
print(pd.pivot_table(storesales,
index=['store'],
columns=['method'],
values=['orders'],
aggfunc=sum,
fill_value=0))
print("\n 用 pd.pivot_table 复杂的交叉分组统计:")
print(pd.pivot_table(storesales,
index=['store'],
columns=['method'],
values=['orders','sales'],
aggfunc=[sum,np.mean,len],
fill_value=0))
print("\n 用 pd.pivot_table margins=True 交叉分组统计:")
print(pd.pivot_table(storesales,
index=['store'],
columns=['method'],
values=['sales'],
aggfunc=[sum],
fill_value=0,
margins=True))
print("\n 用 pd.crosstab margins=True 交叉分组统计:")
print(pd.crosstab(storesales['store'],
storesales['method'],
values=storesales['sales'],
aggfunc=[sum],
margins=True))
运行:
id store method orders sales
0 1001 1 1 78 89000
1 1023 2 1 87 98000
2 1234 2 2 67 78500
3 1002 3 2 87 77500
4 1001 3 1 56 67990
5 1234 1 1 98 78000
6 1003 2 2 87 98320
7 1101 4 2 59 76450
8 1023 2 1 87 74569
9 1234 2 1 78 79098
以上是数据准备----------------------------
设置标签:
id store method orders sales
0 1001 SANFORD On Line 78 89000
1 1023 MILIENIA On Line 87 98000
2 1234 MILIENIA In Store 67 78500
3 1002 OCOEE In Store 87 77500
4 1001 OCOEE On Line 56 67990
5 1234 SANFORD On Line 98 78000
6 1003 MILIENIA In Store 87 98320
7 1101 KISSIMMEE In Store 59 76450
8 1023 MILIENIA On Line 87 74569
9 1234 MILIENIA On Line 78 79098
用 groupby 简单汇总表:
sales orders
method
On Line 710645 760
In Store 586733 535
用 pd.pivot_table 简单汇总表:
orders
store
SANFORD 362
MILIENIA 406
OCOEE 392
KISSIMMEE 135
用 pd.pivot_table 交叉分组统计:
orders
method On Line In Store
store
SANFORD 362 0
MILIENIA 252 154
OCOEE 146 246
KISSIMMEE 0 135
用 pd.pivot_table 复杂的交叉分组统计:
sum ... len
orders sales ... orders sales
method On Line In Store On Line In Store ... On Line In Store On Line In Store
store ...
SANFORD 362 0 312643 0 ... 4 0 4 0
MILIENIA 252 154 251667 176820 ... 3 2 3 2
OCOEE 146 246 146335 244903 ... 2 3 2 3
KISSIMMEE 0 135 0 165010 ... 0 2 0 2
[4 rows x 12 columns]
用 pd.pivot_table margins=True 交叉分组统计:
sum
sales
method On Line In Store All
store
SANFORD 312643 0 312643
MILIENIA 251667 176820 428487
OCOEE 146335 244903 391238
KISSIMMEE 0 165010 165010
All 710645 586733 1297378
用 pd.crosstab margins=True 交叉分组统计:
sum
method On Line In Store All
store
SANFORD 312643.0 NaN 312643
MILIENIA 251667.0 176820.0 428487
OCOEE 146335.0 244903.0 391238
KISSIMMEE NaN 165010.0 165010
All 710645.0 586733.0 1297378