小计和
MultiIndex.from_arrays的解决方案.最后
concat和所有数据帧,
sort_index并添加所有总和:
#replace km/h and convert to int
df.windspeed = df.windspeed.str.replace('km/h','').astype(int)
print (df)
FID admin0 admin1 admin2 windspeed population
0 0 cntry1 state1 city1 60 700
1 1 cntry1 state1 city1 90 210
2 2 cntry1 state1 city2 60 100
3 3 cntry1 state2 city3 60 70
4 4 cntry1 state2 city4 60 180
5 5 cntry1 state2 city4 90 370
6 6 cntry2 state3 city5 60 890
7 7 cntry2 state3 city6 60 120
8 8 cntry2 state3 city6 90 420
9 9 cntry2 state3 city6 120 360
10 10 cntry2 state4 city7 60 740
#pivoting
table = pd.pivot_table(df,
index=["admin0","admin1","admin2"],
columns=["windspeed"],
values=["population"],
fill_value=0)
print (table)
population
windspeed 60 90 120
admin0 admin1 admin2
cntry1 state1 city1 700 210 0
city2 100 0 0
state2 city3 70 0 0
city4 180 370 0
cntry2 state3 city5 890 0 0
city6 120 420 360
state4 city7 740 0 0
#groupby and create sum dataframe by levels 0,1
df1 = table.groupby(level=[0,1]).sum()
df1.index = pd.MultiIndex.from_arrays([df1.index.get_level_values(0),
df1.index.get_level_values(1)+ '_sum',
len(df1.index) * ['']])
print (df1)
population
windspeed 60 90 120
admin0
cntry1 state1_sum 800 210 0
state2_sum 250 370 0
cntry2 state3_sum 1010 420 360
state4_sum 740 0 0
df2 = table.groupby(level=0).sum()
df2.index = pd.MultiIndex.from_arrays([df2.index.values + '_sum',
len(df2.index) * [''],
len(df2.index) * ['']])
print (df2)
population
windspeed 60 90 120
cntry1_sum 1050 580 0
cntry2_sum 1750 420 360
#concat all dataframes together, sort index
df = pd.concat([table, df1, df2]).sort_index(level=[0])
#add km/h to second level in columns
df.columns = pd.MultiIndex.from_arrays([df.columns.get_level_values(0),
df.columns.get_level_values(1).astype(str) + 'km/h'])
#add all sum
df.loc[('All_sum','','')] = table.sum().values
print (df)
population
60km/h 90km/h 120km/h
admin0 admin1 admin2
cntry1 state1 city1 700 210 0
city2 100 0 0
state1_sum 800 210 0
state2 city3 70 0 0
city4 180 370 0
state2_sum 250 370 0
cntry1_sum 1050 580 0
cntry2 state3 city5 890 0 0
city6 120 420 360
state3_sum 1010 420 360
state4 city7 740 0 0
state4_sum 740 0 0
cntry2_sum 1750 420 360
All_sum 2800 1000 360
编辑评论:
def f(x):
print (x)
if (len(x) > 1):
return x.sum()
df1 = table.groupby(level=[0,1]).apply(f).dropna(how='all')
df1.index = pd.MultiIndex.from_arrays([df1.index.get_level_values(0),
df1.index.get_level_values(1)+ '_sum',
len(df1.index) * ['']])
print (df1)
population
windspeed 60 90 120
admin0
cntry1 state1_sum 800.0 210.0 0.0
state2_sum 250.0 370.0 0.0
cntry2 state3_sum 1010.0 420.0 360.0