目标是获取groupby包含index在内的结果
方法是使用reset_index
细节:只有DataFrame可以使用reset_index,所以groupby后要花式索引保证结果符合要求
import numpy as np
import pandas as pd
num = np.random.randint(60,101,6)
data = {
'省份':['省1','省1','省1','省2','省2','省2'],
'市州':['市1','市2','市2','市1','市1','市2'],
'区县':['县1','县1','县2','县1','县2','县2'],
'num':num
}
df = pd.DataFrame(data = data)
df
| 省份 | 市州 | 区县 | num |
---|
0 | 省1 | 市1 | 县1 | 74 |
---|
1 | 省1 | 市2 | 县1 | 74 |
---|
2 | 省1 | 市2 | 县2 | 78 |
---|
3 | 省2 | 市1 | 县1 | 94 |
---|
4 | 省2 | 市1 | 县2 | 73 |
---|
5 | 省2 | 市2 | 县2 | 67 |
---|
res1 = df.groupby(by = ['省份','市州'])[['num']].sum()
res1.reset_index(inplace = True)
res1
| 省份 | 市州 | num |
---|
0 | 省1 | 市1 | 74 |
---|
1 | 省1 | 市2 | 152 |
---|
2 | 省2 | 市1 | 167 |
---|
3 | 省2 | 市2 | 67 |
---|
res2 = df.groupby(by = ['省份'])[['num']].sum()
res2.reset_index(inplace = True)
res2
route = './data/结果保存.xlsx'
with pd.ExcelWriter(route) as writer:
res1.to_excel(writer,sheet_name = '省份市州结果',index = False)
res2.to_excel(writer,sheet_name = '省份结果',index = False)