不是很简洁,但它是我现在能得到的最好的:
>>> def rollup1(x):
... return x.set_index('test')[['grade', 'pass']].to_dict(orient='index')
>>> def rollup2(x):
... return x.groupby('course').apply(rollup1).to_dict()
>>> def rollup3(x):
... return x.groupby('study').apply(rollup2).to_dict()
>>> df = dat.groupby(['name','age','gender']).apply(rollup3)
>>> df.name = 'study'
>>> res = df.reset_index(level=[1,2]).to_dict(orient='index')
>>> pprint.pprint(res)
{'Henry': {'age': 31L,
'gender': 'Male',
'study': {'Physics': {'Quantum mechanics': {'Exam1': {'grade': 'C',
'pass': True},
'Exam2': {'grade': 'C',
'pass': True}}}}},
'John': {'age': 24L,
'gender': 'Male',
'study': {'Mathematics': {'Calculus 101': {'Essay': {'grade': 'A',
'pass': True},
'Exam': {'grade': 'A',
'pass': True}},
'Calculus 102': {'Exam': {'grade': 'B',
'pass': True}}},
'Philosophy': {'Aristotelean Ethics': {'Essay': {'grade': 'A',
'pass': True}}}}}}
我们的想法是将数据汇总到字典,同时将数据分组以获得“研究”专栏
更新
我试图创建更通用的解决方案,所以它也适用于像this one这样的问题:
def rollup_to_dict_core(x, values, columns, d_columns=None):
if d_columns is None:
d_columns = []
if len(columns) == 1:
if len(values) == 1:
return x.set_index(columns)[values[0]].to_dict()
else:
return x.set_index(columns)[values].to_dict(orient='index')
else:
res = x.groupby([columns[0]] + d_columns).apply(lambda y: rollup_to_dict_core(y, values, columns[1:]))
if len(d_columns) == 0:
return res.to_dict()
else:
res.name = columns[1]
res = res.reset_index(level=range(1, len(d_columns) + 1))
return res.to_dict(orient='index')
def rollup_to_dict(x, values, d_columns=None):
if d_columns is None:
d_columns = []
columns = [c for c in x.columns if c not in values and c not in d_columns]
return rollup_to_dict_core(x, values, columns, d_columns)
>>> pprint(rollup_to_dict(dat, ['pass', 'grade'], ['age','gender']))
{'Henry': {'age': 31L,
'gender': 'Male',
'study': {'Physics': {'Quantum mechanics': {'Exam1': {'grade': 'C',
'pass': True},
'Exam2': {'grade': 'C',
'pass': True}}}}},
'John': {'age': 24L,
'gender': 'Male',
'study': {'Mathematics': {'Calculus 101': {'Essay': {'grade': 'A',
'pass': True},
'Exam': {'grade': 'A',
'pass': True}},
'Calculus 102': {'Exam': {'grade': 'B',
'pass': True}}},
'Philosophy': {'Aristotelean Ethics': {'Essay': {'grade': 'A',
'pass': True}}}}}}