import pandas as pd
# test_groupby.csv数据集说明
name,number,rank
A, 24, 12
B, 23, 109
C, 34, 108
A, 23, 1
A, 23, 1
C, 11, 1
# 输出结果 df
name,number,rank
A, 70, 1
B, 23, 109
C, 45, 1
def deal_with_single_uid(uid_info, argv = None):
'''
type(uid_info) : DataFrame
'''
param_1,..., param_k = argv[0],...,argv[k-1]
number = 0; rank = 1000
for row in uid_info.itertuples(index=False, name=None):
number += row[1]
rank = min(rank, row[2])
who = uid_info['name'].as_matrix()[0]
return pd.DataFrame([[who, number, rank]], columns=['name', 'number', 'rank'])
if __name__ == '__name__':
file_path = '../test_groupby.csv'
data = pd.read_csv(file_path)
df = data.groupby('name').apply(deal_with_single_uid, argv = [param_1, ..., param_k])
# type(temp_data) = pandas.core.groupby.DataFrameGroupBy
temp_data = data.groupby('name')
temp = [item for item in temp_data]
# temp的具体内容
[('A', name number rank
0 A 24 12
3 A 23 1
4 A 23 1),
('B', name number rank
1 B 23 109),
('C', name number rank
2 C 34 108
5 C 11 1)]
# type(temp[0]) : tuple; type(temp[0][1]) : DataFrame
temp[0][0] : 'A'
temp[0][1] :
name number rank
0 A 24 12
3 A 23 1
4 A 23 1