问题出现前提条件
1、groupby分组后,
2、使用匿名函数进行操作,且需要按行取值操作时
try:
prf = df.groupby('start_time').apply(lambda x: x['B'] - x.iloc[0]['A'])
except Exception as e:
print(e)
如果start_time只有一个值,groupby只有一组结果时,会raise错误0
解决方案,
当groupby只有一组结果时,groupby后把相关列提取出来,再进行匿名函数操作
group_num = len(df.groupby('start_time')) # groupby分组后仅有一组,不能直接apply
# 或group_num = len(df.groupby('start_time').groups) # .groups查看分组结果
if group_num > 1:
prf = df.groupby('start_time').apply(lambda x: x['B'] - x.iloc[0]['A'])
prf = prf.reset_index(level=[0])
elif group_num == 1:
prf = df.groupby('start_time')[['B', 'A']].apply(lambda x: x['B'] - x.iloc[0]['A'])
elif group_num == 0: # 没有发生交易
print('出现次数为零')
附录:groupby相关用法
import pandas as pd
import numpy as np
# 创建一个 DataFrame 对象
data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings',
'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],
'Rank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2],
'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017],
'Points':[876,789,863,673,741,812,756,7988,64,701,804,690]}
df = pd.DataFrame(data)
print (df)
# 将数据拆分组
print (df.groupby('Team'))
# 查看分组
print (df.groupby('Team').groups)
# 多列分组
print (df.groupby(['Team','Year']).groups)
grouped = df.groupby('Year')
# 迭代遍历分组
for name,group in grouped:
print (name)
print (group)
# 获取一个分组
print('Get group 2014: ')
print (grouped.get_group(2014))
# 聚合
print('Group agg: test mean ')
print (grouped['Points'].agg(np.mean))
# 多个聚合函数
agg = grouped['Points'].agg([np.sum, np.mean, np.std])
print (agg)
# 过滤
filter = df.groupby('Team').filter(lambda x: len(x) >= 3)
print (filter)