#统计每个用户每个月的消费次数
df_purchase = df.pivot_table(index='user_id',values='order_dt',aggfunc='count',columns='month',fill_value=0) #columns='month' 对 values='order_dt'又做了一次划分
df_purchase.head()
#统计每个用户每个月是否消费,消费记录为1否则记录为0
df_purchase = df_purchase.applymap(lambda x:1 if x > 0 else 0)
df_purchase.head()
#将df_purchase中的原始数据0和1修改为new,unactive......,返回新的df叫做df_purchase_new
#固定算法 修改参数即可用
def active_status(data):
status = []#某个用户每一个月的活跃度
for i in range(18):
#若本月没有消费
if data[i] == 0:
if len(status) > 0:
if status[i-1] == 'unreg':
status.append('unreg')
else:
status.append('unactive')
else:
status.append('unreg')
#若本月消费
else:
if len(status) == 0:
status.append('new')
else:
if status[i-1] == 'unactive':
status.append('return')
elif status[i-1] == 'unreg':
status.append('new')
else:
status.append('active')
return status
pivoted_status = df_purchase.apply(active_status,axis = 1)
pivoted_status.head()
user_id
1 [new, unactive, unactive, unactive, unactive, ...
2 [new, unactive, unactive, unactive, unactive, ...
3 [new, unactive, return, active, unactive, unac...
4 [new, unactive, unactive, unactive, unactive, ...
5 [new, active, unactive, return, active, active...
dtype: object
df_purchase_new = DataFrame(data=pivoted_status.tolist(),index=df_purchase.index,columns=df_purchase.columns)
df_purchase_new.head()