3.用户消费行为
- 用户第一次消费(首购)
- 用户最后一次消费
- 新老客消费比
- 用户分层
- 用户购买周期(按订单)
- 用户生命周期(按第一次&最后一次消费)
grouped_user.min().reset_index().order_dt.value_counts().plot()
grouped_user.max().order_dt.value_counts().plot()
user_life=grouped_user.order_dt.agg(['min','max'])
user_id | min | max |
---|
1 | 1997-01-01 | 1997-01-01 |
2 | 1997-01-12 | 1997-01-12 |
3 | 1997-01-02 | 1998-05-28 |
4 | 1997-01-01 | 1997-12-12 |
one=pd.DataFrame((user_life['min']==user_life['max']).value_counts())
a = pd.Series(one.iloc[1])
print("仅消费一次人数",a[0])
| user_id | order_dt | order_products | order_amount | month |
---|
0 | 1 | 1997-01-01 | 1 | 11.77 | 1997-01-01 |
1 | 2 | 1997-01-12 | 1 | 12.00 | 1997-01-01 |
2 | 2 | 1997-01-12 | 5 | 77.00 | 1997-01-01 |
3 | 3 | 1997-01-02 | 2 | 20.76 | 1997-01-01 |
4 | 3 | 1997-03-30 | 2 | 20.76 | 1997-03-01 |
每月新客占比
month_count=df.groupby(['month','user_id']).order_dt.agg(['min','max'])
month | user_id | min | max |
---|
1997-01-01 | 1 | 1997-01-01 | 1997-01-01 |
| 2 | 1997-01-12 | 1997-01-12 |
| 3 | 1997-01-02 | 1997-01-02 |
| 4 | 1997-01-01 | 1997-01-18 |
| 5 | 1997-01-01 | 1997-01-14 |
month_counts=pd.DataFrame(month_count['min']==month_count['max'])
month_new_oldd=month_counts.reset_index().groupby(['month',0]).count().reset_index()
month_new_oldd.columns=['month','TF','user_id']
month_newnum=month_new_oldd.loc[month_new_oldd.TF==True]
| month | TF | user_id |
---|
1 | 1997-01-01 | True | 7093 |
3 | 1997-02-01 | True | 8571 |
5 | 1997-03-01 | True | 8154 |
7 | 1997-04-01 | True | 2228 |
month_count=df.groupby(['month','user_id']).count().reset_index().groupby('month').count()
month_usercount=month_count.reset_index().iloc[:,:2]
| month | user_id |
---|
0 | 1997-01-01 | 7846 |
1 | 1997-02-01 | 9633 |
2 | 1997-03-01 | 9524 |
3 | 1997-04-01 | 2822 |
4 | 1997-05-01 | 2214 |
month_new_old=month_newnum.merge(right=month_usercount,on='month')
month_new_old['newper']=month_new_old.user_id_x/month_new_old.user_id_y
| month | TF | user_id_x | user_id_y | newper |
---|
0 | 1997-01-01 | True | 7093 | 7846 | 0.904028 |
1 | 1997-02-01 | True | 8571 | 9633 | 0.889754 |
2 | 1997-03-01 | True | 8154 | 9524 | 0.856153 |
RFM
rfm = df.pivot_table(index='user_id',
values={'order_products','order_dt','order_amount'},
aggfunc={'order_dt':'max',
'order_amount':'sum',
'order_products':'sum'})
user_id | order_amount | order_dt | order_products |
---|
1 | 11.77 | 1997-01-01 | 1 |
2 | 89.00 | 1997-01-12 | 6 |
3 | 156.46 | 1998-05-28 | 16 |
rfm['R'] = -(rfm.order_dt-rfm.order_dt.max())/np.timedelta64(1,'D')
rfm.rename(columns={'order_products':'F','order_amount':'M'},inplace='True')
rfm[['R','F','M']].apply(lambda x:x-x.mean())
user_id | R | F | M |
---|
1 | 177.778362 | -6.122656 | -94.310426 |
2 | 166.778362 | -1.122656 | -17.080426 |
3 | -334.221638 | 8.877344 | 50.379574 |
def rfm_func(x):
level = x.apply(lambda x:'1' if x >= 0 else '0')
label = level.R + level.F + level.M
d = {
'111':'重要价值客户',
'011':'重要保持客户',
'101':'重要挽留客户',
'001':'重要发展客户',
'110':'一般价值客户',
'010':'一般保持客户',
'100':'一般挽留客户',
'000':'一般发展客户',
}
result = d[label]
return result
rfm['label'] = rfm[['R','F','M']].apply(lambda x:x-x.mean()).apply(rfm_func,axis=1)
rfm.groupby('label').agg(['sum','count'])
| M | | F | | R | |
---|
label | sum | count | sum | count | sum | count |
– | – | – | – | – | – | – |
一般价值客户 | 7181.28 | 77 | 650 | 77 | 36295.0 | 77 |
一般保持客户 | 19937.45 | 206 | 1712 | 206 | 29448.0 | 206 |
一般发展客户 | 196971.23 | 3300 | 13977 | 3300 | 591108.0 | 3300 |
一般挽留客户 | 438291.81 | 14074 | 29346 | 14074 | 6951815.0 | 14074 |
rfm.loc[rfm.label=='重要价值客户','color']='g'
rfm.loc[~(rfm.label=='重要价值客户'),'color']='r'
rfm.plot.scatter('F','R',c=rfm.color)
新、老、活跃、回流、流失
pivoted_counts = df.pivot_table(index='user_id',
columns='month',
values='order_dt',
aggfunc='count').fillna(0)
df_purchase = pivoted_counts.applymap(lambda x: 1 if x > 0 else 0)
def active_status(data):
status = []
for i in range(18):
if data[i] == 0:
if len(status) > 0:
if status[i-1] == 'unreg':
status.append('unreg')
else:
status.append('unactive')
else:
status.append('unreg')
else:
if len(status) == 0:
status.append('new')
else:
if status[i-1]=='unactive':
status.append('return')
elif status[i-1] == 'unreg':
status.append('new')
else:
status.append('active')
return pd.Series(status)
若本月没有消费
若之前是未注册,则依旧为未注册
若之前有消费,则为流失/不活跃
其他情况,为未注册
若本月有消费
若是第一次消费,则为新用户
若之前有过消费,且上个月为不活跃,则为回流
若上个月为未注册,则为新用户
除此之外,为活跃
purchase_sta = df_purchase.apply(active_status,axis=1)
purchase_sta_ct = purchase_sta.replace('unreg',np.NaN).apply(lambda x:pd.value_counts(x))
purchase_sta_ct.fillna(0).T
purchase_sta_ct.fillna(0).T.plot.area()
purchase_sta_ct.fillna(0).T.apply(lambda x:x/x.sum(),axis=1)
order_diff = grouped_user.apply(lambda x:x.order_dt-x.order_dt.shift())
order_diff.describe()
(order_diff / np.timedelta64(1,'D')).hist(bins=20)
(user_life['max'] - user_life['min']).describe()
((user_life['max'] - user_life['min'])/np.timedelta64(1,'D')).hist(bins=20)
u_1=((user_life['max'] - user_life['min']).reset_index()[0]/np.timedelta64(1,'D'))
u_1[u_1>0].hist(bins=40)
4.复购率和回购率分析
purchase_r = pivoted_counts.applymap(lambda x: 1 if x > 1 else np.NaN if x == 0 else 0)
(purchase_r.sum()/purchase_r.count()).plot(figsize = (10,4))
def purchase_back(data):
status = []
for i in range(17):
if data[i] == 1:
if data[i+1] == 1:
status.append(1)
if data[i+1] == 0:
status.append(0)
else:
status.append(np.NaN)
status.append(np.NaN)
return pd.Series(status)
purchase_b = df_purchase.apply(purchase_back,axis=1)
(purchase_b.sum()/purchase_b.count()).plot(figsize=(10,4))