基本简介
根据美国数据库营销研究所Arthur Hughes的研究,客户数据库中有3个神奇的要素,这3个要素构成了数据分析最好的指标:
最近一次消费 (Recency)
消费频率 (Frequency)
消费金额 (Monetary)
–来源百度百科
定义
Recency:最近一次消费,意指上一次购买距离现在的时间,一般按照天进行计算
Frequency:消费频率是顾客在限定的期间内所购买的次数,时间范围需要自己设定
Monetary:指的是一段时间(通常是1年)内的消费金额
实例
:
#定义相关函数
#三个指标的分值函数
def Recency(x):
if x>=0 and x<5:
return 5
elif x>=5 and x<15:
return 4
elif x>=15 and x<30:
return 3
elif x>=30 and x<90:
return 2
elif x>=90:
return 1
def Frequency(x):
if x>=0 and x<10:
return 1
elif x>=10 and x<30:
return 2
elif x>=30 and x<50:
return 3
elif x>=50 and x<100:
return 4
elif x>=100:
return 5
def Monetory(x):
if x>=0 and x<300:
return 1
elif x>=300 and x<1000:
return 2
elif x>=1000 and x<3000:
return 3
elif x>=3000 and x<10000:
return 4
elif x>=1000:
return 5
#判断函数
def judge(x):
if x>avg:
return 1
else:
return 0
#定义RFM函数
def RFM(x):
if x.iloc[0]==1 and x.iloc[1]==1 and x.iloc[2]==1:
return "重要价值客户"
elif x.iloc[0]==1 and x.iloc[1]==1 and x.iloc[2]==0:
return "重要潜力客户"
elif x.iloc[0]==1 and x.iloc[1]==0 and x.iloc[2]==1:
return "重要发展客户"
elif x.iloc[0]==1 and x.iloc[1]==0 and x.iloc[2]==0:
return "新客户"
elif x.iloc[0]==0 and x.iloc[1]==1 and x.iloc[2]==1:
return "重要保持客户"
elif x.iloc[0]==0 and x.iloc[1]==1 and x.iloc[2]==0:
return "一般客户"
elif x.iloc[0]==0 and x.iloc[1]==0 and x.iloc[2]==1:
return "重要挽回客户"
elif x.iloc[0]==0 and x.iloc[1]==0 and x.iloc[2]==0:
return "流失客户"
#分值转化
data["R-SCORE"] = data["R"].apply(Recency)
data["F-SCORE"] = data["F"].apply(Frequency)
data["M-SCORE"] = data["M"].apply(Monetory)
#查看分组情况,用来查看分组的分布是否合理
grouped_r=data["R-SCORE"].groupby(data["R-SCORE"]).count()
grouped_f=data["F-SCORE"].groupby(data["F-SCORE"]).count()
grouped_m=data["M-SCORE"].groupby(data["M-SCORE"]).count()
#计算查看平均值
avg_r = data["R-SCORE"].mean()
avg_f = data["F-SCORE"].mean()
avg_m = data["M-SCORE"].mean()
display(avg_r,avg_f,avg_m)
#也可以用pandas_profiling进行数据预览
import pandas_profiling
pandas_profiling.ProfileReport(data)
#0-1分类
for avg in [avg_r,avg_f,avg_m]:
if avg==avg_r:
data["R-SCORE是否大于均值"] = data["R-SCORE"].apply(judge)
elif avg==avg_f:
data["F-SCORE是否大于均值"] = data["F-SCORE"].apply(judge)
elif avg==avg_m:
data["M-SCORE是否大于均值"] = data["M-SCORE"].apply(judge)
#打标签
data["RFM"] = data[["R-SCORE是否大于均值","F-SCORE是否大于均值","M-SCORE是否大于均值"]].apply(RFM,axis=1)
data.head()
#数据汇总
result = data.groupby("RFM").agg({"RFM":"count"})
#数据展示
from pyecharts import options as opts
from pyecharts.charts import TreeMap
data = [
{"value": int(result['RFM'][0]), "name": result['RFM'].index[0]},
{"value": int(result['RFM'][1]), "name": result['RFM'].index[1]},
{"value": int(result['RFM'][2]), "name": result['RFM'].index[2]},
{"value": int(result['RFM'][3]), "name": result['RFM'].index[3]},
{"value": int(result['RFM'][4]), "name": result['RFM'].index[4]},
{"value": int(result['RFM'][5]), "name": result['RFM'].index[5]},
{"value": int(result['RFM'][6]), "name": result['RFM'].index[6]},
{"value": int(result['RFM'][7]), "name": result['RFM'].index[7]},
]
c = (
TreeMap()
.add("RFM模型", data
,label_opts= opts.LabelOpts(font_size= 12,position = "inside"))
.set_global_opts(title_opts=opts.TitleOpts(title=""))
)
c.render_notebook()