数据分析技术用于营销策略(实践项目)

营销策略

# coding: utf-8

"""
Created on Thu Jul  2 22:22:10 2020

@author: xiaoyao
"""
'\nCreated on Thu Jul  2 22:22:10 2020\n\n@author: xiaoyao\n'
# 1.导入数据
import pandas as pd
trad_flow = pd.read_csv(r'D:\python code\7spyder projects\Python数据科学技术详解与商业实践\2RFM_TRAD_FLOW.csv', encoding='gbk')
print(trad_flow.head(10))
   transID  cumid              time  amount type_label            type
0     9407  10001  14JUN09:17:58:34   199.0         正常          Normal
1     9625  10001  16JUN09:15:09:13   369.0         正常          Normal
2    11837  10001  01JUL09:14:50:36   369.0         正常          Normal
3    26629  10001  14DEC09:18:05:32   359.0         正常          Normal
4    30850  10001  12APR10:13:02:20   399.0         正常          Normal
5    32007  10001  04MAY10:16:45:58   269.0         正常          Normal
6    36637  10001  04JUN10:20:03:06     0.0         赠送       Presented
7    43108  10001  06JUL10:16:56:40   381.0         正常          Normal
8    43877  10001  10JUL10:20:41:54  -399.0         退货  returned_goods
9    46081  10001  23JUL10:16:35:45     0.0         赠送       Presented
# 2.通过 RFM方法 建立 模型
"""
RFM方法:RFM模型是衡量客户价值和客户创利能力的重要工具和手段。
在众多的客户关系管理(CRM)的分析模式中,RFM模型是被广泛提到的。
该机械模型通过一个客户的近期购买行为、购买的总体频率以及花了多少钱3项指标来描述该客户的价值状况。
"""
#2.1 通过计算F反应客户对打折产品的偏好
# 根据客户编号,交易类型分组,并对交易ID进行计数
F=trad_flow.groupby(['cumid','type'])[['transID']].count()
print(F.head())
                      transID
cumid type                   
10001 Normal               15
      Presented             8
      Special_offer         2
      returned_goods        2
10002 Normal               12
# 透视表
F_trans=pd.pivot_table(F,index='cumid',columns='type',values='transID') 
print(F_trans.head())
type   Normal  Presented  Special_offer  returned_goods
cumid                                                  
10001    15.0        8.0            2.0             2.0
10002    12.0        5.0            NaN             1.0
10003    15.0        8.0            1.0             1.0
10004    15.0       12.0            2.0             1.0
10005     8.0        5.0            NaN             1.0
# 缺失值处理
F_trans['Special_offer']= F_trans['Special_offer'].fillna(0) 
print(F_trans.head())
type   Normal  Presented  Special_offer  returned_goods
cumid                                                  
10001    15.0        8.0            2.0             2.0
10002    12.0        5.0            0.0             1.0
10003    15.0        8.0            1.0             1.0
10004    15.0       12.0            2.0             1.0
10005     8.0        5.0            0.0             1.0
# 占比计算,兴趣
F_trans["interest"]=F_trans['Special_offer']/(F_trans['Special_offer']+F_trans['Normal'])
print(F_trans.head())
type   Normal  Presented  Special_offer  returned_goods  interest
cumid                                                            
10001    15.0        8.0            2.0             2.0  0.117647
10002    12.0        5.0            0.0             1.0  0.000000
10003    15.0        8.0            1.0             1.0  0.062500
10004    15.0       12.0            2.0             1.0  0.117647
10005     8.0        5.0            0.0             1.0  0.000000
# 通过计算M反应客户的价值信息
M=trad_flow.groupby(['cumid','type'])[['amount']].sum()
print(M.head())
                      amount
cumid type                  
10001 Normal          3608.0
      Presented          0.0
      Special_offer    420.0
      returned_goods  -694.0
10002 Normal          1894.0
M_trans=pd.pivot_table(M,index='cumid',columns='type',values='amount') 

M_trans['Special_offer']= M_trans['Special_offer'].fillna(0)
M_trans['returned_goods']= M_trans['returned_goods'].fillna(0)

M_trans["value"]=M_trans['Normal']+M_trans['Special_offer']+M_trans['returned_goods']
print(M_trans.head())
type   Normal  Presented  Special_offer  returned_goods   value
cumid                                                          
10001  3608.0        0.0          420.0          -694.0  3334.0
10002  1894.0        0.0            0.0          -242.0  1652.0
10003  3503.0        0.0          156.0          -224.0  3435.0
10004  2979.0        0.0          373.0           -40.0  3312.0
10005  2368.0        0.0            0.0          -249.0  2119.0
# 通过计算R反应客户是否为沉默客户
# 定义一个从文本转化为时间的函数
import time
def to_time(t):
    out_t=time.mktime(time.strptime(t, '%d%b%y:%H:%M:%S'))   # 此处修改为时间戳方便后面qcut函数分箱
    return out_t
a="14JUN09:17:58:34"
print(to_time(a))
1244973514.0
trad_flow["time_new"]= trad_flow.time.apply(to_time)
print(trad_flow.head())
   transID  cumid              time  amount type_label    type      time_new
0     9407  10001  14JUN09:17:58:34   199.0         正常  Normal  1.244974e+09
1     9625  10001  16JUN09:15:09:13   369.0         正常  Normal  1.245136e+09
2    11837  10001  01JUL09:14:50:36   369.0         正常  Normal  1.246431e+09
3    26629  10001  14DEC09:18:05:32   359.0         正常  Normal  1.260785e+09
4    30850  10001  12APR10:13:02:20   399.0         正常  Normal  1.271049e+09
R=trad_flow.groupby(['cumid'])[['time_new']].max()
print(R.head())
           time_new
cumid              
10001  1.284699e+09
10002  1.278129e+09
10003  1.282983e+09
10004  1.283057e+09
10005  1.282127e+09
# 3.构建模型,筛选目标客户
from sklearn import preprocessing
threshold = pd.qcut(F_trans['interest'], 2, retbins=True)[1][1]
binarizer = preprocessing.Binarizer(threshold=threshold)
interest_q = pd.DataFrame(binarizer.transform(F_trans['interest'].values.reshape(-1, 1)))
interest_q.index=F_trans.index
interest_q.columns=["interest"]
threshold = pd.qcut(M_trans['value'], 2, retbins=True)[1][1]
binarizer = preprocessing.Binarizer(threshold=threshold)
value_q = pd.DataFrame(binarizer.transform(M_trans['value'].values.reshape(-1, 1)))
value_q.index=M_trans.index
value_q.columns=["value"]
threshold = pd.qcut(R["time_new"], 2, retbins=True)[1][1]
binarizer = preprocessing.Binarizer(threshold=threshold)
time_new_q = pd.DataFrame(binarizer.transform(R["time_new"].values.reshape(-1, 1)))
time_new_q.index=R.index
time_new_q.columns=["time"]
analysis=pd.concat([interest_q, value_q,time_new_q], axis=1)
# analysis['rank']=analysis.interest_q+analysis.interest_q
analysis = analysis[['interest','value','time']]
analysis.head()

label = {
    (0,0,0):'无兴趣-低价值-沉默',
    (1,0,0):'有兴趣-低价值-沉默',
    (1,0,1):'有兴趣-低价值-活跃',
    (0,0,1):'无兴趣-低价值-活跃',
    (0,1,0):'无兴趣-高价值-沉默',
    (1,1,0):'有兴趣-高价值-沉默',
    (1,1,1):'有兴趣-高价值-活跃',
    (0,1,1):'无兴趣-高价值-活跃'
}
analysis['label'] = analysis[['interest','value','time']].apply(lambda x: label[(x[0],x[1],x[2])], axis = 1)
print(analysis.head())
       interest  value  time       label
cumid                                   
10001       1.0    1.0   1.0  有兴趣-高价值-活跃
10002       0.0    0.0   0.0  无兴趣-低价值-沉默
10003       0.0    1.0   0.0  无兴趣-高价值-沉默
10004       1.0    1.0   0.0  有兴趣-高价值-沉默
10005       0.0    0.0   0.0  无兴趣-低价值-沉默

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值