数据科学 案例1 商品促销(代码)


供自己查阅。

1. 导入数据

import pandas as pd
#import numpy as np
trad_flow = pd.read_csv(r'.\data\RFM_TRAD_FLOW.csv', encoding='gbk')
trad_flow.head(10)
transIDcumidtimeamounttype_labeltype
094071000114JUN09:17:58:34199.0正常Normal
196251000116JUN09:15:09:13369.0正常Normal
2118371000101JUL09:14:50:36369.0正常Normal
3266291000114DEC09:18:05:32359.0正常Normal
4308501000112APR10:13:02:20399.0正常Normal
5320071000104MAY10:16:45:58269.0正常Normal
6366371000104JUN10:20:03:060.0赠送Presented
7431081000106JUL10:16:56:40381.0正常Normal
8438771000110JUL10:20:41:54-399.0退货returned_goods
9460811000123JUL10:16:35:450.0赠送Presented

完整程序

F  = trad_flow.groupby(['cumid','type'])[['transID']].count()
F_trad = pd.pivot_table(F,index = 'cumid',columns='type',values='transID')
F_trad['Special_offer'] = F_trad['Special_offer'].fillna(0)
F_trad['interest'] = F_trad['Special_offer']/(F_trad['Special_offer']+F_trad['Normal'])
M  = trad_flow.groupby(['cumid','type'])[['amount']].sum()
M_trad = pd.pivot_table(M,index = 'cumid',columns='type',values='amount')
M_trad['Special_offer']= M_trad['Special_offer'].fillna(0)
M_trad['returned_goods']= M_trad['returned_goods'].fillna(0)
M_trad['Special_offer'] = M_trad['Special_offer'].fillna(0)
M_trad["value"] = M_trad['Normal'] + M_trad['Special_offer'] +M_trad['returned_goods']

from datetime import datetime
import time
def to_time(t):
    out_time = time.mktime(time.strptime(t,'%d%b%y:%H:%M:%S'))
    return out_time
to_time(trad_flow['time'].loc[0])   
trad_flow['new_time'] = trad_flow.time.apply(to_time)
N = trad_flow.groupby(['cumid'])[['new_time']].max()
N['new_time'] = N['new_time'].fillna(0)

threshold = pd.qcut(F_trad['interest'], 2, retbins=True)[1][1]
binarizer = preprocessing.Binarizer(threshold=threshold)
interest_q = pd.DataFrame(binarizer.transform(F_trad['interest'].values.reshape(-1, 1)))
interest_q.index=F_trad.index
interest_q.columns=["interest"]

threshold = pd.qcut(M_trad['value'], 2, retbins=True)[1][1]
binarizer = preprocessing.Binarizer(threshold=threshold)
value_q = pd.DataFrame(binarizer.transform(M_trad['value'].values.reshape(-1, 1)))
value_q.index=M_trad.index
value_q.columns=['value']

threshold = pd.qcut(N["new_time"], 2, retbins=True)[1][1]
binarizer = preprocessing.Binarizer(threshold=threshold)
time_new_q = pd.DataFrame(binarizer.transform(N["new_time"].values.reshape(-1, 1)))
time_new_q.index=N.index
time_new_q.columns=["time"]

analysis=pd.concat([interest_q, value_q,time_new_q], axis=1)
analysis = analysis[['interest','value','time']]
# analysis.head()

label = {
    (0,0,0):'无兴趣-低价值-沉默',
    (1,0,0):'有兴趣-低价值-沉默',
    (1,0,1):'有兴趣-低价值-活跃',
    (0,0,1):'无兴趣-低价值-活跃',
    (0,1,0):'无兴趣-高价值-沉默',
    (1,1,0):'有兴趣-高价值-沉默',
    (1,1,1):'有兴趣-高价值-活跃',
    (0,1,1):'无兴趣-高价值-活跃'
}
analysis['label'] = analysis.apply(lambda x: label[(x[0],x[1],x[2])], axis = 1)
analysis.head()
interestvaluetimelabel
cumid
100011.01.01.0有兴趣-高价值-活跃
100020.00.00.0无兴趣-低价值-沉默
100030.01.00.0无兴趣-高价值-沉默
100041.01.00.0有兴趣-高价值-沉默
100050.00.00.0无兴趣-低价值-沉默
a =lambda x: label[(x[0],x[1],x[2])]
a((1,1,1))
'有兴趣-高价值-活跃'
threshold[1][1]
2779.0
F_trad['interst'].values.reshape(-1, 1)
array([[0.11764706],
       [0.        ],
       [0.0625    ],
       ...,
       [0.28571429],
       [0.05882353],
       [0.09090909]])

分步理解:

2.通过 RFM方法 建立模型

2.1 通过计算F反应客户对打折产品的偏好

F=trad_flow.groupby(['cumid','type'])[['transID']].count()
F.head()
transID
cumidtype
10001Normal15
Presented8
Special_offer2
returned_goods2
10002Normal12
F_trans=pd.pivot_table(F,index='cumid',columns='type',values='transID')
F_trans.head()
typeNormalPresentedSpecial_offerreturned_goods
cumid
1000115.08.02.02.0
1000212.05.0NaN1.0
1000315.08.01.01.0
1000415.012.02.01.0
100058.05.0NaN1.0
F_trans['Special_offer']= F_trans['Special_offer'].fillna(0)
F_trans.head()
typeNormalPresentedSpecial_offerreturned_goods
cumid
1000115.08.02.02.0
1000212.05.00.01.0
1000315.08.01.01.0
1000415.012.02.01.0
100058.05.00.01.0
F_trans["interest"]=F_trans['Special_offer']/(F_trans['Special_offer']+F_trans['Normal'])
F_trans.head()
typeNormalPresentedSpecial_offerreturned_goodsinterest
cumid
1000115.08.02.02.00.117647
1000212.05.00.01.00.000000
1000315.08.01.01.00.062500
1000415.012.02.01.00.117647
100058.05.00.01.00.000000

2.2 通过计算M反应客户的价值信息

M=trad_flow.groupby(['cumid','type'])[['amount']].sum()
M.head()
amount
cumidtype
10001Normal3608.0
Presented0.0
Special_offer420.0
returned_goods-694.0
10002Normal1894.0
M_trans=pd.pivot_table(M,index='cumid',columns='type',values='amount')
M_trans['Special_offer']= M_trans['Special_offer'].fillna(0)
M_trans['returned_goods']= M_trans['returned_goods'].fillna(0)
M_trans["value"]=M_trans['Normal']+M_trans['Special_offer']+M_trans['returned_goods']
M_trans.head()
typeNormalPresentedSpecial_offerreturned_goodsvalue
cumid
100013608.00.0420.0-694.03334.0
100021894.00.00.0-242.01652.0
100033503.00.0156.0-224.03435.0
100042979.00.0373.0-40.03312.0
100052368.00.00.0-249.02119.0

2.3 通过计算R反应客户是否为沉默客户

  • 定义一个从文本转化为时间的函数
from datetime import datetime
import time
def to_time(t):
    out_t=time.mktime(time.strptime(t, '%d%b%y:%H:%M:%S'))   ########此处修改为时间戳方便后面qcut函数分箱
    return out_t
a="14JUN09:17:58:34"
print(to_time(a))
1244973514.0
trad_flow["time_new"]= trad_flow.time.apply(to_time)
trad_flow.head()
transIDcumidtimeamounttype_labeltypenew_timetime_new
094071000114JUN09:17:58:34199.0正常Normal1.244974e+091.244974e+09
196251000116JUN09:15:09:13369.0正常Normal1.245136e+091.245136e+09
2118371000101JUL09:14:50:36369.0正常Normal1.246431e+091.246431e+09
3266291000114DEC09:18:05:32359.0正常Normal1.260785e+091.260785e+09
4308501000112APR10:13:02:20399.0正常Normal1.271049e+091.271049e+09
R=trad_flow.groupby(['cumid'])[['time_new']].max()
R.head()
time_new
cumid
100011.284699e+09
100021.278129e+09
100031.282983e+09
100041.283057e+09
100051.282127e+09

3.构建模型,筛选目标客户

Pandas —— qcut( )与cut( )的区别

from sklearn import preprocessing
threshold = pd.qcut(F_trans['interest'], 2, retbins=True)[1][1]

信息二分

binarizer = preprocessing.Binarizer(threshold=threshold)
interest_q = pd.DataFrame(binarizer.transform(F_trans['interest'].values.reshape(-1, 1)))
interest_q.index=F_trans.index
interest_q.columns=["interest"]
threshold = pd.qcut(M_trans['value'], 2, retbins=True)[1][1]
binarizer = preprocessing.Binarizer(threshold=threshold)
value_q = pd.DataFrame(binarizer.transform(M_trans['value'].values.reshape(-1, 1)))
value_q.index=M_trans.index
value_q.columns=["value"]
threshold = pd.qcut(R["time_new"], 2, retbins=True)[1][1]
binarizer = preprocessing.Binarizer(threshold=threshold)
time_new_q = pd.DataFrame(binarizer.transform(R["time_new"].values.reshape(-1, 1)))
time_new_q.index=R.index
time_new_q.columns=["time"]
# In[12]
analysis=pd.concat([interest_q, value_q,time_new_q], axis=1)
# In[12]
#analysis['rank']=analysis.interest_q+analysis.interest_q
analysis = analysis[['interest','value','time']]
analysis.head()

label = {
    (0,0,0):'无兴趣-低价值-沉默',
    (1,0,0):'有兴趣-低价值-沉默',
    (1,0,1):'有兴趣-低价值-活跃',
    (0,0,1):'无兴趣-低价值-活跃',
    (0,1,0):'无兴趣-高价值-沉默',
    (1,1,0):'有兴趣-高价值-沉默',
    (1,1,1):'有兴趣-高价值-活跃',
    (0,1,1):'无兴趣-高价值-活跃'
}
analysis['label'] = analysis[['interest','value','time']].apply(lambda x: label[(x[0],x[1],x[2])], axis = 1)
analysis.head()
interestvaluetimelabel
cumid
100011.01.01.0有兴趣-高价值-活跃
100020.00.00.0无兴趣-低价值-沉默
100030.01.00.0无兴趣-高价值-沉默
100041.01.00.0有兴趣-高价值-沉默
100050.00.00.0无兴趣-低价值-沉默
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

irober

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值