RFM电商数据分析(3)

本文纯属复现和鲸社区大佬Phoenix的分析过程,及大部分代码,此处贴上地址:
原文地址

RFM电商数据分析(1)

RFM电商数据分析(2)

RFM电商数据分析(4)

s15 = df.groupby('user_id').agg(消费金额=('amount', 'sum')).describe(percentiles=[0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99])
s15

请添加图片描述

s15.消费金额 = round(s15.消费金额,2)
data_pair = [list(z) for z in zip(s15.index, s15.消费金额.values)][4:-1]
def charts8():
    funnel = (
        Funnel(init_opts=opts.InitOpts(theme=ThemeType.CHALK))
        .add(series_name='消费金额漏斗', data_pair=data_pair, label_opts=opts.LabelOpts(position='left'))
        .set_global_opts(
            toolbox_opts=opts.ToolboxOpts(is_show=True, pos_top='20%'),
        )
        .render_notebook()
    )
    return funnel
charts8()

请添加图片描述
结论:75%的用户消费金额在1151元以内,有一半的用户消费金额低于460元,平均消费金额为1250元,说明高消费群体的消费占主要

s15 = df.groupby('user_id').agg(消费金额=('amount','sum')).reset_index()
bins = [0,10,20,50,100,150,200,500,1000,2000,5000,10000,100000,200000]
s15['cost_bin'] = pd.cut(x=s15['消费金额'], bins=bins,)
s15

在这里插入图片描述

s15_data = s15.groupby('cost_bin').agg(人数=('user_id', 'count')).reset_index()
def charts9():
    bar = (
        Bar(init_opts=opts.InitOpts(theme=ThemeType.CHALK))
        .add_xaxis([str(x) for x in s15_data.cost_bin])
        .add_yaxis("人数", [x for x in s15_data.人数])
        .set_global_opts(
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-25)),
            title_opts=opts.TitleOpts(title="客户总消费金额的分布"),
        )
        .render_notebook()
    )
    return bar
charts9()

请添加图片描述

bins = [0,10,20,50,100,150,200,500,1000,2000,5000,10000,100000]
s16 = df[['order_id','amount']].copy()
s16['cost_bin'] = pd.cut(x=s16['amount'], bins=bins, right=True)
s16_data = s16.groupby('cost_bin').agg(订单数量=('order_id', 'count')).reset_index()
s16_data

请添加图片描述

def charts16():
    bar = (
        Bar(init_opts=opts.InitOpts(theme=ThemeType.CHALK))
        .add_xaxis([str(x) for x in s16_data.cost_bin])
        .add_yaxis("人数",[x for x in s16_data.订单数量])
        .set_global_opts(
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-25)),
            title_opts=opts.TitleOpts(title="单笔订单消费金额的分布"),
        )
        .render_notebook()
    )
    return bar
charts16()

请添加图片描述
结论:每笔订单消费最多的为200-500元,消费高于2000的订单不足10%

# 首购时间,即为一个用户第一次购买的时间
a = df.groupby('user_id').agg(首购时间=('date', 'min')).reset_index()
s17 = pd.merge(df, a, how='inner', on='user_id')
s17['新老客户'] = np.where(s17['date'] == s17['首购时间'], '新客户', '老客户')
s17.head()

请添加图片描述

s18 = s17[s17['新老客户'] == '新客户'].groupby('month').agg(新客户人数=('user_id', 'count'))
s18['老客户人数']=s17[s17['新老客户'] == '老客户'].groupby('month').agg(老客户人数=('user_id', 'count'))
s18['新客户人数占比%'] = s18['新客户人数'] / (s18['新客户人数'] + s18['老客户人数']) * 100
s18['老客户人数占比%'] = s18['老客户人数'] / (s18['新客户人数'] + s18['老客户人数']) * 100
s18['新客销量'] = s17[s17['新老客户'] == '新客户'].groupby('month').agg(新客销量=('buy_count', 'sum'))
s18['老客销量'] = s17[s17['新老客户'] == '老客户'].groupby('month').agg(新客销量=('buy_count', 'sum'))
s18['新客销售额'] = s17[s17['新老客户'] == '新客户'].groupby('month').agg(新客销量=('amount', 'sum'))
s18['老客销售额'] = s17[s17['新老客户'] == '老客户'].groupby('month').agg(新客销量=('amount', 'sum'))
s18 = s18.reset_index()
s18

请添加图片描述

x = s18.month.values.tolist()
y1 = s18.新客户人数.values.tolist()
y2 = s18.老客户人数.values.tolist()
def charts18():
    bar = (
        Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
        .add_xaxis(x)
        .add_yaxis('新客户人数', y1)
        .add_yaxis('老客户人数', y2)
        .set_global_opts(
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            title_opts=opts.TitleOpts(title="每月新/老客户人数")
        )
        .render_notebook()
    )
    return bar
charts18()

请添加图片描述

# 用户影响评分
# 年龄|订单数量|订单总额|客单价
user_order = df.groupby('user_id').agg(订单数量=('order_id','count'), 订单总额=('amount','sum')).reset_index()
user_order['客单均价'] = round(user_order['订单总额'] / user_order['订单数量'], 2)
user_order.sort_values(by='客单均价', ascending=False)
user_order.客单均价.describe(percentiles=[0.01, 0.1, 0.25, 0.5, 0.75, 0.99])[4:-1]

在这里插入图片描述
结论:客单均价中位数为80元,75%的客户一单平消费不到185元

# 年龄/地区中有重复值,直接删除(有多个收获地址造成,故只统计首次出现的地址为地区参数)
user_data1 = df[['user_id','age','sex','local']] 
user_data = pd.merge(user_order,user_data1,how='inner',on='user_id')
user_data.drop_duplicates('user_id', inplace=True)
user_data = user_data.reset_index()
user_data

请添加图片描述

# 分箱
bins=[15,20,25,30,35,40,45,50]
user_data['age_bin'] = pd.cut(x=user_data['age'], bins=bins)
s20 = user_data.groupby('age_bin').agg(订单数量=('订单数量', 'sum'), 订单总额=('订单总额', 'sum'), 客单均价=('客单均价', 'mean'), 客均单量=('订单数量', 'mean')).reset_index()
s20

请添加图片描述
建立模型
假设:订单数量,订单总额,客单均价,客均单量的权重相同
归一化计算分数score,再根据分数排名rank,得出最有价值的客户特征

# 归一化|z标准化
def score_c(x):
    score = (x.values - x.min()) / (x.max() - x.min())
    return score

def score_z(x):
    score = (s.values - x.mean()) / x.std()
    return score
score1 = score_c(s20['订单数量'])
score2 = score_c(s20['订单总额'])
score3 = score_c(s20['客单均价'])
score4 = score_c(s20['客均单量'])
s20['score'] = score1 + score2 + score3 + score4
s20['购买力rank'] = s20['score'].rank(ascending=False)
s20.sort_values(by='购买力rank')

请添加图片描述

# 性别区分 销量|销售额|客单价
s21 = user_data.groupby('sex').agg(订单数量=('订单数量', 'sum'), 订单总额=('订单总额', 'sum'), 客单均价=('客单均价', 'mean'), 客均单量=('订单数量', 'mean')).reset_index()
s21

请添加图片描述

score1 = score_c(s21['订单数量'])
score2 = score_c(s21['订单总额'])
score3 = score_c(s21['客单均价'])
score4 = score_c(s21['客均单量'])
s21['score'] = score1 + score2 + score3 + score4
s21['购买力rank'] = s21['score'].rank(ascending=False)
s21.sort_values(by='购买力rank')

请添加图片描述

s22 = user_data.groupby('local').agg(订单数量=('订单数量','sum'),订单总额=('订单总额','sum'),客单均价=('客单均价','mean'),客均单量=('订单数量','mean')).reset_index()
score1 = score_c(s22['订单数量'])
score2 = score_c(s22['订单总额'])
score3 = score_c(s22['客单均价'])
score4 = score_c(s22['客均单量'])
s22['score'] = score1 + score2 + score3 + score4  
s22['购买力rank'] = s22['score'].rank(ascending=False)
s22.sort_values(by='购买力rank')

请添加图片描述
时间维度上分析

客户消费周期分布情况
按周销量/销售额分布
按小时分析
每月复购率

data_all = df.groupby('date').agg(销量=('buy_count', 'sum'), 销售额=('amount', 'sum')).reset_index()
x = [str(x)[5:10] for x in data_all.date]
y1 = [x for x in data_all.销量]
y2 = [round(x/100,2) for x in data_all.销售额]
def charts20():
    line = (
        Line(init_opts=opts.InitOpts(theme=ThemeType.CHALK))
        .add_xaxis(x)
        .add_yaxis('销量(件)', y1)
        .add_yaxis('销售额(百元)', y2)
        .set_global_opts(
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            tooltip_opts=opts.TooltipOpts(trigger='axis'),
            datazoom_opts=opts.DataZoomOpts(is_show=True),
            title_opts=opts.TitleOpts(title='销量/销售额走势图'),

        )
        .set_series_opts(
            label_opts=opts.LabelOpts(is_show=False))
        .render_notebook()
    )
    return line
charts20()

请添加图片描述

s23 = df[df['amount'] > 0].sort_values('date').groupby('user_id').apply(lambda x:x['date']-x['date'].shift()).dt.days
# 分析订单额大于0的的订单
s23[s23>0].describe(percentiles=[0.01,0.1,0.25,0.75,0.9,0.99])

请添加图片描述

# 按周分析
s24 = df.groupby('week').agg(销量=('buy_count','sum'),销售额=('amount','sum')).reset_index()
s24
# 按小时分析
s25 = df.groupby('hour').agg(销量=('buy_count','sum'),销售额=('amount','sum')).reset_index()
s25.T

每月的新客、老客人数分析 第一次购买的为新客户,次月就成了老客户

s25_data1 = df.groupby('user_id').agg(首次购买月份=('month', 'min')).reset_index()
s25_data2 = pd.merge(df, s25_data1, how='inner', on='user_id')
s25_data2['月新老客户'] = np.where(s25_data2['month'] == s25_data2['首次购买月份'], '新客户', '老客户')
s25 = s25_data2[s25_data2['月新老客户'] == '新客户'].groupby('month').agg(新客户数量=('月新老客户', 'count'))
s25['老客户数量'] = s25_data2[s25_data2['月新老客户'] == '老客户'].groupby('month').agg(老客户数量=('月新老客户', 'count'))
s25['新客户占比'] = s25['新客户数量'] / (s25['新客户数量'] + s25['老客户数量']) * 100
s25['老客户占比'] = s25['老客户数量'] / (s25['新客户数量'] + s25['老客户数量']) * 100
s25 = s25.reset_index()
s25.fillna(value=0, inplace=True)
s25

请添加图片描述

# 原数据,采用bar图叠加line图,不够直观
x = s25.month.values.tolist()
y1 = s25.新客户数量.values.tolist()
y2 = s25.老客户数量.values.tolist()
y3 = [round(x, 2) for x in s25.新客户占比.values]
y4 = [round(x, 2) for x in s25.老客户占比.values]
def charts25():
    bar = (
        Bar(init_opts=opts.InitOpts(theme=ThemeType.CHALK))
        .add_xaxis(x)
        .add_yaxis('新客户数量', y1)
        .add_yaxis('新老户数量', y2)
        .set_global_opts(
#             datazoom_opts=opts.DataZoomOpts(is_show=True),
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            tooltip_opts=opts.TooltipOpts(trigger='axis'),
            title_opts=opts.TitleOpts(title='月新/老客户数量'),
        )
    )
    line = (
        Line()
        .add_xaxis(x)
        .add_yaxis('新客户占比%', y3)
        .add_yaxis('老客户占比%', y4)
#         .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    )
    bar.overlap(line)
    return bar.render_notebook()
charts25()

请添加图片描述

# 采用堆叠bar图
from pyecharts.commons.utils import JsCode
x = s25.month.values.tolist()
y1 = [{'value':int(y), 'percent': round(x, 2)} for x, y in zip(s25.新客户占比.values, s25.新客户数量.values)]
y1[0] = {'value': 14270, 'percent': 100.0}
y2 = [{'value':y, 'percent': round(x, 2)} for x, y in zip(s25.老客户占比.values, s25.老客户数量.values)]
def charts25_5():
    bar = (
        Bar(init_opts=opts.InitOpts(theme=ThemeType.CHALK))
        .add_xaxis(x)
        .add_yaxis('新客户', y1, stack='stack1',category_gap="50%")
        .add_yaxis('老客户', y2, stack='stack1',category_gap="50%")
        .set_global_opts(
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            title_opts=opts.TitleOpts(title='月新/老客户数量'),
        )
        .set_series_opts(
            label_opts=opts.LabelOpts(
                position="right",
                formatter=JsCode(
                    "function(x){return Number(x.data.percent).toFixed() + '%';}"
                ),
            )
        )
        .render_notebook()
    )
    return bar
charts25_5()

请添加图片描述

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值