【数据分析案例】pandas + matplotlib 人货场+RFM+用户复购分析 电商水果销售

使用python中的pandas和matplotlib简单实现描述性分析,因为数据为随机生产数据,因此该案例更多的是体现分析思路以及过程实现


 

导入相关库

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime

中文设置

# 中文设置
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用于显示中文
plt.rcParams['axes.unicode_minus'] = False  # 用于解决保存图像是负号‘-’显示为方框的问题

数据源

初始指标:订单时间、水果名称、客户姓名、水果重量、发货地区、订单金额

# 时间
star_time = datetime.date(datetime.datetime.now().year - 2, datetime.datetime.now().month, datetime.datetime.now().day)
end_time = datetime.datetime.now().date() - datetime.timedelta(days=1)
time_range = pd.date_range(star_time, end_time)
time_list = np.random.choice(time_range, size=len(time_range), replace=True)
# 水果
fruits = ['香蕉', '苹果', '葡萄', '橙子', '西瓜', '哈密瓜', '梨', '草莓']
fruits_list = np.random.choice(fruits, size=len(time_range), replace=True)
# 客户
client = ['张三', '李四', '王五', '赵六', '孙七', '周八', '吴九']
client_list = np.random.choice(client, size=len(time_range), replace=True)

'''
time 下单时间
fruits 水果名称
client 客户姓名
weight 水果重量
'''
# 生成订单数据
order = pd.DataFrame({
    'time'  : time_list,
    'fruits': fruits_list,
    'client': client_list,
    'weight': np.random.choice(list(range(50, 100)), size=len(time_range), replace=True)
})
# 生成水果信息数据
'''
price:重量
regoin:发货地区
'''
information = pd.DataFrame({
    'fruits': fruits,
    'price' : [3.8, 8.9, 12.8, 6.8, 15.8, 4.9, 5.8, 7],
    'regoin': ['华南', '华北', '西北', '华中', '西北', '华南', '华北', '华中']
})
# 数据合并
df = pd.merge(order, information, how='outer').sort_values('time').reset_index(drop=True)

# 生成字段,订单金额
df['amount'] = df['weight'] * df['price']

人货场

分析维度:时间

连续两年销售额走势

df2 = df.pivot_table(index='year_month',
                     values='amount',
                     aggfunc=sum)
plt.figure(figsize=(20, 10), dpi=80)
plt.plot(df2.index, df2['amount'], 'r-.')
plt.title('2021-2022每月销量走势',
          fontsize='xx-large',
          fontweight='light',
          horizontalalignment='center',
          verticalalignment='top')
for a,b in zip(df2.index,df2['amount']):
    plt.text(a,b+0.05,b,ha='center')
print(df2.head(5))
plt.show()

年度销量、销售额和平均销售额

df3 = df.pivot_table(index='year',
                     values=['weight', 'amount'],
                     aggfunc={'weight': sum, 'amount': ['mean', sum]}).round(0)
fig, axis = plt.subplots(nrows=1, ncols=3, figsize=(20, 10), dpi=80)
axis[0].bar(df3.index, df3[('weight', 'sum')].values)
axis[1].bar(df3.index, df3[('amount', 'sum')].values)
axis[2].bar(df3.index, df3[('amount', 'mean')].values)
axis[0].set_title('年度销量对比',
                  fontsize='xx-large',
                  horizontalalignment='center',
                  verticalalignment='top')
axis[1].set_title('年度销售额对比',
                  fontsize='xx-large',
                  horizontalalignment='center',
                  verticalalignment='top')
axis[2].set_title('年度平均销售额对比',
                  fontsize='xx-large',
                  horizontalalignment='center',
                  verticalalignment='top')
for a,b in zip(df3.index,df3[('weight', 'sum')]):
    axis[0].text(a,b,b,ha='center',fontsize='xx-large')
for a,b in zip(df3.index,df3[('amount', 'sum')]):
    axis[1].text(a,b,b,ha='center',fontsize='xx-large')
for a,b in zip(df3.index,df3[('amount', 'mean')]):
    axis[2].text(a,b,b,ha='center',fontsize='xx-large')
print(df3)
plt.show()

分析维度:商品

水果年度销售占比

df4 = df.pivot_table(index='year',
                     values='weight',
                     columns='fruits',
                     aggfunc=sum)
df4 = df4.apply(lambda x: x / x.sum(axis=0), axis=1)
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(20, 10), dpi=80)
ax[0].pie(list(df4.iloc[0].values),
          autopct='%1.1f%%',
          startangle=90,
          wedgeprops={'lw': 5, 'width': 0.3, 'edgecolor': 'w'})
ax[1].pie(list(df4.iloc[1].values),
          autopct='%1.1f%%',
          startangle=90,
          wedgeprops={'lw': 5, 'width': 0.3, 'edgecolor': 'w'})
ax[2].pie(list(df4.iloc[2].values),
          autopct='%1.1f%%',
          startangle=90,
          wedgeprops={'lw': 5, 'width': 0.3, 'edgecolor': 'w'})
labels = fruits
for i in range(3):
    ax[i].set_title('{}年'.format((df4.index.values)[i]),
                    bbox=dict(boxstyle='round,pad=0.5', fc='w', ec='k', lw=1, alpha=0.5))
    # 设置图例
    ax[i].legend(loc=0, ncol=1, labels=labels, frameon=False)
    # 设置等比例轴
    ax[i].axis('equal')
print(df4)
plt.show()

各水果年度销售金额对比

df5 = df.pivot_table(index=['year'],
                     values='amount',
                     columns='fruits',
                     aggfunc=sum).round(0)
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(20, 10), dpi=80)

ax[0].bar(df5.columns, df5.iloc[0].values, color='r', alpha=0.4)
ax[1].bar(df5.columns, df5.iloc[1].values, color='g', alpha=0.4)
ax[2].bar(df5.columns, df5.iloc[2].values, color='b', alpha=0.4)
ax[0].set_title('2020年各水果销售金额对比',
                fontsize='xx-large',
                horizontalalignment='center',
                verticalalignment='top')
ax[1].set_title('2021年各水果销售金额对比',
                fontsize='xx-large',
                horizontalalignment='center',
                verticalalignment='top')
ax[2].set_title('2022年各水果销售金额对比',
                fontsize='xx-large',
                horizontalalignment='center',
                verticalalignment='top')
for a, b in zip(df5.columns, df5.iloc[0].values):
    ax[0].text(a, b, b, ha='center', fontsize='xx-large')
for a, b in zip(df5.columns, df5.iloc[1].values):
    ax[1].text(a, b, b, ha='center', fontsize='xx-large')
for a, b in zip(df5.columns, df5.iloc[2].values):
    ax[2].text(a, b, b, ha='center', fontsize='xx-large')
print(df5)
plt.show()

商品月度销量变化

df6 = df.pivot_table(index=['month'],
                     values='weight',
                     columns='fruits',
                     aggfunc=sum)
fig = plt.figure(figsize=(20,10),dpi=80,facecolor='white',edgecolor=None)
color_list = ['c', 'm', 'y', 'k', 'r', 'g', 'b', 'black']
for i in range(len(fruits)):
    plt.bar(df6.index,df6[fruits[i]].values,color=color_list[i],alpha=0.4)
plt.legend('商品月度销量变化堆积图')
print(df6)
plt.show()

分析维度:地区

df7 = df.pivot_table(index='regoin', values='weight', aggfunc=sum)
fig, axis = plt.subplots(nrows=1, ncols=2, figsize=(20, 10), dpi=80)
axis[0].bar(df7.index, df7['weight'], color=['b', 'm', 'g', 'y'])
for a, b in zip(df7.index, df7['weight']):
    axis[0].text(a, b, b, fontsize='xx-large')
size = [round(i / df7['weight'].sum(), 2) for i in df7['weight'].values]
label = list(df7.index.values)
axis[1].pie(size,startangle=90,labels=label,wedgeprops={'lw': 5, 'width': 0.3, 'edgecolor': 'w'})

axis[1].set_title('饼图(pie)',bbox=dict(boxstyle='round,pad=0.5', fc='w', ec='k', lw=1, alpha=0.5))
# 设置图例
axis[1].legend(loc=0, ncol=1, labels=label, frameon=False)
# 设置等比例轴
axis[1].axis('equal')
plt.show()
print(df7)

分析维度:客户

df9 = df.pivot_table(index='client',
                     values='amount',
                     aggfunc=['count', sum]).sort_values(by=('count', 'amount')).reset_index(drop=False)
color_list = ['c', 'm', 'y', 'k', 'r', 'g', 'b']
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(20, 10), dpi=80, facecolor='w', edgecolor=None,alpha=0.4)
ax[0].bar(df9.index, df9.loc[:, ('count', 'amount')],color=color_list)
client_bar_list = [''] + list(df9['client'].values)
ax[0].xticks = list([0] + df9.index)
ax[0].set_xticklabels(client_bar_list)
for a, b in zip(df9.index, df9.loc[:, ('count', 'amount')]):
    ax[0].text(a, b, b, fontsize='xx-large')
size = [round(i / df9.loc[:, ('sum', 'amount')].sum(), 2) for i in df9.loc[:, ('sum', 'amount')].values]

ax[1].pie(size,labels=size,wedgeprops={'lw': 5, 'width': 0.3, 'edgecolor': 'w'})
ax[1].set_title('饼图(pie)',bbox=dict(boxstyle='round,pad=0.5', fc='w', ec='k', lw=1, alpha=0.5))
ax[1].legend(loc=0, ncol=1, labels=list(df9['client'].values), frameon=False)
ax[1].axis('equal')
print(df9)
plt.show()

RFM用户分层

RFM数据模型

df['R'] = df['time'].apply(lambda x: (datetime.datetime.now() - x).days)
R = df.sort_values(by=['client', 'R'], ascending=[False, True])
R = R.drop_duplicates(['client'], keep='first')
RFM = df.groupby('client').agg({'fruits': 'count', 'amount': sum}).reset_index().rename(
    columns={'fruits': 'F', 'amount': 'M'})
RFM['R'] = R['R'].values
print(RFM)

RFM打分表

RFM['R'] = RFM.R.apply(lambda x: ('5分' if x < 3
                                else ('4分' if x >= 3 and x < 7
                                      else ('3分' if x >= 7 and x < 14
                                            else ('2分' if x >= 14 and x < 21
                                                  else '1分')))))
RFM['M'] = RFM.M.apply(lambda x : ('5分' if x > RFM.M.quantile(0.8)
                                else ('4分' if x > RFM.M.quantile(0.6)
                                      else ('3分' if x > RFM.M.quantile(0.4)
                                            else ('2分' if x > RFM.M.quantile(0.2)
                                                  else '1分')))))
RFM['F'] = RFM.F.apply(lambda x : ('5分' if x > RFM.F.quantile(0.8)
                                else ('4分' if x > RFM.F.quantile(0.6)
                                      else ('3分' if x > RFM.F.quantile(0.4)
                                            else ('2分' if x > RFM.F.quantile(0.2)
                                                  else '1分')))))
RFM = RFM.replace(['5分','4分','3分','2分','1分'],[5,4,3,2,1])
RFM['total'] = RFM.iloc[:,1:].sum(axis=1)
RFM = RFM.sort_values(by='total',ascending=False)
RFM.set_index(['client'],inplace=True)
print(RFM)

打分表计算

RFM['t_avg'] = (RFM.iloc[:, :3].sum(1) / 3).astype(np.int64)

# RFM用户分层
RFM = RFM.apply(lambda x: ''.join([str(i) for i in ((x[['R', 'F', 'M']] > x['t_avg']) * 1).values.tolist()]),
                axis=1).reset_index(drop=False)
RFM.columns = ['client', 'score']
d = {'score': pd.Series(['111', '110', '101', '100', '011', '010', '001', '000']),
     'scale': pd.Series(['重要价值客户', '一般价值客户', '重要发展客户', '一般发展客户', '重要保持客户', '一般保持客户', '重要挽留客户', '一般挽留客户'])}
client_scale = pd.DataFrame(d)
RFM = pd.merge(RFM, client_scale, on=['score'])
print(RFM)

各用户层级销售金额及其占比

df = pd.merge(df, RFM, on=['client'])
df11 = df.groupby('scale')['amount'].sum().reset_index(drop=False)
df11['ratio'] = (df11['amount'] / df['amount'].sum().round(2) * 100).round(2).map('{}%'.format)
df11.sort_values(by=['ratio'], ascending=False)
print(RFM)

用户复购分析

各用户各类水果的复购金额和复购次数

df12 = df.groupby(['client', 'fruits']).first().reset_index(drop=False)
df13 = pd.concat([df, df12]).drop_duplicates(['time', 'amount'], keep=False).reset_index(drop=True)
df13 = df13.groupby(['client', 'fruits'])['amount'].agg(['count', sum])
print(df13)

各用户最爱复购水果信息

df14 = df13.reset_index(drop=False).set_index(['client'])
df14['ratio'] = (df14['sum'] / df14.groupby('client')['sum'].sum() * 100).round(2).map('{}%'.format)
df14 = df14.reset_index().sort_values(by=['client', 'count'], ascending=[False, False])
df14 = df14.drop_duplicates(['client'],keep='first').reset_index(drop=True)
print(df14)

复购周期

df15 = df[['client', 'time']].sort_values(['client', 'time'], ascending=[False, True])
df16 = df15.groupby('client').shift(1).rename(columns={'time': 'time1'})
df16 = pd.concat([df15, df16], axis=1)
df16.dropna(inplace=True)
df16['timediff'] = (df16['time'] - df16['time1']).apply(lambda x: x.days)
df17 = df16.pivot_table(index='client',
                        values='timediff',
                        aggfunc=['min', 'max', 'mean', 'median', 'count'])
print(df17)

源代码

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime

# 数据源
# 时间
star_time = datetime.date(datetime.datetime.now().year - 2, datetime.datetime.now().month, datetime.datetime.now().day)
end_time = datetime.datetime.now().date() - datetime.timedelta(days=1)
time_range = pd.date_range(star_time, end_time)
time_list = np.random.choice(time_range, size=len(time_range), replace=True)
# 水果
fruits = ['香蕉', '苹果', '葡萄', '橙子', '西瓜', '哈密瓜', '梨', '草莓']
fruits_list = np.random.choice(fruits, size=len(time_range), replace=True)
# 客户
client = ['张三', '李四', '王五', '赵六', '孙七', '周八', '吴九']
client_list = np.random.choice(client, size=len(time_range), replace=True)

'''
time 下单时间
fruits 水果名称
client 客户姓名
weight 水果重量
'''
# 生成订单数据
order = pd.DataFrame({
    'time'  : time_list,
    'fruits': fruits_list,
    'client': client_list,
    'weight': np.random.choice(list(range(50, 100)), size=len(time_range), replace=True)
})
# 生成水果信息数据
'''
price:重量
regoin:发货地区
'''
information = pd.DataFrame({
    'fruits': fruits,
    'price' : [3.8, 8.9, 12.8, 6.8, 15.8, 4.9, 5.8, 7],
    'regoin': ['华南', '华北', '西北', '华中', '西北', '华南', '华北', '华中']
})
# 数据合并
df = pd.merge(order, information, how='outer').sort_values('time').reset_index(drop=True)

# 生成字段,订单金额
df['amount'] = df['weight'] * df['price']

# 中文设置
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用于显示中文
plt.rcParams['axes.unicode_minus'] = False  # 用于解决保存图像是负号‘-’显示为方框的问题

# 人货场模型

# 分析维度1:时间
df['year'] = df['time'].dt.year
df['month'] = df['time'].dt.month
df['year_month'] = df['time'].dt.strftime('%Y%m')

# 2021-2022销售额走势
df2 = df.pivot_table(index='year_month',
                     values='amount',
                     aggfunc=sum)
# plt.figure(figsize=(20, 10), dpi=80)
# plt.plot(df2.index, df2['amount'], 'r-.')
# plt.title('连续两年销量走势',
#           fontsize='xx-large',
#           fontweight='light',
#           horizontalalignment='center',
#           verticalalignment='top')
# for a,b in zip(df2.index,df2['amount']):
#     plt.text(a,b+0.05,b,ha='center')
# print(df2.head(5))
# plt.show()

# 年度销量、销售额和平均销售额
df3 = df.pivot_table(index='year',
                     values=['weight', 'amount'],
                     aggfunc={'weight': sum, 'amount': ['mean', sum]}).round(0)
# fig, axis = plt.subplots(nrows=1, ncols=3, figsize=(20, 10), dpi=80)
# axis[0].bar(df3.index, df3[('weight', 'sum')].values)
# axis[1].bar(df3.index, df3[('amount', 'sum')].values)
# axis[2].bar(df3.index, df3[('amount', 'mean')].values)
# axis[0].set_title('年度销量对比',
#                   fontsize='xx-large',
#                   horizontalalignment='center',
#                   verticalalignment='top')
# axis[1].set_title('年度销售额对比',
#                   fontsize='xx-large',
#                   horizontalalignment='center',
#                   verticalalignment='top')
# axis[2].set_title('年度平均销售额对比',
#                   fontsize='xx-large',
#                   horizontalalignment='center',
#                   verticalalignment='top')
# for a,b in zip(df3.index,df3[('weight', 'sum')]):
#     axis[0].text(a,b,b,ha='center',fontsize='xx-large')
# for a,b in zip(df3.index,df3[('amount', 'sum')]):
#     axis[1].text(a,b,b,ha='center',fontsize='xx-large')
# for a,b in zip(df3.index,df3[('amount', 'mean')]):
#     axis[2].text(a,b,b,ha='center',fontsize='xx-large')
# print(df3)
# plt.show()


# 分析维度2:商品
# 水果年度销量占比
df4 = df.pivot_table(index='year',
                     values='weight',
                     columns='fruits',
                     aggfunc=sum)
df4 = df4.apply(lambda x: x / x.sum(axis=0), axis=1)
# fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(20, 10), dpi=80)
# ax[0].pie(list(df4.iloc[0].values),
#           autopct='%1.1f%%',
#           startangle=90,
#           wedgeprops={'lw': 5, 'width': 0.3, 'edgecolor': 'w'})
# ax[1].pie(list(df4.iloc[1].values),
#           autopct='%1.1f%%',
#           startangle=90,
#           wedgeprops={'lw': 5, 'width': 0.3, 'edgecolor': 'w'})
# ax[2].pie(list(df4.iloc[2].values),
#           autopct='%1.1f%%',
#           startangle=90,
#           wedgeprops={'lw': 5, 'width': 0.3, 'edgecolor': 'w'})
# labels = fruits
# for i in range(3):
#     ax[i].set_title('{}年'.format((df4.index.values)[i]),
#                     bbox=dict(boxstyle='round,pad=0.5', fc='w', ec='k', lw=1, alpha=0.5))
#     ax[i].legend(loc=0, ncol=1, labels=labels, frameon=False)
#     ax[i].axis('equal')
# print(df4)
# plt.show()

# 各水果年度销售金额对比
df5 = df.pivot_table(index=['year'],
                     values='amount',
                     columns='fruits',
                     aggfunc=sum).round(0)
# fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(20, 10), dpi=80)
# ax[0].bar(df5.columns, df5.iloc[0].values, color='r', alpha=0.4)
# ax[1].bar(df5.columns, df5.iloc[1].values, color='g', alpha=0.4)
# ax[2].bar(df5.columns, df5.iloc[2].values, color='b', alpha=0.4)
# ax[0].set_title('2020年各水果销售金额对比',
#                 fontsize='xx-large',
#                 horizontalalignment='center',
#                 verticalalignment='top')
# ax[1].set_title('2021年各水果销售金额对比',
#                 fontsize='xx-large',
#                 horizontalalignment='center',
#                 verticalalignment='top')
# ax[2].set_title('2022年各水果销售金额对比',
#                 fontsize='xx-large',
#                 horizontalalignment='center',
#                 verticalalignment='top')
# for a, b in zip(df5.columns, df5.iloc[0].values):
#     ax[0].text(a, b, b, ha='center', fontsize='xx-large')
# for a, b in zip(df5.columns, df5.iloc[1].values):
#     ax[1].text(a, b, b, ha='center', fontsize='xx-large')
# for a, b in zip(df5.columns, df5.iloc[2].values):
#     ax[2].text(a, b, b, ha='center', fontsize='xx-large')
# print(df5)
# plt.show()

# 商品月度销量变化
df6 = df.pivot_table(index=['month'],
                     values='weight',
                     columns='fruits',
                     aggfunc=sum)
# fig = plt.figure(figsize=(20,10),dpi=80,facecolor='white',edgecolor=None)
# color_list = ['c', 'm', 'y', 'k', 'r', 'g', 'b', 'black']
# for i in range(len(fruits)):
#     plt.bar(df6.index,df6[fruits[i]].values,color=color_list[i],alpha=0.4)
# plt.legend('商品月度销量变化堆积图')
# print(df6)
# plt.show()


# 分析维度3:地区
# 不同地区的销量
df7 = df.pivot_table(index='regoin', values='weight', aggfunc=sum)
# fig, axis = plt.subplots(nrows=1, ncols=2, figsize=(20, 10), dpi=80)
# axis[0].bar(df7.index, df7['weight'], color=['b', 'm', 'g', 'y'])
# for a, b in zip(df7.index, df7['weight']):
#     axis[0].text(a, b, b, fontsize='xx-large')
# size = [round(i / df7['weight'].sum(), 2) for i in df7['weight'].values]
# label = list(df7.index.values)
# axis[1].pie(size,startangle=90,labels=label,wedgeprops={'lw': 5, 'width': 0.3, 'edgecolor': 'w'})
#
# axis[1].set_title('饼图(pie)',bbox=dict(boxstyle='round,pad=0.5', fc='w', ec='k', lw=1, alpha=0.5))
# # 设置图例
# axis[1].legend(loc=0, ncol=1, labels=label, frameon=False)
# # 设置等比例轴
# axis[1].axis('equal')
# plt.show()
# print(df7)

# 不同地区的年度平均销售额
df8 = df.pivot_table(index=['regoin', 'year'],
                     values='amount').round(2)
# print(df8)

# 分析维度4:用户
# 用户订单量、金额对比
df9 = df.pivot_table(index='client',
                     values='amount',
                     aggfunc=['count', sum]).sort_values(by=('count', 'amount')).reset_index(drop=False)
# color_list = ['c', 'm', 'y', 'k', 'r', 'g', 'b']
# fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(20, 10), dpi=80, facecolor='w', edgecolor=None,alpha=0.4)
# ax[0].bar(df9.index, df9.loc[:, ('count', 'amount')],color=color_list)
# client_bar_list = [''] + list(df9['client'].values)
# ax[0].xticks = list([0] + df9.index)
# ax[0].set_xticklabels(client_bar_list)
# for a, b in zip(df9.index, df9.loc[:, ('count', 'amount')]):
#     ax[0].text(a, b, b, fontsize='xx-large')
# size = [round(i / df9.loc[:, ('sum', 'amount')].sum(), 2) for i in df9.loc[:, ('sum', 'amount')].values]
#
# ax[1].pie(size,labels=size,wedgeprops={'lw': 5, 'width': 0.3, 'edgecolor': 'w'})
# ax[1].set_title('饼图(pie)',bbox=dict(boxstyle='round,pad=0.5', fc='w', ec='k', lw=1, alpha=0.5))
# ax[1].legend(loc=0, ncol=1, labels=list(df9['client'].values), frameon=False)
# ax[1].axis('equal')
# print(df9)
# plt.show()


# RFM用户分层模型分析
# RFM数据模型
df['R'] = df['time'].apply(lambda x: (datetime.datetime.now() - x).days)
R = df.sort_values(by=['client', 'R'], ascending=[False, True])
R = R.drop_duplicates(['client'], keep='first')
RFM = df.groupby('client').agg({'fruits': 'count', 'amount': sum}).reset_index().rename(
    columns={'fruits': 'F', 'amount': 'M'})
RFM['R'] = R['R'].values
# RFM打分表
RFM['R'] = RFM.R.apply(lambda x: ('5分' if x < 3
                                  else ('4分' if x >= 3 and x < 7
                                        else ('3分' if x >= 7 and x < 14
                                              else ('2分' if x >= 14 and x < 21
                                                    else '1分')))))
RFM['M'] = RFM.M.apply(lambda x: ('5分' if x > RFM.M.quantile(0.8)
                                  else ('4分' if x > RFM.M.quantile(0.6)
                                        else ('3分' if x > RFM.M.quantile(0.4)
                                              else ('2分' if x > RFM.M.quantile(0.2)
                                                    else '1分')))))
RFM['F'] = RFM.F.apply(lambda x: ('5分' if x > RFM.F.quantile(0.8)
                                  else ('4分' if x > RFM.F.quantile(0.6)
                                        else ('3分' if x > RFM.F.quantile(0.4)
                                              else ('2分' if x > RFM.F.quantile(0.2)
                                                    else '1分')))))
RFM = RFM.replace(['5分', '4分', '3分', '2分', '1分'], [5, 4, 3, 2, 1])
RFM['total'] = RFM.iloc[:, 1:].sum(axis=1)
RFM = RFM.sort_values(by='total', ascending=False)
RFM.set_index(['client'], inplace=True)
# print(RFM)
# RFM打分表计算
RFM['t_avg'] = (RFM.iloc[:, :3].sum(1) / 3).astype(np.int64)

# RFM用户分层
RFM = RFM.apply(lambda x: ''.join([str(i) for i in ((x[['R', 'F', 'M']] > x['t_avg']) * 1).values.tolist()]),
                axis=1).reset_index(drop=False)
RFM.columns = ['client', 'score']
d = {'score': pd.Series(['111', '110', '101', '100', '011', '010', '001', '000']),
     'scale': pd.Series(['重要价值客户', '一般价值客户', '重要发展客户', '一般发展客户', '重要保持客户', '一般保持客户', '重要挽留客户', '一般挽留客户'])}
client_scale = pd.DataFrame(d)
RFM = pd.merge(RFM, client_scale, on=['score'])
# print(RFM)

# 各用户层级销售金额及其占比
df = pd.merge(df, RFM, on=['client'])
df11 = df.groupby('scale')['amount'].sum().reset_index(drop=False)
df11['ratio'] = (df11['amount'] / df['amount'].sum().round(2) * 100).round(2).map('{}%'.format)
df11.sort_values(by=['ratio'], ascending=False)
# print(RFM)

# 用户复购分析
# 各客户各类水果的复购金额和复购次数
df12 = df.groupby(['client', 'fruits']).first().reset_index(drop=False)
df13 = pd.concat([df, df12]).drop_duplicates(['time', 'amount'], keep=False).reset_index(drop=True)
df13 = df13.groupby(['client', 'fruits'])['amount'].agg(['count', sum])
# print(df13)

# 各用户最爱复购水果信息
df14 = df13.reset_index(drop=False).set_index(['client'])
df14['ratio'] = (df14['sum'] / df14.groupby('client')['sum'].sum() * 100).round(2).map('{}%'.format)
df14 = df14.reset_index().sort_values(by=['client', 'count'], ascending=[False, False])
df14 = df14.drop_duplicates(['client'], keep='first').reset_index(drop=True)
# print(df14)

# 复购周期
df15 = df[['client', 'time']].sort_values(['client', 'time'], ascending=[False, True])
df16 = df15.groupby('client').shift(1).rename(columns={'time': 'time1'})
df16 = pd.concat([df15, df16], axis=1)
df16.dropna(inplace=True)
df16['timediff'] = (df16['time'] - df16['time1']).apply(lambda x: x.days)
df17 = df16.pivot_table(index='client',
                        values='timediff',
                        aggfunc=['min', 'max', 'mean', 'median', 'count'])
# print(df17)

文末结语

本篇文章仅使用pandas和matplotlib作简单的数据处理,没有把结果打印出来并做分析,是因为仅提供一些分析思路

具体数据的变化需根据打印结果来做分析(每次打印自动刷新数据),读者可继续细化各类分析,如人货场中针对历史销售情况做销量预测;RFM中针对各类用户群体输出运营策略;客户复购分析可根据复购情况退出捆绑套餐,拉动销量低的水果销量


参考文章

Pandas数据分析经典案例 - 云+社区 - 腾讯云

数据分析方法-RFM用户价值分层模型_inganxu的博客-CSDN博客

  • 2
    点赞
  • 14
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

inganxu

感谢您的支持!!!!

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值