本文实现的是一下链接的内容,链接如下:预测用户流失
本文代码主要包含三部分:
- 生成模拟用户活跃档案数据
- 生成模拟用户连续几个月不活跃的档案数据
- 实现用户流失率
# -*- coding: utf-8 -*-
"""
生成用户活跃档案,输出csv
"""
import pandas as pd
import random
user_num = 5 # 要生成的用户数
df = pd.DataFrame(columns=['1月', '2月', '3月', '4月', '5月', '6月',
'7月', '8月', '9月', '10月', '11月', '12月'])
for i in range(user_num):
lst = []
for j in range(12):
lst.append(random.randint(0, 1))
df.loc[i, :] = lst
df.to_csv("C:\\Users\\yang\\Desktop\\data.csv",encoding='utf-8',index=False)
# -*- coding: utf-8 -*-
"""
生成不活跃用户档案
数字表示连续几个月不活跃,
如:0是活跃,1是连续一个月不活跃,2是连续两个月不活跃
"""
import pandas as pd
file_path = "C:\\Users\\yang\\Desktop\\activity_data.csv"
file_data = pd.read_csv(file_path, encoding='utf-8')
for row in range(len(file_data)):
for col in range(1,12): # 2月到12月
if file_data.iloc[row, col] == 1:
file_data.iloc[row, col] = file_data.iloc[row, col-1] + 1
file_data.to_csv("C:\\Users\\yang\\Desktop\\inactivity_data.csv", encoding='utf-8',index=False)
# -*- coding: utf-8 -*-
import pandas as pd
from collections import Counter
df = pd.read_csv("C:\\Users\\yang\\Desktop\\inactivity_data.csv")
continue_inactivity_df = pd.DataFrame(index=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
columns=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
for col in range(12):
for row in Counter(df[str(col + 1) + '月']).keys():
continue_inactivity_df.iloc[row, col] = Counter(df[str(col + 1) + '月']).get(row)
# print(continue_inactivity_df)
probability = [] # 2到12月的不连续活跃n月的用户占比,顺序存放
for row in range(2, 13):
p = 0
for col in range(row - 1, 12):
p = p + continue_inactivity_df.iloc[row, col] / continue_inactivity_df.iloc[row - 1, col - 1]
probability.append(round(p / (12 - row + 1), 4)) # p的均值
# print(probability)
# 2月到11月的流失率计算
user_churn = [] # 流失率
for i in range(len(probability)-1):
churn = 1
for j in range(i+1,len(probability)-1):
churn = churn * probability[j]
user_churn.append(round(churn,8))
print(user_churn)