import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
# 将数据读入pandas
churn = pd.read_csv('churn.csv', sep=',', header=0)
churn.columns = [heading.lower() for heading in \
churn.columns.str.replace(' ', '_').str.replace("\'", "").str.strip('?')]
churn['churn01'] = np.where(churn['churn'] == 'True.', 1., 0.)
print(churn.head())
print(churn.describe())
# Calculate descriptive statistics for grouped data
print(churn.groupby(['churn'])[['day_charge', 'eve_charge', 'night_charge', 'intl_charge', 'account_length', 'custserv_calls']].agg(['count', 'mean', 'std']))
# Specify different statistics for different variables
print(churn.groupby(['churn']).agg({
'day_charge' : ['mean', 'std'],
'eve_charge' : ['mean', 'std'],
'night_charge' : ['mean', 'std'],
'intl_charge' : ['mean', 'std'],
'account_length' : ['count', 'min', 'max'],
'custserv_calls' : ['count', 'min', 'max'
对电信用户流失的逻辑斯提回归分析
最新推荐文章于 2024-05-08 14:20:53 发布
本文通过逻辑斯提回归模型对电信用户流失进行深入分析,揭示了影响用户流失的关键因素,包括服务满意度、费用、产品竞争力等,为企业制定精准的用户保留策略提供依据。
摘要由CSDN通过智能技术生成