用ANN预测客户流失数据

这次我们用python来对客户流失数据进行预测
相关完整代码与csv文件可以从我的GitHub地址获取
https://github.com/liuzuoping/Deep_Learning_note

读取客户流失数据

import pandas
df = pandas.read_csv('../data/customer_churn.csv', index_col=0, header = 0)
df.head()

在这里插入图片描述

数据前处理

cat_var = ['international_plan','voice_mail_plan', 'churn']

for var in cat_var:
    df[var] = df[var].map(lambda e: 1 if e == 'yes' else 0)
df.head()
df.info()

在这里插入图片描述
在这里插入图片描述

区分训练与测试数据集

y = df.iloc[:,-1]
x = df.iloc[:,:-1]
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33, random_state = 123)

尺度标准化

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

训练ANN

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
def tarinProcess(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = 16))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

    history = classifier.fit(x_train, y_train,
                        batch_size=10,
                        epochs=100,
                        verbose=0,
                        validation_data=(x_test, y_test))
    return history
history1 = tarinProcess('sgd')
history2 = tarinProcess('RMSprop')
history3 = tarinProcess('Adagrad')
history4 = tarinProcess('Adadelta')
history5 = tarinProcess('Adam')

K-fold 交叉验证

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
def buildClassifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = 16))
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

    return classifier
classifier = KerasClassifier(build_fn = buildClassifier, batch_size = 10, epochs = 100, optimizer = 'adam')
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 5)
mean = accuracies.mean()
variance = accuracies.std()

在这里插入图片描述

accuracies
mean
variance

array([0.91051453, 0.91498882, 0.88143176, 0.85874438, 0.9304933 ])
0.8992345571517945
0.02572522922509814

Dropout

from keras.layers import Dropout
def buildClassifierWithDropout(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = 16))
    classifier.add(Dropout(rate=0.1))
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dropout(rate=0.1))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

    return classifier
classifier = KerasClassifier(build_fn = buildClassifierWithDropout, batch_size = 10, epochs = 100, verbose = 0, optimizer='adam' )
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 5)
mean = accuracies.mean()
variance = accuracies.std()
accuracies
mean
variance

array([0.85458612, 0.89038032, 0.87695748, 0.85201794, 0.86547083])
0.8678825378417969
0.01430245638705941

Grid Search

from sklearn.model_selection import GridSearchCV
classifier = KerasClassifier(build_fn = buildClassifierWithDropout, epochs = 100)
parameters = {'batch_size': [10, 15],
              'optimizer': ['adam', 'rmsprop']}
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 5)
grid_search     = grid_search.fit(x_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy   = grid_search.best_score_

在这里插入图片描述

  • 0
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值