使用bayes_XGB快速调参
from sklearn.metrics import roc_curve, auc, recall_score, accuracy_score, roc_auc_score, precision_score, f1_score
import matplotlib as plt
def model_metrics(clf, X_train, X_test, y_train, y_test):
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)
y_train_proba = clf.predict_proba(X_train)[:, 1]
y_test_proba = clf.predict_proba(X_test)[:, 1]
print('[准确率]', end=' ')
print('训练集:', '%.4f' % accuracy_score(y_train, y_train_pred), end=' ')
print('测试集:', '%.4f' % accuracy_score(y_test, y_test_pred))
print('[精准率]', end=' ')
print('训练集:', '%.4f' % precision_score(y_train, y_train_pred), end=' ')
print('测试集:', '%.4f' % precision_score(y_test, y_test_pred))
print('[召回率]', end=' ')
print('训练集:', '%.4f' % recall_score(y_train, y_train_pred), end=' ')
print('测试集:', '%.4f' % recall_score(y_test, y_test_pred))
print('[f1-score]', end=' ')
print('训练集:', '%.4f' % f1_score(y_train, y_train_pred), end=' ')
print('测试集:', '%.4f' % f1_score(y_test, y_test_pred))
print('[auc值]', end=' ')
print('训练集:', '%.4f' % roc_auc_score(y_train, y_train_proba), end=' ')
print('测试集:', '%.4f' % roc_auc_score(y_test, y_test_proba))
fpr_train, tpr_train, thresholds_train = roc_curve(y_train, y_train_proba, pos_label=1)
fpr_test, tpr_test, thresholds_test = roc_curve(y_test, y_test_proba, pos_label=1)
label = ["Train - AUC:{:.4f}".format(auc(fpr_train, tpr_train)),
"Test - AUC:{:.4f}".format(auc(fpr_test, tpr_test))]
plt.plot(fpr_train, tpr_train)
plt.plot(fpr_test, tpr_test)
plt.plot([0, 1], [0, 1], 'd--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(label, loc=4)
plt.title("ROC curve")
import numpy as np
import pandas as pd
import matplotlib as plt
def plot_ks(y_test, y_score, positive_flag):
y_test.index = np.arange(len(y_test))
target_data = pd.DataFrame({'y_test': y_test, 'y_score': y_score})
target_data.sort_values(by='y_score', ascending=False, inplace=True)
cuts = np.arange(0.1, 1, 0.1)
index = len(target_data.y_score) * cuts
scores = target_data.y_score.iloc[index.astype('int')]
Sensitivity = []
Specificity = []
for score in scores:
positive_recall= target_data.loc[(target_data.y_test == positive_flag) & (target_data.y_score > score), :].shape[0]
positive = sum(target_data.y_test == positive_flag)
negative_recall= target_data.loc[(target_data.y_test != positive_flag) & (target_data.y_score <= score), :].shape[0]
negative = sum(target_data.y_test != positive_flag)
Sensitivity.append(positive_recall / positive)
Specificity.append(negative_recall / negative)
plot_data = pd.DataFrame({'cuts': cuts, 'y1': 1 - np.array(Specificity), 'y2': np.array(Sensitivity),
'ks': np.array(Sensitivity) - (1 - np.array(Specificity))})
max_ks_index = np.argmax(plot_data.ks)
plt.plot([0] + cuts.tolist() + [1], [0] + plot_data.y1.tolist() + [1], label='1-Specificity(TPR)')
plt.plot([0] + cuts.tolist() + [1], [0] + plot_data.y2.tolist() + [1], label='Sensitivity(FPR)')
plt.vlines(plot_data.cuts[max_ks_index], ymin=plot_data.y1[max_ks_index],
ymax=plot_data.y2[max_ks_index], linestyles='--')
plt.text(x=plot_data.cuts[max_ks_index] + 0.01,
y=plot_data.y1[max_ks_index] + plot_data.ks[max_ks_index] / 2,
s='KS= %.2f' % plot_data.ks[max_ks_index])
plt.legend()
plt.show()
def XGB_predict(X_train, y_train, X_test, y_test):
def xgb_cv(eta, gamma, max_depth):
val = cross_val_score(xgb.XGBClassifier(objective='binary.logistic',
learning_rate=max(eta, 0),
gamma=max(gamma, 0),
max_depth=int(max_depth),
fit_params={'early_stopping_round': 10,
'eval_metric': 'auc'},
n_jobs=-1
), X=X_train, y=y_train).mean()
return val
opt = BayesianOptimization(xgb_cv, {'eta': (0.001, 0.1),
'gamma': (0, 1),
'max_depth': (1, 15),
'n_estimators': (40, 80)})
opt.maximize()
params = opt.max
params = params['params']
params.update({'max_depth': int(params['max_depth'])})
xgb_train = xgb.DMatrix(X_train, label=y_train)
xgb_model = xgb.train(params, xgb_train)
plot_auc.model_metrics(xgb_model, X_train, y_train, X_test, y_test)
y_score = xgb_model.predict_proba(X_test)
df = pd.DataFrame(y_score)
y_score = df[1]
plot_ks.plot_ks(y_test, y_score, 1)
return xgb_model,params