【T1.2 - 模型构建之集成模型】
说明:对上一篇T1.1模型构建,预测贷款用户是否会逾期代码的补充,需在上一篇代码之后运行
构建随机森林、GBDT、XGBoost和LightGBM这4个模型,并对每一个模型进行评分。
模型构建
#导入库
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt
#4个模型
rf = RandomForestClassifier().fit(x_train, y_train)
GBDT = GradientBoostingClassifier().fit(x_train, y_train)
XGBoost = XGBClassifier().fit(x_train, y_train)
LightGBM = LGBMClassifier().fit(x_train, y_train)
4个模型对应的评分
#放到dict里,后面直接for循环
models = {'RF': rf,
'GBDT': GBDT,
'XGBoost': XGBoost,
'LightGBM': LightGBM}
df_result = pd.DataFrame(columns=('Model', 'Accuracy', 'Precision', 'Recall', 'F1 score', 'AUC','AUC1'))
row = 0
for name,clf in models.items():
y_test_pred = clf.predict(x_test)
acc = metrics.accuracy_score(y_test, y_test_pred)
p = metrics.precision_score(y_test, y_test_pred)
r = metrics.recall_score(y_test, y_test_pred)
f1 = metrics.f1_score(y_test, y_test_pred)
y_test_proba = clf.predict_proba(x_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_test_proba[:, 1])
auc = metrics.auc(fpr, tpr)
auc1 = metrics.roc_auc_score(y_test, y_test_proba[:, 1]) # auc求法2
df_result.loc[row] = [name, acc, p, r, f1, auc,auc1]
row += 1
print(df_result)
Model Accuracy Precision Recall F1 score AUC AUC1
0 RF 0.770848 0.601266 0.264624 0.367505 0.718992 0.718992
1 GBDT 0.780659 0.611650 0.350975 0.446018 0.763828 0.763828
2 LightGBM 0.770147 0.570136 0.350975 0.434483 0.757402 0.757402
3 XGBoost 0.785564 0.630542 0.356546 0.455516 0.771363 0.771363
4个模型的ROC曲线
def plot_roc_curve(fpr, tpr, label=None):
plt.plot(fpr, tpr, label=label)
plt.figure()
for name, model in models.items():
proba = model.predict_proba(x_test)[:,1]
fpr, tpr, thresholds = metrics.roc_curve(y_test, proba)
plot_roc_curve(fpr, tpr, label=name)
plt.plot([0, 1], [0, 1], 'k--')# 对角线
plt.axis([0, 1, 0, 1])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.show()