阿里云天池金融风控-Task5

最新推荐文章于 2022-07-19 19:57:46 发布

!sTorm

最新推荐文章于 2022-07-19 19:57:46 发布

阅读量483

点赞数

本文链接：https://blog.csdn.net/weixin_45720616/article/details/116406523

版权

模型融合

本文采用教程中的Stacking集成模型训练，基分类器为KNN，贝叶斯，随机森林，将其预测标签给逻辑斯蒂回归。
在这里插入图片描述
上述为Stacking的模型融合图

import warnings
warnings.filterwarnings('ignore')
import itertools
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import StackingClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from mlxtend.plotting import plot_learning_curves
from mlxtend.plotting import plot_decision_regions

先填充数据

X_train = X_train.fillna(X_train.median())
X_test = X_test.fillna(X_test.median())

clf1 = KNeighborsClassifier(n_neighbors=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
lr = LogisticRegression()
sclf = StackingClassifier(classifiers=[clf1, clf2, clf3], 
                          meta_classifier=lr)
label = ['KNN', 'Random Forest', 'Naive Bayes', 'Stacking Classifier']
clf_list = [clf1, clf2, clf3, sclf]

grid = itertools.product([0,1],repeat=2)
clf_cv_mean = []
clf_cv_std = []
for clf, label, grd in zip(clf_list, label, grid):
    scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc')
    print("Accuracy: %.2f (+/- %.2f) [%s]" %(scores.mean(), scores.std(), label))