DataWhale一周算法进阶3---模型融合

一 任务

用你目前评分最高的模型作为基准模型,和其他模型进行stacking融合,得到最终模型及评分果。

二 代码

import pandas as pd 
import numpy as np
import sys
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from xgboost import XGBClassifier
import lightgbm as lgb
from lightgbm import LGBMClassifier

from sklearn import metrics
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold 
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_validate
from mlxtend.classifier import StackingClassifier
# 读取数据
data_all = pd.read_csv('./data_all.csv', encoding='gbk')
data_all.head()
# 划分数据集
from sklearn.model_selection import train_test_split
features = [x for x in data_all.columns if x not in ['status']] # 特征
X = data_all[features] # 特征向量
y = data_all['status'] # labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=2018)

# 对数据进行归一化处理
scaler = StandardScaler()
scaler.fit(X_train)
X_train_stand = scaler.transform(X_train)
X_test_stand = scaler.transform(X_test)
# scaler.fit(X)
# X_stand = scaler.transform(X)
X_test_stand
lr = LogisticRegression(random_state=2018,C=0.1)
lgb = LGBMClassifier(boosting_type='GBDT',random_state=2018,silent=0)
gbdt = GradientBoostingClassifier(random_state=2018,max_depth=3,n_estimators=50)
xgbc = XGBClassifier(random_state=2018,max_depth=3,eta=0.1,subsample=0.6)
rf = RandomForestClassifier(n_estimators=500,oob_score=True, random_state=2018)
svm = SVC(random_state=2018,tol=0.01)
sclf = StackingClassifier(classifiers=[lr, gbdt, xgbc,rf,svm], meta_classifier=lgb)
sclf1 = StackingClassifier(classifiers=[gbdt, xgbc,svm], meta_classifier=lgb)
sclf2 = StackingClassifier(classifiers=[gbdt, xgbc,svm], meta_classifier=lr)
sclf3 = StackingClassifier(classifiers=[svm], meta_classifier=lr)
def get_scores(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)   
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    if hasattr(model, "decision_function"):
        y_train_proba = model.decision_function(X_train)
        y_test_proba = model.decision_function(X_test)
    else:
        y_train_proba = (model.predict_proba(X_train))[:, 1]
        y_test_proba = (model.predict_proba(X_test))[:, 1]
    # accuracy
    train_accuracy = metrics.accuracy_score(y_train, y_train_predict)
    test_accuracy = metrics.accuracy_score(y_test, y_test_predict)
    # recision
    train_precision = metrics.precision_score(y_train, y_train_predict)
    test_precision = metrics.precision_score(y_test, y_test_predict)
    # recall
    train_recall = metrics.recall_score(y_train, y_train_predict)
    test_recall = metrics.recall_score(y_test, y_test_predict)
    # f1-score
    train_f1 = metrics.f1_score(y_train, y_train_predict)
    test_f1 = metrics.f1_score(y_test, y_test_predict)
    # auc
    train_auc = metrics.roc_auc_score(y_train, y_train_proba)
    test_auc = metrics.roc_auc_score(y_test, y_test_proba)
    # roc 曲线
    train_fprs,train_tprs,train_thresholds = metrics.roc_curve(y_train, y_train_proba)
    test_fprs,test_tprs,test_thresholds = metrics.roc_curve(y_test, y_test_proba)
    plt.plot(train_fprs, train_tprs)
    plt.plot(test_fprs, test_tprs)
    plt.plot([0,1], [0,1],"--")
    plt.title("ROC curve")
    plt.xlabel("FPR")
    plt.ylabel("TPR")
    plt.legend(labels=["Train AUC:"+str(round(train_auc, 5)),"Test AUC:"+str(round(test_auc,5))], loc="lower right")
    plt.show()
    #输出各种得分
    print("训练集准确率:", train_accuracy)
    print("测试集准确率:", test_accuracy)
    print("==================================")
    print("训练集精准率:", train_precision)
    print("测试集精准率:", test_precision)
    print("==================================")
    print("训练集召回率:", train_recall)
    print("测试集召回率:", test_recall)
    print("==================================")
    print("训练集F1-score:", train_f1)
    print("测试集F1-score:", test_f1)
    print("==================================")
    print("训练集AUC:", train_auc)
    print("测试集AUC:", test_auc)
get_scores(sclf, X_train, X_test, y_train, y_test)

问题

1.hhhhh代码没问题但是都kernel dead了 下次用 google colab试一下

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值