数据分析 | 选择最佳的Stacking堆叠模型、单层结构双层结构 | Python代码

目录

一、单层结构

二、 双层结构

三、运行结果


        一、单层Stacking结构,基学习器为:朴素贝叶斯、随即下降、随机森林、决策树、AdaBoost、GBDT、XGBoost,7个任选3个作为基学习器组合,Meta函数固定为MLP。

import itertools
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, confusion_matrix
from sklearn.ensemble import StackingClassifier

# 导入数据
data_train_lasso = pd.read_excel()
data_validation_lasso = pd.read_excel()
data_test_lasso = pd.read_excel()

# 划分数据集
X_train = data_train_lasso.iloc[:, 0:-1]
y_train = data_train_lasso.iloc[:, -1]
X_validation = data_validation_lasso.iloc[:, 0:-1]
y_validation = data_validation_lasso.iloc[:, -1]
X_test = data_test_lasso.iloc[:, 0:-1]
y_test = data_test_lasso.iloc[:, -1]

# 定义基础分类器
base_classifiers = {
    'nb': GaussianNB(),
    'sgd': SGDClassifier(random_state=514),
    'rf': RandomForestClassifier(random_state=514),
    'dt': DecisionTreeClassifier(random_state=514),
    'ada': AdaBoostClassifier(random_state=514),
    'gbdt': GradientBoostingClassifier(random_state=514),
    'xgb': XGBClassifier(random_state=514)
}

# 定义元分类器
meta_classifier = MLPClassifier(random_state=514)

# 生成基学习器的组合
base_combinations = list(itertools.combinations(base_classifiers.keys(), 3))

# 保存结果的列表
results = []

# 遍历每个组合
for base_combination in base_combinations:
    # 选取当前组合的基学习器
    current_classifiers = [(name, base_classifiers[name]) for name in base_combination]

    # 创建StackingClassifier
    stacking_classifier = StackingClassifier(estimators=current_classifiers, final_estimator=meta_classifier)

    # 训练Stacking分类器
    stacking_classifier.fit(X_train, y_train)

    # 预测测试集
    y_pred = stacking_classifier.predict(X_test)

    # 评估性能
    accuracy_stacking = accuracy_score(y_test, y_pred)
    auc_stacking = roc_auc_score(y_test, y_pred)
    f1_stacking = f1_score(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    fpr_stacking = fp / (fp + tn)

    # 保存当前组合的评估指标
    result = {
        'Classifiers': base_combination,
        'Accuracy': accuracy_stacking,
        'AUC': auc_stacking,
        'F1 Score': f1_stacking,
        'FPR': fpr_stacking
    }

    results.append(result)

# 将结果转为DataFrame
results_df = pd.DataFrame(results)

# 打印结果
print(results_df)

# 保存结果到Excel文件
results_df.to_excel()
       二、 双层Stacking结构,基学习器为:朴素贝叶斯、随即下降、随机森林、决策树、AdaBoost、GBDT、XGBoost,7个中每层任选2个作为基学习器组合,Meta函数固定为MLP。

from itertools import combinations
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, confusion_matrix
import pandas as pd

# 导入数据
data_train_lasso = pd.read_excel()
data_validation_lasso = pd.read_excel()
data_test_lasso = pd.read_excel()

# 划分数据集
X_train = data_train_lasso.iloc[:, 0:-1]
y_train = data_train_lasso.iloc[:, -1]
X_validation = data_validation_lasso.iloc[:, 0:-1]
y_validation = data_validation_lasso.iloc[:, -1]
X_test = data_test_lasso.iloc[:, 0:-1]
y_test = data_test_lasso.iloc[:, -1]

# 定义基学习器
base_classifiers = [
    ('naive_bayes', GaussianNB()),
    ('sgd', SGDClassifier(random_state=514)),
    ('random_forest', RandomForestClassifier(random_state=514)),
    ('decision_tree', DecisionTreeClassifier(random_state=514)),
    ('adaboost', AdaBoostClassifier(random_state=514)),
    ('gbdt', GradientBoostingClassifier(random_state=514)),
    ('xgboost', XGBClassifier(random_state=514))
]

# 定义元学习器
meta_classifier = MLPClassifier(random_state=514)

# 创建双层StackingClassifier
results = {'combo': [], 'accuracy': [], 'auc': [], 'f1': [], 'fpr': []}

for combo_first_layer in combinations(base_classifiers, 2):
    for combo_second_layer in combinations(combo_first_layer, 2):
        # 创建StackingClassifier
        stacking_classifier = StackingClassifier(estimators=list(combo_second_layer), final_estimator=meta_classifier)

        # 训练Stacking分类器
        stacking_classifier.fit(X_train, y_train)

        # 预测测试集
        y_pred = stacking_classifier.predict(X_test)

        # 评估性能
        accuracy_stacking = accuracy_score(y_test, y_pred)
        auc_stacking = roc_auc_score(y_test, y_pred)
        f1_stacking = f1_score(y_test, y_pred)
        tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
        fpr_stacking = fp / (fp + tn)

        # 保存结果
        combo_key = tuple(sorted([combo_first_layer[0][0], combo_first_layer[1][0], combo_second_layer[0][0], combo_second_layer[1][0]]))
        results['combo'].append(combo_key)
        results['accuracy'].append(accuracy_stacking)
        results['auc'].append(auc_stacking)
        results['f1'].append(f1_stacking)
        results['fpr'].append(fpr_stacking)

# 将结果保存到 DataFrame
results_df = pd.DataFrame(results)

# 将 DataFrame 写入 Excel 文件
results_df.to_excel()
        三、运行结果如下:
ClassifiersAccuracyAUCF1 ScoreFPR
('nb', 'sgd', 'rf')0.8979905110.8996831780.8948050080.127034121
('nb', 'sgd', 'dt')0.8979905110.8996831780.8948050080.127034121
('nb', 'sgd', 'ada')0.8979905110.8996831780.8948050080.127034121
('nb', 'sgd', 'gbdt')0.8979905110.8996831780.8948050080.127034121
('nb', 'sgd', 'xgb')0.8979905110.8996831780.8948050080.127034121
('nb', 'rf', 'dt')0.9911387110.9913027240.9905709410.011286089
('nb', 'rf', 'ada')0.9908596150.9909692440.9902652890.010761155
('nb', 'rf', 'gbdt')0.9911387110.9912938480.990569540.011154856
('nb', 'rf', 'xgb')0.9912084850.9913683410.9906444910.011154856
('nb', 'dt', 'ada')0.9898130060.9899051020.9891482090.011548556
('nb', 'dt', 'gbdt')0.9908596150.9909692440.9902652890.010761155
('nb', 'dt', 'xgb')0.9915573540.9916609180.9910070610.009973753
('nb', 'ada', 'gbdt')0.9855567960.9856273010.9846130970.015485564
('nb', 'ada', 'xgb')0.9905107450.990721050.9899109790.012598425
('nb', 'gbdt', 'xgb')0.9905805190.9907955430.9899859060.012598425
('sgd', 'rf', 'dt')0.5316773650.500
('sgd', 'rf', 'ada')0.8979905110.8996831780.8948050080.127034121
('sgd', 'rf', 'gbdt')0.5316773650.500
('sgd', 'rf', 'xgb')0.5316773650.500
('sgd', 'dt', 'ada')0.8979905110.8996831780.8948050080.127034121
('sgd', 'dt', 'gbdt')0.5316773650.500
('sgd', 'dt', 'xgb')0.5316773650.500
('sgd', 'ada', 'gbdt')0.8979905110.8996831780.8948050080.127034121
('sgd', 'ada', 'xgb')0.8979905110.8996831780.8948050080.127034121
('sgd', 'gbdt', 'xgb')0.5316773650.500
('rf', 'dt', 'ada')0.9911387110.9913027240.9905709410.011286089
('rf', 'dt', 'gbdt')0.9905107450.990641160.9898974890.011417323
('rf', 'dt', 'xgb')0.9913480320.9914818210.9907888870.010629921
('rf', 'ada', 'gbdt')0.9911387110.9912760940.9905667380.010892388
('rf', 'ada', 'xgb')0.9911387110.9912405880.990561130.010367454
('rf', 'gbdt', 'xgb')0.9912084850.9913683410.9906444910.011154856
('dt', 'ada', 'gbdt')0.9902316490.9903786930.9896034460.011942257
('dt', 'ada', 'xgb')0.9905107450.990694420.9899064870.012204724
('dt', 'gbdt', 'xgb')0.9907898410.9909391340.9901975350.011417323
('ada', 'gbdt', 'xgb')0.9906502930.990861160.9900593470.012467192
  • 10
    点赞
  • 14
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值