stacking技术图示:
完整代码:
# -*- coding: utf-8 -*-
from sklearn.datasets import load_iris
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold, train_test_split
import pandas as pd
# 显示所有列
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import lightgbm as lgb
pd.set_option('display.max_columns', None)
# 显示所有行
pd.set_option('display.max_rows', None)
# 设置value的显示长度为100,默认为50
pd.set_option('max_colwidth', 100)
def stacking(model, train_data, train_target, test_data, n_fold):
"""
:param model: 模型算法
:param train_data: 训练集(不含带预测的目标特征)
:param train_target: 需要预测的目标特征
:param test_data: 测试集
:param n_fold: 交叉验证的折数
:return:
"""
skf = StratifiedKFold(n_splits=n_fold, random_state=1) # StratifiedKFold 默认分层采样
train_pred = np.zeros((train_data.shape[0], 1), int) # 存储训练集预测结果
test_pred = np.zeros((test_data.shape[0], 1), int) # 存储测试集预测结果 行数:len(test_data) ,列数:1列
for skf_index, (train_index, val_index) in enumerate(skf.split(train_data, train_target)):
print('第 ', skf_index+1, ' 折交叉验证开始... ')
# 训练集划分
x_train, x_val = train_data.iloc[train_index], train_da