好文章:
https://zhuanlan.zhihu.com/p/61705517
一个stacking过程:
_N_FOLDS = 5 # 采用5折交叉验证
kf = KFold(n_splits=_N_FOLDS, random_state=42) # sklearn的交叉验证模块,用于划分数据
def get_oof(clf, X_train, y_train, X_test):
# X_train: 1000 * 10
# y_train: 1 * 1000
# X_test : 500 * 10
oof_train = np.zeros((X_train.shape[0], 1)) # 1000 * 1 Stacking后训练数据的输出
oof_test_skf = np.empty((_N_FOLDS, X_test.shape[0], 1)) # 5 * 500 * 1,oof_test_skf[i]代表第i折交叉验证产生的模型对测试集预测结果
for i, (train_index, test_index) in enumerate(kf.split(X_train)): # 交叉验证划分此时的训练集和验证集
kf_X_train = X_train[train_index] # 800 * 10 训练集
kf_y_train = y_train[train_index] # 1 * 800 训练集对应的输出
kf_X_val = X_train[test