来源> https://www.kaggle.com/eikedehling/stack-of-svm-elasticnet-xgboost-rf-0-55/code
class Ensemble(object):
def __init__(self, n_splits, stacker, base_models):
self.n_splits = n_splits
self.stacker = stacker
self.base_models = base_models
def fit_predict(self, X, y, T):
X = np.array(X)
y = np.array(y)
T = np.array(T)
folds = list(KFold(n_splits=self.n_splits, shuffle=True, random_state=2016).split(X, y))
S_train = np.zeros((X.shape[0], len(self.base_models)))
S_test = np.zeros((T.shape[0], len(self.base_models)))
for i, clf in enumerate(self.base_models):
S_test_i = np.zeros((T.shape[0], self.n_splits))
for j, (train_idx, test_idx) in enumerate(folds):
X_train = X[train_idx]
y_train = y[train_idx]
X_holdout = X[test_idx]
y_holdout = y[test_idx]
clf.fit(X_train, y_train)
y_pred = clf.predict(X_holdout)[:]
print ("Model %d fold %d score %f" % (i, j, r2_score(y_holdout, y_pred)))
S_train[test_idx, i] = y_pred
S_test_i[:, j] = clf.predict(T)[:]
S_test[:, i] = S_test_i.mean(axis=1)
# results = cross_val_score(self.stacker, S_train, y, cv=5, scoring='r2')
# print("Stacker score: %.4f (%.4f)" % (results.mean(), results.std()))
# exit()
self.stacker.fit(S_train, y)
res = self.stacker.predict(S_test)[:]
return res
stack = Ensemble(n_splits=5,
#stacker=ElasticNetCV(l1_ratio=[x/10.0 for x in range(1,10)]),
stacker=ElasticNet(l1_ratio=0.1, alpha=1.4),
base_models=(svm_pipe, en_pipe, xgb_pipe, rf_model))
y_test = stack.fit_predict(train, y_train, test)