一、自己实现:切片
# k折交叉验证 : 自己实现
k = 5
per_group_samples = len(X) // k
for i in range(k):
X_test = X[i * per_group_samples: (i+1) * per_group_samples]
y_test = y[i * per_group_samples: (i+1) * per_group_samples]
# axis=0:行操作 axis=1:列操作
X_train = np.concatenate((X[: i * per_group_samples], X[(i+1) * per_group_samples:]), axis=0)
y_train = np.concatenate((y[: i * per_group_samples], y[(i+1) * per_group_samples:]), axis=0)
二、直接调用:随机下标
# k折交叉验证 : 直接调用
kf = KFold(n_splits=5)
for train_index, test_index in kf.split(X):
X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
# 使用随机梯度下降进行k折交叉验证
kf = KFold(n_splits=5)
for k, (train_index, test_index) in enumerate(kf.split(X)):
X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
clf = SGDRegressor(max_iter=1000, tol=1e-3)
clf.fit(X_train, y_train)
score_train = mean_squared_error(y_train, clf.predict(X_train))
score_test = mean_squared_error(y_test, clf.predict(X_test))
print(k, " 折 SGDRegressor train MSE: ", score_train)
print(k, " 折 SGDRegressor test MSE: ", score_test)
三、完整代码
from sklearn.model_selection import KFold
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import SGDRegressor
import numpy as np
# 加载数据
iris = load_iris()
X = iris.data
y = iris.target
# k折交叉验证 : 自己实现
k = 5
per_group_samples = len(X) // k
for i in range(k):
X_test = X[i * per_group_samples: (i+1) * per_group_samples]
y_test = y[i * per_group_samples: (i+1) * per_group_samples]
# axis=0:行操作 axis=1:列操作
X_train = np.concatenate((X[: i * per_group_samples], X[(i+1) * per_group_samples:]), axis=0)
y_train = np.concatenate((y[: i * per_group_samples], y[(i+1) * per_group_samples:]), axis=0)
# k折交叉验证 : 直接调用
kf = KFold(n_splits=5)
for train_index, test_index in kf.split(X):
X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
# 使用随机梯度下降进行k折交叉验证
kf = KFold(n_splits=5)
for k, (train_index, test_index) in enumerate(kf.split(X)):
X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
clf = SGDRegressor(max_iter=1000, tol=1e-3)
clf.fit(X_train, y_train)
score_train = mean_squared_error(y_train, clf.predict(X_train))
score_test = mean_squared_error(y_test, clf.predict(X_test))
print(k, " 折 SGDRegressor train MSE: ", score_train)
print(k, " 折 SGDRegressor test MSE: ", score_test)