# K折交叉验证
from sklearn.model_selection import KFold
kf = KFold(n_splits=10) # 10折
rmse_scores = [] # 保存10折运行的结果
for train_indices, test_indices in kf.split(X): # 分割元数据,生成索引
X_train, X_test = X[train_indices], X[test_indices] # 训练集和验证集
y_train, y_test = y[train_indices], y[test_indices] # 训练标签集和验证标签集
# 初始化线性回归模型对象
LR = LinearRegression(normalize=True)
LR.fit(X_train, y_train) # 训练
y_pred = LR.predict(X_test) # 预测
rmse = np.sqrt(mean_absolute_error(np.log(y_test), np.log(abs(y_pred)))) # 评估
rmse_scores.append(rmse) # 累计每一轮的验证结果
print("rmse scores : ", rmse_scores)
print(f'average rmse score : {np.mean(rmse_scores)}')
# 2 随机森林(回归)
from sklearn.ensemble import RandomForestRegressor# K折交叉验证kf = KFold(n_splits=10)rmse_scores = [] for train_indices, test_indices in kf.split(X): X_train, X_test = X[train_indices], X[test_indices] y_train, y_test = y[train_indices], y[test_indices] # 初始化模型 RFR = RandomForestRegressor() # 基模型 # 训练/fit拟合 RFR.fit(X_train, y_train) # 预测 y_pred = RFR.predict(X_test) # 评估 rmse = mean_absolute_error(y_test, y_pred) # 累计结果 rmse_scores.append(rmse)print("rmse scores : ", rmse_scores)print(f'average rmse scores : {np.mean(rmse_scores)}')