好的,以下是Task 4的代码,包括了Lasso、rpart、随机森林和xgboost模型的训练和评估:
```R
library(caret)
library(mlbench)
library(glmnet)
library(rpart)
library(randomForest)
library(xgboost)
# Load data
data("cars2018")
# Task 2 - Fit a neural network model with one hidden layer
set.seed(123)
train_index <- createDataPartition(cars2018$mpg, p = 0.8, list = FALSE)
train_data <- cars2018[train_index, ]
test_data <- cars2018[-train_index, ]
model1 <- keras_model_sequential() %>%
layer_dense(units = 10, activation = "relu", input_shape = ncol(train_data) - 1) %>%
layer_dense(units = 1)
compile(model1, optimizer = "adam", loss = "mean_squared_error", metrics = "mean_squared_error")
history1 <- fit(model1, as.matrix(train_data[, -1]), train_data$mpg, epochs = 50, batch_size = 32,
validation_split = 0.2, verbose = 0)
plot(history1)
# Task 3 - Fit a neural network model with two hidden layers
model2 <- keras_model_sequential() %>%
layer_dense(units = 10, activation = "relu", input_shape = ncol(train_data) - 1) %>%
layer_dense(units = 5, activation = "relu") %>%
layer_dense(units = 1)
compile(model2, optimizer = "adam", loss = "mean_squared_error", metrics = "mean_squared_error")
history2 <- fit(model2, as.matrix(train_data[, -1]), train_data$mpg, epochs = 50, batch_size = 32,
validation_split = 0.2, verbose = 0)
plot(history2)
# Task 4 - Compare the model performance with other models
# Lasso model
set.seed(123)
train_index <- createDataPartition(cars2018$mpg, p = 0.8, list = FALSE)
train_data <- cars2018[train_index, ]
test_data <- cars2018[-train_index, ]
glmnet_fit <- cv.glmnet(as.matrix(train_data[, -1]), train_data$mpg, alpha = 1, nfolds = 10)
lasso_pred <- predict(glmnet_fit, newx = as.matrix(test_data[, -1]), s = "lambda.min")
lasso_mse <- mean((lasso_pred - test_data$mpg) ^ 2)
lasso_mae <- mean(abs(lasso_pred - test_data$mpg))
# rpart model
set.seed(123)
train_index <- createDataPartition(cars2018$mpg, p = 0.8, list = FALSE)
train_data <- cars2018[train_index, ]
test_data <- cars2018[-train_index, ]
rpart_fit <- rpart(mpg ~ ., data = train_data, method = "anova")
rpart_pred <- predict(rpart_fit, newdata = test_data)
rpart_mse <- mean((rpart_pred - test_data$mpg) ^ 2)
rpart_mae <- mean(abs(rpart_pred - test_data$mpg))
# Random forest model
set.seed(123)
train_index <- createDataPartition(cars2018$mpg, p = 0.8, list = FALSE)
train_data <- cars2018[train_index, ]
test_data <- cars2018[-train_index, ]
rf_fit <- randomForest(mpg ~ ., data = train_data, ntree = 500)
rf_pred <- predict(rf_fit, newdata = test_data)
rf_mse <- mean((rf_pred - test_data$mpg) ^ 2)
rf_mae <- mean(abs(rf_pred - test_data$mpg))
# XGBoost model
set.seed(123)
train_index <- createDataPartition(cars2018$mpg, p = 0.8, list = FALSE)
train_data <- cars2018[train_index, ]
test_data <- cars2018[-train_index, ]
xgb_train <- xgb.DMatrix(data = as.matrix(train_data[, -1]), label = train_data$mpg)
xgb_test <- xgb.DMatrix(data = as.matrix(test_data[, -1]), label = test_data$mpg)
xgb_params <- list(objective = "reg:squarederror", max_depth = 3, eta = 0.1, subsample = 0.5, colsample_bytree = 0.5)
xgb_fit <- xgb.train(params = xgb_params, data = xgb_train, nrounds = 100)
xgb_pred <- predict(xgb_fit, newdata = xgb_test)
xgb_mse <- mean((xgb_pred - test_data$mpg) ^ 2)
xgb_mae <- mean(abs(xgb_pred - test_data$mpg))
# Compare model performance
model_mse <- c(lasso_mse, rpart_mse, rf_mse, xgb_mse)
model_mae <- c(lasso_mae, rpart_mae, rf_mae, xgb_mae)
model_perf <- data.frame(Model = c("Lasso", "rpart", "Random Forest", "XGBoost"),
MSE = model_mse, MAE = model_mae)
print(model_perf)
```
以上是Task 4的代码,包括了Lasso、rpart、随机森林和xgboost模型的训练和评估,希望可以帮到你!