R语言随机森林模型回归randomForest

基于《The influence of the neighbourhood environment on peer-to-peer accommodations: A random forest regression analysis》文章,Redirectingicon-default.png?t=M4ADhttps://doi.org/10.1016/j.jhtm.2022.02.028

 

Multiple linear regression and Random forest regression

# Use the software RStudio 4.0.5

# -*- coding: UTF-8 -*-

# This code uses All room as an example, other types of Airbnb such as entire home/apt, private room, shared room codes are also used to avoid redundancy and will be omitted from the classification codes.

## Loading packages and data------------------------------------------------------------

library(randomForest)
library(pheatmap)

library(extrafont)

library(corrplot)

library(car)

setwd("C:/Users/Desktop/Airbnb") # Setting up the work path

Data_Airbnb <- read.csv("Airbnb_data.csv", sep = ",") # Reading data

## Multiple linear regression---------------------------------------------------------------

Lm_Airbnb <- lm(Airbnb~c("PopDen", "PGDP", "HPrice", "Distance", "BusDen", "MetroDen", "CaterDen", "ShopDen", "RecrDen", "UnivDen", "HotelDen", "AttrDen") ,data = Data_Airbnb) # Modelling

summar(Lm_Airbnb) # View fitting results

lm.pred_Airbnb <- predict(lm_Airbnb, Data_Airbnb) # Predicted results

lm.pred_Airbnb1 <- data.frame(forest.pred_Airbnb, Data_Airbnb) # Comparison of predicted and actual results

# Multicollinearity test

vif(Lm_Airbnb, digits = 3) # Variance inflation factor(VIF)

## Random forest regression--------------------------------------------------------------

set.seed(1234) # Setting up random number seeds

Rf_Airbnb <- randomForest(Airbnb ~ c("popDen", "PGDP", "HPrice", "Distance", "BusDen", "MetroDen", "CaterDen", "ShopDen", "RecrDen", "UnivDen", "HotelDen", "AttrDen"), data = Data_Airbnb, ntree = 500, importance = TRUE) # Modelling

# Cross-validation

set.seed(1234)

result <- rfcv(Data_Airbnb[ ,2:12], Data_Airbnb$Airbnb, cv.fold = 2, scale = "log", step = 0.5) # rfcv is a random forest cross-validation function

result$error.cv # View the crossover error rate table

# Results of random forest regression

forest.pred_Airbnb <- predict(Rf_Airbnb, Data_Airbnb) # Predicted results

forest.pred_Airbnb1 <- data.frame(forest.pred_Airbnb, Data_Airbnb) # Comparison of predicted and actual results

# Checking out the chart

opar <- par(no.readonly = TRUE)

par(lwd = 2, cex = 1, cex.axis = 1, font = 2, cex.lab = 1, tck = -.02)

plot(forest.pred_Airbnb, main = " ", lwd = 2, font.lab = 2, font = 2, ann = FALSE, family = 'Times')

title(xlab = "Number of feature", ylab = "Cross-valication error", font.lab = 2)

par(opar)

# Variable importance - %lncMSE

varImpPlot(forest.pred_Airbnb, family = 'Times')

dev.off()

# The partial dependencies of variables

opar <- par(no.readonly = TRUE)

partialPlot(forest.pred_Airbnb, Data_Airbnb, PopDen, "0", main = " ", xlab = " ", ylab = " ", col = "black")

partialPlot(forest.pred_Airbnb, Data_Airbnb, PGDP, "0", main = " ", xlab = " ", ylab = " ", col = "black") # The same applies to the other variables "Distance", "BusDen", etc.

# Comparison of multiple linear regression and random forest regression results ----------------------------------------------------------------------------------------------

# R-value

cor(lm.pred_Airbnb, Data_Airbnb$Airbnb) # Multiple linear regression R-value

cor(forest.pred_Airbnb, Data_Airbnb$Airbnb) # Random forest regression R-value

# Mean absolute error (MAE)

MAE <- function(actual, predicted){mean(abs(actual - predicted))} # Formula to define MAE

MAE(lm.pred_Airbnb, Data_Airbnb$Airbnb) # Mean absolute error of multiple linear regression

MAE(forest.pred_Airbnb, Data_Airbnb$Airbnb) # Mean absolute error of random forest regression

dev.off()

  • 0
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值