R实现的LM和KNN结果的对比

该代码片段在R中实现了knn和线性回归两种拟合方法,对不同数量的预测变量(puse)进行训练和测试。通过计算bias平方、方差和MSE来评估模型性能。knn的误差分析考虑了不同邻居数(k)的影响,而线性回归则基于所有预测变量。结果存储在out_knn和out_lm数据框中,但未进行可视化展示。
摘要由CSDN通过智能技术生成
  1. 分别实现了两种拟合方式,并将结果存储到out_knn, out_lm;

  1. 结果可以进一步进行可视化,不过这里懒得处理了;

  1. 对于R中的一些数据结构尚不熟悉,计算bias的时候直接转成dataframe来操作实现的。

library(FNN)
library(ggplot2)
rm(list=ls())

ntrain <- 50 # Training
ntest <- 500 # Testing
nrep <- 100 # repeat 100 times
p <- 20
puse <- c(1, 2, 3, 4, 10, 20) # number of predictors
k <- c(1:9)

sigma <- 0.025

Xtrain <- matrix(runif(ntrain * p, -1, 1), ntrain, p)
Xtest <- matrix(runif(ntest * p, -1, 1), ntest, p)
y0 <- sin(2 * Xtrain[, 1]) # Only the first predictor is related to y
ytest <- sin(2 * Xtest[, 1])

out_knn <- data.frame() # Output results
out_lm <- data.frame()

for(i in 1:length(puse)){
  yhat_lm <- matrix(0, ntest, nrep)
  yhat_knn <- replicate(length(k), matrix(0, ntest, nrep))
  
  for(l in 1:nrep){
    y <- y0 + rnorm(ntrain, 0, sigma)
    
    ######### DO: fit linear regression using lm funciton, assign predicted value to yhat_lm[,l] #########
    fit_lm <- lm(y~.,data=data.frame(label = subset(Xtrain, select = c(1:puse[i]))))
    yhat_lm[, l] <-  predict(fit_lm, newdata = data.frame(label = subset(Xtest, select = c(1:puse[i]))))   # Predicted value by lm
      
      
      for(j in 1:length(k)){
        ######### DO: fit knn using knn.reg funciton, assign predicted value to yhat_knn[, l, j] #########
        fit_knn <- knn.reg(subset(Xtrain, select = c(1:puse[i])), subset(Xtest, select = c(1:puse[i])), y, k=k[j])
        yhat_knn[, l, j] <- as.matrix(fit_knn$pred)    # Predicted value by knn.reg
      }
    
    cat(i, "-th p, ", l, "-th repitition finished. \n")
  }
  
  ######### DO: compute bias and variance of linear regression #########
  
    # Compute mean of predicted values
    ybar_lm <- mean(yhat_lm)# E(f^hat)
    
    # Compute bias^2
    biasSQ_lm <- mean((ytest-ybar_lm)^2)# E[ (f - E(f^hat))^2 ]
    
    # Compute variance
    variance_lm <- mean((yhat_lm-ybar_lm)^2)# E[ (E(f^hat) - f^hat)^2 ]
    
    # Compute total MSE
    err_lm <- mean((ytest-yhat_lm)^2)
    
    out_lm <- rbind(out_lm, data.frame(error = biasSQ_lm, component = "squared-bias", p = paste0("p = ", puse[i])))
    out_lm <- rbind(out_lm, data.frame(error = variance_lm, component = "variance", p = paste0("p = ", puse[i])))
    out_lm <- rbind(out_lm, data.frame(error = err_lm, component = "MSE", p = paste0("p = ", puse[i])))
  
  
  ######### DO: compute bias and variance of knn regression #########
  
    # Compute mean of predicted values
    ybar_knn <- apply(data.frame(aperm(yhat_knn, c(3, 2, 1))),1, mean)# E(f^hat)
    
    # Compute bias^2
    #biasSQ_knn <- mean((ytest-ybar_knn)^2)# E[ (f - E(f^hat))^2 ]
    biasSQ_knn <- apply(data.frame((t(replicate(9, ytest))-ybar_knn)^2),1, mean)
    
    # Compute variance
    #variance_knn <- mean((yhat_knn-ybar_knn)^2)# E[ (E(f^hat) - f^hat)^2 ]
    variance_knn <- apply(data.frame(aperm((yhat_knn-ybar_knn)^2, c(3, 2, 1))),1, mean)
    
    # Compute total MSE
    err_knn <- apply(data.frame(aperm((yhat_knn-ytest)^2, c(3, 2, 1))),1, mean)
    
    out_knn <- rbind(out_knn, data.frame(error = biasSQ_knn, component = "squared-bias", K = 1/k, p = paste0("p = ", puse[i])))
    out_knn<- rbind(out_knn, data.frame(error = variance_knn, component = "variance", K = 1/k, p = paste0("p = ", puse[i])))
    out_knn<- rbind(out_knn, data.frame(error = err_knn, component = "MSE", K = 1/k, p = paste0("p = ", puse[i])))
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值