一元回归
library(rio)
lm <- import('lm1.xlsx')
fitlm <- lm(weight~N,data = lm)
summary((fitlm))
# 模型R20.911,截距和系数均极显著方程为weight=3N-87.51667
summary((fitlm))
Call:
lm(formula = weight ~ N, data = lm)
Residuals:
Min 1Q Median 3Q Max
-1.7333 -1.1333 -0.3833 0.7417 3.1167
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -87.51667 5.93694 -14.74 1.71e-09 ***
N 3.45000 0.09114 37.85 1.09e-14 ***
多元回归
library(rio)
lm2 <- import('lm2.xlsx')
fitlm2 <- lm(y~., data = lm2) # y~.为考虑y与其他所有变量
summary(fitlm2)
steplm2 <- step(fitlm2) # 逐步回归
summary(steplm2)
# R20.94方程y=13.206x1+3.229x3+5.826x4-574.208
# x3x4未达到显著水平也可以考虑去除
结果
>Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -574.208 144.483 -3.974 0.01648 *
x1 13.206 2.842 4.646 0.00969 **
x3 3.229 2.986 1.081 0.34046
x4 5.826 2.700 2.158 0.09714 .
存在共线性的回归
岭回归
library(carData)
library(car)
lm3 <- import('./R语言与农业建模/R_statistics/cement.csv')
fitlm3 <- lm(y~x1+x2+x3+x4, data = lm3)
summary(fitlm3)
vif(fitlm3)
# 参数之间存在较强共线性vif均大于9
steplm3 <- step(fitlm3)
summary(steplm3)
vif(steplm3 )
#逐步回归后x2x4仍有共线性
plot(lm3[, 2:5])
cor(lm3[, 2:5])
# 岭回归
library(ridge)
a <- linearRidge(y~x1+x2+x3+x4, data = lm3)
summary(a)
# y=75.45461+1.30872x1+0.39175x2-0.09997x3-0.24820x4
偏最小二乘
library(pls)
ap <- plsr(y~x1+x2+x3+x4, data = lm3, ncomp=4, validation='LOO',jackknife=TRUE)
summary(ap)
#root mean squared error of prediction
RMSEP(ap)
#the mean squared error of prediction
MSEP(ap)
R2(ap)
coef(ap) # 模型可能是标准化后的结果,可能需要对数据标准化处理
# y=x1 * 1.48052476+x2 * 0.49183630 + x3 * 0.05099729 - x4 * 0.15627481
jack.test(ap)
plot(ap)