R语言笔记4_模型诊断(关于残差)

R语言笔记4_模型诊断(关于残差)及模型补救(Box-Cox变换)

检验线性关系

# test linear relation about residuals
rm (list=ls())
x = seq(1,30,by=1)
n = length(x)
y = x^2 - 10*x + 30 + 25*rnorm(n)
a100 = data.frame(x,y)

fit1 = lm(y~x, data = a100)
anova(fit1)

## Scatterplot with regrssion line
plot(y~x, data = a100)
abline(fit1)

## scatterplot with smoothed curve
scatter.smooth(x,y)

## residual plot
scatter.smooth(x,fit1$residuals)
abline(h=0)

检验方差齐性

# test homogeneous variance
x = seq(1,100,by=1)
n = length(x)
y = 30 + 100*x + 10*x*rnorm(n)
a100a = data.frame(x,y)

fit2 = lm(y~x,data=a100a)
anova(fit2)
summary(fit2)

## scatterplot with smoothed curve
scatter.smooth(x,y)

## residual plot
scatter.smooth(x,fit2$residuals)
abline(h=0)

检验残差正态性

# test normality of residuals
hours = c(75,76,77,78,79,80,81,82,83,84,85,86,87)
lotsize = c(642,644,656,667,673,688,696,698,713,717,725,742,757)
toluca = data.frame(hours,lotsize)
reg =lm(hours~lotsize, data = toluca)
resid = resid(reg)

## normal qq plot
qqp <- qqnorm(resid)
cor(qqp$x,qqp$y)

## shapiro-wilk test
shapiro.test(resid)

检验离群值

# test outliers of residuals
x = seq(1,100,by=5)
n = length(x)
y = 30 + 50*x + 200*rnorm(n)
x = c(x,100)
y = c(y,30+50*100-10000)#the last pair is the outlier
a100c1 = data.frame(x,y)

## with outlier
fit5 = lm(y~x,data = a100c1)
anova(fit5)
summary(fit5)

## without outlier
fit6 = lm(y~x,data = a100c1[-(n+1),])
anova(fit6)
summary(fit6)

plot(y~x, data =a100c1)
abline(fit5)
abline(fit6,col = 'red')

plot(fit5$residuals ~ x, data = a100c1)
abline(h=0)

模型补救(Box-Cox 变换)

#### 模型补救措施
## make transformations
library(MASS)
age = c(0,0,1,1,2,3,3,4,4)
plasma = c(13.44,12.84,10.11,11.38,9.83,7.94,6.01,4.86,6.23)
lplasma = c(1.1284,1.1086,1.0048,1.0561,0.9926,0.8998,0.7789,0.6866,0.7945)
a1 = data.frame(age,plasma,lplasma)
hist(a1$plasma)
plot(a1$age,a1$plasma)

## box-cox transformation 
boxcox(plasma ~ age, data = a1, lambda = seq(-1.5, 05, length = 10))
loglike <- boxcox(plasma~age, data=a1, lambda = seq(-1.5,.5,length=10))
loglike$x[which.max(loglike$y)]

## first, get the geometric mean
n <- nrow(a1)
k2 <- (prod(a1$plasma))^(1/n)
lambda <- seq(-1,1,by=0.1)
# or k2 = exp(mean(log(a1$plasma)))

## get the transformed data set a2
transformed <- NULL
for(i in 1:length(lambda)){
  k1 <- 1/(lambda[i]*k2^(lambda[i]-1))
  trans_y <- if(lambda[i] == 0)
    {k2*(log(a1$plasma))}else
    {k1*((a1$plasma)^lambda[i]-1)}
  a2 <- cbind(a1, lambda = rep(lambda[i],n), trans_y)
  transformed <- rbind(transformed, a2)
}

## extract SSE's of linear regressions by lambda
sse <- by(transformed, transformed[,"lambda"], function(x) anova(lm(trans_y~age,data=x))[,2][2])
plot(sse~lambda, type = "l")

  • 0
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值