回归的一般问题

The response variable
(a) Continuous Normal regression, ANOVA or ANCOVA
(b) Proportion Logistic regression
(c) Count Log-linear models
(d) Binary Binary logistic analysis

(e) Time at death Survival analysis


The explanatory variables
(a) All explanatory variables continuous Regression
(b) All explanatory variables categorical Analysis of variance (ANOVA)

(c) Explanatory variables both continuous and categorical Analysis of covariance (ANCOVA)


library(MASS)

windows(7,7)

boxcox(volume~log(girth)+log(height))

boxcox(volume~log(girth)+log(height),lambda=seq(-0.5,0.5,0.01)) 

boxcox(volume~girth+height)

boxcox(volume~girth+height,lambda=seq(0.1,0.6,0.01)) 




9.13 Model checking

For instance, we should routinely plot the residuals against:
the fitted values (to look for heteroscedasticity);
the explanatory variables (to look for evidence of curvature);
the sequence of data collection (to look for temporal correlation);

standard normal deviates (to look for non-normality of errors)



The idea is to see what patterns are generated in normal plots by the different kinds
of non-normality. In real applications we would use the generic plot(model) rather than mcheck (see
below). First, we write the function mcheck. The idea is to produce two plots, side by side: a plot of the
residuals against the fitted values on the left, and a plot of the ordered residuals against the quantiles of the

normal distribution on the right



mcheck <- function (obj, ...){
rs <- obj$resid
fv <- obj$fitted
windows(7,4)
par(mfrow=c(1,2))
plot(fv,rs,xlab="Fitted values",ylab="Residuals",pch=16,col="red")
abline(h=0, lty=2)
qqnorm(rs,xlab="Normal scores",ylab="Ordered residuals",main="",pch=16)
qqline(rs,lty=2,col="green")
par(mfrow=c(1,1))

invisible(NULL) }


x <- 0:30


e <- rnorm(31,mean=0,sd=5)
yn <- 10+x+e
mn <- lm(yn~x)

mcheck(mn)


eu <- 20*(runif(31)-0.5)
yu <- 10+x+eu
mu <- lm(yu~x)

mcheck(mu)


enb <- rnbinom(31,2,.3)
ynb <- 10+x+enb
mnb <- lm(ynb~x)

mcheck(mnb)


enb <- rnbinom(31,2,.3)
ynb <- 10+x+enb
mnb <- lm(ynb~x)

mcheck(mnb)


eg <- rgamma(31,1,1/x)
yg <- 10+x+eg

mg <- lm(yg~x)

mcheck(mg)


x <- c(2,3,3,3,4)

y <- c(2,3,2,1,2)


windows(7,4)
par(mfrow=c(1,2))

plot(x,y,xlim=c(0,8),ylim=c(0,8))


x1 <- c(x,7)
y1 <- c(y,6)
plot(x1,y1,xlim=c(0,8),ylim=c(0,8))

abline(lm(y1~x1),col="blue")


reg <- lm(y1~x1)

summary(reg)


influence.measures(reg)

influence.measures(reg)$is.inf

lm.influence(reg)

summary.aov(lm(y1[-6]~x1[-6]))


data <- read.table("c:\\temp\\ipomopsis.txt",header=T)
attach(data)

names(data)

model <- lm(Fruit[Grazing=="Grazed"]~Root[Grazing=="Grazed"])

model <- lm(Fruit~Root,subset=(Grazing=="Grazed"))

weights = rep(1, n.observations)

model <- lm(Fruit~Grazing,weights=Root)

summary(model)

model <- lm(Fruit~Grazing)

summary(model)

Root[37] <- NA
model <- lm(Fruit~Grazing*Root)

model <- lm(Fruit~Grazing*Root,na.action=na.fail)

leverage <- function(x){1/length(x)+(x-mean(x))ˆ2/sum((x-mean(x))ˆ2)}

plot(leverage(x),type="h",ylim=c(0,1),col="blue")

abline(h=4/6,lty=2,col="green")


Decay <- read.table("c:\\temp\\Decay.txt",header=T)

attach(Decay)

names(Decay)

model <- lm(amount~time)

par(mfrow=c(2,2))

plot(model)

data <- read.table("c:\\temp\\regression.txt",header=T)
attach(data)

names(data)

model <- lm(growth~tannin)

summary(model)


coef(model)

fitted(model)

resid(model)

vcov(model)


x <- 0:100

y <- 17+0.2*x+3*rnorm(101)

model0 <- lm(y~1)
model1 <- lm(y~x)

model2 <- lm(y~x+I(xˆ2))


models <- list(model0,model1,model2)

lapply(models,coef)

as.vector(unlist(lapply(models,coef)))[c(1,2,4)]

lapply(models,AIC)



Aliasing

data <- read.table("c:\\temp\\poly.txt",header=T)

attach(data)

names(data)

tapply(response,treatment,mean)

summary.aov(model)

is.factor(treatment)

is.ordered(treatment)

treatment <- ordered(treatment,levels=c("none","low","medium","high"))

levels(treatment)


model2 <- lm(response~treatment)

summary.aov(model2)

summary.lm(model2)

tapply(response,treatment,mean)

yv <- as.vector(tapply(response,treatment,mean))
x <- 1:4
model <- lm(yv~x+I(xˆ2)+I(xˆ3))

summary(model)

x <- 1:4
x2 <- xˆ2
x3 <- xˆ3

cor(cbind(x,x2,x3))

t(contrasts(treatment))


y <- as.vector(tapply(response,treatment,mean))
model <- lm(y~poly(x,3))

summary(model)


xv <- seq(1,4,0.1)

yv <- predict(model,list(x=xv))

(bar.x <- barplot(y))

barplot(y,names=levels(treatment))
xs <- -0.5 + 1.2 * xv

lines(xs,yv,col="red")



9.28 Summary of statistical modelling
The steps in the statistical analysis of data are always the same, and should always be done in the following
order:
(1) data inspection (plots and tabular summaries, identifying errors and outliers);
(2) model specification (picking an appropriate model from many possibilities);
(3) ensure that there is no pseudoreplication, or specify appropriate random effects;
(4) fit a maximal model with an appropriate error structure;
(5) model simplification (by deletion from a complex initial model);
(6) model criticism (using diagnostic plots, influence tests, etc.);
(7) repeat steps 2 to 6 as often as necessary



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值