library(nlme)
## Warning: package 'nlme' was built under R version 3.6.3
library(lme4)
## Warning: package 'lme4' was built under R version 3.6.3
## Loading required package: Matrix
##
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
##
## lmList
library(lmerTest)
## Warning: package 'lmerTest' was built under R version 3.6.3
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(AED)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
使用示例数据集,比较不同协变量调整下线性回归模型的效能
data("RIKZ")
mod_lm1<-lm(Richness~NAP,data=RIKZ)
mod_lm2<-lm(Richness~NAP+Exposure,data=RIKZ)
mod_lm3<-lm(Richness~NAP+Exposure+Beach,data=RIKZ)
summary(mod_lm1)
##
## Call:
## lm(formula = Richness ~ NAP, data = RIKZ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0675 -2.7607 -0.8029 1.3534 13.8723
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.6857 0.6578 10.164 5.25e-13 ***
## NAP -2.8669 0.6307 -4.545 4.42e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.16 on 43 degrees of freedom
## Multiple R-squared: 0.3245, Adjusted R-squared: 0.3088
## F-statistic: 20.66 on 1 and 43 DF, p-value: 4.418e-05
summary(mod_lm2)
##
## Call:
## lm(formula = Richness ~ NAP + Exposure, data = RIKZ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.3083 -1.7107 -0.8489 0.7674 13.3264
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2909 5.1878 7.188 7.83e-09 ***
## NAP -2.7252 0.4716 -5.779 8.26e-07 ***
## Exposure -2.9988 0.5060 -5.926 5.07e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.106 on 42 degrees of freedom
## Multiple R-squared: 0.6321, Adjusted R-squared: 0.6146
## F-statistic: 36.09 on 2 and 42 DF, p-value: 7.577e-10
summary(mod_lm3)
##
## Call:
## lm(formula = Richness ~ NAP + Exposure + Beach, data = RIKZ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0393 -1.6114 -0.5544 0.7196 13.5599
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.3706 5.1212 7.102 1.18e-08 ***
## NAP -2.5119 0.4810 -5.223 5.46e-06 ***
## Exposure -2.7647 0.5170 -5.348 3.64e-06 ***
## Beach -0.3095 0.1906 -1.623 0.112
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.048 on 41 degrees of freedom
## Multiple R-squared: 0.6543, Adjusted R-squared: 0.6291
## F-statistic: 25.87 on 3 and 41 DF, p-value: 1.484e-09
同时调整NAP,Exposure及Beach时,Beach与Richness的关联无统计学意义
绘制model1的诊断性曲线图
par(mfrow=c(2,2))
plot(mod_lm1)
model1的诊断性曲线提示model1中自变量不符合正态性及方差齐性,比较各个model的AIC值
AIC(mod_lm1,mod_lm2,mod_lm3)
## df AIC
## mod_lm1 3 259.9535
## mod_lm2 4 234.6083
## mod_lm3 5 233.8053
mod_lm3的AIC值最小,其中Beach无统计学意义,按Beach进行分组,绘制回归曲线
ggplot(RIKZ,aes(NAP,Richness,group=Beach,color=Beach))+
geom_point(size=3)+
geom_smooth(formula = "y~x",method = "lm",se=F,size=0.5)
发现不同Beach分组的回归曲线斜率与截距不一致,提示Beach仍然是Richness的影响因素,单独调整Beach时,结果如下
mod_lm4<-lm(Richness~NAP+Beach,data = RIKZ)
summary(mod_lm4)
##
## Call:
## lm(formula = Richness ~ NAP + Beach, data = RIKZ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8980 -2.4944 -0.4541 1.2401 14.2386
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.5053 1.2793 7.430 3.55e-09 ***
## NAP -2.4363 0.6188 -3.937 0.000305 ***
## Beach -0.5939 0.2357 -2.520 0.015622 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.923 on 42 degrees of freedom
## Multiple R-squared: 0.4133, Adjusted R-squared: 0.3853
## F-statistic: 14.79 on 2 and 42 DF, p-value: 1.372e-05
Beach仍有统计学意义,提示Exposure与Beach可能存在交互作用
par(mfrow=c(2,2))
plot(mod_lm4)
使用lme函数,添加Beach的随机截距项
mod_lmm1<-lme(Richness ~NAP,random = ~1|Beach,data=RIKZ)
summary(mod_lmm1)
## Linear mixed-effects model fit by REML
## Data: RIKZ
## AIC BIC logLik
## 247.4802 254.525 -119.7401
##
## Random effects:
## Formula: ~1 | Beach
## (Intercept) Residual
## StdDev: 2.944065 3.05977
##
## Fixed effects: Richness ~ NAP
## Value Std.Error DF t-value p-value
## (Intercept) 6.581893 1.0957618 35 6.006682 0
## NAP -2.568400 0.4947246 35 -5.191574 0
## Correlation:
## (Intr)
## NAP -0.157
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.4227495 -0.4848006 -0.1576462 0.2518966 3.9793918
##
## Number of Observations: 45
## Number of Groups: 9
预测Richness的值,为fit1
RIKZ$fit1<-predict(mod_lmm1)
head(RIKZ)
## Sample Richness Exposure NAP Beach fit1
## 1 1 11 10 0.045 1 9.087834
## 2 2 10 10 -1.036 1 11.864274
## 3 3 13 10 -1.336 1 12.634793
## 4 4 11 10 0.616 1 7.621278
## 5 5 10 10 -0.684 1 10.960197
## 6 6 8 8 1.190 2 8.725105
绘制线性回归图
ggplot(RIKZ,aes(NAP,Richness,color=factor(Beach)))+
geom_point(size=3,alpha=0.25)+
geom_abline(aes(intercept=6.5819),slope=-2.5684)+
geom_point(aes(y=fit1),size=5,alpha=0.5)+
geom_line(aes(y=fit1),size=1)+
theme_bw()+
theme(legend.position = "none")+
scale_color_brewer(palette = "Set1")
## Warning: geom_abline(): Ignoring `mapping` because `slope` and/or `intercept`
## were provided.
斜率相同,但截距不同,与线性回归模型进行比较
RIKZ$fit0<-predict(mod_lm1)
head(RIKZ)
## Sample Richness Exposure NAP Beach fit1 fit0
## 1 1 11 10 0.045 1 9.087834 6.556653
## 2 2 10 10 -1.036 1 11.864274 9.655722
## 3 3 13 10 -1.336 1 12.634793 10.515778
## 4 4 11 10 0.616 1 7.621278 4.919680
## 5 5 10 10 -0.684 1 10.960197 8.646589
## 6 6 8 8 1.190 2 8.725105 3.274107
一般线性回归图
ggplot(RIKZ,aes(NAP,Richness,color=factor(Beach)))+
geom_point(size=3,alpha=0.25)+
geom_abline(aes(intercept=6.5819),slope=-2.5684)+
geom_point(aes(y=fit0),size=5,alpha=0.5)+
geom_line(aes(y=fit0),size=1)+
theme_bw()+
theme(legend.position = "none")+
scale_color_brewer(palette = "Set1")
## Warning: geom_abline(): Ignoring `mapping` because `slope` and/or `intercept`
## were provided.
线性混合模型更能体现数据分布特征,添加随机斜率项目
head(RIKZ,10)
## Sample Richness Exposure NAP Beach fit1 fit0
## 1 1 11 10 0.045 1 9.087834 6.556653
## 2 2 10 10 -1.036 1 11.864274 9.655722
## 3 3 13 10 -1.336 1 12.634793 10.515778
## 4 4 11 10 0.616 1 7.621278 4.919680
## 5 5 10 10 -0.684 1 10.960197 8.646589
## 6 6 8 8 1.190 2 8.725105 3.274107
## 7 7 9 8 0.820 2 9.675413 4.334842
## 8 8 8 8 0.635 2 10.150567 4.865210
## 9 9 19 8 0.061 2 11.624829 6.510784
## 10 10 17 8 -1.334 2 15.207746 10.510044
mod_lmm2<-lme(Richness~NAP,random=~1+NAP|Beach,data=RIKZ)
summary(mod_lmm2)
## Linear mixed-effects model fit by REML
## Data: RIKZ
## AIC BIC logLik
## 244.3839 254.9511 -116.1919
##
## Random effects:
## Formula: ~1 + NAP | Beach
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 3.549064 (Intr)
## NAP 1.714963 -0.99
## Residual 2.702820
##
## Fixed effects: Richness ~ NAP
## Value Std.Error DF t-value p-value
## (Intercept) 6.588706 1.264761 35 5.209448 0e+00
## NAP -2.830028 0.722940 35 -3.914610 4e-04
## Correlation:
## (Intr)
## NAP -0.819
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.8213326 -0.3411043 -0.1674617 0.1921216 3.0397049
##
## Number of Observations: 45
## Number of Groups: 9
绘制混合线性模型回归曲线
RIKZ$fit2<-predict(mod_lmm2)
ggplot(RIKZ,aes(NAP,Richness,color=factor(Beach)))+
geom_point(size=3,alpha=0.25)+
geom_abline(aes(intercept=6.5819),slope=-2.5684)+
geom_point(aes(y=fit2),size=5,alpha=0.5)+
geom_line(aes(y=fit2),size=1)+
theme_bw()+
theme(legend.position = "none")+
scale_color_brewer(palette = "Set1")
## Warning: geom_abline(): Ignoring `mapping` because `slope` and/or `intercept`
## were provided.
可以看到不同NAP组的斜率及结局都不尽相同,与其他模型相比,更能体现数据特征
补充 lme4包实现
library(lme4)
mod_lmm4<-lmer(Richness~NAP+(1+NAP|Beach),REML=F,data = RIKZ)
## boundary (singular) fit: see ?isSingular
summary(mod_lmm4)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: Richness ~ NAP + (1 + NAP | Beach)
## Data: RIKZ
##
## AIC BIC logLik deviance df.resid
## 246.7 257.5 -117.3 234.7 39
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7985 -0.3418 -0.1827 0.1749 3.1389
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Beach (Intercept) 10.949 3.309
## NAP 2.502 1.582 -1.00
## Residual 7.174 2.678
## Number of obs: 45, groups: Beach, 9
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.5818 1.1883 8.8936 5.539 0.000377 ***
## NAP -2.8293 0.6849 7.9217 -4.131 0.003366 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## NAP -0.810
## convergence code: 0
## boundary (singular) fit: see ?isSingular
AIC(mod_lmm1,mod_lmm2,mod_lmm4)
## df AIC
## mod_lmm1 4 247.4802
## mod_lmm2 6 244.3839
## mod_lmm4 6 246.6561
mod_lmm2的AIC最小,模型最好 注意:lme和lmer函数添加随机项目的表达式有点差别,同时,lmer函数中默认REML=TRUE)