响应变量是计数形式,响应变量(因变量y)的值遵循泊松分布。
1. 数据集载入
### 数据集载入
library(AER)
data() # 查看AER包里面的数据集
data(Affairs) # 载入Affairs数据集
# 查看数据集
summary(Affairs)
str(Affairs)
View(Affairs)
2. 模型拟合
poisson_fit <- glm(affairs ~ ., family = poisson,
data = Affairs)
summary(poisson_fit)
coef(poisson_fit)
plot(poisson_fit)
## 检测是否有过度离势overdispersion
deviance(poisson_fit)/df.residual(poisson_fit)
install.packages("qcc")
library(qcc)
qcc.overdispersion.test(Affairs$affairs,type="poisson")
# p小于 0.05,表明确实存在过度离势,family选择quasipoisson
poisson_fit2 <- glm(affairs ~., family = quasipoisson(),data = Affairs)
summary(poisson_fit2)
## 根据因变量的显着性水平,找出和结果结果变量显著相关的因变量,重新建模
poisson_fit3 <- glm(affairs ~age+yearsmarried+religiousness+rating,
family = quasipoisson,data = Affairs)
summary(poisson_fit3)
anova(poisson_fit3)
poisson_fit3$fitted.values
3. 预测
## 构造数据,固定其他变量,检测rating值对预测结果的影响
new_data <-data.frame(age = mean(Affairs$age),
yearsmarried = mean(Affairs$yearsmarried),
religiousness = mean(Affairs$religiousness),
rating=seq(1:5))
#?predict.glm
pred_value <- predict(poisson_fit3,new_data,type='response')