logistic regression在R中实现案例

r 同时被 2 个专栏收录
14 篇文章 0 订阅
6 篇文章 0 订阅
rm(list = ls())
install.packages("AER")
library(AER)
library(survival)
library(ROCR)
data(Affairs,papackage = "AER")
mydata <- Affairs
summary(mydata)

affairs          gender         age         yearsmarried    children  religiousness     education       occupation
Min.   : 0.000   female:315   Min.   :17.50   Min.   : 0.125   no :171   Min.   :1.000   Min.   : 9.00   Min.   :1.000
1st Qu.: 0.000   male  :286   1st Qu.:27.00   1st Qu.: 4.000   yes:430   1st Qu.:2.000   1st Qu.:14.00   1st Qu.:3.000
Median : 0.000                Median :32.00   Median : 7.000             Median :3.000   Median :16.00   Median :5.000
Mean   : 1.456                Mean   :32.49   Mean   : 8.178             Mean   :3.116   Mean   :16.17   Mean   :4.195
3rd Qu.: 0.000                3rd Qu.:37.00   3rd Qu.:15.000             3rd Qu.:4.000   3rd Qu.:18.00   3rd Qu.:6.000
Max.   :12.000                Max.   :57.00   Max.   :15.000             Max.   :5.000   Max.   :20.00   Max.   :7.000
rating
Min.   :1.000
1st Qu.:3.000
Median :4.000
Mean   :3.932
3rd Qu.:5.000
Max.   :5.000

table(mydata$affairs) 0 1 2 3 7 12 451 34 17 19 42 38 mydata$affairs[mydata$affairs>0]<- 1 table(mydata$affairs)
0   1
451 150
mydata$affairs <- factor(mydata$affairs,levels = c(0,1),labels = c("NO","YES"))
table(mydata$affairs) NO YES 451 150 model <-glm(affairs~gender + age +yearsmarried + children +religiousness+ education +occupation +rating,data = mydata,family = binomial()) summary(model)$coefficients
Estimate Std. Error    z value     Pr(>|z|)
(Intercept)    1.37725816 0.88775566  1.5513933 1.208075e-01
gendermale     0.28028665 0.23909454  1.1722838 2.410831e-01
age           -0.04425502 0.01824811 -2.4251835 1.530065e-02
yearsmarried   0.09477302 0.03221445  2.9419413 3.261617e-03
childrenyes    0.39767213 0.29150776  1.3641905 1.725076e-01
religiousness -0.32472063 0.08975394 -3.6178985 2.970049e-04
education      0.02105086 0.05051032  0.4167636 6.768513e-01
occupation     0.03091971 0.07177644  0.4307780 6.666298e-01
rating        -0.46845426 0.09090644 -5.1531469 2.561511e-07


#coef 为回归系数

coef <- summary(model)$coefficients[,1] se <- summary(model)$coefficients[,2]
pvalue <- summary(model)$coefficients[,4]  #第一种方法计算OR及可信区间 results <- cbind(exp(coef),exp(coef-1.96*se),exp(coef + 1.96*se),pvalue) dimnames(results)[[2]] <- c("OR","LL","UL","P value") results OR LL UL P value (Intercept) 3.9640180 0.6957653 22.5843967 1.208075e-01 gendermale 1.3235091 0.8283341 2.1146979 2.410831e-01 age 0.9567099 0.9230967 0.9915472 1.530065e-02 yearsmarried 1.0994093 1.0321383 1.1710647 3.261617e-03 childrenyes 1.4883560 0.8405632 2.6353799 1.725076e-01 religiousness 0.7227292 0.6061436 0.8617389 2.970049e-04 education 1.0212740 0.9250113 1.1275545 6.768513e-01 occupation 1.0314027 0.8960473 1.1872047 6.666298e-01 rating 0.6259691 0.5238076 0.7480559 2.561511e-07  #第二种方法计算OR及可信区间 exp(cbind("OR" = coef(model),confint(model))) Waiting for profiling to be done... OR 2.5 % 97.5 % (Intercept) 3.9640180 0.7013467 22.9148790 gendermale 1.3235091 0.8294178 2.1209988 age 0.9567099 0.9223032 0.9908864 yearsmarried 1.0994093 1.0326203 1.1718726 childrenyes 1.4883560 0.8451473 2.6584834 religiousness 0.7227292 0.6049441 0.8605325 education 1.0212740 0.9254481 1.1284991 occupation 1.0314027 0.8964089 1.1884105 rating 0.6259691 0.5227302 0.7470069  #逐步回归 step(model,direction = "backward") Start: AIC=627.51 affairs ~ gender + age + yearsmarried + children + religiousness + education + occupation + rating Df Deviance AIC - education 1 609.68 625.68 - occupation 1 609.70 625.70 - gender 1 610.89 626.89 - children 1 611.40 627.40 <none> 609.51 627.51 - age 1 615.67 631.67 - yearsmarried 1 618.34 634.34 - religiousness 1 622.92 638.92 - rating 1 636.75 652.75 Step: AIC=625.68 affairs ~ gender + age + yearsmarried + children + religiousness + occupation + rating Df Deviance AIC - occupation 1 610.15 624.15 - gender 1 611.29 625.29 - children 1 611.62 625.62 <none> 609.68 625.68 - age 1 615.78 629.78 - yearsmarried 1 618.46 632.46 - religiousness 1 623.27 637.27 - rating 1 636.93 650.93 Step: AIC=624.15 affairs ~ gender + age + yearsmarried + children + religiousness + rating Df Deviance AIC - children 1 611.86 623.86 <none> 610.15 624.15 - gender 1 613.41 625.41 - age 1 616.00 628.00 - yearsmarried 1 619.07 631.07 - religiousness 1 623.98 635.98 - rating 1 637.23 649.23 Step: AIC=623.86 affairs ~ gender + age + yearsmarried + religiousness + rating Df Deviance AIC <none> 611.86 623.86 - gender 1 615.36 625.36 - age 1 618.05 628.05 - religiousness 1 625.57 635.57 - yearsmarried 1 626.23 636.23 - rating 1 639.93 649.93 Call: glm(formula = affairs ~ gender + age + yearsmarried + religiousness + rating, family = binomial(), data = mydata) Coefficients: (Intercept) gendermale age yearsmarried religiousness 1.94760 0.38612 -0.04393 0.11133 -0.32714 rating -0.46721 Degrees of Freedom: 600 Total (i.e. Null); 595 Residual Null Deviance: 675.4 Residual Deviance: 611.9 AIC: 623.9  #AIC越小，模型拟合越高，接下来根据显著影响的因素，列入回归模型 model2 <-glm(formula = affairs~ gender + age +yearsmarried +religiousness+rating, family = binomial(),data = mydata) summary(model2)$coefficients
Estimate Std. Error   z value     Pr(>|z|)
(Intercept)    1.94760307 0.61233521  3.180616 1.469624e-03
gendermale     0.38612217 0.20702802  1.865072 6.217131e-02
age           -0.04392545 0.01806068 -2.432104 1.501138e-02
yearsmarried   0.11132715 0.02982799  3.732304 1.897360e-04
religiousness -0.32714238 0.08947345 -3.656307 2.558752e-04
rating        -0.46721157 0.08928317 -5.232919 1.668543e-07


#对新模型和旧模型进行检验

anova(model,model2)
Analysis of Deviance Table

Model 1: affairs ~ gender + age + yearsmarried + children + religiousness +
education + occupation + rating
Model 2: affairs ~ gender + age + yearsmarried + religiousness + rating
Resid. Df Resid. Dev Df Deviance
1       592     609.51
2       595     611.86 -3  -2.3486


#预测,用testdata尝试预测

testdata <- data.frame(rating = rep(c(1,2,3,4,5),2),
gender =rep(c("male","female"),each = 5),
age =mean(mydata$age), yearsmarried = mean(mydata$yearsmarried),
religiousness = mean(mydata$religiousness)) testdata$prob <- predict(model2,newdata = testdata,type = "response")
testdata
rating gender      age yearsmarried religiousness      prob
1       1   male 32.48752     8.177696      3.116473 0.5818455
2       2   male 32.48752     8.177696      3.116473 0.4658389
3       3   male 32.48752     8.177696      3.116473 0.3534133
4       4   male 32.48752     8.177696      3.116473 0.2551596
5       5   male 32.48752     8.177696      3.116473 0.1767546
6       1 female 32.48752     8.177696      3.116473 0.4860616
7       2 female 32.48752     8.177696      3.116473 0.3721557
8       3 female 32.48752     8.177696      3.116473 0.2708743
9       4 female 32.48752     8.177696      3.116473 0.1888649
10      5 female 32.48752     8.177696      3.116473 0.1273479


#对affairs这个数据集进行增加预测值与实际结果的值

Affairs$yn_affairs[Affairs$affairs == 0] <- 0
Affairs$yn_affairs[Affairs$affairs > 0] <- 1
Affairs$yn_affairs <- factor(Affairs$yn_affairs,
levels = c(0,1),
labels = c("No","Yes"))


#使用pROC绘制AUC曲线

pred <- predict(model2,newdata = Affairs[,-c(1,10)],type = "response")
modelroc <- roc(Affairs[,10],pred)
plot(modelroc, print.auc = TRUE,auc.polygon = TRUE,grild = c(0.1,0.2),
grid.col =c("green","red"),max.auc.polygon =TRUE,
auc.polygon.col = "skyblue",print.three = TRUE)


• 0
点赞
• 0
评论
• 0
收藏
• 一键三连
• 扫一扫，分享海报

05-29 120

06-21
06-17 3万+
07-22 879
06-14 4万+
10-19 2729