应用统计分析作业四：数据框中因子型数据的处理

最新推荐文章于 2022-10-26 00:36:45 发布

自律的萱小主

最新推荐文章于 2022-10-26 00:36:45 发布

阅读量248

点赞数

分类专栏：零基础文章标签： r语言

本文链接：https://blog.csdn.net/weixin_43917150/article/details/122183504

版权

零基础专栏收录该内容

12 篇文章 1 订阅

订阅专栏

library(foreign)
df<-read.spss("meph.sav ",to.data.frame=TRUE)
###1. 将white与others合并为一个变量
attach(df)

contrasts(df$RACE1)# 语句只能用于factor(因子型)
levels(df$RACE1) <- list("WHITE"=c("others","white"),"BLACK"="Black","NATIVE"="Native","ASIAN"="Asian")
contrasts(df$RACE1)

2. 挑出因子变量合并为一个数据集

names(df)
df1 <- subset(df,select=c(GENDER,RACE1,REGION1,EDUC1,PHSTAT1,MNHPOOR,ANYLIMIT,INCOME1,insure,poisexp))

3. 查看各变量的因子赋值

detach(df)
attach(df1)

box <- list(0)
for (i in 1:ncol(df1)){
  box[[i]]<-contrasts(df1[,i]) # 这句语句只能用于factor(因子型)
}
box

4. 调整GENDER和RACE1的参考值

contrasts(df1$GENDER)
contrasts(df1$RACE1)
df1$GENDER <- relevel(df1$GENDER,ref="male") # ref=虚拟变量都取0的那个值，relevel重新确定因子型变量的基准
df1$RACE1 <- relevel(df1$RACE1,ref="ASIAN")

5. 计算percentage of data

prop.table(table(df1$RACE1))
box1 <- list(0)
for (i in 1:ncol(df1)){
  box1[[i]]<-prop.table(table(df1[,i])) # 这句语句只能用于factor(因子型)
}
box1

6. 计算在每一个level上正的有多少

PPE <-function(it){
  name <- levels(it) # level的值
  level <- nlevels(it) # 有几个level
  m <- matrix(0,nrow=2,ncol=level) # 两行n列
  for(i in 1:level){
     prob <- sum(it==name[i]&df1$poisexp=="health expenditure is positive")/sum(it==name[i])
     m[,i] <- rbind(name[i],prob)
  }
  return(m)
}
for (i in 1:length(df1)){
  print(PPE(df1[,i]))
}

7. 导出数据

write.csv(df1,"meph.csv")

8. 比较两个模型的适用性

fit1 <- glm()
fit1 <- glm(poisexp~ GENDER + RACE1 + REGION1 + EDUC1 + PHSTAT1 + MNHPOOR + ANYLIMIT + INCOME1 + insure,  
            family=binomial(link="logit"), data=df1)
fit2 <- glm(poisexp~GENDER+RACE1+REGION1+EDUC1+PHSTAT1+ANYLIMIT+INCOME1+insure,  
                               family=binomial(link="logit"), data=df1)
anova(fit1,fit2) # 根据结果，应使用fit.reduced

9. 预测正确率

df1$prob <- predict(fit2,newdata=df1,type="response")
contrasts(df1$poisexp)
df1$ppE <- ifelse(df1$prob>0.5,"health expenditure is positive","otherwise")
df1$Right <- ifelse(df1$ppE==df1$poisexp,1,0)
prop.table(table(df1$Right))

自律的萱小主

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
应用统计分析作业四：数据框中因子型数据的处理

library(foreign)df<-read.spss("meph.sav ",to.data.frame=TRUE)###1. 将white与others合并为一个变量attach(df)contrasts(df$RACE1)# 语句只能用于factor(因子型)levels(df$RACE1) <- list("WHITE"=c("others","white"),"BLACK"="Black","NATIVE"="Native","ASIAN"="Asian")contr
复制链接

扫一扫