setwd("C:/Users/me/Desktop/8月22") ###### 加载需要的包 ####### library(CTT) library(openxlsx) library(dplyr) library(tidyr) library(difR) source('C:\\Users\\me\\Desktop\\8月22\\difmh.R',local=TRUE) source('C:\\Users\\me\\Desktop\\8月22\\mantel.R',local=TRUE) source('C:\\Users\\me\\Desktop\\8月22\\distractor.R',local=TRUE)
以上是需要的包,和预写的函数
form <- read.xlsx("form.xlsx",sheet=1,colNames = T) dataQ<-read.xlsx("识别结果.xlsx",sheet=4,colName
读取form和结果数据
kk <- colnames(dataQ)[grep("VR",colnames(dataQ))] formVR <- data.frame(a=1) for(i in kk){ c <- grep(i,form$ItemID)[1] formVR <-data.frame(formVR,i=t(form[c,])) } formVR <- as.data.frame(t(formVR[,-1]),stringsAsFactors=F) kk2 <- colnames(dataQ)[grep("NR",colnames(dataQ))] formNR <- data.frame(a=1) for(i in kk2){ c <- grep(i,form$ItemID)[1] formNR <-data.frame(formNR,i=t(form[c,])) } formNR <- as.data.frame(t(formNR[,-1]),stringsAsFactors=F) keyV <- formVR$Key keyN <- formNR$Key
抽取分量表数据和分量表key
dataNR <- dataQ[,grep("NR",colnames(dataQ))] ##MR数据 dataVR <- dataQ[,grep("VR",colnames(dataQ))] ##VR数据 info <- dataQ[,1:5] ##提取信息 dataVR[which(dataVR!="A"&dataVR!="B"&dataVR!="C"&dataVR!="D"&dataVR!="O",arr.ind=T)] <- "O"##将其他乱七八糟的选项替换为O dataNR[which(dataNR!="A"&dataNR!="B"&dataNR!="C"&dataNR!="D"&dataNR!="O",arr.ind=T)] <- "O"##将其他乱七八糟的选项替换为O
data1 <- dataVR key <- keyV form1 <- formVR
以上数据需要根据分量表进行修改,以下code dif分析处需修改文件
JG <- score(data1,key,output.scored=TRUE,rel=TRUE) scored <- as.data.frame(JG$scored) ##0,1计分 score <- as.matrix(JG$score) ##总分 JGreli <- as.data.frame(unlist(JG$reliability)) allJG <- data.frame(info,scored,score) #write.csv(allJG,"VR01计分.csv") ##输出0,1分+总分 itemn <- ncol(data1) alpha <- round(JGreli[3,],2) ########################################### distractor analysis ########################################### #distractor.analysis(dataVR,keyV,p.table=F,write.csv="C:/Users/me/Desktop/BEO/11中结果/VR计数.csv")##计数 distractor(dataNR,keyN,p.table=T,write.csv="NRdis.csv") fourNR <- read.csv("NRdis.csv",header=F) #distractor.analysis(dataVR,keyV,p.table=T,write.csv="C:/Users/me/Desktop/BEO/11中结果/VR计数.csv")##百分比 distractor(dataVR,keyV,p.table=T,write.csv="VRdis.csv") fourVR <- read.csv("VRdis.csv",header=F) ###################################### 均值标准差等 ####################################### four <- fourVR four <- fourNR qfd <- round(t(as.matrix(apply(scored,2,function(dat)cor(dat,score)))),2) ##皮尔逊 pbc <- round(apply(scored,2,function(dat)((mean(score[dat==1,])-mean(score[dat==0,]))/ sd(score))*sqrt(mean(dat)*(1-mean(dat)))),2) mean1 <- matrix(1,itemn,1) for(i in c("A","B","C","D","O")){ A <- as.numeric(t(round(apply(data1,2,function(dat)mean(score[dat==i,])),2))) ##求均值 B <- as.numeric(t(round(apply(data1,2,function(dat)sd(score[dat==i,])),2))) ##求标准差 C <- apply(data1,2,function(dat)sum(dat==i)) ##求人数 D <- as.numeric(round(t(C)/nrow(data1),4)*100) ##求人数占比 mean1<- data.frame(mean1,i=A) mean1 <- data.frame(mean1,i=B) mean1 <- data.frame(mean1,i=C) mean1 <- data.frame(mean1,i=D) } result <- mean1[,-1] colnames(result) <- c("mean_A","sd_A","nopt_A","popt_A","mean_B","sd_B","nopt_B","popt_B", "mean_C","sd_C","nopt_C","popt_C","mean_D","sd_D","nopt_D","popt_D", "mean_O","sd_O","nopt_O","popt_O") result1 <- as.data.frame(t(result)) result1<- tibble::rownames_to_column(result1) ############################################## 选项干扰项分析 ############################################# corout <- matrix(NA,itemn,5) colnames(corout)<-c("A","B","C","D","O") for(i in 1:ncol(data1)){ zong <- score-scored[,i] #去除第i道题目的总分 for(j in c("A","B","C","D","O")){ c <- data1[,i] # 第i道题目abcd c[which(c==j)] <- 1 #找第i题中 是abcd的选项 替换为1 c[which(c!=1)] <- 0 #其他为0 core <- cor(as.numeric(c),zong) corout[i,j] <- core } } rownames(corout) <- t(colnames(data1)) ############################################## DIF ############################################# group <- info$性别 group[group=="M"] <- 0 group[group=="F"] <- 1 r <- difmh(scored,group,focal.name=1,purify=T,save.output = TRUE) rr <- read.csv("NRMH.csv",sep = ",",stringsAsFactors=F) #m <- mantelhaen.test(scored,as.numeric(group)) dif <- 1+(r$DIFitems-1)*5 plot(r) ############################################## 整合数据 ######################################### RESULT <- as.data.frame(matrix(NA,itemn*5,20)) corr <- as.data.frame(t(corout)) corr <- tibble::rownames_to_column(corr) #将行名转为显式向量 mean2<- result1[grep("mean",result1[,1]),] sd2 <- result1[grep("sd",result1[,1]),] nopt <- result1[grep("nopt",result1[,1]),] popt <- result1[grep("popt",result1[,1]),] cor <- gather(corr,corr,xx,-1) #干扰项相关 mean2<- gather(mean2,mean2,mean,-rowname) sd2 <- gather(sd2,sd2,sd,-rowname) nopt<- gather(nopt,nopt,nopt,-rowname) popt<- gather(popt,popt,popt,-rowname) RESULT <- data.frame(itemid=NA,key=NA,run_key=NA,ais=NA,rbi=NA, rbi2=NA,flag_p=NA,flag_r=NA,α信度=alpha,MH_chisq=NA,MH_delta=NA, DIF_grade=NA,DIF_sig=NA,dis_rbi=round(cor[,3],2),nopt=nopt[,3],popt=popt[,3], mean=mean2[,3],sd=round(as.numeric(sd2[,3]),2)) RESULT$key <- four[,1] #key RESULT[which(RESULT$key=="*A"|RESULT$key==" A"),1] <- as.character(rownames(corout)) #itemid RESULT <- separate(RESULT,key,c("key","option"),sep = " ") RESULT$run_key[which(RESULT$dis_rbi>0)] <- "*" #run_key RESULT$rbi[-which(is.na(RESULT$itemid))] <- t(qfd) #rbi RESULT$rbi2[-which(is.na(RESULT$itemid))] <- pbc #rbi2 RESULT$flag_r[which(RESULT$rbi<0.2)] <- "low_r" #flag_r RESULT$ais[-which(is.na(RESULT$itemid))] <- round(as.numeric(RESULT$popt[which(is.na(RESULT$option))])/100,2) #ais RESULT$flag_p[which(RESULT[,5]>=0.95)] <- "high_p" #flag_p RESULT$MH_chisq[-which(is.na(RESULT$itemid))] <- as.numeric(rr$alphaMH[c(1:40)]) RESULT$MH_delta[-which(is.na(RESULT$itemid))] <- as.numeric(rr$deltaMH[c(1:40)]) RESULT$DIF_grade[-which(is.na(RESULT$itemid))] <- as.character(rr$DIF_grade [c(1:40)]) RESULT$DIF_sig[dif] <- "*" distrac <- as.data.frame(round(four[,c(2:5)],4)*100) colnames(distrac) <- c("low_25","p25_50","p_50-75","top25") RESULT2 <- data.frame(RESULT,distrac) RESULT2[is.na(RESULT2)] <- " " write.csv(RESULT2,"NRresult.csv",row.names = F)
########################################### 分维度 ########################################### dimension <- unique(form1$Strand3) ##提取维度 design <- as.data.frame(matrix(0,itemn,length(dimension))) ##定义设计矩阵 colnames(design) <- dimension for(i in dimension){ id <- form1$ItemID[grep(i,form1$Strand3)] design[,i][grep(i,form1$Strand3)] <- 1 ##填充设计矩阵 } sub <- subscales(scored,design,dimension,F,F) ##分维度计算 frac_dim <- lapply(sub,function(x)x$score/ncol(x$scored)) ##分维度计算通过率 frac_dim <- round(as.data.frame(frac_dim),4) vr_score <-as.data.frame(lapply(sub,function(x)x$score)) vrscore <- apply(vr_score,1,function(x)sum(x)) ##分维度计算总分 prr1 <- ((100*rank(vrscore)-50)/length(vrscore)) ##计算百分等级 prr1[prr1<=1] <- 1 prr2 <- floor(prr1) ##向下取整 #pr1 <- rank(vrscore)/length(vrscore)*100 #pr2 <- floor(pr1) prr <- prr2 ##计算标准九 prr[prr<4] <- 1 prr[prr>=4&prr<=10] <- 2 prr[prr>=11&prr<=22] <- 3 prr[prr>=23&prr<=39] <- 4 prr[prr>=40&prr<=59] <- 5 prr[prr>=60&prr<=76] <- 6 prr[prr>=77&prr<=88] <- 7 prr[prr>=89&prr<=95] <- 8 prr[prr>=96&prr<100] <- 9 transfer <- data.frame(info,vr_score=vrscore,pr_vr=prr1,pr_vrw=floor(prr1),vr标准九=prr,frac_dim) write.csv(transfer,"VRtransfer.csv",row.names = F)
以上是根据数据形态进行地量表分转换(后期会有根据已知常模进行量表分的转换)
这一轮code的优化主要体现在,灵活应用data.frame和各种apply族的函数。
初步使用了dplyr包,后期会重点写dplyr包的使用。