R置换检验
(1) R函数代码
##################
## function
##################
############
treatment_effects <- function(df, var){
# length of variables
lenVar <- length(var)
# the result vector
ret <- rep(NA, lenVar)
for(i in 1:lenVar){
varI <- var[i]
# data of variable i where treatment is drug
drugDat <- df[df$treatment == "drug", varI]
# data of variable i where treatment is placebo
placeboDat <- df[df$treatment == "placebo", varI]
# the effect value
ret[i] <- mean(drugDat) - mean(placeboDat)
names(ret)[i] <- varI
}
return(ret)
}
############
############
# number of permutations---numP
permutation_pvalues <- function(df, var, numP=1000){
# effects result
varEffects <- treatment_effects(df, var)
lenVar <- length(var)
# the result data.frame for each permutation
ret <- matrix(NA, nrow=numP, ncol=lenVar)
ret <- as.data.frame(ret)
names(ret) <- var
n <- nrow(df)
for(k in 1:numP){
sampk <- sample(1:n)
df$treatment <- df$treatment[sampk]
for(i in 1:lenVar){
varI <- var[i]
drugDat <- df[df$treatment == "drug", varI]
placeboDat <- df[df$treatment == "placebo", varI]
ret[k, varI] <- mean(drugDat) - mean(placeboDat)
}
}
## compute p-value for each variable
pvalue <- rep(NA, lenVar)
for(i in 1:lenVar){
varI <- var[i]
# random permutation result
randomDiff <- ret[, varI]
# real data result
observedDiff <- varEffects[varI]
pvalue[i] = sum(abs(randomDiff) >= abs(observedDiff)) / numP
names(pvalue)[i] <- varI
}
return(pvalue)
}
############
############
print_effects_with_pvalues <- function(df, var, numP1=1000){
# effect value
efft <- treatment_effects(df, var)
# p value
pval <- permutation_pvalues(df, var, numP=numP1)
efft <- efft[var]
pval <- pval[var]
# print result data
printMat <- matrix(NA, nrow=2, ncol=length(var))
row.names(printMat) <- c("effect", "pvalue")
colnames(printMat) <- var
printMat[1, ] <- efft
printMat[2, ] <- pval
print(printMat)
}
############
(2) Rmd文件结果检验
---
title: "entire_data_output"
date: "Wednesday, April 01, 2015"
output: html_document
---
### (a) entire data
```{r,echo=TRUE,message=FALSE,warning=FALSE}
# set random seed
set.seed(100)
# load function
source("function.R")
dat <- read.table("http://www.cs.utoronto.ca/~radford/csc120/a3data",
header=TRUE)
# create a new column in the data frame called BMD_change that is BMD2 minus BMD1
dat$BMD_change <- with(dat, BMD2 - BMD1)
head(dat)
print_effects_with_pvalues(dat, c("BMD2", "BMD_change", "headache", "stomachache"))
```
### (b1) sub1--just the women
```{r,echo=TRUE,message=FALSE,warning=FALSE}
sub1 <- subset(dat, sex == "F")
print_effects_with_pvalues(sub1, c("BMD2", "BMD_change", "headache", "stomachache"))
```
### (b2) sub2--just the women
```{r,echo=TRUE,message=FALSE,warning=FALSE}
sub2 <- subset(dat, sex == "M")
print_effects_with_pvalues(sub2, c("BMD2", "BMD_change", "headache", "stomachache"))
```
### (b3) sub3--just the women younger than 50
```{r,echo=TRUE,message=FALSE,warning=FALSE}
sub3 <- subset(dat, sex == "F" & age < 50)
print_effects_with_pvalues(sub3, c("BMD2", "BMD_change", "headache", "stomachache"))
```
### (b4) sub4--just the women 50 or more years old
```{r,echo=TRUE,message=FALSE,warning=FALSE}
sub4 <- subset(dat, sex == "F" & age < 50)
print_effects_with_pvalues(sub4, c("BMD2", "BMD_change", "headache", "stomachache"))
```