生物医学统计第三次作业

howell（Python）

已于 2024-05-27 23:36:08 修改

阅读量335

点赞数 2

分类专栏：生物医学统计文章标签： r语言

于 2024-05-27 21:59:27 首次发布

本文链接：https://blog.csdn.net/howell55/article/details/139249026

版权

生物医学统计专栏收录该内容

2 篇文章 1 订阅

订阅专栏

注意！！！！！

第一行的路径用自己文件保存位置的路径

不知道的话，右键文件点开属性，复制在？？？？处就行了。记得把"\"改成"/"


path_file<- "????????/data_for_assignment4.rds"
  list_data<-readRDS(path_file)

amess <- list_data$q1_4[[1]]
bcStats <- list_data$q1_4[[2]]
trials <-list_data$q1_4[[3]]

#Q1
mod <- aov(folate~factor(treatmnt),data=amess)
mod
summary(aov(mod))

# 为什么更有效？
# 控制I型错误：当比较多个组时，单独的t检验会增加I型错误（错误地拒绝了正确的零假设）的风险。方差分析通过一次分析所有组来控制这种风险。
# 统计功效：方差分析通常比单独的t检验具有更高的统计功效，因为它使用所有可用数据来估计变异性。
# 多重比较问题：如果进行多个单独的t检验，需要进行多重比较校正。方差分析本身就包含了对多重比较的校正，因此不需要额外的校正步骤。

#Q2
pairwise.t.test(amess$folate, amess$treatmnt,p.adjust.method = "holm")

#乳腺癌发病率
pacman::p_load("rmarkdown")
paged_table(bcStats)
#Q3
pairwise.wilcox.test(bcStats$NewCasesOfBreastCancerIn2002, 
                     bcStats$continent, p.adjust.method = "bonferroni",exact= FALSE)

#美洲和非洲，欧洲和非洲，欧洲和美洲，欧洲和亚洲有显著性差异

#Linear Regression
paged_table(trials)

x <- trials$Drug.concentration
y <- trials$Cell.Count

plot(x,y,cex=0.8)
text(x,y+0.2,labels=1:length(x))

model <- lm(y ~x)

par(mfrow=c(1,1))
plot(x,y,pch=16)
points(x,model$fitted.values,pch=16,col="grey")
arrows(x,model$fitted.values,x,y,code=0,col="grey")
abline(model,col="red",lty=2)
text(x,y+0.2,labels=1:length(x),cex=0.8,col="steelblue")

# Q4 summary(model)
par(mfrow=c(2,2))
plot(model)
#对数据进行分析后发现，其中10,15两个数据点误差较大。
#在Residuals vs Fitted中，3号、10号和15号残差均比较大，且10,15离拟合线较远
# 在Standardized residuals vs Observations中，看可以看出来，数据间相对不独立，有线性逻辑关系，但是10,15两个点离拟合出来的线较远
# Cook's distance vs Observations用于检测数据中的异常值，很清晰的我们可以发现，10,15两个点不分布在红线周围
# Residuals vs Leverage用于识别有高杠杆值的点，我们发现3点距离其他点远，杠杆值高，对数据影响大。

#Q5
pacman::p_load("ggpubr")
pacman::p_load("factoextra")

trials$Drug.concentration <- as.numeric(trials$Drug.concentration)
trials$Cell.Count <- as.numeric(trials$Cell.Count)
trials <- na.omit(trials)  # 移除含有 NA 的行

set.seed(123)
res.km <- kmeans(scale(trials[,c(2,3)]), 3, nstart = 25)
fviz_cluster(res.km, data = trials[2-3])

#Q6

rowInd <- match(c("PILRA", "NPFFR2", "GPATCH3", "IFIT3", "KCNH1", "AUTS2", "ITGA9", "ARR3"), geneAnnotation$HUGO.gene.symbol)
geneAnnotation[rowInd,]
pacman::p_load('genefilter')
pacman::p_load('matrixStats')
# the rowSds function will calculate the standard deviation for each row in a numeric matrix
geneVar <- rowSds(as.matrix(normalizedValues))
library(RColorBrewer)

sampcol <- rep("blue", ncol(normalizedValues))

sampcol[patientMetadata$er == 1 ] <- "yellow"

rbPal <- brewer.pal(10, "RdBu")


genelist<-geneAnnotation$HUGO.gene.symbol


normalizedValues_matrix<-as.matrix(normalizedValues)

heatmap(normalizedValues_matrix,
        ColSideColors = sampcol,
        col=rbPal,
        labRow = genelist,
        labCol="")

genelist <- c("PILRA", "NPFFR2", "GPATCH3", "IFIT3", "KCNH1", "AUTS2", "ITGA9", "ARR3")
probes   <- geneAnnotation$probe[match(genelist, geneAnnotation$HUGO.gene.symbol)]
exprows  <- match(probes, rownames(normalizedValues))
heatmap(as.matrix(normalizedValues[exprows,]))

library(RColorBrewer)

sampcol <- rep("blue", ncol(normalizedValues))

sampcol[patientMetadata$er == 1 ] <- "yellow"

rbPal <- brewer.pal(10, "RdBu")

heatmap(as.matrix(normalizedValues[exprows,]),
        ColSideColors = sampcol,
        col=rbPal,
        labRow = genelist,labCol="")

# hint: 
chr8Genes <- geneAnnotation[geneAnnotation$Chromosome=="chr8",]
head(chr8Genes)
knitr::kable(chr8Genes)

chr8GenesOrd <- chr8Genes[order(chr8Genes$Start),]
head(chr8GenesOrd)

match(chr8GenesOrd$probe, rownames(normalizedValues))

chr8Expression <- normalizedValues[match(chr8GenesOrd$probe, rownames(normalizedValues)),]
dim(chr8Expression)

ngenes <- nrow(chr8Expression)
pdf("Chromosome8Genes.pdf")
pvals <- NULL

# hint:

for(i in 1:nrow(chr8Expression)) {
  tmp <- t.test(as.numeric(chr8Expression[i,]) ~  factor(patientMetadata$er))
  pvals[i] <- tmp$p.value

   if(tmp$p.value < 0.05){
    boxplot(as.numeric(chr8Expression[i,]) ~ factor(patientMetadata$er),
            main=chr8Genes$HUGO.gene.symbol[i])
   }

  }

pvals
dev.off()

chr8Genes[pvals<0.05,]$HUGO.gene.symbol

pvals

sum(pvals < 0.05)

第六题我跑不出来，那个库下不下来，希望有大神指点