TCGA数据差异分析整理
安装软件
对于差异基因我们有三个R包,DESeq,edgeR,和limma包,三个包都可以,作者更倾向于DESeq包,这个包也太慢了,建议睡前跑,醒了就跑结束了
if(!require(ggplotify))install.packages("ggplotify")
if(!require(patchwork))install.packages("patchwork")
if(!require(cowplot))install.packages("cowplot")
if(!require(DESeq2))BiocManager::install('DESeq2')
if(!require(edgeR))BiocManager::install('edgeR')
if(!require(limma))BiocManager::install('limma')
DESeq2
rm(list = ls())
load("TCGA-stamgdc.Rdata")
table(group_list)
#deseq2----
library(DESeq2)
colData <- data.frame(row.names =colnames(exp),
condition=group_list)
if(!file.exists(paste0(cancer_type,"dd.Rdata"))){
dds <- DESeqDataSetFromMatrix(
countData = exp,
colData = colData,
design = ~ condition)
dds <- DESeq(dds)
save(dds,file = paste0(cancer_type,"dd.Rdata"))
}
res <- results(dds, contrast = c("condition",rev(levels(group_list))))
resOrdered <- res[order(res$padj),] # 按照P值排序
DEG <- as.data.frame(resOrdered)
head(DEG)
#添加change列标记基因上调下调
logFC_cutoff <- with(DEG,mean(abs(log2FoldChange)) + 2*sd(abs(log2FoldChange)) )
#logFC_cutoff DEG$change = as.factor(ifelse(DEG$padj < 0.05 & abs(DEG$log2$log2FoldChaFoldChange) > logFC_cutoff,i