#### 1. data collection
library(tidyverse)
library(GSVA)
library(limma)
library(readr)
annotations <- read_csv("~/Program/mitten-crab/data/annotations.csv")
library(readr)
Tpm <- read_csv("~/Program/mitten-crab/data/Tpm.csv")
gene_exp = TPM %>% column_to_rownames(var = "gene_id")
#### 2. handle data
# 将基因与GO的对应关系整理出来
go2gene <- dplyr::select(annotations,GO = GOs,GID = gene_id) %>%
separate_rows(GO, sep = ',', convert = F) %>%
filter(GO!="-",!is.na(GO)) %>%
unique()
go2gene %>% head()
go = go2gene$GO %>% unique()
# 使用 mclapply 并行处理成list用于gsva输入
numCores <- detectCores() - 1 # 使用系统核心数减一,留一个核心给系统其他任务
go_gsva <- mclapply(go, function(i) {
go2gene %>%
filter(GO == i) %>%
pull(var = "GID") %>%
unique()
}, mc.cores = numCores)
#### 3. GSVA分析
gsva_exp <- gsva(as.matrix(log2(gene_exp+1)),
go_gsva,
kcdf="Gaussian" ,#"Gaussian" for logCPM,logRPKM,logTPM, "Poisson" for counts
verbose=T,
parallel.sz = parallel::detectCores())#调用所有核
#### 4. limma进行差异分析
group_list = c(rep("Small",3),rep("Big",3))
design <- model.matrix(~ factor(group_list)) # 创建设计矩阵,用于线性模型分析
colnames(design)=levels(factor(group_list)) # 设置设计矩阵的列名
row.names(design)<- sample_info %>% filter(site == "TE") %>% rownames()
contrast.matrix<-makeContrasts("Big - Small", #大相较于小
levels = design) # 创建一个对比矩阵,指定差异表达分析的比较方向
fit <- lmFit(gsva_exp[,c(sample_info %>% filter(site == "TE") %>% rownames())],
design) # 对每个基因应用线性模型,es是表达数据矩阵
fit2 <- contrasts.fit(fit, contrast.matrix) # 应用对比矩阵到拟合的线性模型上
fit <- eBayes(fit2) # 使用经验贝叶斯方法对标准误进行调整,改善统计推断
allGeneSets <- topTable(fit,
coef="Big - Small",
number=Inf) %>% # 获取所有基因的统计摘要,coef参数需要与对比矩阵中的对比名称一致
as.data.frame() %>%
rownames_to_column(var = "GO_id")
非模式生物GSVA分析+limma差异分析
最新推荐文章于 2024-07-22 16:18:06 发布