# GSEA分析====
# ①「ES折线图」,离垂直距离x=0轴最远的峰值便是基因集的ES值,峰出现在排序基因集的前端(ES值大于0)则说明通路上调,出现在后端(ES值小于0)则说明通路下调。
# ②「基因集成员位置图」,若竖线集中分布在基因排序列表的前端或后端,说明该基因集通路上调或下调;若竖线较均匀分布在基因排序列表中,则说明该基因集通路在比较的两个数据中无明显变化。红色部分对应的基因在实验组中高表达,蓝色部分对应的基因在对照组中高表达,「leading edge subset」 是(0,0)到曲线峰值ES出现对应的这部分基因成员。
# ③「所有基因rank值」(由log2FoldChang值计算得出)的分布,以棕红色面积图显展示。
# 1.1) GSEA_GO分析====
rm(list = ls())
setwd('E:\\01R_action\\RNA_seq_analysis\\output')
## 创建富集分析结果文件夹
dir.create("./6.GSEA")
load("./5.enrich_plot/enrich_comparedwith2304.rdata")
library(dplyr)
library(tibble)
library(clusterProfiler)
library(enrichplot)
DEG <- mergeID
rm(allID);rm(gene_list);rm(mergeID)
# 保留log2FoldChange值中的最大值
DEG1 <- DEG[,c('ENTREZID','log2FoldChange')]
DEG1 <- DEG1 %>%
group_by(ENTREZID) %>%
summarise(log2FoldChange = max(log2FoldChange, na.rm = TRUE))
DEG1 <- as.data.frame(DEG1)
geneList = DEG1[,'log2FoldChange']#log2FoldChange列!!!
names(geneList) = as.character(DEG1[,'ENTREZID'])
head(geneList,5)
geneList = sort(geneList, decreasing = TRUE)# 排序是必须的,记住排序方式
# GSEA分析,gseGO与gseKEGG
# 1.gseGO分析
gseago <- gseGO(geneList,
ont="ALL", # "BP","MF","CC"and ALL
OrgDb='org.At.tair.db',
keyType='ENTREZID',
eps = 0,#当实际P值小于eps值时,会收到警告,参数设置为0来获得更准确的P值估
nPermSimple = 200000,#控制置换测试的次数,增加后可以提高准确性,这次中2000时报错最少
verbose=F)
# 修改 gseago 对象中 p.adjust 和pvalue列的格式
# gseago@result$p.adjust <- formatC(gseago@result$p.adjust, format = "e", digits = 2)
# gseago@result$pvalue <- formatC(gseago@result$pvalue, format = "e", digits = 2)
#转换成数据框
gseago_res <- as.data.frame(gseago)#查看富集到了多少,即gseago_res行数,参数设置会影响富集行数
write.table(gseago_res,file="./6.GSEA/GSEA_GO/gseago_res.txt",sep = "\t",row.names = F,quote = F)
save(gseago,gseago_res,file = "./6.GSEA/GSEA_GO/GSEA_GO_comparedwith2304.rdata")
#根据自己研究内容选择想要呈现的基因集
growth <- c('morphogenesis','development','differentiation',
'maturation','senescence','death')
filtered <- gseago@result[grep(paste(growth, collapse = "|"),gseago@result$Description, ignore.case = TRUE),]
#根据结果,剔除无关部分
outter <- c('trichoblast','root','autophagosome','symbiont')
filtered <- filtered[grep(paste(outter, collapse = "|"),filtered$Description, ignore.case = TRUE, invert = TRUE),]
# 查找目标基因集的数值索引
location <- which(rownames(gseago@result) %in% filtered$ID)
related_sets <- gseago@geneSets[names(gseago@geneSets) %in% filtered$ID]
name <- names(related_sets)
#传统gseplot2绘图====
# 绘制感兴趣基因集图
color_vector <- c('forestgreen','#238392','#08306B','#FABC6B','#FB6A4A','#874AAA')
gsea_growth <- gseaplot2(gseago,
geneSetID = location,
color= color_vector,
pvalue_table = T,#显示了路径的P值表格
base_size = 24,
rel_heights = c(1.5, 0.6, 1),
subplots = 1:3,
ES_geom = "line")
# 保存图像
pdf(file = "./6.GSEA/GSEA_GO/gsea_go_growth.pdf",width = 12,height = 12)
print(gsea_growth)
dev.off()
name <- names(related_sets)
#新包GseaVis绘图更好看,用这个画====
plot_list <- lapply(name, function(geneSetID) {#如果用位置而不是具体每个list名字来画,p值会出问题
GseaVis::gseaNb(object = gseago,
subPlot =3,
lineSize =1,
geneSetID=geneSetID,
rmSegment = FALSE,
segCol ='red',
pvalX=0.7,pvalY =0.5,pHjust =0,
geneCol ="black",
curveCol= c("#76BA99", "#EB4747", "#996699"),
addPval = TRUE,pCol ='black',
markTopgene = TRUE,addGene = TRUE,topGeneN=5)
})
combined_plot <- cowplot::plot_grid(plotlist = plot_list, ncol = 3)
# 保存图像
pdf(file = "./6.GSEA/GSEA_GO/gsea_go_growth1.pdf",width = 20,height = 14)
print(combined_plot)
dev.off()
#GO_ridgeplot山脊图====
load('./6.GSEA/GSEA_GO/GSEA_GO_comparedwith2304.rdata')
gseago@result$p.adjust <- as.numeric(as.character(gseago@result$p.adjust))
# 定义关键词列表
growth <- c('morphogenesis','development','differentiation',
'maturation','senescence','death')
outter <- c('trichoblast','root','autophagosome','symbiont')
# 筛选出包含 growth 中任何一个关键词的条目
growth_pattern <- paste(growth, collapse = "|")
filtered_growth <- grepl(growth_pattern, gseago@result$Description, ignore.case = TRUE)
# 排除包含 outter 中任何一个关键词的条目
outter_pattern <- paste(outter, collapse = "|")
filtered_outter <- !grepl(outter_pattern, gseago@result$Description, ignore.case = TRUE)
# 筛选数据
gseago_filtered <- gseago
gseago_filtered@result <- gseago@result[filtered_growth & filtered_outter, ]
ridgep <- ridgeplot(gseago_filtered,
fill = "p.adjust",
core_enrichment = TRUE,
label_format = 60, #设置轴标签文字的每行字符数长度,过长则会自动换行。
orderBy = "NES",
decreasing = FALSE) +
ggtitle('Expression distribution of core enrichment genes of GSEA_GO')+
theme(plot.title = element_text(size =12, face = "bold"))
pdf(file = "./6.GSEA/GSEA_GO/gsea_go_ridgeplot.pdf",width = 9,height = 6)
print(ridgep)
dev.off()
#GSEA_KEGG====
DEG <- mergeID
rm(allID);rm(gene_list);rm(mergeID)
# 保留log2FoldChange值中的最大值
DEG1 <- DEG[,c('TAIR','log2FoldChange')]
DEG1 <- DEG1 %>%
group_by(TAIR) %>%
summarise(log2FoldChange = max(log2FoldChange, na.rm = TRUE))
DEG1 <- as.data.frame(DEG1)
geneList = DEG1[,'log2FoldChange']#log2FoldChange列!!!
names(geneList) = as.character(DEG1[,'TAIR'])
head(geneList,5)
geneList = sort(geneList, decreasing = TRUE)# 排序是必须的,记住排序方式
# 运行GSEA
gseKegg <- gseKEGG(geneList, organism = "ath", pvalueCutoff = 1)
gseKegg@result[["Description"]] <- sub(" -.*", "", gseKegg@result[["Description"]])
#转换成数据框
gseakegg_res <- as.data.frame(gseKegg)#查看富集到了多少,即gseago_res行数,参数设置会影响富集行数
write.table(gseakegg_res,file="./6.GSEA/GSEA_Kegg/gseakegg_res.txt",sep = "\t",row.names = F,quote = F)
save(gseKegg,gseakegg_res,file = "./6.GSEA/GSEA_Kegg/GSEA_KEGG.rdata")
#使用gseaNb函数绘制GSEA图====
#单个通路可视化====
library(GseaVis)
gsea_kegg <- GseaVis::gseaNb(
object = gseKegg,
subPlot =3,
lineSize =1,
geneSetID='ath00195',
rmSegment = FALSE,
segCol ='red',
pvalX=0.7,pvalY =0.5,pHjust =0,
geneCol ="black",
curveCol= c("#76BA99", "#EB4747", "#996699"),
addPval = TRUE,
pCol ='black',
markTopgene = TRUE,
addGene = TRUE,
topGeneN=5)#显示的是gseKegg@geneSets[["ath00195"]]这里面的,不是gseKegg@result[["core_enrichment"]]中的
# 保存图像
pdf(file = "./6.GSEA/GSEA_Kegg/gsea_kegg_1.pdf",width = 12,height = 8)
print(gsea_kegg)
dev.off()
#2.根据自己研究内容选择想要呈现的基因集
growth <- c('Photosynthesis','Starch','Nitrogen','Carbon','glycan')#Galactose乳糖
filtered <- gseKegg@result[grep(paste(growth, collapse = "|"),gseKegg@result$Description, ignore.case = TRUE),]
#根据结果,剔除无关部分
outter <- c('O-glyca','degradation','folate','various')
filtered <- filtered[grep(paste(outter, collapse = "|"),filtered$Description, ignore.case = TRUE, invert = TRUE),]
# 查找目标基因集的数值索引
location <- which(rownames(gseKegg@result) %in% filtered$ID)
related_sets <- gseKegg@geneSets[names(gseKegg@geneSets) %in% filtered$ID]
name <- names(related_sets)
plot_list <- lapply(name, function(geneSetID) {#如果用位置而不是具体每个list名字来画,p值会出问题
GseaVis::gseaNb(object = gseKegg,
subPlot =3,
lineSize =1,
geneSetID=geneSetID,
rmSegment = FALSE,
segCol ='red',
pvalX=0.7,pvalY =0.5,pHjust =0,
geneCol ="black",
curveCol= c("#76BA99", "#EB4747", "#996699"),
addPval = TRUE,pCol ='black',
markTopgene = TRUE,addGene = TRUE,topGeneN=5)
})
combined_plot <- cowplot::plot_grid(plotlist = plot_list, ncol = 3)
# 保存图像
pdf(file = "./6.GSEA/GSEA_Kegg/gsea_kegg_pathway.pdf",width = 20,height = 14)
print(combined_plot)
dev.off()
#KEGG_ridgeplot山脊图====
load('./6.GSEA/GSEA_Kegg/GSEA_KEGG.rdata')
gseKegg@result$p.adjust <- as.numeric(as.character(gseKegg@result$p.adjust))
# 定义关键词列表
growth <- c('Photosynthesis','Starch','Nitrogen','Carbon','glycan')#Galactose乳糖
outter <- c('O-glyca','degradation','folate','various')
# 筛选出包含 growth 中任何一个关键词的条目
growth_pattern <- paste(growth, collapse = "|")
filtered_growth <- grepl(growth_pattern, gseKegg@result$Description, ignore.case = TRUE)
# 排除包含 outter 中任何一个关键词的条目
outter_pattern <- paste(outter, collapse = "|")
filtered_outter <- !grepl(outter_pattern, gseKegg@result$Description, ignore.case = TRUE)
# 筛选数据
gseKegg_filtered <- gseKegg
gseKegg_filtered@result <- gseKegg@result[filtered_growth & filtered_outter, ]
#Photosynthesis有两个,将p.adjust > 0.05命名为Photosynthesis_2,否则会画图失败
library(dplyr)
gseKegg_filtered <- gseKegg_filtered %>%
mutate(Description = case_when(
Description == "Photosynthesis" & p.adjust > 0.05 ~ "Photosynthesis_2", # 条件匹配
TRUE ~ Description # 其余保持不变
))
ridgep1 <- ridgeplot(gseKegg_filtered,
fill = "p.adjust",
core_enrichment = TRUE,
label_format = 60, #设置轴标签文字的每行字符数长度,过长则会自动换行。
orderBy = "NES",
decreasing = FALSE) +
ggtitle('Expression distribution of core enrichment genes of GSEA_KEGG')+
theme(plot.title = element_text(size =12, face = "bold"))
pdf(file = "./6.GSEA/GSEA_Kegg/gsea_kegg_ridgeplot.pdf",width = 9,height = 6)
print(ridgep1)
dev.off()
GSEA分析 转录组
于 2025-03-16 16:59:01 首次发布