GSEA富集分析需要一个基因排序列表(a ranked list of genes),通常按logFC排序。
1. 导入包,读入差异表达数据
rm(list=ls())
setwd("working_dir")
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("fgsea")
library(msigdb) # The Molecular Signatures Database (MSigDB)
library(fgsea)
#ls("package: fgsea")
#ls("package:msigdb")
# read the file
mydata <- read.csv("your_file",row.names=1)
head(mydata)
mydata <- mydata[order(mydata$logFC,decreasing=TRUE),]
FCgenelist <- mydata$logFC
names(FCgenelist) <- mydata$symbol
head(FCgenelist)
2. 准备基因集
The MSigDB gene sets are divided into 9 major collections.
# https://www.gsea-msigdb.org/gsea/msigdb/index.jsp
# "hs" for human and "mm" for mouse
msigdb.hs = getMsigdb(org = 'hs',id = c("SYM", "EZID"))
# Downloading and integrating KEGG gene sets
msigdb.hs = appendKEGG(msigdb.hs)
length(msigdb.hs)
# 可以根据需求选择子基因集
listCollections(msigdb.hs)
#hallmarks = subsetCollection(msigdb.hs, 'h')
#c3<- subsetCollection(msigdb.hs, 'c3')
msigdb_ids <- geneIds(msigdb.hs)
class(msigdb_ids) # list
3. fgsea 富集分析并作图
fgseaRes <- fgsea(pathways = msigdb_ids,
stats = FCgenelist,
minSize=15,
maxSize=500,
nperm=10000)
head(fgseaRes[order(pval), ])
sum(fgseaRes[, padj < 0.01])
topPathwaysUp <- fgseaRes2[ES > 0][head(order(pval), n=10), pathway]
topPathwaysDown <- fgseaRes2[ES < 0][head(order(pval), n=10), pathway]
topPathways <- c(topPathwaysUp, rev(topPathwaysDown))
# 画table图
tiff('enriched_pathway.tiff', units="in", width=8, height=6, res=600, compression = 'lzw')
plotGseaTable(msigdb_ids[topPathways], FCgenelist, fgseaRes,
gseaParam=0.5)
dev.off()
# 画通基因集的富集图, HALLMARK_HYPOXIA 为一种基因集名称。
plotEnrichment(msigdb_ids[["HALLMARK_HYPOXIA"]],FCgenelist)+
labs(title="HALLMARK_HYPOXIA")