setwd('D:\\11_NM_DNAmeth\\02_DMs_DMR_gene\\04_new_DMR_DEG_GO\\rapid/')
library('clusterProfiler')
library(topGO)
library(AnnotationHub)
library(dbplyr)
library(ggplot2)
library(pathview)
library(biomaRt)
hub <- AnnotationHub()
query(hub, 'Oryza sativa')
rice <- hub[["AH85565"]]
rice2<-hub[["AH85566"]]
keytypes(rice)#查看可使用的key
keytypes(rice2)#查看可使用的key
data<-read.table('./down_hyper_no.id.txt',header = 0)
# #在ensemble plants上能看到所有已提交的物种信息
# ensembl = useMart(biomart = "plants_mart",host = "http://plants.ensembl.org")
# #查看ensemble plants都有哪些物种信息,并设置为该物种信息。
# dataset <- listDatasets(mart = ensembl)
# head(dataset)
ensembl = useMart(biomart = "plants_mart",host = "http://plants.ensembl.org",
dataset="osativa_eg_gene")
entrez_id_df <- getBM(attributes =c("ensembl_gene_id",'entrezgene_id'),
filters = "ensembl_gene_id",values = data,mart = ensembl)
gene<- as.character(na.omit(entrez_id_df$entrezgene_id))
enrich.go <- enrichGO(gene = gene, #基因列表文件中的基因名称
OrgDb = rice, #指定物种的基因数据库,示例物种是绵羊(sheep)
keyType = 'ENTREZID', #指定给定的基因名称类型,例如这里以 entrze id 为例
ont = 'ALL', #可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者
pAdjustMethod = 'fdr', #指定 p 值校正方法
pvalueCutoff = 0.05, #指定 p 值阈值,不显著的值将不显示在结果中
qvalueCutoff = 0.2,
# minGSSize = 1,#指定 q 值阈值,不显著的值将不显示在结果中
readable = FALSE)
write.table(enrich.go, 'kegg.txt', sep = '\t', quote = FALSE, row.names = FALSE)
barplot(enrich.go)
data<-read.table('./BCD_vs_CK-up.id.txt',header = 0)
entrez_id_df <- getBM(attributes =c("ensembl_gene_id",'entrezgene_id'),
filters = "ensembl_gene_id",values = data,mart = ensembl)
write.table(entrez_id_df, 'BCD_vs_CK-up.zhuan.xls', sep = '\t', quote = FALSE, row.names = FALSE)
gene<- as.character(na.omit(entrez_id_df$entrezgene_id))
kegg <- enrichKEGG(
gene = gene, #基因列表文件中的基因名称
keyType = 'kegg', #kegg 富集
organism = 'osa', #例如,oas 代表绵羊,其它物种更改这行即可
pAdjustMethod = 'fdr', #指定 p 值校正方法
pvalueCutoff = 0.05,
minGSSize = 1,#指定 p 值阈值,不显著的值将不显示在结果中
qvalueCutoff = 0.2 #指定 q 值阈值,不显著的值将不显示在结果中
)
dotplot(kegg)
write.table(kegg, 'BCD_vs_CK-up.kegg.txt', sep = '\t', quote = FALSE, row.names = FALSE)
data<-read.table('./BCD_vs_CK-down.kegg.txt',sep = '\t',header = 1)
head(data)
data$GeneRatio=as.numeric(data$GeneRatio)
ggplot(data, aes(x=GeneRatio, y=Description)) +
geom_point(aes( size= Count , colour = -log10(p.adjust)) ) +
# scale_y_discrete(limits=Edata$Description)+
# ggtitle("GO enrichment") +
labs(x = "",y = "")+
scale_color_gradient(low = 'blue', high = 'red',name="-log10(p.adjust)") +
# theme_minimal()+
# xlim(range(Edata$type)) +
theme(strip.text.x = element_text( colour = 'black',size = 13),#size=8,angle=75
strip.text.y = element_text( colour = 'black',size = 13),
strip.background = element_rect(colour="white", fill="grey"),
axis.title = element_text(face = "bold",
size = "13",color = "black"),
axis.text.x = element_text(face = "bold",color = "black",
size = 12, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(face = "bold",size = 12,color = "black"),
legend.text = element_text(size = 10,face = "bold"),
legend.title = element_text(size = 10,
face = "bold"),
legend.position ='right',
# legend.key.size =
panel.background = element_rect(fill="transparent"),
panel.border =element_rect(fill='transparent', color='black'),
# panel.grid.major.y = element_blank(),
# panel.grid.minor = element_line(colour = "grey95", size = 0.25),
panel.grid.major.y = element_line(colour = "grey95", size = 1),
# panel.grid.major.x = element_blank(),
panel.grid.major.x = element_line(colour = "grey95", size = 1),
plot.title = element_text(lineheight=.8, face="bold", hjust=0.5, size =14),)
参考博文:
GO分析-植物部分-水稻
使用biomaRt做水稻ID转换及相关信息的填充
使用clusterProfiler进行GO、KEGG富集分析(有参情况)