library(clusterProfiler)
library(dplyr)
library(stringr)
setwd("D:\\zhuomian\\基因家族进化\\GO")
options(stringsAsFactors = F)
egg <- read.delim("out.emapper.annotations",header = T,sep="\t")
egg[egg==""]<-NA
gterms <- egg %>%
dplyr::select(query, GOs) %>% na.omit()
gene_ids <- egg$query
eggnog_lines_with_go <- egg$GOs!= ""
eggnog_annoations_go <- str_split(egg[eggnog_lines_with_go,]$GOs, ",")
gene2go <- data.frame(gene = rep(gene_ids[eggnog_lines_with_go],
times = sapply(eggnog_annoations_go, length)),
term = unlist(eggnog_annoations_go))
go2name <- read.delim('GO.library', header = FALSE, stringsAsFactors = FALSE)
names(go2name) <- c('ID', 'Description', 'Ontology')
gene_select <- read.delim(file = 'gene.txt', stringsAsFactors = FALSE,header = F)$V1
go_rich <- enricher(gene = gene_select,
TERM2GENE = gene2go[c('term','gene')],
TERM2NAME = go2name[c('ID', 'Description')],
pvalueCutoff = 1,
pAdjustMethod = 'BH',
qvalueCutoff = 1
)
tmp <- merge(go_rich, go2name[c('ID', 'Ontology')], by = 'ID')
tmp <- tmp[c(10, 1:9)]
tmp <- tmp[order(tmp$pvalue), ]
write.table(tmp, 'GO_enrichment.xls', sep = '\t', row.names = FALSE, quote = FALSE)