go注释！_go annoations-CSDN博客

本文链接：https://blog.csdn.net/SUMPLUSS/article/details/139479814

library(clusterProfiler)
library(dplyr)
library(stringr)

setwd("D:\\zhuomian\\基因家族进化\\GO")	# 设置自己的工作目录

## GO注释生成
options(stringsAsFactors = F)
egg <- read.delim("out.emapper.annotations",header = T,sep="\t")
egg[egg==""]<-NA
gterms <- egg %>%
  dplyr::select(query, GOs) %>% na.omit()
gene_ids <- egg$query
eggnog_lines_with_go <- egg$GOs!= ""
eggnog_annoations_go <- str_split(egg[eggnog_lines_with_go,]$GOs, ",")
gene2go <- data.frame(gene = rep(gene_ids[eggnog_lines_with_go],	# 一个基因可能有多个GOterm，需要拆分成1：1对应关系
                      times = sapply(eggnog_annoations_go, length)),
                      term = unlist(eggnog_annoations_go))
go2name <- read.delim('GO.library', header = FALSE, stringsAsFactors = FALSE)
names(go2name) <- c('ID', 'Description', 'Ontology')

# GO富集
gene_select <- read.delim(file = 'gene.txt', stringsAsFactors = FALSE,header = F)$V1
go_rich <- enricher(gene = gene_select,
                    TERM2GENE = gene2go[c('term','gene')], 	# 这两项都要注意顺序
                    TERM2NAME = go2name[c('ID', 'Description')], 
                    pvalueCutoff = 1, 
                    pAdjustMethod = 'BH', 
                    qvalueCutoff = 1
)
tmp <- merge(go_rich, go2name[c('ID', 'Ontology')], by = 'ID')
tmp <- tmp[c(10, 1:9)]
tmp <- tmp[order(tmp$pvalue), ]
write.table(tmp, 'GO_enrichment.xls', sep = '\t', row.names = FALSE, quote = FALSE)