library(clusterProfiler)
library(tidyverse)
library(AnnotationForge)
##### 1.数据准备
library(readr)
annotations <- read_delim("~/Program/pan_genome/data/annotation/Penaeus_chinensis_annotations.tsv",
delim = "\t", escape_double = FALSE,
comment = "#", trim_ws = TRUE) %>%
mutate(Gene_Name = if_else(Preferred_name != "-",
Preferred_name,
PFAMs)) %>% filter(Gene_Name != "-")
##### 2.准备gene_info \ gene to go \ gene to kegg
gene_info <- dplyr::select(annotations,GID = query,Gene_Name) %>%
dplyr::filter(!is.na(Gene_Name))
# 将基因与GO的对应关系整理出来
gene2go <- dplyr::select(annotations,GID = query,GOs) %>%
separate_rows(GOs, sep = ',', convert = F) %>%
filter(GOs!="-",!is.na(GOs)) %>%
mutate(EVIDENCE = 'A') %>%
unique()
# 将基因与pathway的对应关系整理出来
gene2pathway <- dplyr::select(annotations, GID = query,Pathway = KEGG_Pathway) %>%
separate_rows(Pathway, sep = ',', convert = F) %>%
filter(str_detect(Pathway, 'map')) %>%
unique()%>%na.omit()
## 准备GO注释内容
library(GO.db)
goterms <- Term(GOTERM)
GOlist=as.data.frame(goterms)
go2name <- rownames_to_column(GOlist,var = 'GO_id')
## 这里提供两种获得KEGG注释的方法,kEGG数据库经常抽风(这里用不到)
library(magrittr)
get_path2name <- function(){
keggpathid2name.df <- clusterProfiler:::kegg_list("pathway")
keggpathid2name.df[,1] %<>% gsub("path:map", "", .)
colnames(keggpathid2name.df) <- c("path_id","path_name")
return(keggpathid2name.df)
}
pathway2name <- get_path2name()
#library(KEGGREST)
#listDatabases()
#map = keggList("pathway") %>% as.data.frame()
##### 3.构建OrgDBPackage
AnnotationForge::makeOrgPackage(gene_info=gene_info,
go=gene2go,
maintainer='rongchen.liu <rongchen.liu@foxmail.com>',
author='rongchen.liu',
version="0.1" ,
outputDir="./Program/Penaeus_chinensis/",
tax_id="88888",
genus="Penaeus",
species="chinensis")
install.packages('./Program/Penaeus_chinensis//org.Pchinensis.eg.db',repos = NULL, type="source") #安装包
利用eggnog结果构建自己的OrgDB包
最新推荐文章于 2024-07-19 23:23:16 发布