We retrieve data from KEGG Website for
further gene or gene set enrichment analysis.
setwd("D:/R_wordir/API_ret/KEGG_API/")
Add package:
library("httr")
library("readr")
library("curl")
## Using libcurl 7.64.1 with Schannel
##
## Attaching package: 'curl'
## The following object is masked from 'package:readr':
##
## parse_date
## The following object is masked from 'package:httr':
##
## handle_reset
Build API syntax
baseurl <- "https://rest.kegg.jp/"
## obtain organism abbreviation in KEGG database
KEGG_org <- read.delim("https://rest.kegg.jp/list/organism",header = F)
class(KEGG_org)
## [1] "data.frame"
head(KEGG_org)
## V1 V2 V3
## 1 T01001 hsa Homo sapiens (human)
## 2 T01005 ptr Pan troglodytes (chimpanzee)
## 3 T02283 pps Pan paniscus (bonobo)
## 4 T02442 ggo Gorilla gorilla gorilla (western lowland gorilla)
## 5 T01416 pon Pongo abelii (Sumatran orangutan)
## 6 T03265 nle Nomascus leucogenys (northern white-cheeked gibbon)
## V4
## 1 Eukaryotes;Animals;Vertebrates;Mammals
## 2 Eukaryotes;Animals;Vertebrates;Mammals
## 3 Eukaryotes;Animals;Vertebrates;Mammals
## 4 Eukaryotes;Animals;Vertebrates;Mammals
## 5 Eukaryotes;Animals;Vertebrates;Mammals
## 6 Eukaryotes;Animals;Vertebrates;Mammals
## Obtain all kegg pathway name of human
hsa_pathway <- read.delim("https://rest.kegg.jp/list/pathway/hsa",header = F)
class(hsa_pathway)
## [1] "data.frame"
head(hsa_pathway)
## V1 V2
## 1 path:hsa00010 Glycolysis / Gluconeogenesis - Homo sapiens (human)
## 2 path:hsa00020 Citrate cycle (TCA cycle) - Homo sapiens (human)
## 3 path:hsa00030 Pentose phosphate pathway - Homo sapiens (human)
## 4 path:hsa00040 Pentose and glucuronate interconversions - Homo sapiens (human)
## 5 path:hsa00051 Fructose and mannose metabolism - Homo sapiens (human)
## 6 path:hsa00052 Galactose metabolism - Homo sapiens (human)
## Obtain pathway and genes of human
PATHWAYID2GENEID <- read.delim("https://rest.kegg.jp/link/hsa/pathway",header = F)
class(PATHWAYID2GENEID)
## [1] "data.frame"
head(PATHWAYID2GENEID)
## V1 V2
## 1 path:hsa00010 hsa:10327
## 2 path:hsa00010 hsa:124
## 3 path:hsa00010 hsa:125
## 4 path:hsa00010 hsa:126
## 5 path:hsa00010 hsa:127
## 6 path:hsa00010 hsa:128
Integrate pathway and pathwaytogeneid and transfrom that to list format
change pathway to id list
pathIDs <- unique(PATHWAYID2GENEID$V1)
PATHWAYIDs_GENEIDs_list <- lapply(pathIDs, function(x){
substring(as.vector(PATHWAYID2GENEID[PATHWAYID2GENEID$V1==x,"V2"]),5)}
)
head(PATHWAYIDs_GENEIDs_list)
## [[1]]
## [1] "10327" "124" "125" "126" "127" "128" "130" "130589"
## [9] "131" "160287" "1737" "1738" "2023" "2026" "2027" "217"
## [17] "218" "219" "2203" "221" "222" "223" "224" "226"
## [25] "229" "230" "2538" "2597" "26330" "2645" "2821" "3098"
## [33] "3099" "3101" "387712" "3939" "3945" "3948" "441531" "501"
## [41] "5105" "5106" "5160" "5161" "5162" "5211" "5213" "5214"
## [49] "5223" "5224" "5230" "5232" "5236" "5313" "5315" "55276"
## [57] "55902" "57818" "669" "7167" "80201" "83440" "84532" "8789"
## [65] "92483" "92579" "9562"
##
## [[2]]
## [1] "1431" "1737" "1738" "1743" "2271" "3417" "3418" "3419" "3420"
## [10] "3421" "4190" "4191" "47" "48" "4967" "50" "5091" "5105"
## [19] "5106" "5160" "5161" "5162" "55753" "6389" "6390" "6391" "6392"
## [28] "8801" "8802" "8803"
##
## [[3]]
## [1] "132158" "2203" "221823" "226" "229" "22934" "230" "2539"
## [9] "25796" "2821" "414328" "51071" "5211" "5213" "5214" "5226"
## [17] "5236" "55276" "5631" "5634" "6120" "64080" "6888" "7086"
## [25] "729020" "8277" "84076" "8789" "9104" "9563"
##
## [[4]]
## [1] "10327" "10720" "10941" "231" "27294" "2990" "51084" "51181"
## [9] "54490" "54575" "54576" "54577" "54578" "54579" "54600" "54657"
## [17] "54658" "54659" "55277" "57016" "574537" "6120" "6652" "729020"
## [25] "729920" "7358" "7360" "7363" "7364" "7365" "7366" "7367"
## [33] "79799" "9365" "9942"
##
## [[5]]
## [1] "197258" "2203" "226" "229" "230" "231" "26007" "2762"
## [9] "29925" "29926" "3098" "3099" "3101" "3795" "4351" "5207"
## [17] "5208" "5209" "5210" "5211" "5213" "5214" "5372" "5373"
## [25] "55556" "57016" "57103" "6652" "7167" "7264" "80201" "8789"
## [33] "8790"
##
## [[6]]
## [1] "130589" "231" "2538" "2548" "2582" "2584" "2592" "2595"
## [9] "2645" "2683" "2717" "2720" "3098" "3099" "3101" "3906"
## [17] "3938" "5211" "5213" "5214" "5236" "55276" "57016" "57818"
## [25] "6476" "7360" "80201" "8704" "8972" "92579" "93432"
## check pathway name and pathway2ID name
identical(pathIDs,hsa_pathway$V1)
## [1] TRUE
## add name to the list
brk <- unlist(lapply(gregexpr(" -",hsa_pathway$V2), function(x){
x[1]
}))
names(PATHWAYIDs_GENEIDs_list) <- substring(hsa_pathway$V2,1,last = brk)
head(PATHWAYIDs_GENEIDs_list)
## $`Glycolysis / Gluconeogenesis `
## [1] "10327" "124" "125" "126" "127" "128" "130" "130589"
## [9] "131" "160287" "1737" "1738" "2023" "2026" "2027" "217"
## [17] "218" "219" "2203" "221" "222" "223" "224" "226"
## [25] "229" "230" "2538" "2597" "26330" "2645" "2821" "3098"
## [33] "3099" "3101" "387712" "3939" "3945" "3948" "441531" "501"
## [41] "5105" "5106" "5160" "5161" "5162" "5211" "5213" "5214"
## [49] "5223" "5224" "5230" "5232" "5236" "5313" "5315" "55276"
## [57] "55902" "57818" "669" "7167" "80201" "83440" "84532" "8789"
## [65] "92483" "92579" "9562"
##
## $`Citrate cycle (TCA cycle) `
## [1] "1431" "1737" "1738" "1743" "2271" "3417" "3418" "3419" "3420"
## [10] "3421" "4190" "4191" "47" "48" "4967" "50" "5091" "5105"
## [19] "5106" "5160" "5161" "5162" "55753" "6389" "6390" "6391" "6392"
## [28] "8801" "8802" "8803"
##
## $`Pentose phosphate pathway `
## [1] "132158" "2203" "221823" "226" "229" "22934" "230" "2539"
## [9] "25796" "2821" "414328" "51071" "5211" "5213" "5214" "5226"
## [17] "5236" "55276" "5631" "5634" "6120" "64080" "6888" "7086"
## [25] "729020" "8277" "84076" "8789" "9104" "9563"
##
## $`Pentose and glucuronate interconversions `
## [1] "10327" "10720" "10941" "231" "27294" "2990" "51084" "51181"
## [9] "54490" "54575" "54576" "54577" "54578" "54579" "54600" "54657"
## [17] "54658" "54659" "55277" "57016" "574537" "6120" "6652" "729020"
## [25] "729920" "7358" "7360" "7363" "7364" "7365" "7366" "7367"
## [33] "79799" "9365" "9942"
##
## $`Fructose and mannose metabolism `
## [1] "197258" "2203" "226" "229" "230" "231" "26007" "2762"
## [9] "29925" "29926" "3098" "3099" "3101" "3795" "4351" "5207"
## [17] "5208" "5209" "5210" "5211" "5213" "5214" "5372" "5373"
## [25] "55556" "57016" "57103" "6652" "7167" "7264" "80201" "8789"
## [33] "8790"
##
## $`Galactose metabolism `
## [1] "130589" "231" "2538" "2548" "2582" "2584" "2592" "2595"
## [9] "2645" "2683" "2717" "2720" "3098" "3099" "3101" "3906"
## [17] "3938" "5211" "5213" "5214" "5236" "55276" "57016" "57818"
## [25] "6476" "7360" "80201" "8704" "8972" "92579" "93432"
generate GMT file format for GSEA or specific analysis
we build a function write_gmt
that can transform pathway2geneid_list
to GMT format.
# set_ls: gene set in list class
# out: output file name and directory
# desc: the description of gene set as list, sometimes is NA or url if there is no
# description
write_gmt <- function(set_ls, out, desc=list()){
filedir <- file(description = out,open = "wt")
lapply(names(set_ls), function(name){
descp <- ifelse(is.null(desc[[name]]),"KEGG_website",desc[[name]])
outline <- paste0(c(name,descp,set_ls[[name]]),collapse = "\t")
writeLines(outline,con = filedir)
})
close(filedir)
}
write_gmt(PATHWAYIDs_GENEIDs_list,out = "./KEGG_hsa.gmt")