快速提取TCMSP - Traditional Chinese Medicine Systems Pharmacology Database and Analysis Platform
搜索的中药成分对应的基因靶点R程序,可同时提取多味药物对应的基因靶点。
library(rvest)
library(httr)
library(jsonlite)
library(dplyr)
library(data.table)
library(tidyverse)
# 需要把下一行双引号里的文本替换成文“本段程序”所在目录的路径,并把‘\’,改成‘\\’
setwd("E:\\code\\R")
obThresholdValue = 30
dlThresholdValue = 0.18
# 填写药名称(可填写多个)
names <- c(
'Aiye'
)
# 填写药对应的链接,与上面填写的药名称顺序一致
urls <- c(
'https://tcmsp-e.com/tcmspsearch.php?qr=Folium%20Artemisiae%20Argyi&qsr=herb_en_name&token=a321854fbffcb9d42104d5c55dd71c15'
)
all_drug_target <- tibble(Drug=NA,MOL_ID=NA,molecule_name=NA,target_name=NA)
num <- 1
for (url in urls) {
name <- names[num]
print(name)
num <- num + 1
web <- read_html(GET(url,encoding="UTF-8", config(ssl_verifypeer = FALSE)))
tcmsp <- web %>% html_elements("script") %>% html_text()
test1 <- str_extract_all(tcmsp,"data:\\s\\[.*\\]")
test2 <- unlist(test1[12])
drug_m <- str_replace(test2[1], "data:","") %>% fromJSON(simplifyVector = TRUE) %>% mutate(across(c(ob, dl), as.numeric)) %>% filter(ob >= obThresholdValue & dl >= dlThresholdValue)
drug_t <- str_replace(test2[2], "data:","") %>% fromJSON(simplifyVector = TRUE) %>% semi_join(drug_m, by = "MOL_ID") %>% tibble() %>% add_column(Drug = name, .before = 'molecule_ID') %>% select(Drug, MOL_ID, molecule_name, target_name)
all_drug_target <- all_drug_target %>% add_row(drug_t)
}
all_drug_target <- all_drug_target %>% filter(!is.na(Drug) & !is.na(MOL_ID) & !is.na(molecule_name) & !is.na(target_name)) %>% distinct(target_name, molecule_name, Drug, MOL_ID, .keep_all = TRUE)
write.table(all_drug_target, file="AllDrugTarget.txt", sep="\t", row.names=FALSE, quote = FALSE)