STRINGdb包进行蛋白质相互作用网络分析

最新推荐文章于 2024-10-16 16:42:38 发布

qq_27390023

最新推荐文章于 2024-10-16 16:42:38 发布

阅读量3.3k

点赞数 1

文章标签： r语言生物信息

本文链接：https://blog.csdn.net/qq_27390023/article/details/128674707

版权

STRINGdb包提供STRING蛋白质-蛋白质相互作用数据库的R接口(https://string-db.org)，并能进行富集度分析。String蛋白互作网络数据库，不仅包含了直接物理作用的互作关系，还包含了蛋白之间以间接作用的互作关系。除了有实验证据支持的数据外，还有整合其他数据库中的互作数据以及利用生物信息学预测获得的互作数据。蛋白相互作用的分数：The combined score is computed by combining the probabilities from the different evidence channels and corrected for the probability of randomly observing an interaction.

STRING帮助文档 https://cn.string-db.org/cgi/help?sessionId=b9mHp0ZnVrOx

STRING数据下载https://cn.string-db.org/cgi/download?sessionId=b9mHp0ZnVrOx

if (!require("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("STRINGdb")
library(STRINGdb)
# showClass("STRINGdb")
### 1. initialization
# 查看最新的数据版本
# https://cn.string-db.org/cgi/access?footer_active_subpage=archive
string_db <- STRINGdb$new( version="11.5", species=9606, 
                           score_threshold=200, network_type="functional", input_directory="")
STRINGdb$species
# STRINGdb$help("initialize")
# 物种id查询
# https://www.ncbi.nlm.nih.gov/search/all/?term=Homo+sapiens+%5Borganism%5D
# Homo sapiens Taxonomy ID: 9606
# Mus musculus Taxonomy ID: 10090
class(string_db) # [1] "STRINGdb"
STRINGdb$show()
STRINGdb$methods()              # To list all the methods available.
STRINGdb$help("get_graph")      # To visualize their documentation.
### 2. 加载数据
data(diff_exp_example1)
head(diff_exp_example1)
class(diff_exp_example1) # [1] "data.frame"
# 将输入数据框的基因标识符映射到STRING标识符，
# 返回带有“STRING_id”附加列的输入数据框。
example1_mapped <- string_db$map(diff_exp_example1, "gene", 
                                 removeUnmappedRows = TRUE )
# gene_df <- data.frame(gene=c("TPSD1","LINC00635","POTEKP","LOC100505759","MST1R", 
#                               "RBM24","DGKI","ABCA3","LACTB2"),rank = 1:9)     
# 
# example2_mapped <- string_db$map(gene_df, 'gene',
#                                  removeUnmappedRows = TRUE )
### 3.作图
#diff_exp_example1$gene
# 设置作图参数
options(SweaveHooks=list(fig=function()
par(mar=c(2.1, 0.1, 4.1, 2.1))))
hits <- example1_mapped$STRING_id[1:200] 
# 获取图形绘制参数
getOption("SweaveHooks")[["fig"]]()
# 绘图
string_db$plot_network( hits )
# string_db$plot_network(c("TP53","MDM2","MDM4","ATM","CHEK2","RPS6KA3"))
# filter by p-value and add a color column
# green:down-regulated gene,red:up-regulated genes
example1_mapped_pval05 <- string_db$add_diff_exp_color( subset(example1_mapped, pvalue<0.05), 
                                                        logFcColStr="logFC" ) 
# post payload information to the STRING server
payload_id <- string_db$post_payload( example1_mapped_pval05$STRING_id, 
                                      colors=example1_mapped_pval05$color )
getOption("SweaveHooks")[["fig"]]()
# display a STRING network png with the "halo"
string_db$plot_network( hits, payload_id=payload_id )
### 4. 富集分析
# 加载并返回STRING注释（即GO注释、KEGG路径、域数据库）。
# 注释存储在“annotations”变量中。
annotations <- string_db$get_annotations( hits ) # hits为STRING_id向量
head(annotations, n=2)
enrichment <- string_db$get_enrichment( hits )
head(enrichment, n=20)
### 5. 聚类分析并作图
clustersList <- string_db$get_clusters(example1_mapped$STRING_id[1:600])
options(SweaveHooks=list(fig=function()
  par(mar=c(2.1, 0.1, 4.1, 2.1))))
getOption("SweaveHooks")[["fig"]]()
par(mfrow=c(2,2))
for(i in seq(1:4)){
  string_db$plot_network(clustersList[[i]])
}
### 6. STRINGdb中的蛋白质信息
# Returns the STRING proteins data frame.
string_proteins <- string_db$get_proteins()
head(string_proteins)
# string_ids <- string_proteins$protein_external_id
# 蛋白质symbol的string_ids
tp53 = string_db$mp( "tp53" )
# string_proteins[which(string_proteins$preferred_name=="TP53"),'protein_external_id']
atm = string_db$mp( "atm" )
# string_proteins[which(string_proteins$preferred_name=="ATM"),'protein_external_id']
### 7. 获取输入中给定的蛋白质（string_ids）的网络邻居
string_db$get_neighbors(tp53)
string_db$get_neighbors(c(tp53,atm))
string_db$get_neighbors(c('"9606.ENSP00000269305"','9606.ENSP00000278616'))
### 8. 显示string_db中给定的蛋白质之间的相互作用。
tp53 = string_db$mp( "tp53" )
atm = string_db$mp("atm")
string_db$get_interactions(c(tp53,atm))
string_proteins <- string_db$get_proteins()
string_ids <- string_proteins$protein_external_id[1:100]
string_interaction_df <- string_db$get_interactions(string_ids)

参考：

https://bioconductor.org/packages/release/bioc/html/STRINGdb.html

https://www.jianshu.com/p/1ae2be18ca70