#install.packages("cgdsr",repos="http://cran.us.r-project.org")
library(cgdsr)
#help(package=cgdsr)
#ls("package:cgdsr")
#getCancerStudies,getGeneticProfiles,getCaseLists,getProfileData
# 数据集中包括很多癌症研究项目(CancerStudies),每个研究中包含很多基因组数据类型
# 如mutation/mRNA expression/cna等(GeneticProfiles),
# 哪些样本做了哪种组学数据(CaseLists),
# 样本集中不同基因的组学数据(ProfileData)
# getGeneticProfiles:得到数据类型(mutation/mRNA expression/cna,etc.)描述数据框
# getCaseLists:得到样品的id
mycgds = CGDS("http://www.cbioportal.org/") #Construct a CGDS connection object
study_df <- getCancerStudies(mycgds)
head(study_df)
View(study_df)
dim(study_df)
mycancerstudy = getCancerStudies(mycgds)[300,1] # stad_tcga_pub
View(getCaseLists(mycgds,mycancerstudy))
mycaselist = getCaseLists(mycgds,mycancerstudy)[2,1] #stad_tcga_pub_3way_complete
#View(getCaseLists(mycgds,mycancerstudy))
mycaselist = getCaseLists(mycgds,mycancerstudy)[1,1]
## getGeneticProfiles(mycgds,mycancerstudy)[,1]
# [1] "stad_tcga_pub_gistic"
# [2] "stad_tcga_pub_linear_CNA"
# [3] "stad_tcga_pub_mutations"
# [4] "stad_tcga_pub_fusion"
# [5] "stad_tcga_pub_rna_seq_v2_mrna"
# [6] "stad_tcga_pub_rna_seq_v2_mrna_median_Zscores"
# [7] "stad_tcga_pub_rna_seq_v2_mrna_median_all_sample_Zscores"
mygeneticprofile = getGeneticProfiles(mycgds,mycancerstudy)[2,1] #CNV data
mygeneticprofile = getGeneticProfiles(mycgds,mycancerstudy)[3,1] # Mutations data
mygeneticprofile = getGeneticProfiles(mycgds,mycancerstudy)[6,1] # mRNA data
#View(getGeneticProfiles(mycgds,mycancerstudy))
myclinicalData<- getClinicalData(mycgds,mycaselist) #临床特征数据
# 查询CGDS API并返回基于基因、组学数据和病例列表的数据。
gene_list<- c('BRCA1','BRCA2','EGFR','PTEN')
case_list = unlist(strsplit(getCaseLists(mycgds,mycancerstudy)[1,'case_ids'],' '))
#case_list <- case_list[1:8] # 取前八个
data <- getProfileData(mycgds,genes= gene_list,
cases=case_list,
caseList=mycaselist,
mygeneticprofile)
head(data)
数据的网页查询,探索,请参考: