TCGA数据下载推荐:R语言easyTCGA包

#使用easyTCGA获取数据
#清空
rm(list=ls())
gc()
# 安装bioconductor上面的R包
options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")
if(!require("BiocManager")) install.packages("BiocManager")
if(!require("TCGAbiolinks")) BiocManager::install("TCGAbiolinks")
if(!require("SummarizedExperiment")) BiocManager::install("SummarizedExperiment")
if(!require("DESeq2")) BiocManager::install("DESeq2")
if(!require("edgeR")) BiocManager::install("edgeR")
if(!require("limma")) BiocManager::install("limma")
# 安装cran上面的R包
if(!require("survival")) install.packages("survival")
if(!require("broom")) install.packages("broom")
if(!require("devtools")) install.packages("devtools")
if(!require("cli")) install.packages("cli")
#devtools::install_github("ayueme/easyTCGA")
library(easyTCGA)
help(package="easyTCGA")
setwd("F:\\TCGA\\TCGA-COAD")
#下载mRNA、lncRNA和临床信息
COAD<-getmrnaexpr("TCGA-COAD")#原始下载的count, TPM, FPKM 均没有经过log2转化
#下载miRNA
COAD_miRNA<-getmirnaexpr("TCGA-COAD")
#下载copy number variation data
COAD_cnv<-getcnv("TCGA-COAD")
#下载masked somatic mutation 体细胞突变
COAD_snv<-getsnvmaf("TCGA-COAD")
#下载DNA methylation beta value 甲基化数据
getmethybeta("TCGA-COAD")

 

#从下载目录中打开数据
#差异分析
diff<-diff_analysis(exprset=mrna_expr_counts,#没有经过log2转化
              project="TCGA-COAD",
              save=F)

#批量生存分析
surv<-batch_survival(
  exprset=mrna_expr_counts,
  clin=clin_info,
  is_count = T,
  optimal_cut = TRUE,
  project="TCGA-COAD",
  save_data = FALSE,
  min_sample_size = 5,
  print_index = TRUE
)

 

#突变分析:瀑布图
#BiocManager::install("maftools")
library(maftools)
maf<-read.maf(snv,clinicalData=clin_snv)
plotmafSummary(maf)
colnames(clin_snv)
oncoplot(maf=maf,
         clinicalFeatures=c("ajcc_pathologic_stage","vital_status"),
         top=10,
         sortByAnnotation=T
)

 

 

#绘制KM曲线
dim(mrna_expr_counts)
set.seed(123)
colnames(clin_info)
clin<-data.frame(time=clin_info$days_to_last_follow_up,
                 event=clin_info$vital_status)
clin$event<-ifelse(clin$event=="Alive",0,1)
plot_KM(exprset=mrna_expr_counts, 
        marker="CHPF", #基因
        clin=clin, 
        optimal_cut = TRUE, 
        return_data = TRUE)

 

#正常和癌症组织基因表达对比箱线图
rownames(mrna_expr_counts)
plot_gene_paired(exprset=mrna_expr_counts, 
                 marker="CHPF", #基因
                 return_data = TRUE)

 

#比较组间基因表达差异
set.seed(123)
group=sample(c(0,1),524,replace = T)
plot_gene(exprset=mrna_expr_counts, 
          marker=c("CHPF","MAOA"), 
          group=group, 
          return_data = TRUE)

 

  • 0
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是TCGA数据下载及处理的R语言脚本: 首先,需要安装以下RTCGAbiolinks,tidyverse,ggplot2,survival,survminer。 ```R # 安装TCGAbiolinks if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("TCGAbiolinks") # 安装其他必要的 install.packages(c("tidyverse", "ggplot2", "survival", "survminer")) ``` 接下来,下载TCGA数据。例如,我们下载肺癌(LUSC)的RNA-seq和临床数据。 ```R library(TCGAbiolinks) # Set working directory setwd("your_working_directory") # Download RNA-seq data query <- GDCquery(project = "TCGA-LUSC", data.category = "Transcriptome Profiling", data.type = "Gene Expression Quantification", workflow.type = "HTSeq - FPKM", legacy = TRUE, platform = "Illumina HiSeq", file.type = "results", experimental.strategy = "RNA-Seq") GDCdownload(query) # Download clinical data query <- GDCquery(project = "TCGA-LUSC", data.category = "Clinical", file.type = "xml") GDCdownload(query) ``` 接下来,我们可以将下载的RNA-seq数据导入到R中,并进行预处理。例如,我们可以通过log2转换标准化数据并删除低表达基因。 ```R # Load RNA-seq data LUSC_rnaseq <- GDCprepare(query, save = TRUE, save.filename = "LUSC_rnaseq") # Log2 transformation and normalization LUSC_rnaseq$log2 <- log2(LUSC_rnaseq$counts+1) LUSC_rnaseq_norm <- normalizeBetweenArrays(LUSC_rnaseq$log2, method = "quantile") # Remove low expressed genes LUSC_rnaseq_norm_filter <- LUSC_rnaseq_norm[rowSums(LUSC_rnaseq_norm > 1) >= 20,] ``` 最后,我们可以使用survival和survminer对临床数据进行生存分析和可视化。 ```R # Load clinical data LUSC_clinical <- GDCprepare_clinic(query, clinical.info = "patient") # Merge RNA-seq and clinical data LUSC_data <- merge(LUSC_rnaseq_norm_filter, LUSC_clinical, by = "bcr_patient_barcode") # Survival analysis fit <- survfit(Surv(time, vital_status) ~ 1, data = LUSC_data) ggsurvplot(fit, data = LUSC_data, pval = TRUE, conf.int = TRUE) # Cox proportional hazards model model <- coxph(Surv(time, vital_status) ~ gene1 + gene2 + gene3, data = LUSC_data) summary(model) ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值