思路
- 选择一种疾病,从TCGA数据库获得其案例。
- 获取各案例中lncRNA表达量。
- 对各lncRNA做差异分析。
- 考察差异表达的lncRNA编码肽链的可能性。
- 实验验证肽链对疾病的影响。
环境
- TCGA数据库
- R studio
实验步骤
从TCGA获取案例
获取各案例中lncRNA表达量
#
# "TCGA_Dataset_lncRNA_Selector"
# "Version" 1.0
# "June 4, 2021"
#
###########################################
# #
# Copyright (c) 2020-2021 by WLS Studio. #
# Written by abhhba #
# Function: #
# Get lnc-RNA data from TCGA dataset. #
# #
###########################################
#
#All Rights Reserved.
#BiocManager::install("TCGAbiolinks")
#BiocManager::install("rtracklayer")
library(TCGAbiolinks)
library(Biobase)
library(SummarizedExperiment)
library(dplyr)
library(rtracklayer)
library(stringr)
#设定索引参数
query = GDCquery(project = "TCGA-LIHC",
legacy = FALSE,
experimental.strategy = "RNA-Seq",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - FPKM-UQ")
#下载数据并加载
GDCdownload(query)
dataAssy = GDCprepare(query, summarizedExperiment = F)
#处理版本号
dataAssy$X1=gsub('\\..*','',dataAssy$X1)
#将第一列设为行名
f = t(dataAssy[,1])
dataAssy2 = dataAssy[,-1]
rownames(dataAssy2) = f
dataAssy=dataAssy2
#筛选列名
colnames(dataAssy) = str_match(colnames(dataAssy), "(TCGA-[^-]*-[^-]*-[^-]*)")[,2]
#write.table(dataAssy, "RNA_FPKM_UQ_data.txt", row.names = F, sep = "\t", quote = F)
#加载包含lnc-RNA的集合
AnnoData=import('gencode.v27.long_noncoding_RNAs.gtf')
index=which(AnnoData$type=='gene')
#生成对应表
Target=data.frame(Ensembl_ID=AnnoData$gene_id[index],
Symbol=AnnoData$gene_name[index],
Biotype=AnnoData$gene_type[index])
Target$Ensembl_ID=gsub('\\..*','',Target$Ensembl_ID)
#寻找交集
common=intersect(Target$Ensembl_ID,rownames(dataAssy))
#选出Lnc-RNA的基因
dataAssy_match=dataAssy[common,]
rownames(dataAssy_match)=common
#释放对象
rm(f,dataAssy2)
对各lncRNA做差异分析
获得lncRNA的编码可能性
获得lncRNA的fasta文件
采用ensembl的api获得。
library(httr)
library(jsonlite)
library(xml2)
#row_name=rownames(dataAssy_match)[1:10]
row_name=common
str_all=""
for (i in row_name){
r <- GET(paste("https://rest.ensembl.org", "/sequence/id/",i,"?", sep = ""))
p=content(r)
str=paste0(">",p$id,"\n",p$seq,"\n")
str_all=paste0(str_all,str)
}
stop_for_status(r)
#生成提交CPC用的fa文件
#提交至http://cpc2.gao-lab.org/batch.php
#获得result_cpc2.txt
write(str_all,file = "submit.fa")
result=read.csv("/Users/abhhba/Desktop/result_cpc2.txt",sep = "\t")
获取lncRNA编码能力预测值
###CPC2
#http://cpc2.cbi.pku.edu.cn/download.php
source /your_soft_dir/biosoft/conda/etc/profile.d/conda.sh
conda activate python27 #use python 2.7 env
human_fa_linear=/data/fasta/Fed_Fasting_combined.human.isoforms.line.fa
data_dir=/data/fasta
input_dir=/data/fasta/total_novel
out_dir=/data/fasta/cpc2_results
cpc2_file=/your_soft_dir/Coding_potential/CPC2/CPC2-beta/bin/CPC2.py
python ${cpc2_file} -i /data/fasta/total_novel.fa -o ${out_dir}/total_novel.cpc2_results
###CNCI
#https://github.com/www-bioinfo-org/CNCI download and use directly
data_dir=/data/fasta
sof_dir=/your_soft_dir/Coding_potential/CNCI-master
out_dir=/data/fasta/cnci_results
python $sof_dir/CNCI.py -f $data_dir/total_novel.fa -o $out_dir/total_novel -m ve -p 4
###CPAT
#https://sourceforge.net/projects/rna-cpat/files/?source=navbar website
#https://github.com/likelet/LncPipe/tree/master/bin/cpat_model cpat modules download
#https://blog.csdn.net/u013241595/article/details/101160741 pip3 aconda
#conda install r-base==3.5.1 --force-reinstall
source /your_soft_dir/biosoft/conda/etc/profile.d/conda.sh
conda activate flair_env #use python 3.5 env
data_dir=/data/fasta
out_dir=/data/fasta/CPAT_results
modle_dir=/your_soft_dir/Coding_potential/LncPipe-master/bin/cpat_model
cpat.py -g /data/fasta/total_novel.fa -d ${modle_dir}/Human_logitModel.RData \
-x ${modle_dir}/Human_Hexamer.tsv -o ${out_dir}/total_novel
###plek
#https://sourceforge.net/projects/plek/files/
source /your_soft_dir/biosoft/conda/etc/profile.d/conda.sh
conda activate python27 #python 2.7 env
plek_dir=/your_soft_dir/Coding_potential/PLEK.1.2/PLEK.py
out_dir=/data/fasta/plek_results
python ${plek_dir} -fasta /data/fasta/total_novel.fa -out ${out_dir}/total_novel -thread 15 -minlength 20
###FEELnc
#https://github.com/tderrien/FEELnc
#use conda to install
#cd ${conda_dir}
#find ./ -name FEELnc_codpot.pl in python27 env
#then follow the install guide to creat and source
#source deactivate firstly
#then re conda activate python27 then it works
source /your_soft_dir/biosoft/conda/etc/profile.d/conda.sh
#source activate /your_soft_dir/biosoft/conda/pkgs/feelnc-0.1.1-pl526_5
conda activate python27
cd /data/fasta/FEELnc_results
codpot_dir=/your_soft_dir/biosoft/conda/pkgs/feelnc-0.1.1-pl526_5/bin/FEELnc_codpot.pl
FEELnc_codpot.pl -i /data/fasta/total_novel.fa -a /your_soft_dir/index/ori_genomic/hg38/fasta/gencode.v33.pc_transcripts.fa --mode=shuffle -p 18