suppa做RNA_Splicing
环境配置
#创建虚拟环境,并安装好salmon,gffread,suppa三个工具
conda create -n suppa
source activate suppa
conda install bioconda::salmon
conda install bioconda::gffread
conda install bioconda::suppa
step1
#gtf剪切
suppa.py generateEvents -i /share/pub/liangjc/genome/bmy/Danio_rerio.GRCz11.110.gtf -o Danio_splicing_events -f ioe -e SE SS MX RI FL
#提取gtf转录本
gffread -w Danio_rerio.GRCz11.110.transcripts.fasta -g /share/pub/liangjc/genome/bmy/Danio_rerio.GRCz11.dna.primary_assembly.fa /share/pub/liangjc/genome/bmy/Danio_rerio.GRCz11.110.gtf
#合并所有ioe文件
awk ’
FNR==1 && NR!=1 { while (/^/) getline; }
1 {print}
’ *.ioe > Danio.events.ioe
#为gtf转录本构建索引
salmon index -t Danio_rerio.GRCz11.110.transcripts.fasta -i Danio_rerio.GRCz11.110.transcripts.index
step2
#估计转录组表达量(TPM值)
salmon quant -i /share/pub/liangjc/RNA_splicing/Danio_rerio.GRCz11.110.transcripts.index/ -l ISF --gcBias -1 /share/pub/liangjc/bmy/RNA-seqW/CN-1-0.25.R1.fq.gz -2 /share/pub/liangjc/bmy/RNA-seqW/CN-1-0.25.R2.fq.gz -p 10 -o test_output
step3
#得到符合要求的tpm文件
multipleFieldSelection.py -i /share/pub/liangjc/RNA_splicing/test_output/quant.sf -k 1 -f 4 -o iso_tpm.txt
perl -alne ‘{/(|.*|)\t/; ;s/$1//g;s/|//g;print}’ iso_tpm.txt > iso_tpm_formatted.txt
#计算psi
suppa.py psiPerEvent -i /share/pub/liangjc/RNA_splicing/ Danio.events.ioe -e /share/pub/liangjc/RNA_splicing/iso_tpm_formatted.txt -o test
step4
#做差异分析
suppa.py diffSplice --method empirical --input /share/pub/liangjc/RNA_splicing/ Danio.events.ioe --psi <Cond1.psi> <Cond2.psi> --tpm <Cond1_expression-file> <Cond2_expression-file> --area 1000 --lower-bound 0.05 -gc -o