环境配置(这里的环境配置和 RNA 流程的环境配置一样)
conda create
conda install -y sra-tools
conda install -y trimmomatic
conda install -y cutadapt multiqc
conda install -y trim-galore
conda install -y star hisat2 bowtie2
conda install -y subread tophat htseq bedtools deeptools
conda install -y salmon
source deactivate #注销当前的rna环境
下载 sra 数据
# 使用 prefetch 下载,这个下载及其的慢
cat SRR_Acc_List-2586-4.txt |while read i
do prefetch $i -O `pwd` && echo "** ${i}.sra done **"
done
# 这个是使用 ascp 下载
cat SRR_Acc_List.txt|while read id
do
x=$(echo $id | cut -b1-6)
y=$(echo $id | cut -b10-10)
echo $id
ascp -QT -l 300m -P33001 -i \
${wkd}/asperaweb_id_dsa.openssh \
era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/$x/00$y/$id/ ./
done
sra 转换为 fastq
#--gzip将生成的结果fastq文件进行压缩
cd fq
for i in $wkd/dataset/*sra
do
echo $i
time fastq-dump --gzip --split-files ./$i
done
# 修改 fastq 文件名称
cat SRR_Acc_List-9245-3.txt | while read i ;
do
mv ${i}_1*.gz ${i}_S1_L001_I1_001.fastq.gz;\
mv ${i}_2*.gz ${i}_S1_L001_R1_001.fastq.gz;\
mv ${i}_3*.gz ${i}_S1_L001_R2_001.fastq.gz\
done
质控
# 以P2586-4为例
mkdir -p $wkd/qc
cd $wkd/qc
find $wkd/raw/P2586-4 -name '*R1*.gz'>P2586-4-id-1.txt
find $wkd/raw/P2586-4 -name '*R2*.gz'>P2586-4-id-2.txt
cat P2586-4-id-1.txt P2586-4-id-2.txt >P2586-4-id-all.txt
cat P2586-4-id-all.txt| xargs fastqc -t 20 -o ./
cellranger 软件下载
要去下载新的版本
https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest
软件检测
cellranger testrun --id=tiny
# 我使用了12个CPU,大约需要20分钟检查完
# 如果成功完整地安装的话,最后会给出这样一个报告:
cellranger testrun (2.0.2)
Copyright (c) 2017 10x Genomics, Inc. All rights reserved.
-------------------------------------------------------------------------------
Running Cell Ranger in test mode...
Martian Runtime - 2.0.2-2.2.2
Running preflight checks (please wait)...
[runtime] (ready) ID.tiny.SC_RNA_COUNTER_CS.SC_RNA_COUNTER.SETUP_CHUNKS
[runtime] (split_complete) ID.tiny.SC_RNA_COUNTER_CS.SC_RNA_COUNTER.SETUP_CHUNKS
...
Pipestance completed successfully!
下载注释信息
curl -O http://cf.10xgenomics.com/supp/cell-exp/refdata-cellranger-GRCh38-1.2.0.tar.gz
# 然后解压
tar -xzvf refdata-cellranger-GRCh38-1.2.0.tar.gz