run_line.wes-ngs-pipeline

#cd /data/lijing/WES_test && mkdir {raw,clean,align,genome,hg19_VCF}

## step 1 预处理
outpath=/data/lijing/WES_test
mkdir -p ${outpath}/{raw,clean,align,genome,hg19_VCF}
path=/data/lijing/WES_test/raw


ls ${path}/*.gz |while read id
do
str1=$(basename $id)
echo ${str1%%.*}>>${outpath}/name.txt
done

# ls ${path}/*.gz |while read id; do echo $(basename $id)>>${outpath}/name.txt;done
cut -d'.' -f 1 ${outpath}/name.txt |sort|uniq > ${outpath}/name.uniq.txt

## step2 原始数据质控
for item in `cat ${outpath}/name.uniq.txt`
do
/home/biosoft/fastp/fastp \
	-i ${path}/${item}.1.fq.gz \
	-o ${outpath}/clean/${item}.1.clean.fq.gz \
	-I ${path}/${item}.2.fq.gz \
	-O ${outpath}/clean/${item}.2.clean.fq.gz
done

## step3 比对
for item in `cat ${outpath}/name.uniq.txt`
do
/usr/bin/bwa \
	-t 4 -M -R "@RG\tID:lane1\tPL:illumina\tLB:library\tSM:wes" \
	/data/lijing/WES_test/genome/hg19.fa \
	${outpath}/clean/${item}.1.clean.fq.gz \
	${outpath}/clean/${item}.2.clean.fq.gz > \
	${outpath}/align/${item}.sam \
	2 > ${outpath}/align/${item}.bwa.align.log && \
	/usr/bin/samtools view -b -S ${outpath}/align/${item}.sam > ${outpath}/align/${item}.bam && \
	/usr/bin/samtools sort ${outpath}/align/${item}.bam -o ${outpath}/align/${item}.sorted.bam && \
	/usr/bin/samtools flagstat ${outpath}/align/${item}.sorted.bam > ${outpath}/align/${item}.sorted.bam.flagstat
done

## step4 根据bed找到外显子覆盖度信息
/home/biosoft/gatk-4.0.12.0/gatk CreateSequenceDictionary -R /data/lijing/WES_test/genome/hg19.fa -O /data/lijing/WES_test/genome/hg19.dict

/home/biosoft/gatk-4.0.12.0/gatk BedToIntervalList -I /data/lijing/WES_test/hg19_VCF/S31285117_Regions.bed -O /data/lijing/WES_test/hg19_VCF/Exon.Interval.bed -SD /data/lijing/WES_test/genome/hg19.dict

for item in `cat ${outpath}/name.uniq.txt`
do
/home/biosoft/gatk-4.0.12.0/gatk CollectHsMetrics \
	-BI /data/lijing/WES_test/hg19_VCF/Exon.Interval.bed \
	-TI /data/lijing/WES_test/hg19_VCF/Exon.Interval.bed \
	-I ${outpath}/align/${item}.sorted.bam \
	-O ${outpath}/align/${item}.stat.txt
done

## 标记PCR重复序列并建立索引
for item in `cat ${outpath}/name.uniq.txt`
do
/home/biosoft/gatk-4.0.12.0/gatk --java-options \
	"-Xmx10G -Djava.io.tmpdir=${outpath}/align" \
	MarkDuplicates -I ${outpath}/align/${item}.sorted.bam \
	-O ${outpath}/align/${item}.sorted.MarkDuplicates.bam \
	-M ${outpath}/align/${item}.sorted.bam.metrics > ${outpath}/align/${item}.log.mark \
	2 > ${outpath}/align/${item}.err.mark && \
	/usr/bin/samtools index ${outpath}/align/${item}.sorted.MarkDuplicates.bam
done
## /usr/bin/samtools view -f 1024 wes.sorted.MarkDuplicates.bam | less -SN

wget -c -r ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg19 -P /data/lijing/WES_test/hg19_VCF

gunzip /data/lijing/WES_test/hg19_VCF/ftp.broadinstitute.org/bundle/hg19/*.gz

/usr/bin/samtools faidx /data/lijing/WES_test/genome/hg19.fa

## 重新校正碱基质量值
for item in `cat ${outpath}/name.uniq.txt`
do
/home/biosoft/gatk-4.0.12.0/gatk --java-options \
	"-Xmx10G -Djava.io.tmpdir=${outpath}/align" \
	BaseRecalibrator -R /data/lijing/WES_test/genome/hg19.fa \
	-I ${outpath}/align/${item}.sorted.MarkDuplicates.bam \
	--known-sites /data/lijing/WES_test/hg19_VCF/ftp.broadinstitute.org/bundle/hg19/1000G_phase1.indels.hg19.sites.vcf \
	--known-sites /data/lijing/WES_test/hg19_VCF/ftp.broadinstitute.org/bundle/hg19/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf \
	--known-sites /data/lijing/WES_test/hg19_VCF/ftp.broadinstitute.org/bundle/hg19/dbsnp_138.hg19.vcf \
	-L /data/lijing/WES_test/hg19_VCF/S31285117_Regions.bed \
	-O ${outpath}/align/${item}.recal_data.table && \
	/home/biosoft/gatk-4.0.12.0/gatk --java-options \
	"-Xmx10G -Djava.io.tmpdir=${outpath}/align" \
	ApplyBQSR -R /data/lijing/WES_test/genome/hg19.fa \
	-I ${outpath}/align/${item}.sorted.MarkDuplicates.bam \
	-bqsr ${outpath}/align/${item}.recal_data.table \
	-L /data/lijing/WES_test/hg19_VCF/S31285117_Regions.bed \
	-O ${outpath}/align/${item}.sorted.MarkDuplicates.BQSR.bam && \
	
done











  • 7
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值