UMI:unique molecular identifier,UMI建库检测稀有突变、校正测序错误与PCR偏差等。
得到包含UMI分子标签信息的BAM文件
1.提取UMI
# 也能处理.gz 压缩文件
picard FastqToSam F1=test_read1.fq F2=test_read2.fq O=test.uBam SM=testsample
fgbio -Xmx50G ExtractUmisFromBam -i test.uBam -o test.umi.uBam -r 5M2S+T 5M2S+T -s RX -t ZA ZB
2. 比对去掉umi的序列
samtools fastq test.umi.uBam | bwa mem -t 50 -p /data/ref/hg38/hg38 /dev/stdin | samtools view -b > test.umi.Bam
3. 合并uBam 和 Bam 得到带有UMI信息的比对文件
picard MergeBamAlignment R=/data/ref/hg38/hg38.fa \
UNMAPPED_BAM=test.umi.uBam \
ALIGNED_BAM=test.umi.Bam \
O=test.umi.merged.Bam \
CREATE_INDEX=true \
MAX_GAPS=-1 \
ALIGNER_PROPER_PAIR_FLAGS=true \
VALIDATION_STRINGENCY=SILENT \
SO=coordinate \
ATTRIBUTES_TO_RETAIN=XS
4. Call Consensus Reads
fgbio GroupReadsByUmi \
--input=test.umi.merged.Bam \
--output=test.umi.group.Bam \
--strategy=paired --min-map-q=20 --edits=1 --raw-tag=RX
fgbio CallMolecularConsensusReads \
--min-reads=1 \
--min-input-base-quality=20 \
--input=test.umi.group.Bam \
--output=test.consensus.uBam
samtools fastq test.consensus.uBam | bwa mem -t 50 -p /data/ref/hg38/hg38 /dev/stdin | samtools view -b - > test.consensus.Bam
picard MergeBamAlignment R=/data/ref/hg38/hg38.fa \
UNMAPPED_BAM=test.consensus.uBam \
ALIGNED_BAM=test.consensus.Bam \
O=test.consensus.merge.Bam \
CREATE_INDEX=true \
MAX_GAPS=-1 \
ALIGNER_PROPER_PAIR_FLAGS=true \
VALIDATION_STRINGENCY=SILENT \
SO=coordinate \
ATTRIBUTES_TO_RETAIN=XS
fgbio FilterConsensusReads \
--input=test.consensus.merge.Bam \
--output=test.consensus.merge.filter.Bam \
--ref=/data/ref/hg38/hg38.fa --min-reads=2 \
--max-read-error-rate=0.05 \
--max-base-error-rate=0.1 \
--min-base-quality=30 \
--max-no-call-fraction=0.20
fgbio ClipBam \
--input=test.consensus.merge.filter.Bam \
--output=test.consensus.merge.filter.clip.Bam \
--ref=/data/ref/hg38/hg38.fa \
--clip-overlapping-reads=true
参考:
http://fulcrumgenomics.github.io/fgbio/tools/latest/
https://broadinstitute.github.io/picard/command-line-overview.html#LiftOverIntervalList