重测序通用代码-生物信息学pipeline3

重测序数据分析,假设一个基因组,15个个体~

Samtools安装与使用-samtools-v1.17(bioinfomatics tools-007)-CSDN博客

bwa安装及使用v0.7.17(生物信息学工具-018)-CSDN博客

# Paths and tools
REFERENCE="genome.fasta"
SAMPLES=("sample1" "sample2" "sample3" "sample4" "sample5" "sample6" "sample7" "sample8" "sample9" "sample10" "sample11" "sample12" "sample13" "sample14" "sample15")
THREADS=8  # Adjust based on your system resources
GATK="gatk"  # Adjust the path if GATK is not in your PATH
BWA="bwa"
SAMTOOLS="samtools"
BCFTOOLS="bcftools"

# Step 1: Quality control using FastQC
echo "Step 1: Quality control of raw reads..."
mkdir -p QC_reports
for SAMPLE in "${SAMPLES[@]}"; do
  fastqc ${SAMPLE}_R1.fastq.gz ${SAMPLE}_R2.fastq.gz -o QC_reports/
done

# Step 2: Align reads to the reference genome using BWA
echo "Step 2: Aligning reads to the reference genome..."
for SAMPLE in "${SAMPLES[@]}"; do
  $BWA mem -t $THREADS $REFERENCE ${SAMPLE}_R1.fastq.gz ${SAMPLE}_R2.fastq.gz | $SAMTOOLS view -Sb - > ${SAMPLE}.bam
done

# Step 3: Sort and mark duplicates
echo "Step 3: Sorting and marking duplicates..."
for SAMPLE in "${SAMPLES[@]}"; do
  $SAMTOOLS sort -@ $THREADS -o ${SAMPLE}_sorted.bam ${SAMPLE}.bam
  $GATK MarkDuplicates -I ${SAMPLE}_sorted.bam -O ${SAMPLE}_sorted.markdup.bam -M ${SAMPLE}_markdup_metrics.txt
  $SAMTOOLS index ${SAMPLE}_sorted.markdup.bam
done

# Step 4: Base Quality Score Recalibration (BQSR)
echo "Step 4: Base quality score recalibration..."
for SAMPLE in "${SAMPLES[@]}"; do
  $GATK BaseRecalibrator -I ${SAMPLE}_sorted.markdup.bam -R $REFERENCE --known-sites known_variants.vcf -O ${SAMPLE}_recal_data.table
  $GATK ApplyBQSR -I ${SAMPLE}_sorted.markdup.bam -R $REFERENCE --bqsr-recal-file ${SAMPLE}_recal_data.table -O ${SAMPLE}_recalibrated.bam
done

# Step 5: Call variants using GATK HaplotypeCaller for each individual
echo "Step 5: Variant calling..."
for SAMPLE in "${SAMPLES[@]}"; do
  $GATK HaplotypeCaller -R $REFERENCE -I ${SAMPLE}_recalibrated.bam --emit-ref-confidence GVCF -O ${SAMPLE}.g.vcf.gz
done

# Step 6: Combine GVCFs
echo "Step 6: Combining GVCFs..."
$GATK CombineGVCFs -R $REFERENCE $(for SAMPLE in "${SAMPLES[@]}"; do echo "-V ${SAMPLE}.g.vcf.gz"; done) -O combined.g.vcf.gz

# Step 7: Joint Genotyping
echo "Step 7: Joint genotyping..."
$GATK GenotypeGVCFs -R $REFERENCE -V combined.g.vcf.gz -O joint_genotyped.vcf.gz

# Step 8: Variant filtering
echo "Step 8: Filtering variants..."
$GATK VariantFiltration -R $REFERENCE -V joint_genotyped.vcf.gz --filter-expression "QD < 2.0 || FS > 60.0 || MQ < 40.0" --filter-name "basic_snp_filter" -O filtered_variants.vcf.gz

# Step 9: Index the final VCF
echo "Step 9: Indexing the final VCF..."
$BCFTOOLS index filtered_variants.vcf.gz

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值