################################ Section 1:FASTQ to BAM ###############################
## mapping
#bwa mem -t 32 -M /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta H11_CCACTC_L007_R1_001.fastq H11_CCACTC_L007_R2_001.fastq > mem.sam
################################## Section 2:Calling Variants #################################
## add read groups
#java -Xmx2g -jar /leofs/noncode/softwares/picard-tools-1.112/AddOrReplaceReadGroups.jar
# INPUT=mem.sam OUTPUT=addrg_mem.bam
# SORT_ORDER=coordinate
# RGID=group1 RGLB=lib1 RGPL=illumina RGPU=unit1 RGSM=sample1
## dedupping
#java -Xmx2g -jar /leofs/noncode/softwares/picard-tools-1.112/MarkDuplicates.jar
# INPUT=addrg_mem.bam OUTPUT=dedup_mem.bam
# METRICS_FILE=metrics.mem.txt READ_NAME_REGEX=".*?([0-9]+):([0-9]+):([0-9]+)$"
## validate bam formate
#java -Xmx2g -jar /leofs/noncode/softwares/picard-tools-1.112/ValidateSamFile.jar
# INPUT=dedup_mem.bam OUTPUT=validate_mem.txt
## build index form bam file
#java -Xmx4g -jar /leofs/noncode/softwares/picard-tools-1.112/BuildBamIndex.jar
# INPUT=dedup_mem.bam
## Local Realignment around Indels (~6h)
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T RealignerTargetCreator
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# -I dedup_mem.bam
# -known /leofs/noncode/xcl/references/human/hg19/1000G_phase1.indels.hg19.sites.vcf
# -o target_interval.list
#(If this step goes wrong,it may be the reference FASTA not sorted as chr1..chrY..chrM.Just run again,it maybe do well);
#
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T IndelRealigner
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# -I dedup_mem.bam
# -targetIntervals target_interval.list
# -known /leofs/noncode/xcl/references/human/hg19/1000G_phase1.indels.hg19.sites.vcf
# -o realigned_mem.bam
## Base quality scores recalibration (BQSR)
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T BaseRecalibrator
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# -I realigned_mem.bam
# -knownSites /leofs/noncode/xcl/references/human/hg19/dbsnp_138.hg19.vcf
# -knownSites /leofs/noncode/xcl/references/human/hg19/1000G_phase1.indels.hg19.sites.vcf
# -o recal_data_mem.table
#
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T BaseRecalibrator
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# -I realigned_mem.bam
# -knownSites /leofs/noncode/xcl/references/human/hg19/dbsnp_138.hg19.vcf
# -knownSites /leofs/noncode/xcl/references/human/hg19/1000G_phase1.indels.hg19.sites.vcf
# -BQSR recal_data_mem.table
# -o post_recal_data_mem.table
#
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T AnalyzeCovariates
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# -before recal_data_mem.table
# -after post_recal_data_mem.table
# -plots recalibration_plots.pdf
#
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T PrintReads
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# -I realigned_mem.bam
# -BQSR recal_data_mem.table
# -o recal_mem.bam
## Variant Discovery
# Calling Variants (~6h)
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T HaplotypeCaller
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# -I recal_mem.bam
# --dbsnp /leofs/noncode/xcl/references/human/hg19/dbsnp_138.hg19.vcf
# --emitRefConfidence GVCF
# --variant_index_type LINEAR
# --variant_index_parameter 128000
# -o gvcf_mem.vcf
# -nct 16
#(If this step goes wrong with the ERROR MESSAGE:"There was a failure because you did not provide enough memory to run this program. See the -Xmx JVM argument to adjust the maximum heap size provided to Java".You could change the parameter "-nct 16" to "-nct 2" or lower.)
# Joint Genotyping
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T GenotypeGVCFs
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# --variant gvcf_mem.vcf
# -o raw.snps.indels.vcf
# -nt 16
############################################ Section 3:Filtering Variants###########################
# Variant Filtering
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T SelectVariants
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# -V gvcf_mem.vcf
# -selectType SNP
# -o raw_snps.vcf
# -nt 16
#
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T VariantFiltration
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# -V raw_snps.vcf
# --filterExpression "QD < 2.0 || FS > 60.0 || MQ < 40.0 || HaplotypeScore > 13.0 ||MappingQualityRankSum < -12.5|| ReadPosRankSum < -8.0"
# --filterName "my_snp_filter"
# -o filtered_snps.vcf
#
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T SelectVariants
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# -V gvcf_mem.vcf
# -selectType INDEL
# -o raw_indels.vcf
# -nt 16
#
#java -Xmx4g -jar /leofs/noncode/softwares/gatk/GenomeAnalysisTK.jar
# -T VariantFiltration
# -R /leofs/noncode/reference/gatk/genome/hg19/ucsc.hg19.fasta
# -V raw_indels.vcf
# --filterExpression "QD < 2.0 || FS > 200.0 || MQ < 40.0 || ReadPosRankSum < -20.0"
# --filterName "my_snp_filter"
# -o filtered_indels.vcf
转载本文请联系原作者获取授权,同时请注明本文来自熊朝亮科学网博客。
链接地址:http://blog.sciencenet.cn/blog-1509670-918904.html
上一篇:EditPlus的安装、设置和使用
下一篇:md5的使用