1、snpeff
1.1、snpeff软件下载
wget https://snpeff.blob.core.windows.net/versions/snpEff_latest_core.zip
unzip snpEff_latest_core.zip
cd snpEff
解压后即可使用。
1.2、在snpeff文件夹下新建一个命名为data的文件夹,data文件夹下面再新建一个要生成数据库的文件夹,命名为maize,在maize里面下载参考基因组序列和基因组的GTF文件,分别重命名为sequences.fa和genes.gtf。
1.3、定义snpEff.config文件,
vi snpEff.config打开snpEff.config文件,添加
# maize genome, version maizev5
maize.genome : maize
如图所示:
1.4、SNP、Indel注释
##SNP注释
java -Xmx4g -jar /ipm1/hang/pl/program/snp_indel_snpeff/snpEff/snpEff.jar -c /ipm1/hang/pl/program/snp_indel_snpeff/snpEff/snpEff.config -ud 2000 -csvStats maizesnp.csv -htmlStats maizesnp.html -o vcf maizev4 maize.snp.filed.vcf > maize.filtered.snp.ann.vcf
##indel注释
java -Xmx4g -jar /ipm1/hang/pl/program/snp_indel_snpeff/snpEff/snpEff.jar -c /ipm1/hang/pl/program/snp_indel_snpeff/snpEff/snpEff.config -ud 2000 -csvStats maizeindel.csv -htmlStats maizeindel.html -o vcf maizev4 maize.indel.filed.vcf > maize.filtered.indel.ann.vcf
2、annovar
2.1、软件安装
annovar下载地址Download ANNOVAR - ANNOVAR Documentation
解压后输出环境变量
export PATH=/ipm1/hang/pl/program/snp_indel_snpeff/annovar:$PATH
annovar的使用需要gtfToGenePred软件包
下载地址为:https://link.zhihu.com/?target=http%3A//hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/
下载后运行 chmod +x ./gtfToGenePred 增加运行权限
2.2、将GTF文件转换成refGene格式
gtfToGenePred -genePredExt -ignoreGroupsWithoutExons genes.gtf maize_refGene.txt
2.3、建立注释库
/ipm1/hang/pl/program/snp_indel_snpeff/annovar/retrieve_seq_from_fasta.pl --format refGene --seqfile sequences.fa maize_refGene.txt --out maize_refGeneMrna.fa
2.4、将VCF文件转换成表格格式输入文件
##SNP
/ipm1/hang/pl/program/snp_indel_snpeff/annovar/convert2annovar.pl -format vcf4 -allsample -withfreq maize.snp.filed.vcf > maize.snp.vcf.annovar.input
##Indel
/ipm1/hang/pl/program/snp_indel_snpeff/annovar/convert2annovar.pl -format vcf4 -allsample -withfreq maize.indel.filed.vcf > maize.indel.vcf.annovar.input
2.5、SNP、Indel注释
##SNP注释
/ipm1/hang/pl/program/snp_indel_snpeff/annovar/annotate_variation.pl -geneanno --neargene 2000 -buildver maize -dbtype refGene -outfile maizesnp.anno -exonsort maize.snp.vcf.annovar.input ./
##indel注释
/ipm1/hang/pl/program/snp_indel_snpeff/annovar/annotate_variation.pl -geneanno --neargene 2000 -buildver maize -dbtype refGene -outfile maizeindel.anno -exonsort maize.indel.vcf.annovar.input ./
2.6、统计变异信息、查看变异类型等
less -S maizesnp.anno.variant_function |cut -f 1|sed 's/;/\n/g' |sort | uniq -c
less -S maizeindel.anno.variant_function |cut -f 1|sed 's/;/\n/g' |sort | uniq -c