解压序列文件
gzip -d *.gz
重命名fq文件为fastq
rename 's/fq/fastq/' *.fq
rename 's/\.1\.fq/\_R1\.fastq/' *.fq
rename 's/\.2\.fq/\_R2\.fastq/' *.fq
rename 's/raw\.split\.//' *.fastq
拼接序列
usearch -fastq_mergepairs *R1*.fastq -relabel @ -fastq_maxdiffs 10 \
-fastq_pctid 80 -fastqout merged.fq
删除引物和barcode序列
usearch -fastx_truncate merged.fq -stripleft 26 -stripright 27 -fastqout stripped.fq
下载RDP训练集
https://www.mothur.org/wiki/RDP_reference_files#Version_14
训练集小写转成大写
cat trainset16_022016.pds.fasta | tr a-z A-Z >trainset16.fasta
将互补连同一方向到同一条链(ITS序列用UNITE99作为参考序列)
usearch -orient stripped.fq -db /home/kxf/Documents/database/trainset16.fasta -fastqout orient.fq #细菌甲烷菌
质量控制并删除质量信息(设定maximum expected error为1)
usearch -fastq_filter orient.fq -fastq_maxee 1.0 -fastaout filtered.fa
去除嵌合体
usearch -uchime2_ref filtered.fa -db /home/kxf/Documents/database/trainset16.fasta -uchimeout out.txt -strand plus -mode sensitive #细菌甲烷菌原虫
usearch -uchime2_ref filtered.fa -db reference=/home/kxf/Documents/database/UNITEv6_sh_97_s.fasta -uchimeout out.txt -strand plus -mode sensitive #真菌
vsearch -uchime_ref filtered.fa -nonchimeras outvsearch.fa -db /home/kxf/Documents/database/SILVA_132_SSURef_Nr99_tax_silva.fasta
查看序列长度分度
usearch -fastq_eestats2 stripped.fq -output eestats2.txt -length_cutoffs 100,300,10
mothur > summary.seqs(fasta=outvsearch.fa,processors=88)
序列修剪到相同长度(配对ITS不需要修剪)
usearch -fastx_truncate outvsearch.fa -trunclen 250 -fastaout reads250.fa
mothur > screen.seqs(fasta=outvsearch.fa,start=95%value,end=5%value, maxhomop=10)
序列去冗余
usearch -fastx_uniques outvsearch.good.fa -minuniquesize 8 -fastaout uniques.fasta -sizeout -relabel Uniq # mothur 修剪
usearch -fastx_uniques reads250.fa -minuniquesize 4 -fastaout uniques.fasta -sizeout -relabel Uniq
删除数量少的序列
usearch -sortbysize uniques.fasta -fastaout uniques.fasta -minsize 2
聚成OTU
usearch -cluster_otus uniques.fasta -otus otus.fa -relabel Otu
生成OTU表(输入文件要用未去冗余的序列)
usearch -otutab filtered.fa -otus otus.fa -mothur_shared_out otutab.txt -mapout map.txt #mothur
usearch -otutab reads250.fa -otus otus.fa -otutabout otutab.txt -mapout map.txt
多样性指数
OTU的树文件
计算距离矩阵
usearch -calc_distmx otus.fa -tabbedout mx.txt -maxdist 0.2 -termdist 0.3
利用距离矩阵生成tree文件
usearch -cluster_aggd mx.txt -treeout clusters.tree -clusterout clusters.txt \
-id 0.80 -linkage min
注释信息
mothur >classify.seqs(fasta=otus.fa, reference=/home/kxf/Documents/database/silva.nr_v132.align, taxonomy=/home/kxf/Documents/database/silva.nr_v132.tax, cutoff=60, processors=88) #细菌,原虫
mothur >classify.seqs(fasta=otus.fa, reference=/home/kxf/Documents/database/RIM_DB_14_07.fasta, taxonomy=/home/kxf/Documents/database/RIM_DB_14_07_c.txt, processors=88) #产甲烷菌
mothur >classify.seqs(fasta=otus.fa, reference=/home/kxf/Documents/database/UNITEv6_sh_97_s.fasta, taxonomy=/home/kxf/Documents/database/UNITEv6_sh_97_s.tax, processors=88) #真菌ITS
稀释性曲线
rarefaction.single(shared=otutab.txt)
计算α多样性指数
usearch -alpha_div otutab.txt -output alpha.txt #32位不够
summary.single(shared=current)
计算β多样性指数
dist.shared(shared=current, calc=thetayc-jclass-braycurtis)
count.seqs(shared=current)
unifrac.unweighted(tree=clusters.tree, count=current, distance=lt, processors=88, random=F)
unifrac.weighted(tree=clusters.tree, count=current, distance=lt, processors=88, random=F)
pcoa(phylip=clusters.tree1.unweighted.phylip.dist)
pcoa(phylip=clusters.tree1.weighted.phylip.dist)
pcoa(phylip=otutab.braycurtis.usearch.lt.dist)
pcoa(phylip=otutab.jclass.usearch.lt.dist)
pcoa(phylip=otutab.thetayc.usearch.lt.dist)
system(rm -f *.rabund)
system(mv *.dist *.loadings *.axes -t result/pcoa/)
system(mv *.tree otus.fa *.rarefaction otutab.txt otutab.usearch.count_table otutab.groups.summary *.taxonomy otutab.groups.rarefaction -t result/)
建立带注释信息的OTU表
create.database(shared=otutab.txt,constaxonomy=otus.nr_v132.wang.taxonomy, repfasta=otus.fa,count=otutab.usearch.count_table)