run_line.last.vsearch-ngs-pipeline

## step 1
path=/data/lijing/data
outpath=/data/lijing/test

#ls ${path}/*.gz |while read id; do echo $(basename $id)>>${outpath}/name.txt;done
#cut -d'.' -f 1 ${outpath}/name.txt |sort|uniq > ${outpath}/name.uniq.txt
cd ${path}
ls *.gz|awk '{split($0,a,"_");print a[1]"_"a[2]"_"a[3]}' |sort|uniq > ${outpath}/name.uniq.txt

## 序列合并
for item in `cat ${outpath}/name.uniq.txt`
do
vsearch --fastq_mergepairs ${path}/${item}_R1_001.fastq.gz \
	--reverse ${path}/${item}_R2_001.fastq.gz \
	--fastqout ${outpath}/${item}.merged.fq \
	--fastaout ${outpath}/${item}.merged.fa \
	--relabel ${item}.
done

cat ${outpath}/*.merged.fq > ${outpath}/all.fq

##去除引物和质控
vsearch --fastx_filter ${outpath}/all.fq \
	--fastq_stripleft 0 --fastq_stripright 0 \
	--fastq_maxee_rate 0.01 \
	--fastaout ${outpath}/filtered.fa

##序列去冗余
vsearch --derep_fulllength ${outpath}/filtered.fa \
	--minuniquesize 10 --sizeout --relabel Uni_ \
	--output ${outpath}/uniques.fa

##特征挑选
##聚类
vsearch --cluster_fast ${outpath}/uniques.fa\
	--id 0.97 \
	--centroids ${outpath}/otu.fa \
	--relabel OTU_ \
	--sizeout
##去噪
vsearch --cluster_unoise ${outpath}/uniques.fa \
	--minsize 10 \
	--centroids ${outpath}/asv.fa \
	--relabel ASV_ \
	--sizeout

## 去除嵌合体
##基于自身去除
#vsearch --uchime3_denovo ${outpath}/otu.fa\
#	--nonchimeras ${outpath}/otus.fa

#vsearch --uchime3_denovo ${outpath}/asv.fa \
#	--nonchimeras ${outpath}/asvs.fa
##基于数据库去除
vsearch --uchime_ref ${outpath}/otu.fa\
	--db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
	--nonchimeras ${outpath}/otus.fa

vsearch --uchime_ref ${outpath}/asv.fa \
	--db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
	--nonchimeras ${outpath}/asvs.fa

##生成特征表
vsearch --usearch_global ${outpath}/filtered.fa \
	--db ${outpath}/asvs.fa \
	--id 0.97 \
	--query_cov 0.97 \
	--strand both \
	--threads 20 \
	--otutabout ${outpath}/asvs.otutab.txt

vsearch --usearch_global ${outpath}/filtered.fa \
        --db ${outpath}/otus.fa \
        --id 0.97 \
	--query_cov 0.97 \
	--strand both \
        --threads 20 \
        --otutabout ${outpath}/otus.otutab.txt

## 物种注释
vsearch --sintax ${outpath}/asvs.fa \
	--db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
	--sintax_cutoff 0.1 \
	-tabbedout ${outpath}/asvs.sintax

vsearch --sintax ${outpath}/otus.fa \
        --db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
        --sintax_cutoff 0.1 \
        -tabbedout ${outpath}/otus.sintax

#vsearch --usearch_global ${outpath}/asvs.fa \
#	--db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
#	--id 0.97 \
#	--query_cov 0.97 \
#	--strand both \
#	--biomout ${outpath}/asvs_tax.txt \
#	--fastapairs \
#	--userfields query+target+id+qcov+tcov \
#	--userout ${outpath}/asvs_stat.xls
#
#vsearch --usearch_global ${outpath}/outs.fa \
#	--db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
#        --id 0.97 \
#        --query_cov 0.97 \
#        --strand both \
#	--biomout ${outpath}/outs_tax.txt \
#	--fastapairs \
#        --userfields query+target+id+qcov+tcov \
#        --userout ${outpath}/outs_stat.xls

python3 /data/lijing/use_script/20230616_otu_match_tax.py \
	-ip1 ${outpath}/asvs.sintax \
	-ip2 ${outpath}/asvs.otutab.txt \
	-op1 ${outpath}/asvs.result.xlsx

python3 /data/lijing/use_script/20230616_otu_match_tax.py \
	-ip1 ${outpath}/otus.sintax \
	-ip2 ${outpath}/otus.otutab.txt \
	-op1 ${outpath}/otus.result.xlsx











### step 2
## for item in `cat ${outpath}/name.uniq.txt`; do echo $item; done
#for item in `cat ${outpath}/name.uniq.txt`; do /home/biosoft/fastp/fastp \
#        --thread 20 -W 4 -q 20 -l 150 -y 30 --detect_adapter_for_pe -x \
#        -i ${path}/${item}_R1_001.fastq.gz \
#        -I ${path}/${item}_R2_001.fastq.gz \
#        -o ${outpath}/${item}.R1.out.fq.gz \
#        -O ${outpath}/${item}.R2.out.fq.gz \
#        -j ${outpath}/${item}.json \
#        -h ${outpath}/${item}.html && \
#        /usr/bin/python3 /data/lijing/download/20230329_estimate_qc.py \
#        -i ${outpath}/${item}.json \
#        -o ${outpath}/${item}.qc.xls; done
#
### step 3 assemble not good (optional)
#for j in `cat ${outpath}/name.uniq.txt`
#do
#/data/lijing/download_software/MEGAHIT-1.2.9-Linux-x86_64-static/bin/megahit \
#        -1 ${outpath}/${j}.R1.out.fq.gz \
#        -2 ${outpath}/${j}.R2.out.fq.gz \
#        -o ${outpath}/${j}
#done
#
### step 4 ### judge contig good/bad (optional)
#for j in `cat ${outpath}/name.uniq.txt`
#do
#python /data/lijing/download_software/quast/quast-5.0.2/quast.py \
#        ${outpath}/${j}/final.contigs.fa \
#        -t 15 -o ${outpath}/${j}
#done
#
#for j in `cat ${outpath}/name.uniq.txt`
#do
#/data/lijing/download_software/ncbi-blast/bin/blastn \
#	-db /data/lijing/assemble_capture_test/silva_database/SILVA_138.1_SSURef_NR99_tax_silva.fa \
#	-query ${outpath}/${j}/final.contigs.fa \
#	-out ${outpath}/${j}.blastn.xls \
#	-outfmt 6 -num_threads 20 && \
#	/usr/bin/python3 /data/lijing/assemble_capture_test/last_step1.py \
#	-ip1 ${outpath}/${j}.blastn.xls \
#	-r /data/lijing/assemble_capture_test/silva_database/taxmap_slv_ssu_ref_nr_138.1.xls \
#	-op1 ${outpath}/${j}.blastn.result.xls && \
#	/usr/bin/python3 /data/lijing/assemble_capture_test/last_step2.py \
#	-ip1 ${outpath}/${j}/final.contigs.fa \
#	-ip2 ${outpath}/${j}.blastn.result.xls \
#	-op1 ${outpath}/${j}/${j}.final.contigs.fa
#done
#
##for j in `cat ${outpath}/name.uniq.txt`
##do
##/home/biosoft/bwa-0.7.17/bwa index -a is ${outpath}/${j}/${j}.final.contigs.fa -p ${outpath}/${j}/${j}.final.contigs.fa
##done
#
##for j in `cat ${outpath}/name.uniq.txt`
##do
##/home/biosoft/bwa-0.7.17/bwa mem -t 20 ${outpath}/${j}/${j}.final.contigs.fa 
#for item in `cat ${outpath}/name.uniq.txt`; do /home/biosoft/bwa-0.7.17/bwa mem -t 20 \
#        /data/lijing/assemble_capture_test/silva_database/SILVA_138.1_SSURef_NR99_tax_silva.fa \
#        ${outpath}/${item}.R1.out.fq.gz \
#        ${outpath}/${item}.R2.out.fq.gz > \
#        ${outpath}/${item}.out.sam && \
#       /usr/bin/samtools view -@ 20 -F 4 ${outpath}/${item}.out.sam > \
#        ${outpath}/${item}.out.mapped.sam ; done
#
#for j in `cat ${outpath}/name.uniq.txt`
#do
#/usr/bin/python3 /data/lijing/assemble_capture_test/last_step3.py \
#	-ip1 ${outpath}/${j}.out.mapped.sam \
#	-ip2 ${outpath}/${j}.blastn.result.xls \
#	-r /data/lijing/assemble_capture_test/silva_database/taxmap_slv_ssu_ref_nr_138.1.xls \
#	-op1 ${outpath}/${j}.genus.count.xls \
#	-op2 ${outpath}/${j}.species.count.xls \
#	-ml 150
#done


  • 6
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值