## step 1
path=/data/lijing/data
outpath=/data/lijing/test
#ls ${path}/*.gz |while read id; do echo $(basename $id)>>${outpath}/name.txt;done
#cut -d'.' -f 1 ${outpath}/name.txt |sort|uniq > ${outpath}/name.uniq.txt
cd ${path}
ls *.gz|awk '{split($0,a,"_");print a[1]"_"a[2]"_"a[3]}' |sort|uniq > ${outpath}/name.uniq.txt
## 序列合并
for item in `cat ${outpath}/name.uniq.txt`
do
vsearch --fastq_mergepairs ${path}/${item}_R1_001.fastq.gz \
--reverse ${path}/${item}_R2_001.fastq.gz \
--fastqout ${outpath}/${item}.merged.fq \
--fastaout ${outpath}/${item}.merged.fa \
--relabel ${item}.
done
cat ${outpath}/*.merged.fq > ${outpath}/all.fq
##去除引物和质控
vsearch --fastx_filter ${outpath}/all.fq \
--fastq_stripleft 0 --fastq_stripright 0 \
--fastq_maxee_rate 0.01 \
--fastaout ${outpath}/filtered.fa
##序列去冗余
vsearch --derep_fulllength ${outpath}/filtered.fa \
--minuniquesize 10 --sizeout --relabel Uni_ \
--output ${outpath}/uniques.fa
##特征挑选
##聚类
vsearch --cluster_fast ${outpath}/uniques.fa\
--id 0.97 \
--centroids ${outpath}/otu.fa \
--relabel OTU_ \
--sizeout
##去噪
vsearch --cluster_unoise ${outpath}/uniques.fa \
--minsize 10 \
--centroids ${outpath}/asv.fa \
--relabel ASV_ \
--sizeout
## 去除嵌合体
##基于自身去除
#vsearch --uchime3_denovo ${outpath}/otu.fa\
# --nonchimeras ${outpath}/otus.fa
#vsearch --uchime3_denovo ${outpath}/asv.fa \
# --nonchimeras ${outpath}/asvs.fa
##基于数据库去除
vsearch --uchime_ref ${outpath}/otu.fa\
--db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
--nonchimeras ${outpath}/otus.fa
vsearch --uchime_ref ${outpath}/asv.fa \
--db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
--nonchimeras ${outpath}/asvs.fa
##生成特征表
vsearch --usearch_global ${outpath}/filtered.fa \
--db ${outpath}/asvs.fa \
--id 0.97 \
--query_cov 0.97 \
--strand both \
--threads 20 \
--otutabout ${outpath}/asvs.otutab.txt
vsearch --usearch_global ${outpath}/filtered.fa \
--db ${outpath}/otus.fa \
--id 0.97 \
--query_cov 0.97 \
--strand both \
--threads 20 \
--otutabout ${outpath}/otus.otutab.txt
## 物种注释
vsearch --sintax ${outpath}/asvs.fa \
--db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
--sintax_cutoff 0.1 \
-tabbedout ${outpath}/asvs.sintax
vsearch --sintax ${outpath}/otus.fa \
--db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
--sintax_cutoff 0.1 \
-tabbedout ${outpath}/otus.sintax
#vsearch --usearch_global ${outpath}/asvs.fa \
# --db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
# --id 0.97 \
# --query_cov 0.97 \
# --strand both \
# --biomout ${outpath}/asvs_tax.txt \
# --fastapairs \
# --userfields query+target+id+qcov+tcov \
# --userout ${outpath}/asvs_stat.xls
#
#vsearch --usearch_global ${outpath}/outs.fa \
# --db /data/lijing/download_software/vsearch_data/silva_16s_v123.fa \
# --id 0.97 \
# --query_cov 0.97 \
# --strand both \
# --biomout ${outpath}/outs_tax.txt \
# --fastapairs \
# --userfields query+target+id+qcov+tcov \
# --userout ${outpath}/outs_stat.xls
python3 /data/lijing/use_script/20230616_otu_match_tax.py \
-ip1 ${outpath}/asvs.sintax \
-ip2 ${outpath}/asvs.otutab.txt \
-op1 ${outpath}/asvs.result.xlsx
python3 /data/lijing/use_script/20230616_otu_match_tax.py \
-ip1 ${outpath}/otus.sintax \
-ip2 ${outpath}/otus.otutab.txt \
-op1 ${outpath}/otus.result.xlsx
### step 2
## for item in `cat ${outpath}/name.uniq.txt`; do echo $item; done
#for item in `cat ${outpath}/name.uniq.txt`; do /home/biosoft/fastp/fastp \
# --thread 20 -W 4 -q 20 -l 150 -y 30 --detect_adapter_for_pe -x \
# -i ${path}/${item}_R1_001.fastq.gz \
# -I ${path}/${item}_R2_001.fastq.gz \
# -o ${outpath}/${item}.R1.out.fq.gz \
# -O ${outpath}/${item}.R2.out.fq.gz \
# -j ${outpath}/${item}.json \
# -h ${outpath}/${item}.html && \
# /usr/bin/python3 /data/lijing/download/20230329_estimate_qc.py \
# -i ${outpath}/${item}.json \
# -o ${outpath}/${item}.qc.xls; done
#
### step 3 assemble not good (optional)
#for j in `cat ${outpath}/name.uniq.txt`
#do
#/data/lijing/download_software/MEGAHIT-1.2.9-Linux-x86_64-static/bin/megahit \
# -1 ${outpath}/${j}.R1.out.fq.gz \
# -2 ${outpath}/${j}.R2.out.fq.gz \
# -o ${outpath}/${j}
#done
#
### step 4 ### judge contig good/bad (optional)
#for j in `cat ${outpath}/name.uniq.txt`
#do
#python /data/lijing/download_software/quast/quast-5.0.2/quast.py \
# ${outpath}/${j}/final.contigs.fa \
# -t 15 -o ${outpath}/${j}
#done
#
#for j in `cat ${outpath}/name.uniq.txt`
#do
#/data/lijing/download_software/ncbi-blast/bin/blastn \
# -db /data/lijing/assemble_capture_test/silva_database/SILVA_138.1_SSURef_NR99_tax_silva.fa \
# -query ${outpath}/${j}/final.contigs.fa \
# -out ${outpath}/${j}.blastn.xls \
# -outfmt 6 -num_threads 20 && \
# /usr/bin/python3 /data/lijing/assemble_capture_test/last_step1.py \
# -ip1 ${outpath}/${j}.blastn.xls \
# -r /data/lijing/assemble_capture_test/silva_database/taxmap_slv_ssu_ref_nr_138.1.xls \
# -op1 ${outpath}/${j}.blastn.result.xls && \
# /usr/bin/python3 /data/lijing/assemble_capture_test/last_step2.py \
# -ip1 ${outpath}/${j}/final.contigs.fa \
# -ip2 ${outpath}/${j}.blastn.result.xls \
# -op1 ${outpath}/${j}/${j}.final.contigs.fa
#done
#
##for j in `cat ${outpath}/name.uniq.txt`
##do
##/home/biosoft/bwa-0.7.17/bwa index -a is ${outpath}/${j}/${j}.final.contigs.fa -p ${outpath}/${j}/${j}.final.contigs.fa
##done
#
##for j in `cat ${outpath}/name.uniq.txt`
##do
##/home/biosoft/bwa-0.7.17/bwa mem -t 20 ${outpath}/${j}/${j}.final.contigs.fa
#for item in `cat ${outpath}/name.uniq.txt`; do /home/biosoft/bwa-0.7.17/bwa mem -t 20 \
# /data/lijing/assemble_capture_test/silva_database/SILVA_138.1_SSURef_NR99_tax_silva.fa \
# ${outpath}/${item}.R1.out.fq.gz \
# ${outpath}/${item}.R2.out.fq.gz > \
# ${outpath}/${item}.out.sam && \
# /usr/bin/samtools view -@ 20 -F 4 ${outpath}/${item}.out.sam > \
# ${outpath}/${item}.out.mapped.sam ; done
#
#for j in `cat ${outpath}/name.uniq.txt`
#do
#/usr/bin/python3 /data/lijing/assemble_capture_test/last_step3.py \
# -ip1 ${outpath}/${j}.out.mapped.sam \
# -ip2 ${outpath}/${j}.blastn.result.xls \
# -r /data/lijing/assemble_capture_test/silva_database/taxmap_slv_ssu_ref_nr_138.1.xls \
# -op1 ${outpath}/${j}.genus.count.xls \
# -op2 ${outpath}/${j}.species.count.xls \
# -ml 150
#done
run_line.last.vsearch-ngs-pipeline
最新推荐文章于 2024-06-14 17:59:23 发布