## step 1
path=/data/lijing/data
outpath=/data/lijing/result
#ls ${path}/*.gz |while read id; do echo $(basename $id)>>${outpath}/name.txt;done
#cut -d'.' -f 1 ${outpath}/name.txt |sort|uniq > ${outpath}/name.uniq.txt
cd /data/lijing/FFPE_16S
ls *.gz|awk '{split($0,a,"_");print a[1]"_"a[2]"_"a[3]}' |sort|uniq > ${outpath}/name.uniq.txt
## step 2
# for item in `cat ${outpath}/name.uniq.txt`; do echo $item; done
for item in `cat ${outpath}/name.uniq.txt`; do /home/biosoft/fastp/fastp \
--thread 20 -W 4 -q 20 -l 150 -y 30 --detect_adapter_for_pe -x \
-i ${path}/${item}_R1_001.fastq.gz \
-I ${path}/${item}_R2_001.fastq.gz \
-o ${outpath}/${item}.R1.out.fq.gz \
-O ${outpath}/${item}.R2.out.fq.gz \
-j ${outpath}/${item}.json \
-h ${outpath}/${item}.html && \
/usr/bin/python3 /data/lijing/download/20230329_estimate_qc.py \
-i ${outpath}/${item}.json \
-o ${outpath}/${item}.qc.xls; done
## step 3 assemble not good (optional)
for j in `cat ${outpath}/name.uniq.txt`
do
/data/lijing/download_software/MEGAHIT-1.2.9-Linux-x86_64-static/bin/megahit \
-1 ${outpath}/${j}.R1.out.fq.gz \
-2 ${outpath}/${j}.R2.out.fq.gz \
-o ${outpath}/${j}
done
## step 4 ### judge contig good/bad (optional)
for j in `cat ${outpath}/name.uniq.txt`
do
python /data/lijing/download_software/quast/quast-5.0.2/quast.py \
${outpath}/${j}/final.contigs.fa \
-t 15 -o ${outpath}/${j}
done
for j in `cat ${outpath}/name.uniq.txt`
do
/data/lijing/download_software/ncbi-blast/bin/blastn \
-db /data/lijing/assemble_capture_test/silva_database/SILVA_138.1_SSURef_NR99_tax_silva.fa \
-query ${outpath}/${j}/final.contigs.fa \
-out ${outpath}/${j}.blastn.xls \
-outfmt 6 -num_threads 20 && \
/usr/bin/python3 /data/lijing/assemble_capture_test/last_step1.py \
-ip1 ${outpath}/${j}.blastn.xls \
-r /data/lijing/assemble_capture_test/silva_database/taxmap_slv_ssu_ref_nr_138.1.xls \
-op1 ${outpath}/${j}.blastn.result.xls && \
/usr/bin/python3 /data/lijing/assemble_capture_test/last_step2.py \
-ip1 ${outpath}/${j}/final.contigs.fa \
-ip2 ${outpath}/${j}.blastn.result.xls \
-op1 ${outpath}/${j}/${j}.final.contigs.fa
done
#for j in `cat ${outpath}/name.uniq.txt`
#do
#/home/biosoft/bwa-0.7.17/bwa index -a is ${outpath}/${j}/${j}.final.contigs.fa -p ${outpath}/${j}/${j}.final.contigs.fa
#done
#for j in `cat ${outpath}/name.uniq.txt`
#do
#/home/biosoft/bwa-0.7.17/bwa mem -t 20 ${outpath}/${j}/${j}.final.contigs.fa
for item in `cat ${outpath}/name.uniq.txt`; do /home/biosoft/bwa-0.7.17/bwa mem -t 20 \
/data/lijing/assemble_capture_test/silva_database/SILVA_138.1_SSURef_NR99_tax_silva.fa \
${outpath}/${item}.R1.out.fq.gz \
${outpath}/${item}.R2.out.fq.gz > \
${outpath}/${item}.out.sam && \
/usr/bin/samtools view -@ 20 -F 4 ${outpath}/${item}.out.sam > \
${outpath}/${item}.out.mapped.sam ; done
for j in `cat ${outpath}/name.uniq.txt`
do
/usr/bin/python3 /data/lijing/assemble_capture_test/last_step3.py \
-ip1 ${outpath}/${j}.out.mapped.sam \
-ip2 ${outpath}/${j}.blastn.result.xls \
-r /data/lijing/assemble_capture_test/silva_database/taxmap_slv_ssu_ref_nr_138.1.xls \
-op1 ${outpath}/${j}.genus.count.xls \
-op2 ${outpath}/${j}.species.count.xls \
-ml 150
done
run_line.last.bwa.screen-ngs-pipeline
最新推荐文章于 2024-06-17 10:26:55 发布