# 首先用idseq的image建好container,确定好container的名字,提前建好去人源库和比对库(star,bowtie2,gsnap)
import click
import pandas as pd
import os
@click.command()
@click.option('-i', '--inputfile', help='raw_data(/home/lijing/nt_nr_database_division1/K200004870_L01_60.fq.gz |...)',
required=True)
@click.option('-d', '--docker', help='docker_name(823fa78ed660 |...)', required=True)
@click.option('-dp', '--docker_path',
help='docker_path(823fa78ed660:/idseqflow-dockerfile-container-share/20211109/testdata/ |...)',
required=True)
# @click.option('-v', '--validate', help='docker_path(823fa78ed660:/idseqflow-dockerfile-container-share/20211109/testdata/...)', required=True)
@click.option('-dpgf', '--docker_path_gzfile',
help='docker_path_gzfile(/idseqflow-dockerfile-container-share/20211109/testdata/K200004870_L01_60.fq.gz |...)',
required=True)
@click.option('-dpf', '--docker_path_file',
help='docker_path_file(/idseqflow-dockerfile-container-share/20211109/testdata/K200004870_L01_60.fq |...)',
required=True)
@click.option('-sh', '--star_hostdatabase',
help='star_hostdatabase_name(/idseqflow-dockerfile-container-share/20211109/star_data/genomeDir |...)',
required=True)
@click.option('-bh', '--bowtie2_hostdatabase',
help='bowtie2_hostdatabase_name(/idseqflow-dockerfile-container-share/20211109/bowtie2/Homo_sapiens.GRCh38.dna.primary_assembly |...)',
required=True)
@click.option('-gh', '--gsnap_hostdatabase',
help='gsnap_hostdatabase_name(/idseqflow-dockerfile-container-share/20211109/GMAP-GSNAP_data/gsnap |...)',
required=True)
@click.option('-hqp', '--hostfilter_qc_outpath',
help='hostfilter_qc_outpath_name(/home/lijing/all_test/test_202112/tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2_gsnap.fasta |...)',
required=True)
def get_hostfilter(inputfile, docker_path, docker, docker_path_gzfile, star_hostdatabase, docker_path_file,
bowtie2_hostdatabase, gsnap_hostdatabase, hostfilter_qc_outpath):
# 用复制
val1 = os.system("echo '复制文件进入docker' && docker cp %s %s" % (inputfile, docker_path))
# 使用移动改权限
# val1 = os.system("echo '将文件改权限移入docker' && docker cp %s %s" % (inputfile,docker_path))
val2 = os.system("echo '解压测序文件' && docker exec -it %s /bin/bash -c 'gunzip %s' " % (docker, docker_path_gzfile))
val3 = os.system(
"echo 'star去宿主' && docker exec -it %s /bin/bash -c 'STAR --outFilterMultimapNmax 99999 --outFilterScoreMinOverLread 0.5 --outFilterMatchNminOverLread 0.5 --outReadsUnmapped Fastx --outFilterMismatchNmax 999 --outSAMmode None --clip3pNbases 0 --runThreadN 15 --genomeDir %s --readFilesIn %s --outFileNamePrefix star_tmp' " % (
docker, star_hostdatabase, docker_path_file))
# 这里的诺因接头可以再进行添加
val4 = os.system(
"echo 'Trimmomatic去接头' && docker exec -it %s /bin/bash -c 'java -jar /usr/local/bin/trimmomatic-0.38.jar SE -phred33 star_tmpUnmapped.out.mate1 tmp_star_trimmomatic.fastq ILLUMINACLIP:/idseqflow-dockerfile-container-share/20211109/testdata/nuoyin_adapter.fasta:2:30:10:8:true MINLEN:35' " % (
docker))
val5 = os.system(
"echo 'PriceSeq去低质量' && docker exec -it %s /bin/bash -c 'PriceSeqFilter -a 12 -rnf 90 -log c -f tmp_star_trimmomatic.fastq -o tmp_star_trimmomatic_priceseq.fastq -rqf 85 0.98' " % (
docker))
val6 = os.system(
"echo 'Dedup去重复' && docker exec -it %s /bin/bash -c 'idseq-dedup -l 70 -i tmp_star_trimmomatic_priceseq.fastq -o tmp_star_trimmomatic_priceseq_dedup.fastq' " % (
docker))
val7 = os.system(
"echo 'fastp去低复杂' && docker exec -it %s /bin/bash -c 'fastp -y 30 -i tmp_star_trimmomatic_priceseq_dedup.fastq -o tmp_star_trimmomatic_priceseq_dedup_lzw.fastq' " % (
docker))
val8 = os.system(
"echo 'bowtie2去宿主' && docker exec -it %s /bin/bash -c 'fastq_to_fasta -i tmp_star_trimmomatic_priceseq_dedup_lzw.fastq -o tmp_star_trimmomatic_priceseq_dedup_lzw.fasta && bowtie2 -q -x %s -f --very-sensitive-local -S tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2.sam --seed random_seed -p 15 -U tmp_star_trimmomatic_priceseq_dedup_lzw.fasta' && docker cp %s:/tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2.sam /home/lijing/all_test/test_202112/tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2.sam && python3 /home/lijing/all_test/test_202112/sam_to_fasta_single.py -i /home/lijing/all_test/test_202112/tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2.sam -o /home/lijing/all_test/test_202112/tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2.fasta " % (
docker, bowtie2_hostdatabase, docker))
# val9=取100万个序列
val9 = os.system(
"echo 'gsnap去宿主' && docker cp /home/lijing/all_test/test_202112/tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2.fasta %s:/ && docker exec -it %s /bin/bash -c 'gsnap -A sam --batch=0 --use-shared-memory=0 --gmap-mode=all --npaths=1 --ordered -t 32 --max-mismatches=40 -D %s -d Homo_sapiens.GRCh38.dna.primary_assembly_k16 -o tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2_gsnap.sam tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2.fasta' && docker cp %s:/tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2_gsnap.sam /home/lijing/all_test/test_202112/tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2_gsnap.sam && python3 /home/lijing/all_test/test_202112/sam_to_fasta_single.py -i /home/lijing/all_test/test_202112/tmp_star_trimmomatic_priceseq_dedup_lzw_bowtie2_gsnap.sam -o %s" % (
docker, docker, gsnap_hostdatabase, docker, hostfilter_qc_outpath))
if __name__ == '__main__':
get_hostfilter()
# def judge(val):
# if val ==0:
# print("taxid匹配上lineage成功")
# else:
# print("taxid匹配上lineage失败")
#
# if __name__ == '__main__':
# judge()
idseq流程(到去宿主一步,比对输出前)_20211217
最新推荐文章于 2024-06-14 02:02:23 发布