geNomad，DeepVirFinder，VirSorter2，VIBRANT，PPR-Meta，viralVerify安装与使用

CAAS_IFR_zp

已于 2024-05-28 03:25:09 修改

阅读量1.8k

点赞数 13

文章标签：数据分析

于 2024-01-11 22:20:54 首次发布

本文链接：https://blog.csdn.net/m0_53945548/article/details/135521940

版权

geNomad安装(conda)

GitHub - apcamargo/genomad: geNomad: Identification of mobile genetic elements

mamba create -n genomad -c conda-forge -c bioconda genomad=1.7.4
mamba activate genomad
genomad download-database {dir}
genomad end-to-end --cleanup --threads 180 contig.fa out_dir database(完整路径)

主要看summary文件夹里面的结果就行

DeepVirFinder安装(conda)

GitHub - jessieren/DeepVirFinder: Identifying viruses from metagenomic data by deep learning

conda create --name dvf python=3.6 numpy theano=1.0.3 keras=2.2.4 scikit-learn Biopython h5py=2.10.0
source activate dvf
conda install mkl-service
git clone https://github.com/jessieren/DeepVirFinder
cd DeepVirFinder
chmod 755 *.py
~/DeepVirFinder/dvf.py -i contig.fa -m (完整地址of model dir) -o out_dir -l 500 -c 180

得分 > 0.9 且p < 0.05 可以识别为病毒contig（因人而异）

VirSorter2安装(conda)

GitHub - jiarong/VirSorter2: customizable pipeline to identify viral sequences from (meta)genomic data

mamba create -n vs2 -c conda-forge -c bioconda "python>=3.6,<=3.10" scikit-learn=0.22.1 imbalanced-learn pandas seaborn hmmer==3.3 prodigal screed ruamel.yaml "snakemake>=5.18,<=5.26" click "conda-package-handling<=1.9"
mamba activate vs2
mkdir VirSorter2
cd VirSorter2
git clone https://github.com/jiarong/VirSorter2.git
cd VirSorter2
pip install -e .
#下载数据库
https://osf.io/v46sc/download
tar -xzf db.tgz
virsorter config --init-source --db-dir=./db
virsorter run -w {out_dir} -i contig.fa --min-length 500 --min-score 0.5 --include-groups dsDNAphage,NCLDV,RNA,ssDNA,lavidaviridae --keep-original-seq --hallmark-required-on-short --rm-tmpdir -j 180 all

VIBRANT安装(conda)

GitHub - AnantharamanLab/VIBRANT: Virus Identification By iteRative ANnoTation

conda create -n vibrant python=3 pandas numpy=1.19 scikit-learn=0.21.3 prodigal hmmer biopython matplotlib seaborn
conda install -c bioconda vibrant==1.2.1
download-db.sh {dir}
~/mambaforge/envs/vibrant/bin/VIBRANT_run.py
~/mambaforge/envs/vibrant/bin/VIBRANT_run.py -i contig.fa -t 180 -folder {out_dir} -l 1000 -d (完整路径)/VIBRANT/databases/ -m (完整路径)/VIBRANT/files/

PPR-Meta安装(docker)

GitHub - zhenchengfang/PPR-Meta: A tool for identifying phages and plasmids from metagenomic fragments using deep learning

cd PPR-meta
docker pull shufangwu/ppr-meta:1.0
docker images
docker run -it shufangwu/ppr-meta:1.0 bash
cd /home/PPR-Meta
./PPR_Meta example.fna result.csv
exit
如果你只想退出并保持容器运行，你可以按Ctrl+P+Q
#共享文件夹
docker run -it -v {宿主机}:/home/PPR-Meta shufangwu/ppr-meta:1.0 bash
cd /home/PPR-Meta
./PPR_Meta M8_corrected_contig_NC_clean.fa M8_PPR-Meta.csv -t 0.7
# 进入已经打开的容器
docker exec -it 号码
# 关闭docker容器
docker stop 号码 bash

viralVerify安装

mkdir viralVerify
cd viralVerify
git clone https://github.com/ablab/viralVerify/
cd viralVerify/script
chmod 755 *.py
~/viralVerify/bin/viralverify -h
wegt https://figshare.com/ndownloader/files/17904323?private_link=f897d463b31a35ad7bf0
gunzip nbc_hmms.hmm.gz
~/viralVerify/bin/viralverify -f contig.fa -o viralverify_dir --hmm {nbc_hmms.hmm的路径} -t 180

自己写了个代码把各种结果汇总到一起，大家看着改改，应该也能用

#!/usr/bin/env python3
#########################################################
# Vir_result_handle
# written by PeiZhong in IFR of CAAS

import argparse
from Bio import SeqIO

parser = argparse.ArgumentParser(description='Vir_result_handle')
parser.add_argument('OperaPath', help='Path that contain your Vir result file')
parser.add_argument('viralverify_result_txt', help='viralverify_result_txt or None')
parser.add_argument('genomad_result_txt', help='genomad_result_txt or None')
parser.add_argument('DeepVirFinder_result_txt', help='DeepVirFinder_result_txt or None')
parser.add_argument('DeepVirFinder_Score', help='DeepVirFinder_Score,Values higher than or equal to this are considered virus [0-1] , 0.9 is good')
parser.add_argument('DeepVirFinder_pvalue', help='DeepVirFinder_pvalue,Values less than or equal to this are considered virus [0-1] , 0.05 is good')
parser.add_argument('VIBRANT_result_txt', help='VIBRANT_result_txt or None')
parser.add_argument('VirSorter2_result_txt', help='VirSorter2_result_txt or None')
parser.add_argument('PPR_meta_result_txt', help='PPR_meta_result_txt or None')
parser.add_argument('Sample_name', help='For outputting files')
parser.add_argument('contig_fa_file_path', help='file path of contig.fa')
parser.add_argument('contig_fa', help='contig.fa file')
parser.add_argument('Length', help='Length of filter')

args = parser.parse_args()
viralverify_result_txt = args.viralverify_result_txt
genomad_result_txt = args.genomad_result_txt
DeepVirFinder_result_txt = args.DeepVirFinder_result_txt
DeepVirFinder_Score = args.DeepVirFinder_Score
DeepVirFinder_pvalue = args.DeepVirFinder_pvalue
VIBRANT_result_txt = args.VIBRANT_result_txt
VirSorter2_result_txt = args.VirSorter2_result_txt
PPR_meta_result_txt = args.PPR_meta_result_txt
OperaPath = args.OperaPath
Sample_name = args.Sample_name
contig_fa = args.contig_fa
contig_fa_file_path = args.contig_fa_file_path
Length = args.Length

if OperaPath.endswith("/"):
    OperaPath = OperaPath
else:
    OperaPath = OperaPath+"/"

if contig_fa_file_path.endswith("/"):
    contig_fa_file_path = contig_fa_file_path
else:
    contig_fa_file_path = contig_fa_file_path+"/"

db_contig={}

def viralverify_result(file):
    db={}
    with open(OperaPath+viralverify_result_txt, 'r') as f1:
        for line in f1.readlines():
            if "Contig name" not in line:
                contig_name = line.split("\t")[0].strip("\n")
                Prediction = line.split("\t")[1].strip("\n")
                L = line.split("\t")[2].strip("\n")
                if ("Virus" in Prediction) and (eval(L) >= eval(Length)):
                    db[contig_name] = 0
    with open(OperaPath+Sample_name+"_viralverify_name.txt", 'a') as f2:
        for key in db:
            print(">"+key,file=f2)
            db_contig[key] = 0

def genomad_result(file):
    db = {}
    with open(OperaPath+genomad_result_txt, 'r') as f1:
        for line in f1.readlines():
            if "seq_name" not in line:
                contig_name = line.split("\t")[0].strip("\n")
                L = line.split("\t")[1].strip("\n")
                if eval(L) <= eval(Length):
                    db[contig_name] = 0
    with open(OperaPath+Sample_name+"_genomad_name.txt", 'a') as f2:
        for key in db:
            print(">"+key,file=f2)
            db_contig[key] = 0

def DeepVirFinder_result(file):
    db = {}
    with open(OperaPath+DeepVirFinder_result_txt, 'r') as f1:
        for line in f1.readlines():
            if "name" not in line:
                contig_name = line.split("\t")[0].strip("\n")
                score = line.split("\t")[2].strip("\n")
                pvalue = line.split("\t")[3].strip("\n")
                if (eval(score) >= eval(DeepVirFinder_Score)) and (eval(pvalue) <= eval(DeepVirFinder_pvalue)):
                    db[contig_name] = 0
    with open(OperaPath+Sample_name+"_DeepVirFinder_name.txt", 'a') as f2:
        for key in db:
            print(">"+key,file=f2)
            db_contig[key] = 0

def VIBRANT_result(file):
    db = {}
    with open(OperaPath+VIBRANT_result_txt, 'r') as f1:
        for line in f1.readlines():
            line = line.strip("\n")
            db[line] = 0
    with open(OperaPath+Sample_name+"_VIBRANT_name.txt", 'a') as f2:
        for key in db:
            print(">"+key,file=f2)
            db_contig[key] = 0

def VirSorter2_result(file):
    db = {}
    with open(OperaPath+VirSorter2_result_txt, 'r') as f1:
        for line in f1.readlines():
            if "seqname" not in line:
                contig_name = line.split("\t")[0].strip("\n")
                contig_name2 = contig_name.split('|')[0]
                db[contig_name2] = 0
    with open(OperaPath+Sample_name+"_VirSorter2_name.txt", 'a') as f2:
        for key in db:
            print(">"+key,file=f2)
            db_contig[key] = 0

def PPR_meta_result(file):
    db = {}
    with open(OperaPath+PPR_meta_result_txt, 'r') as f1:
        for line in f1.readlines():
            if "Header" not in line:
                contig_name = line.split("\t")[0].strip("\n")
                phage = line.split("\t")[5].strip("\n")
                L = line.split("\t")[1].strip("\n")
                if ("phage" in phage) and ("_" not in phage) and (eval(L) <= eval(Length)):
                    db[contig_name] = 0
    with open(OperaPath+Sample_name+"_PPR_meta_name.txt", 'a') as f2:
        for key in db:
            print(">"+key,file=f2)
            db_contig[key] = 0

if "None" not in viralverify_result_txt:
    viralverify_result(viralverify_result_txt)
if "None" not in genomad_result_txt:
    genomad_result(genomad_result_txt)
if "None" not in DeepVirFinder_result_txt:
    DeepVirFinder_result(DeepVirFinder_result_txt)
if "None" not in VIBRANT_result_txt:
    VIBRANT_result(VIBRANT_result_txt)
if "None" not in VirSorter2_result_txt:
    VirSorter2_result(VirSorter2_result_txt)
if "None" not in PPR_meta_result_txt:
    PPR_meta_result(PPR_meta_result_txt)

print(db_contig)
with open(OperaPath + Sample_name + "_vir_all.txt", 'a') as f_out:
    for key in db_contig:
        print(key,file=f_out)

with open(contig_fa_file_path+contig_fa, 'r') as fasta_file:
    sequences = SeqIO.to_dict(SeqIO.parse(fasta_file, 'fasta'))

Vir_sequences = {}
for key in db_contig:
    if key in sequences:
        Vir_sequences[key] = sequences[key]

with open(OperaPath+Sample_name+"_Virsq.fa", 'a') as Virsq_file:
    SeqIO.write(Vir_sequences.values(), Virsq_file, 'fasta')

Vir_result_dir="~"
fa_path="~"
for i in *_corrected_contig_NC_clean.fa
do
	num=${i%%_corrected_contig_NC_clean.fa}
	cd ${Vir_result_dir}
	/home/zhongpei/hard_disk_sda2/zhongpei/Software/my_script/Vir_seq_take_and_handle.py ${Vir_result_dir} ${num}_viralverify.txt ${num}_genomad.txt ${num}_DeepVirFinder.txt 0.9 0.05 ${num}_VIBRANT.txt ${num}_VirSorter2.txt ${num}_PPR-Meta.txt ${num} ${fa_path} ${i} 1000
	cd ..
done