本地 blast

最新推荐文章于 2024-02-06 12:33:55 发布

mszjaas00

最新推荐文章于 2024-02-06 12:33:55 发布

阅读量195

点赞数

本文链接：https://blog.csdn.net/qq_37517281/article/details/110431254

版权

下载 blast+
https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.9.0/ncbi-blast±2.9.0-src.tar.gz
安装后

使用命令（比对protein序列与菌种dna）

makeblastdb
    -in D:\QQ聊天记录\909344426\FileRecv\MobileFile\sspBCD.fasta
    -dbtype  nucl
    -out testdb
tblastn
    -query D:\QQ聊天记录\909344426\FileRecv\sspBCD_query.fasta
    -db C:\Users\Administrator\testdb
    -out D:\QQ聊天记录\909344426\FileRecv\query_result

也可用python脚本

import os
cur = os.path.abspath('.')

def gen_db(sp): # 为菌生成数据库
    sp_path = os.path.join(cur, 'sp', sp)
    dbpath = os.path.join(cur, 'temp', 'tempdb')
    os.system('makeblastdb -in {} -dbtype  nucl -out {}'.format(sp_path, dbpath))

def comp(sp, target): # 对比菌和蛋白
    sp_path = os.path.join(cur, 'sp', sp)
    target_path = os.path.join(cur, 'target', target)
    dbpath = os.path.join(cur, 'temp', 'tempdb')
    temppath = os.path.join(cur, 'temp', '{}-{}'.format(sp, target))
    out_path = os.path.join(cur, 'result', '{}.fasta'.format(target))
    os.system('tblastn -query {} -db {} -out {}'.format(target_path, dbpath, temppath))
    with open(temppath, 'r') as f:
        with open(out_path, 'a') as f2:
            for line in f.readlines():
                if line.startswith('>'):
                    f2.write('\n>{}_{}\n'.format(sp, target))
                    result_num = 0
                elif line.startswith(' Score'):
                    result_num += 1
                elif line.startswith('Sbjct') and result_num == 1:
                    s = line.split(' ')[4].replace('-', '')
                    f2.write(s + '\n')
# 遍历菌与蛋白
for sp in os.listdir(os.path.join(cur, 'sp')):
    gen_db(sp)
    for target in os.listdir(os.path.join(cur, 'target')):
        comp(sp, target)