下载 blast+
https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.9.0/ncbi-blast±2.9.0-src.tar.gz
安装后
使用命令(比对protein序列与菌种dna)
makeblastdb
-in D:\QQ聊天记录\909344426\FileRecv\MobileFile\sspBCD.fasta
-dbtype nucl
-out testdb
tblastn
-query D:\QQ聊天记录\909344426\FileRecv\sspBCD_query.fasta
-db C:\Users\Administrator\testdb
-out D:\QQ聊天记录\909344426\FileRecv\query_result
也可用python脚本
import os
cur = os.path.abspath('.')
def gen_db(sp): # 为菌生成数据库
sp_path = os.path.join(cur, 'sp', sp)
dbpath = os.path.join(cur, 'temp', 'tempdb')
os.system('makeblastdb -in {} -dbtype nucl -out {}'.format(sp_path, dbpath))
def comp(sp, target): # 对比菌和蛋白
sp_path = os.path.join(cur, 'sp', sp)
target_path = os.path.join(cur, 'target', target)
dbpath = os.path.join(cur, 'temp', 'tempdb')
temppath = os.path.join(cur, 'temp', '{}-{}'.format(sp, target))
out_path = os.path.join(cur, 'result', '{}.fasta'.format(target))
os.system('tblastn -query {} -db {} -out {}'.format(target_path, dbpath, temppath))
with open(temppath, 'r') as f:
with open(out_path, 'a') as f2:
for line in f.readlines():
if line.startswith('>'):
f2.write('\n>{}_{}\n'.format(sp, target))
result_num = 0
elif line.startswith(' Score'):
result_num += 1
elif line.startswith('Sbjct') and result_num == 1:
s = line.split(' ')[4].replace('-', '')
f2.write(s + '\n')
# 遍历菌与蛋白
for sp in os.listdir(os.path.join(cur, 'sp')):
gen_db(sp)
for target in os.listdir(os.path.join(cur, 'target')):
comp(sp, target)