Python学习日志-Linux下Python多进程输出MSA-生物信息学
"""
Created on Jan 4 14:37 2020
@author: Xue-Qiang
"""
import os, re
import time
import gc
import subprocess
from multiprocessing import Pool
def msa_batch_generate(inputfile):
os.mkdir('msa_file')
with open(inputfile, 'r+') as f:
lines = f.readlines()
for i in range(len(lines)):
line = lines[i].strip()
assert line != ''
if '>' in line:
file_name = re.sub('>', '', line)
os.chdir('msa_file')
os.mkdir(file_name)
line = line.split('-')
line[1] = '>' + line[1]
msa1_name = re.sub('>', '', line[0])
msa2_name = re.sub('>', '', line[1])
os.chdir(file_name)
with open(msa1_name, 'w+') as file1:
file1.write(line[0] + '\n' + lines[i + 1].strip())
with open(msa2_name, 'w+') as file2:
file2.write(line[1] + '\n' + lines[i + 2].strip())
os.chdir(os.path.join(os.getcwd(), "../.."))
del line, msa1_name, msa2_name
gc.collect()
del lines
gc.collect()
def hhblits(fasta_file):
subprocess.Popen(
'/data0/hujun_stu/software/HHsuite/hhsuite-3.0-beta.3-Linux/bin/hhblits -i ' + fasta_file + ' -d /data0/hujun_stu/uniprot20_2016_02/uniprot20_2016_02 -n 3 -id 99 -e 0.001 -cov 50 -oa3m ' + fasta_file + '.a3m'
, stdout=subprocess.PIPE, shell=True)
if __name__ == "__main__":
start = time.time()
msa_batch_generate(r'test.txt')
pool = Pool(6)
for root, dirs, files in os.walk('msa_file'):
for file in files:
path = os.path.join(root, file)
pool.apply_async(hhblits, args=(path,))
pool.close()
pool.join()
print(time.time() - start)