前言
基于分子文件构建分子碎片库。内嵌Brics, Recap, MacFrags三种算法。
- RECAP(Retrosynthetic Combinatorial Analysis Procedure):RECAP 是一种基于化学反应规则的分子切割方法,通过将分子沿特定的化学键进行断裂,生成更小的碎片。这些碎片可以帮助研究者更好地了解分子的结构和活性关系。
- BRICS(Building blocks for Rapid Interactive Construction of molecular Structures):BRICS 是一种基于规则的分子切割方法,用于生成合成可行的分子片段。通过这种方法,研究者可以从现有化合物中构建新的化学结构,并预测它们的性质。
- MacFrags:由上海科技大学李洪林课题组发布,在BRICS规则基础上剔除了一些规则。此外引入了Simple算法将分子片段的提取转换为子图的提取。
一、相关文件与环境依赖
Rdkit
igraph
macfrag.py https://github.com/yydiao1025/MacFrag/blob/main/MacFrag.py
二、脚本
import os, time
import threading
from functools import wraps
from rdkit import Chem
from rdkit.Chem import BRICS, Recap
import argparse
def read_molecules(file_path):
"""read molecules via rdkit
Parameters
----------
file_path : str
the molecular file path
Returns
-------
mols : _type_
RDKit molecule list
"""
# 判断文件类型
file_type = file_path.split('.')[-1]
if file_type == 'sdf':
suppl = Chem.SDMolSupplier(file_path)
mols = [x for x in suppl if x is not None]
elif file_type == 'smi':
suppl = Chem.SmilesMolSupplier(file_path)
mols = [x for x in suppl if x is not None]
elif file_type == 'mol':
suppl = Chem.MolSupplier(file_path)
mols = [x for x in suppl if x is not None]
elif file_type == 'mol2':
suppl = Mol2MolSupplier(file_path)
mols = [x for x in suppl if x is not None]
elif file_type == 'pdb':
mols = [AllChem.MolFromPDBFile(file_path)]
else:
raise ValueError(f'Unsupported file type: {file_type}')
# 判断分子数
if len(mols) == 0:
raise ValueError('No molecules found in file!')
return mols
def timeout_decorator(timeout):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
def target_func():
return func(*args, **kwargs)
thread = threading.Thread(target=target_func)
thread.start()
thread.join(timeout)
if thread.is_alive():
print("Function execution timed out.")
return None
else:
return target_func()
return wrapper
return decorator
@timeout_decorator(timeout=5)
def Brics_frag(mol):
brics_fragments = BRICS.BRICSDecompose(mol)
return brics_fragments
@timeout_decorator(timeout=10)
def Recap_frag(mol):
recap_tree = Recap.RecapDecompose(mol)
recap_fragments = list(recap_tree.children.keys())
return recap_fragments
def MacFrac(smi_path, output):
func = '/path/to/your/MacFrag.py' #请改成自己的脚本位置
os.system( f'python {func} -i {smi_path} -o {output} -maxBlocks 6 -maxSR 8 -asMols False -minFragAtoms 1')
def main(smi_path, output, methods):
if ('Brics' in methods) or ('Recap' in methods):
start = time.time()
mols = read_molecules(smi_path)
end = time.time()
runtime = runtime = end -start
print(f'read mols runtime: {runtime}')
if 'Brics' in methods:
start = time.time()
with open(os.path.join(output, 'brics_frag.smi'), 'w') as f:
for mol in mols:
brics_fragments = Brics_frag(mol)
f.write('\n'.join(brics_fragments)+'\n')
end = time.time()
runtime = end -start
print(f'brics runtime: {runtime}')
if 'Recap' in methods:
start = time.time()
with open(os.path.join(output, 'recap_frag.smi'), 'w') as f:
for mol in mols:
Recap_fragments = Recap_frag(mol)
f.write('\n'.join(Recap_fragments)+'\n')
end = time.time()
runtime = end -start
print(f'recap runtime: {runtime}')
if 'Macfrag' in methods:
start = time.time()
MacFrac(smi_path, output)
end = time.time()
runtime = end -start
print(f'macfrag runtime: {runtime}')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='make fragments for smiles file')
parser.add_argument('-i', '--smi_path', required=True)
parser.add_argument('-o', '--output', default=os.getcwd())
parser.add_argument('-m', '--methods', default='Brics', choices=['Brics', 'Recap', 'Macfrag'])
args = parser.parse_args()
smi_path = args.smi_path
output = args.output
methods = args.methods
main(smi_path, output, methods)
脚本使用方法
Python -i 分子文件路径 -o 输出文件夹路径 -m [Brics, Recap, Macfrag 三选一]