利用python构建分子碎片库


前言

基于分子文件构建分子碎片库。内嵌Brics, Recap, MacFrags三种算法。

  1. RECAP(Retrosynthetic Combinatorial Analysis Procedure):RECAP 是一种基于化学反应规则的分子切割方法,通过将分子沿特定的化学键进行断裂,生成更小的碎片。这些碎片可以帮助研究者更好地了解分子的结构和活性关系。
  2. BRICS(Building blocks for Rapid Interactive Construction of molecular Structures):BRICS 是一种基于规则的分子切割方法,用于生成合成可行的分子片段。通过这种方法,研究者可以从现有化合物中构建新的化学结构,并预测它们的性质。
  3. MacFrags:由上海科技大学李洪林课题组发布,在BRICS规则基础上剔除了一些规则。此外引入了Simple算法将分子片段的提取转换为子图的提取。

一、相关文件与环境依赖

Rdkit
igraph
macfrag.py https://github.com/yydiao1025/MacFrag/blob/main/MacFrag.py

二、脚本

import os, time
import threading
from functools import wraps
from rdkit import Chem
from rdkit.Chem import BRICS, Recap
import argparse

def read_molecules(file_path):
    """read molecules via rdkit

    Parameters
    ----------
    file_path : str
        the molecular file path

    Returns
    -------
    mols : _type_
        RDKit molecule list

    """
    # 判断文件类型
    file_type = file_path.split('.')[-1]
    if file_type == 'sdf':
        suppl = Chem.SDMolSupplier(file_path)
        mols = [x for x in suppl if x is not None]
    elif file_type == 'smi':
        suppl = Chem.SmilesMolSupplier(file_path)
        mols = [x for x in suppl if x is not None]                    
    elif file_type == 'mol':
        suppl = Chem.MolSupplier(file_path)
        mols = [x for x in suppl if x is not None]
    elif file_type == 'mol2':
        suppl = Mol2MolSupplier(file_path)
        mols = [x for x in suppl if x is not None]
    elif file_type == 'pdb':
        mols = [AllChem.MolFromPDBFile(file_path)]
    else:
        raise ValueError(f'Unsupported file type: {file_type}')
        
    # 判断分子数
    if len(mols) == 0:
        raise ValueError('No molecules found in file!')
        
    return mols

def timeout_decorator(timeout):
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            def target_func():
                return func(*args, **kwargs)

            thread = threading.Thread(target=target_func)
            thread.start()
            thread.join(timeout)

            if thread.is_alive():
                print("Function execution timed out.")
                return None
            else:
                return target_func()

        return wrapper

    return decorator

@timeout_decorator(timeout=5)
def Brics_frag(mol):
    brics_fragments = BRICS.BRICSDecompose(mol)
    
    return brics_fragments

@timeout_decorator(timeout=10)
def Recap_frag(mol):
    recap_tree = Recap.RecapDecompose(mol)
    recap_fragments = list(recap_tree.children.keys())

    return recap_fragments

def MacFrac(smi_path, output):
    func = '/path/to/your/MacFrag.py' #请改成自己的脚本位置
    os.system( f'python {func} -i {smi_path} -o {output} -maxBlocks 6 -maxSR 8 -asMols False -minFragAtoms 1')

def main(smi_path, output, methods):
    if ('Brics' in methods) or ('Recap' in methods):
        start = time.time()
        mols = read_molecules(smi_path)    
        end = time.time()
        runtime = runtime = end -start
        print(f'read mols runtime: {runtime}')

    if 'Brics' in methods:
        start = time.time()
        with open(os.path.join(output, 'brics_frag.smi'), 'w') as f:
            for mol in mols:
                brics_fragments = Brics_frag(mol)
                f.write('\n'.join(brics_fragments)+'\n')
        end = time.time()
        runtime = end -start
        print(f'brics runtime: {runtime}')

    if 'Recap' in methods:
        start = time.time()
        with open(os.path.join(output, 'recap_frag.smi'), 'w') as f:
            for mol in mols:
                Recap_fragments = Recap_frag(mol)
                f.write('\n'.join(Recap_fragments)+'\n')
        end = time.time()
        runtime = end -start
        print(f'recap runtime: {runtime}')

    if 'Macfrag' in methods:
        start = time.time()
        MacFrac(smi_path, output)
        end = time.time()
        runtime = end -start
        print(f'macfrag runtime: {runtime}')

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='make fragments for smiles file')
    parser.add_argument('-i', '--smi_path', required=True)
    parser.add_argument('-o', '--output', default=os.getcwd())
    parser.add_argument('-m', '--methods', default='Brics', choices=['Brics', 'Recap', 'Macfrag'])
    args = parser.parse_args()

    smi_path = args.smi_path
    output   = args.output
    methods  = args.methods

    main(smi_path, output, methods)

脚本使用方法

Python -i 分子文件路径 -o 输出文件夹路径 -m [Brics, Recap, Macfrag 三选一]

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值