化学结构处理

该博客介绍了一种使用RDKit库来检测和处理分子结构异常的方法,包括价键异常和净电荷检查。通过遍历吡唑环互变异构体,识别手性中心并生成异构体SMILES,将异常结构写入CSV文件。同时,它还涉及到了TautomerEnumerator在分子异构体生成中的应用。
摘要由CSDN通过智能技术生成

通过 RDKit识别异常结构并输出到CSV

import csv
from rdkit import Chem
from rdkit.Chem.rdchem import Atom
from rdkit.Chem import MolStandardize,rdmolops
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.Draw import rdMolDraw2D,IPythonConsole
from IPython.display import SVG
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers
import codecs

def data_write_csv(file_name, datas):
    file_csv = codecs.open(file_name, 'w+', 'utf-8')
    writer = csv.writer(file_csv, delimiter=' ', quotechar=' ', quoting=csv.QUOTE_MINIMAL)
    for data in datas:
        writer.writerow(data)
    print("保存文件成功,处理结束")

#发现异常的结构一般有价键是否异常和净电荷最终是否为0,价键是否异常,MolFromSmiles()方法可以自动识别出

def get_total_charge(mol):#净电荷检查
    k=0
    atoms = mol.GetAtoms()
    for i in range(0,mol.GetNumAtoms()):
        k=Atom.GetFormalCharge(atoms[i])+k
        i=i+1
    return k

对手性原子的手性遍历输出异构体smilesllist,以及筛选出问题结构

with open(input_file_name,'r') as readfile:
    k=0
    csv_reader = csv.reader(readfile)
    with open(output_file_name,'w',encoding='utf-8', newline='') as writefile:
        for row in csv_reader:
            writer = csv.writer(writefile)
            k+=1
            mol = Chem.MolFromSmiles(row[2])
            if mol is None:
                data_valence_error.append(row[2])
            elif get_total_charge(mol)!=0:
                charge_error2.append(row[2])
            else:
                chiral_list=Chem.FindMolChiralCenters(mol,includeUnassigned=True)
                for i,j in chiral_list:
                    if j=='?':
                        EnumerateStereoisomerslist=[[str(Chem.MolToCXSmiles(x))] for x in EnumerateStereoisomers(mol)]
                        writer.writerows(EnumerateStereoisomerslist)
#                     else:
#                         writer.writerow([str(row[2])])
        print(k)
                        
        data_write_csv(out_file_name1,data_valence_error)
        data_write_csv(out_file_name2,charge_error2)

遍历吡唑环互变异构体

patt = Chem.MolFromSmarts('[c;R1]1[c;R1][n;R1;H1][c;R1][n;R1]1')
pharms=rdMolStandardize.CleanupParameters()
TautomerEnumerator=rdMolStandardize.TautomerEnumerator(pharms)

j=0
with open(input_file_name,'r') as readfile:
    csv_reader = csv.reader(readfile)
    with open(output_file_name2,'w',encoding='utf-8', newline='') as writefile:
        for row in csv_reader:
            writer = csv.writer(writefile)
            mol = Chem.MolFromSmiles(row[2])
            if mol is None:
                data_valence_error.append(row[2])
            elif get_total_charge(mol)!=0:
                charge_error2.append(row[2])
            elif mol.HasSubstructMatch(patt)==True:
                j+=1
                tautomers=TautomerEnumerator.Enumerate(mol).smiles
                tautomers_list=[]
                for i in range(len(tautomers)):
                    tautomers_list.append([tautomers[i]])
                writer.writerows(tautomers_list)
        print(j)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值