import csv
from rdkit import Chem
from rdkit.Chem.rdchem import Atom
from rdkit.Chem import MolStandardize,rdmolops
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.Draw import rdMolDraw2D,IPythonConsole
from IPython.display import SVG
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers
import codecs
def data_write_csv(file_name, datas):
file_csv = codecs.open(file_name, 'w+', 'utf-8')
writer = csv.writer(file_csv, delimiter=' ', quotechar=' ', quoting=csv.QUOTE_MINIMAL)
for data in datas:
writer.writerow(data)
print("保存文件成功,处理结束")
#发现异常的结构一般有价键是否异常和净电荷最终是否为0,价键是否异常,MolFromSmiles()方法可以自动识别出
def get_total_charge(mol):#净电荷检查
k=0
atoms = mol.GetAtoms()
for i in range(0,mol.GetNumAtoms()):
k=Atom.GetFormalCharge(atoms[i])+k
i=i+1
return k
对手性原子的手性遍历输出异构体smilesllist,以及筛选出问题结构
with open(input_file_name,'r') as readfile:
k=0
csv_reader = csv.reader(readfile)
with open(output_file_name,'w',encoding='utf-8', newline='') as writefile:
for row in csv_reader:
writer = csv.writer(writefile)
k+=1
mol = Chem.MolFromSmiles(row[2])
if mol is None:
data_valence_error.append(row[2])
elif get_total_charge(mol)!=0:
charge_error2.append(row[2])
else:
chiral_list=Chem.FindMolChiralCenters(mol,includeUnassigned=True)
for i,j in chiral_list:
if j=='?':
EnumerateStereoisomerslist=[[str(Chem.MolToCXSmiles(x))] for x in EnumerateStereoisomers(mol)]
writer.writerows(EnumerateStereoisomerslist)
# else:
# writer.writerow([str(row[2])])
print(k)
data_write_csv(out_file_name1,data_valence_error)
data_write_csv(out_file_name2,charge_error2)
遍历吡唑环互变异构体
patt = Chem.MolFromSmarts('[c;R1]1[c;R1][n;R1;H1][c;R1][n;R1]1')
pharms=rdMolStandardize.CleanupParameters()
TautomerEnumerator=rdMolStandardize.TautomerEnumerator(pharms)
j=0
with open(input_file_name,'r') as readfile:
csv_reader = csv.reader(readfile)
with open(output_file_name2,'w',encoding='utf-8', newline='') as writefile:
for row in csv_reader:
writer = csv.writer(writefile)
mol = Chem.MolFromSmiles(row[2])
if mol is None:
data_valence_error.append(row[2])
elif get_total_charge(mol)!=0:
charge_error2.append(row[2])
elif mol.HasSubstructMatch(patt)==True:
j+=1
tautomers=TautomerEnumerator.Enumerate(mol).smiles
tautomers_list=[]
for i in range(len(tautomers)):
tautomers_list.append([tautomers[i]])
writer.writerows(tautomers_list)
print(j)