Jupyter快捷键
ctrl+Enter: 运行
shift+Enter:运行+插入下一板块
Tab:补齐面板,需要Enter确认
Esc&Enter:切换模式
RDkit基本概念
smile
简化分子线性输入规范(SMILES),是一种用ASCII字符串明确描述分子结构的规范,即化学结构的字符串表示
mol
将描述符转化为计算机能识别的文件格式
分子的书写
smi = 'c1nccc2n1ccc2'
mol = Chem.MolFromSmiles(smi)
闭环的书写
m = Chem.MolFromSmiles('OC1C2C1CC2')
可以这样理解:先将原子沿着一条链编号,将要剩下成环的原子单独拿出来连接。如下图:1号和3号C原子是最后连接的,所以smiles里面单独命名为C1,而4号C原子没有单独的连接,所以无特殊标记,最后2号和5号C原子因为也要连接,所以命名为C2
描述符
RDK描述符
MACCS指纹
摩根指纹
分子性质
The topological polar surface area (TPSA) descriptor
logP
油水分配系数,确定化合物是否适合作为药物的重要指标
RMSDN
Root Mean Square Deviation,均方根偏差
RDkit基本指令
Chem & AllChem
mol = Chem.MolFromSmiles(smi)
smi = 'CC(C)OC(=O)C(C)NP(=O)(OCC1C(C(C(O1)N2C=CC(=O)NC2=O)(C)F)O)OC3=CC=CC=C3'
from rdkit import Chem
from rdkit.Chem import AllChem
mol = Chem.MolFromSmiles(smi)
print(type(mol))
# <class 'rdkit.Chem.rdchem.Mol'>
读入文件
from rdkit import Chem
mol = Chem.MolFromMolFile('./ligand.mol')
print(type(mol))
写入文件
smi='CC(C)OC(=O)C(C)NP(=O)(OCC1C(C(C(O1)N2C=CC(=O)NC2=O)(C)F)O)OC3=CC=CC=C3'
mol = Chem.MolFromSmiles(smi)
smi = Chem.MolToSmiles(mol)
molblock = Chem.MolToMolBlock(mol) # molblock???
print(molblock)
print(molblock,file=open('ligand.mol', 'w+'))
操作原子
from rdkit import Chem
smi = 'CC(C)OC(=O)C(C)NP(=O)(OCC1C(C(C(O1)N2C=CC(=O)NC2=O)(C)F)O)OC3=CC=CC=C3'
mol = Chem.MolFromSmiles(smi)
atoms = mol.GetAtoms()
print(atoms)
print(type(atoms))
print(atoms[0])
print(type(atoms[0]))
#<rdkit.Chem.rdchem._ROAtomSeq object at 0x000002405C620160>
#<class 'rdkit.Chem.rdchem._ROAtomSeq'>
#<rdkit.Chem.rdchem.Atom object at 0x000002405C6200A0>
#<class 'rdkit.Chem.rdchem.Atom'>
操作bond
from rdkit import Chem
smi = 'CC(C)OC(=O)C(C)NP(=O)(OCC1C(C(C(O1)N2C=CC(=O)NC2=O)(C)F)O)OC3=CC=CC=C3'
mol = Chem.MolFromSmiles(smi)
bonds = mol.GetBonds()
print(type(bonds))
print(type(bonds[0]))
print(type(bonds[0]))
#<class 'rdkit.Chem.rdchem._ROBondSeq'>
#<class 'rdkit.Chem.rdchem.Bond'>
#<class 'rdkit.Chem.rdchem.Bond'>
操作环(ring)
from rdkit import Chem
m = Chem.MolFromSmiles('OC1C2C1CC2')
atom2 = m.GetAtomWithIdx(2)
print("atom2 in ring:", atom2.IsInRing()) #是否在环上
from rdkit import Chem
m = Chem.MolFromSmiles('C1CC1CC1CC1')
ssr = Chem.GetSymmSSSR(m)
num_ring = len(ssr)
print("num of ring", num_ring)
for ring in ssr:
print("ring consisted of atomsid:", list(ring)) #环组成
m = Chem.MolFromSmiles('c1ccccc1') #小写表示默认为 共轭的碳原子(满不饱和度)
for bond in m.GetBonds():
print(bond.GetBondType()) #AROMATIC
绘图
以Grid形式画图
from rdkit.Chem import Draw
from rdkit import Chem
smis = [
'COC1=C(C=CC(=C1)NS(=O)(=O)C)C2=CN=CN3C2=CC=C3',
# 'CCN(CC1=C(C=CC(=C1)C(F)(F)F)C2=CC(=C3N2C=NC=C3)CC(=O)O)C(=O)C4CC4',
'C1=CC2=C(C(=C1)C3=CN=CN4C3=CC=C4)ON=C2C5=CC=C(C=C5)F',
'COC(=O)C1=CC2=CC=CN2C=N1',
'C1=C2C=C(N=CN2C(=C1)Cl)C(=O)O',
]
template = Chem.MolFromSmiles('c1nccc2n1ccc2')
AllChem.Compute2DCoords(template)
mols = []
for smi in smis:
mol = Chem.MolFromSmiles(smi)
AllChem.GenerateDepictionMatching2DStructure(mol,template) #按 共用结构渲染
mols.append(mol)
img = Draw.MolsToGridImage(mols, molsPerRow=4, subImgSize=(200, 200), legends=['' for x in mols]) #legends 图片标签
img #Draw
输出结果
给原子标号
#from rdkit import Chem
m = Chem.MolFromSmiles('OC1C2C1CC2')
for atom in m.GetAtoms():
atom.SetProp('atomLabel', str(atom.GetIdx()))
m
输出结果
匹配片段
m = Chem.MolFromSmiles('c1ccccc1OC')
patt = Chem.MolFromSmarts('OC')
flag = m.HasSubstructMatch(patt)
if flag:
atomids = m.GetSubstructMatch(patt)
print("matched atom id:", atomids)
else:
print("molecu m don't contain group -OCH3")
m
#matched atom id :(6. 7)
8530

被折叠的 条评论
为什么被折叠?



