在python平台上利用pymol来查找PDB文件中蛋白质的相互作用位点

小吉喆

已于 2022-04-28 07:33:19 修改

阅读量1.9w

点赞数 20

分类专栏：生物信息学文章标签： python

于 2020-08-23 21:03:31 首次发布

本文链接：https://blog.csdn.net/weixin_45832250/article/details/108188517

版权

生物信息学专栏收录该内容

3 篇文章

订阅专栏

这篇代码写的可能有点问题，大家如果有需要，去看另一篇吧。如果有啥不足，请批评指正。

在python平台上利用pymol来查找PDB文件中蛋白质的相互作用位点

关于蛋白质结合位点，查阅了很多篇文献，大多数都是关于如何定义结合位点的，却没有说明该如何去找结合位点。大多数都是说从PDB库中下载蛋白质结构数据，但是却没有说下载过数据之后该怎么做。偶然间从论坛上看到利用pymol软件可以解析PDB文件中的原子坐标，也可以查找选中片段范围内的氨基酸，比如3.5A内的氨基酸。经过半个多月的摸索，发现可以直接在python上利用pymol包来找结合位点。

1. 利用pymol来找结合位点

下图是利用pymol来找结合位点，只要选中链中的配体，然后在命令窗口输入select 2，byres sele around 3.5。2中就会显示出来3.5A内的作用位点
pdb文件在pymol中的显示
这样可以一个一个的来找结合位点，如果数据不多，可以通过该方法来找结合位点。

2. 代码实现

对于数据比较多的情况来说，我们可以利用pymol包，在python上面来寻找结合位点，并输出为列表。下面直接上代码

from pymol import cmd

import os
import re

sumlist = []

outfile = open('C:\\Users\\Desktop\\找到的结合位点.txt','a')#该文件为找到的结合位点
#####
aa_codes = {
     'ALA':'A', 'CYS':'C', 'ASP':'D', 'GLU':'E',
     'PHE':'F', 'GLY':'G', 'HIS':'H', 'LYS':'K',
     'ILE':'I', 'LEU':'L', 'MET':'M', 'ASN':'N',
     'PRO':'P', 'GLN':'Q', 'ARG':'R', 'SER':'S',
     'THR':'T', 'VAL':'V', 'TYR':'Y', 'TRP':'W'}#定义一个字典，将三字母氨基酸转换为单字母

DNA = ['DT','DG','DA','DC']##定义一个DNA列表

RNA = ['G', 'C', 'U', 'A']##定义一个RNA列表

#####

####定义函数来找配体


def search(lig,sym,chainID):
    srlig =[]
    cmd.load(file_path + '\\' + i)
    sss = lig
    cmd.remove('solvent')
    cmd.select('1', ' resn %s in chain %s' % (sss,chainID))
    # cmd.set_name('')
    cmd.select('2', 'byres 1 around 3.5')#3.5原子间的距离
    srlig.append(i[0:4])
    for a in cmd.get_model("2").atom:
        if a.resn in aa_codes:
            aazong = aa_codes[a.resn] + a.resi
            if aazong not in srlig:
                srlig.append(aazong)
    srlig.insert(1, sym)
    if len(srlig)>2 and srlig not in sumlist: #剔除没有配体的列表
        sumlist.append(srlig)
    cmd.delete('all')


#######函数结束



file_path = 'C:\\Users\\Desktop\\蛋白结合位点质\\protein_site'###这个地方是pdb文件的路径

file = os.listdir(file_path)

for i in file:
    f = open(file_path+'\\'+i)
    ls = []

    otherlig = []  # 定义从序列中找出的配体列表 otherlig 是小分子，金属离子
    DNAlig = [] ###与DNA相互作用
    RNAlig = []#与RNA相互作用
    proteinlig = []# 与蛋白质相互作用
    HET = []
    AAA=[]
    '''
    判断分辨率是否大于3.5A
    '''
    g = open(file_path + '\\' + i)
    for reso in g:
         if reso[10:15] =='X-RAY':
             for reso in g:
                 if reso[0:22] == 'REMARK   2 RESOLUTION.':
                    AA = re.findall('\d+',reso[22:])
                    Aa = ''.join(AA)
                    AAA.append(Aa)
    if AAA:
        if int(AAA[0])<=350:###定义分辨率大于3.5A
            for j in f:
                if j[0:6]=='SEQRES':
                    colume = j.split()
                    if [colume[2],colume[3]] not in ls:
                        ls.append([colume[2],colume[3]])
            f.close()
            # print(ls)
            minls = []
            for m in ls:
                minls.append(int(m[1]))
            minnum = min(minls)
            chainls = []
            chainssssssla = []
            for m in ls:
                if int(m[1])==minnum:
                    chainls.append(m[0])
            n = open(file_path+'\\'+i)
            for j in n:
                if j[0:6]=='SEQRES':
                    colume = j.split()
                    if colume[2] in chainls and len(ls)>1:
                        for col in colume[4:]:
                            if col in DNA and col not in DNAlig:
                                DNAlig.append(col)
                            elif col in RNA and col not in RNAlig:
                                RNAlig.append(col)
                            elif col in aa_codes and col not in proteinlig:
                                proteinlig.append(col)
                            elif col not in otherlig and col not in aa_codes and col not in RNA and col not in DNA:
                                otherlig.append(col)
                HETlist = ['HETNAM','HETSYN','HETATM']
                if j[0:3]=='HET' and j[0:6] not in HETlist:
                    colume = j.split()
                    if colume[1] not in HETlist:
                        HET.append(colume[1])
                    if colume[2] not in chainssssssla:
                        chainssssssla.append(colume[2])
            if proteinlig:
                ligand = '+'.join(proteinlig)
                chainID = '+'.join(chainls)
                search(ligand,'III',chainID)
            if DNAlig:
                ligand = '+'.join(DNAlig)
                chainID = '+'.join(chainls)
                search(ligand,'DNA',chainID)
            if RNAlig:
                ligand = '+'.join(RNAlig)
                chainID = '+'.join(chainls)
                search(ligand,'RNA',chainID)
            if otherlig:
                chainID = '+'.join(chainls)
                for ligand in otherlig:
                    search(ligand,ligand,chainID)
            if HET:
                chainID = '+'.join(chainssssssla)
                for ligand in HET:
                    search(ligand,ligand,chainID)
for list in sumlist:
    print(list)