这个代码适合这种形式的文件读取,每个以蛋白质命名的文件夹下面有对应的药物分子sdf和蛋白质pdb文件:
import sys
import torch
import os
from rdkit import Chem
from pathlib import Path
import glob
import csv
import ast
from ast import literal_eval
file = os.listdir('./PDBbind_v2020_refined/refined-set/') #总文件夹
list=[]
n=0
f=open('out1.csv','w')
wr=csv.writer(f)
for name in file:
path=os.path.join("./PDBbind_v2020_refined/refined-set/",name)
files=glob.glob(os.path.join(path, "*.sdf"))
row_str = '\t'.join([str(x) for x in files])
mols = [ mol for mol in Chem.SDMolSupplier(row_str,sanitize=False) ]
for mol in mols:
smi = Chem.MolToSmiles(mol)
list.append(smi)
for i in range(len(list)):
wr.writerows([[list[i]]])
f.close()