以N作为每次更改氨基酸残基序号的标志,然后每次增加1.
设置count, res_num两个排序的参数
设置cur_res和nex_res两个对比的参数
注意受体和配体都要从1开始排序,所以设置start以及tar_start两个参数。
import os
URL = r"./pdb_modify/val_pdb_modify"
filenames = os.listdir(URL) #遍历pdb文件名
save_path = r"./pdb_modify/val_pdb"
os.makedirs(save_path)
for file in filenames:
file_path = os.path.join(URL,file)
receptor_id = file.split('_')[1] #获得receptor_id
file_name = file.split('.')[0]
target_id = file_name.split('_')[2]
start_id = receptor_id
with open(file_path, 'r') as f:
row = f.readlines()
output_ = []
start = 0
tar_start = 0
for line in row:
if 'ATOM' in line:
chain_id = line[21]
res_num = line[22:26]
#if line_seq== ' 1':
if res_num == ' 1':
output_.append(line)
elif chain_id == start_id:
count = start - 1
cur_res = ''
if line[13:16] =='N ' and res_num != cur_res:
start += 1
cur_res = res_num
num = start
#num = str(count)
output_.append(line[:22]+'%4i'%num+line[26:])
elif line[13:16] !='N ' and res_num != cur_res:
count += 1
cur_res = res_num
num = count
#num = str(count)
output_.append(line[:22]+'%4i'%num+line[26:])
elif chain_id == target_id:
tar_count = tar_start - 1
cur_tar_res = ''
if line[13:16] =='N ' and res_num != cur_tar_res:
tar_start += 1
cur_tar_res = res_num
tar_num = tar_start
#num = str(count)
output_.append(line[:22]+'%4i'%tar_num+line[26:])
elif line[13:16] !='N ' and res_num != cur_tar_res:
tar_count += 1
cur_tar_res = res_num
tar_num = tar_count
#num = str(count)
output_.append(line[:22]+'%4i'%tar_num+line[26:])
with open(f'{save_path}/{file_name}.pdb', 'w') as f:
f.writelines(output_)