1 #!/usr/bin/env python3 2 3 ## README ##################################################################### 4 # 5 # This script is meant to measure pi-pi stacking between the solute and solvent 6 # atoms. It will evaluate only those solvent atoms within a certain cutoff, 7 # defined by the user (below). 8 9 cutoff = 10.0 10 11 # This cutoff distance is based upon the approximated center of the ring 12 # containing the pi system. The center is approximated by using two of the 13 # three coordinates given by the user. The two atoms which will approximate the 14 # center need to be the second and third atoms defined in the lists below. The 15 # atoms are defined by the atom labels as they appear in the pdb file. Please 16 # note, this script works with pdb files ONLY. 17 18 solute_atoms = ['C0C', 'C0E', 'C09'] 19 solvent_atoms = ['C02', 'N04', 'C05'] 20 21 # Please ensure that all three atoms are in the aromatic ring, as these three 22 # atoms will be used to form the plane in which the pi system lies. 23 # 24 # The script will find the angles between the planes involved in the (hopefully 25 # found) pi-pi stacking, as well as take an average and standard deviation. 26 # Output for every solvent molecule evaluated is printed in an output file 27 # named 'output_pi-pi.txt'. 28 # 29 # !!The above two variables should be all that is required for the user to 30 # change!! 31 # 32 ## INVOCATION ################################################################# 33 # 34 # Ensure that the script is marked executable or explicitly invoke python 35 # (version 3.2 minimal) to run the script. Any pdb file which you'd like to 36 # analyze should be given as an argument. Shell expansion is handled 37 # appropriately. 38 # 39 # Example: 40 # ./pipi.py *pdb 41 # ./pipi.py d50plt5 d50plt10 d50plt15 42 # 43 # Note that the filenames do not need a pdb suffix, but the script relies on 44 # the pdb format. 45 ############################################################################## 46 # 47 # Author: Billy Wayne McCann 48 # email : thebillywayne@gmail.com 49 # NOTE: My code is purposefully verbose. don't hate. 50 ############################################################################### 51 52 from sys import argv, exit 53 from math import pow, degrees, sqrt, acos 54 from sysconfig import get_python_version 55 56 # make sure we're using at least version 3.2 of Python 57 if float(get_python_version()) < 3.2: 58 print("Requires Python 3.2 or higher.") 59 exit(0) 60 61 # make sure arguments have been given to the script 62 if len(argv[1:]) == 0: 63 print("Give pdb files as an argument to this script.") 64 exit(0) 65 else: 66 pdbs = argv[1:] 67 68 # species are separated in the pdb file by 'TER ' 69 ter = 'TER ' 70 71 # file in which to place output 72 output = open('output_pi-pi.txt', 'w') 73 74 ### FUNCTIONS ############################################################### 75 76 def find_TERs(content): 77 ''' Find where the TER's in the pdb file occur''' 78 79 terlines = [] 80 terline = content.index(ter) 81 terlines.append(terline) 82 while True: 83 try: 84 terline = content.index(ter, terline+1) 85 terlines.append(terline) 86 except: 87 return terlines 88 89 def get_coordinates(species, atoms): 90 ''' Find coordinates of atoms of interest of the species of interest, 91 whether the solute or the solvent. The atoms should have been defined in 92 the solute_atoms and solvent_atoms variables''' 93 94 atom_arrays = [] 95 coordinates = [] 96 97 # scan through species for specific atom entry and store into atom_arrays 98 for entry in species: 99 for atom in atoms: 100 if atom in entry: 101 atom_arrays.append(entry) 102 103 # extract only the desired elements from the atom_arrays and store them in 104 # coordinates list. 105 for element in atom_arrays: 106 array = element.split() 107 coordinates.append(array[4]) 108 coordinates.append(array[5:8]) 109 110 # in binary solvents, the coordinates will sometimes not be found in a 111 # particular solvent molecule. return None and test for it in the main 112 # body. 113 if not coordinates: 114 return [None,None] 115 116 # the molecular id is the first entry in the coordinates list 117 mol_id = coordinates[0] 118 119 # the actual coordinates are the even entries in the coordinates list 120 coordinates = [coordinates[i] for i in range(len(coordinates)) if float(i) 121 % 2 != 0] 122 return mol_id, coordinates 123 124 def vectorize(coordinate1, coordinate2): 125 '''Take coordinates and return a vector''' 126 127 # extract x, y, z values from coordinates 128 # make them floats for the subtraction operations in the return statement 129 x1 = float(coordinate1[0]) 130 x2 = float(coordinate2[0]) 131 y1 = float(coordinate1[1]) 132 y2 = float(coordinate2[1]) 133 z1 = float(coordinate1[2]) 134 z2 = float(coordinate2[2]) 135 return [x2-x1, y2-y1, z2-z1] 136 137 def dotproduct(vector1, vector2): 138 '''Return the dot product between two vectors''' 139 140 return vector1[0]*vector2[0]+vector1[1]*vector2[1]+vector1[2]*vector2[2] 141 142 def crossproduct(vector1, vector2): 143 '''Find the cross product between two vectors''' 144 145 return [vector1[1]*vector2[2]-vector1[2]*vector2[1], 146 vector1[2]*vector2[0]-vector1[0]*vector2[2], 147 vector1[0]*vector2[1]-vector1[1]*vector2[0]] 148 149 def magnitude(vector): 150 '''Return the magnitude of a vector''' 151 152 return sqrt(vector[0]*vector[0]+vector[1]*vector[1]+ 153 vector[2]*vector[2]) 154 155 def unit(vector): 156 '''Return the unit vector of a vector''' 157 158 mag = magnitude(vector) 159 unit_vector = [] 160 for scalar in vector: 161 unit_vector.append(scalar/mag) 162 return unit_vector 163 164 def center(coordinate1, coordinate2): 165 ''' Given two coordinates, find the midpoint between them. 166 This function is used to approximate the center of a species. ''' 167 x1 = float(coordinate1[0]) 168 x2 = float(coordinate2[0]) 169 y1 = float(coordinate1[1]) 170 y2 = float(coordinate2[1]) 171 z1 = float(coordinate1[2]) 172 z2 = float(coordinate2[2]) 173 174 return [(x2+x1)/2, (y2+y1)/2, (z2+z1)/2] 175 176 def distance(coordinate1, coordinate2): 177 ''' Find the distance between two points''' 178 x1 = float(coordinate1[0]) 179 x2 = float(coordinate2[0]) 180 y1 = float(coordinate1[1]) 181 y2 = float(coordinate2[1]) 182 z1 = float(coordinate1[2]) 183 z2 = float(coordinate2[2]) 184 185 return sqrt((x2-x1)**2 + (y2-y1)**2 + (z2-z1)**2) 186 187 def normal(coordinates): 188 ''' Given three coordinates, find the normal to the plane created by the 189 three coordinates''' 190 191 vector1 = vectorize(coordinates[0], coordinates[1]) 192 vector2 = vectorize(coordinates[1], coordinates[2]) 193 194 return crossproduct(vector1, vector2) 195 196 def calculate_angle(normal1, normal2): 197 ''' Calculate the angle between two planes.''' 198 199 # make normals into unit vectors first 200 normal1 = unit(normal1) 201 normal2 = unit(normal2) 202 203 # the angle between the two planes of atomic coordinates 204 # is the arccosine of dot product of the two normals. 205 return degrees(acos(dotproduct(normal1, normal2))) 206 207 def stddev(all_values, average_value): 208 '''Find the standard deviation of the angles''' 209 210 # create a new list which is the difference between the average of the 211 # values and each individual value. 212 square_diff_from_average = [pow((i-average_value), 2) for i in all_values] 213 214 # the standard deviation is square root of the sum of the differences 215 # between each each value and the average squared divided by the number of 216 # values in the list ('population' standard deviation). 217 return sqrt(sum(square_diff_from_average)/len(square_diff_from_average)) 218 219 ## MAIN ###################################################################### 220 221 # initialize dictionary to store solvent data into 222 solvent_data = {} 223 224 print("Using a cutoff of {0} Angstroms.".format(cutoff)) 225 output.write("Using a cutoff of {0} Angstroms.\n\n".format(cutoff)) 226 227 for pdb in pdbs: 228 with open(pdb, 'r') as pdb_file: 229 contents = pdb_file.read().split('\n') 230 TERlines = find_TERs(contents) 231 232 # find solutes coordinates, center, and normal vector 233 solute = contents[0:TERlines[0]] 234 solute_coordinates = get_coordinates(solute, solute_atoms)[1] 235 solute_center = center(solute_coordinates[1], solute_coordinates[2]) 236 solute_normal = normal(solute_coordinates) 237 238 solvents = [] 239 for i in range(len(TERlines)-1): 240 solvent = contents[TERlines[i]:TERlines[i+1]] 241 solvents.append(solvent) 242 243 # keep track of how many solvents are within cutoff in each pdb 244 j = 0 245 for solvent in solvents: 246 solvent_name, solvent_coordinates = get_coordinates(solvent, solvent_atoms) 247 248 if solvent_coordinates is None: 249 continue 250 251 solvent_name = int(solvent_name) 252 solvent_center = center(solvent_coordinates[1], 253 solvent_coordinates[2]) 254 radius = distance(solute_center, solvent_center) 255 256 if radius < cutoff: 257 solvent_normal = normal(solvent_coordinates) 258 angle = calculate_angle(solute_normal, solvent_normal) 259 if solvent_name not in solvent_data: 260 solvent_data[solvent_name] = {'pdb': [], 261 'distance': [], 'angle': []} 262 solvent_data[solvent_name]['pdb'].append(pdb) 263 solvent_data[solvent_name]['distance'].append(radius) 264 solvent_data[solvent_name]['angle'].append(angle) 265 j += 1 266 267 else: 268 continue 269 270 print("PDB {0} contains {1} solvent(s) within the cutoff.".format(pdb, j)) 271 output.write("PDB {0} contains {1} solvent(s) within the cutoff.\n".format(pdb, j)) 272 273 # end of for pdb loop 274 275 output.write("\n") 276 277 # analyze the accepted solvent data. 278 for accepted in sorted(solvent_data): 279 output.write('Solvent ID: {0}\n'.format(accepted)) 280 281 for index in range(len(solvent_data[accepted]['pdb'])): 282 output.write('+ PDB name: {0}\t\t'.format(solvent_data[accepted]['pdb'][index])) 283 output.write('Distance: {0:.2f}\t'.format(solvent_data[accepted]['distance'][index])) 284 output.write('Angle: {0:.2f}\t\n'.format(solvent_data[accepted]['angle'][index])) 285 286 output.write('== AVERAGES ==\n') 287 average_distance = sum(solvent_data[accepted]['distance'])/len(solvent_data[accepted]['distance']) 288 distance_stddev = stddev(solvent_data[accepted]['distance'], 289 average_distance) 290 average_angle = sum(solvent_data[accepted]['angle'])/len(solvent_data[accepted]['angle']) 291 angle_stddev = stddev(solvent_data[accepted]['angle'], average_angle) 292 output.write("Distance:\t{0:.2f} +- ".format(average_distance)) 293 output.write("{0:.2f}.\n".format(distance_stddev)) 294 output.write("Angle: \t{0:.2f} +- ".format(average_angle)) 295 output.write("{0:.2f}.\n\n".format(angle_stddev)) 296 297 output.close() 298 exit(0)