Projects_ILs Parameterization

最新推荐文章于 2024-09-24 09:40:31 发布
weixin_30569001
最新推荐文章于 2024-09-24 09:40:31 发布
阅读量58
点赞数
文章标签：人工智能 shell python
原文链接：http://www.cnblogs.com/touchdown/p/5174295.html
版权
  1 #!/usr/bin/env python3
  2 
  3 ## README #####################################################################
  4 # 
  5 # This script is meant to measure pi-pi stacking between the solute and solvent
  6 # atoms. It will evaluate only those solvent atoms within a certain cutoff,
  7 # defined by the user (below).
  8 
  9 cutoff = 10.0
 10 
 11 # This cutoff distance is based upon the approximated center of the ring
 12 # containing the pi system. The center is approximated by using two of the
 13 # three coordinates given by the user. The two atoms which will approximate the
 14 # center need to be the second and third atoms defined in the lists below. The
 15 # atoms are defined by the atom labels as they appear in the pdb file. Please
 16 # note, this script works with pdb files ONLY.
 17 
 18 solute_atoms = ['C0C', 'C0E', 'C09']
 19 solvent_atoms = ['C02', 'N04', 'C05']
 20 
 21 # Please ensure that all three atoms are in the aromatic ring, as these three
 22 # atoms will be used to form the plane in which the pi system lies.   
 23 # 
 24 # The script will find the angles between the planes involved in the (hopefully
 25 # found) pi-pi stacking, as well as take an average and standard deviation.
 26 # Output for every solvent molecule evaluated is printed in an output file
 27 # named 'output_pi-pi.txt'.
 28 # 
 29 # !!The above two variables should be all that is required for the user to
 30 # change!!
 31 #
 32 ## INVOCATION #################################################################
 33 # 
 34 # Ensure that the script is marked executable or explicitly invoke python
 35 # (version 3.2 minimal) to run the script.  Any pdb file which you'd like to
 36 # analyze should be given as an argument. Shell expansion is handled
 37 # appropriately.
 38 # 
 39 # Example:
 40 # ./pipi.py *pdb
 41 # ./pipi.py d50plt5 d50plt10 d50plt15
 42 #
 43 # Note that the filenames do not need a pdb suffix, but the script relies on
 44 # the pdb format.  
 45 ##############################################################################
 46 #
 47 # Author: Billy Wayne McCann
 48 # email : thebillywayne@gmail.com
 49 # NOTE:  My code is purposefully verbose. don't hate. 
 50 ###############################################################################
 51 
 52 from sys import argv, exit
 53 from math import pow, degrees, sqrt, acos
 54 from sysconfig import get_python_version
 55 
 56 # make sure we're using at least version 3.2 of Python
 57 if float(get_python_version()) < 3.2:
 58     print("Requires Python 3.2 or higher.")
 59     exit(0)
 60 
 61 # make sure arguments have been given to the script
 62 if len(argv[1:]) == 0:
 63     print("Give pdb files as an argument to this script.")
 64     exit(0)
 65 else:
 66     pdbs = argv[1:]
 67 
 68 # species are separated in the pdb file by 'TER   '
 69 ter = 'TER   '
 70 
 71 # file in which to place output
 72 output = open('output_pi-pi.txt', 'w')
 73 
 74 ###  FUNCTIONS  ###############################################################
 75 
 76 def find_TERs(content):
 77     ''' Find where the TER's in the pdb file occur'''
 78 
 79     terlines = []
 80     terline = content.index(ter)
 81     terlines.append(terline)
 82     while True:
 83         try:
 84             terline = content.index(ter, terline+1)
 85             terlines.append(terline)
 86         except:
 87             return terlines
 88 
 89 def get_coordinates(species, atoms):
 90     ''' Find coordinates of atoms of interest of the species of interest,
 91     whether the solute or the solvent.  The atoms should have been defined in
 92     the solute_atoms and solvent_atoms variables'''
 93 
 94     atom_arrays = []
 95     coordinates = []
 96 
 97     # scan through species for specific atom entry and store into atom_arrays
 98     for entry in species:
 99         for atom in atoms:
100             if atom in entry:
101                 atom_arrays.append(entry)
102 
103     # extract only the desired elements from the atom_arrays and store them in
104     # coordinates list.
105     for element in atom_arrays:
106         array = element.split()
107         coordinates.append(array[4])
108         coordinates.append(array[5:8])
109 
110     # in binary solvents, the coordinates will sometimes not be found in a
111     # particular solvent molecule. return None and test for it in the main
112     # body.
113     if not coordinates:
114         return [None,None] 
115 
116     # the molecular id is the first entry in the coordinates list
117     mol_id = coordinates[0]
118 
119     # the actual coordinates are the even entries in the coordinates list
120     coordinates = [coordinates[i] for i in range(len(coordinates)) if float(i)
121                     % 2 != 0]
122     return mol_id, coordinates
123 
124 def vectorize(coordinate1, coordinate2):
125     '''Take coordinates and return a vector'''
126 
127     # extract x, y, z values from coordinates
128     # make them floats for the subtraction operations in the return statement
129     x1 = float(coordinate1[0])
130     x2 = float(coordinate2[0])
131     y1 = float(coordinate1[1])
132     y2 = float(coordinate2[1])
133     z1 = float(coordinate1[2])
134     z2 = float(coordinate2[2])
135     return [x2-x1, y2-y1, z2-z1]
136 
137 def dotproduct(vector1, vector2):
138     '''Return the dot product between two vectors'''
139 
140     return vector1[0]*vector2[0]+vector1[1]*vector2[1]+vector1[2]*vector2[2]
141 
142 def crossproduct(vector1, vector2):
143     '''Find the cross product between two vectors'''
144 
145     return [vector1[1]*vector2[2]-vector1[2]*vector2[1],
146             vector1[2]*vector2[0]-vector1[0]*vector2[2],
147             vector1[0]*vector2[1]-vector1[1]*vector2[0]]
148 
149 def magnitude(vector):
150     '''Return the magnitude of a vector'''
151 
152     return sqrt(vector[0]*vector[0]+vector[1]*vector[1]+
153             vector[2]*vector[2])
154 
155 def unit(vector):
156     '''Return the unit vector of a vector'''
157 
158     mag = magnitude(vector)
159     unit_vector = []
160     for scalar in vector:
161         unit_vector.append(scalar/mag)
162     return unit_vector
163 
164 def center(coordinate1, coordinate2):
165     ''' Given two coordinates, find the midpoint between them. 
166     This function is used to approximate the center of a species. '''
167     x1 = float(coordinate1[0])
168     x2 = float(coordinate2[0])
169     y1 = float(coordinate1[1])
170     y2 = float(coordinate2[1])
171     z1 = float(coordinate1[2])
172     z2 = float(coordinate2[2])
173 
174     return [(x2+x1)/2, (y2+y1)/2, (z2+z1)/2]
175 
176 def distance(coordinate1, coordinate2):
177     ''' Find the distance between two points'''
178     x1 = float(coordinate1[0])
179     x2 = float(coordinate2[0])
180     y1 = float(coordinate1[1])
181     y2 = float(coordinate2[1])
182     z1 = float(coordinate1[2])
183     z2 = float(coordinate2[2])
184 
185     return sqrt((x2-x1)**2 + (y2-y1)**2 + (z2-z1)**2)
186 
187 def normal(coordinates):
188     ''' Given three coordinates, find the normal to the plane created by the
189     three coordinates'''
190 
191     vector1 = vectorize(coordinates[0], coordinates[1])
192     vector2 = vectorize(coordinates[1], coordinates[2])
193 
194     return crossproduct(vector1, vector2)
195 
196 def calculate_angle(normal1, normal2):
197     ''' Calculate the angle between two planes.'''
198 
199     # make normals into unit vectors first
200     normal1 = unit(normal1)
201     normal2 = unit(normal2)
202 
203     # the angle between the two planes of atomic coordinates
204     # is the arccosine of dot product of the two normals.
205     return  degrees(acos(dotproduct(normal1, normal2)))
206 
207 def stddev(all_values, average_value):
208     '''Find the standard deviation of the angles'''
209 
210     # create a new list which is the difference between the average of the
211     # values and each individual value.
212     square_diff_from_average = [pow((i-average_value), 2) for i in all_values]
213 
214     # the standard deviation is square root of the sum of the differences
215     # between each each value and the average squared divided by the number of
216     # values in the list ('population' standard deviation).
217     return sqrt(sum(square_diff_from_average)/len(square_diff_from_average))
218 
219 ## MAIN  ######################################################################
220 
221 # initialize dictionary to store solvent data into
222 solvent_data = {} 
223 
224 print("Using a cutoff of {0} Angstroms.".format(cutoff))
225 output.write("Using a cutoff of {0} Angstroms.\n\n".format(cutoff))
226 
227 for pdb in pdbs:
228     with open(pdb, 'r') as pdb_file:
229         contents = pdb_file.read().split('\n')
230         TERlines = find_TERs(contents)
231 
232         # find solutes coordinates, center, and normal vector
233         solute = contents[0:TERlines[0]]
234         solute_coordinates = get_coordinates(solute, solute_atoms)[1]
235         solute_center = center(solute_coordinates[1], solute_coordinates[2]) 
236         solute_normal = normal(solute_coordinates)
237 
238         solvents = []
239         for i in range(len(TERlines)-1):
240             solvent = contents[TERlines[i]:TERlines[i+1]]
241             solvents.append(solvent)
242 
243         # keep track of how many solvents are within cutoff in each pdb
244         j = 0
245         for solvent in solvents:
246             solvent_name, solvent_coordinates = get_coordinates(solvent, solvent_atoms)
247             
248             if solvent_coordinates is None:
249                 continue
250 
251             solvent_name = int(solvent_name)
252             solvent_center = center(solvent_coordinates[1],
253                                     solvent_coordinates[2])
254             radius = distance(solute_center, solvent_center)
255 
256             if radius < cutoff:
257                 solvent_normal = normal(solvent_coordinates)
258                 angle = calculate_angle(solute_normal, solvent_normal)
259                 if solvent_name not in solvent_data:
260                     solvent_data[solvent_name] = {'pdb': [], 
261                             'distance': [], 'angle': []}
262                 solvent_data[solvent_name]['pdb'].append(pdb)
263                 solvent_data[solvent_name]['distance'].append(radius)
264                 solvent_data[solvent_name]['angle'].append(angle)
265                 j += 1
266 
267             else:
268                 continue
269 
270         print("PDB {0} contains {1} solvent(s) within the cutoff.".format(pdb, j))
271         output.write("PDB {0} contains {1} solvent(s) within the cutoff.\n".format(pdb, j))
272 
273 # end of for pdb loop
274 
275 output.write("\n")
276 
277 # analyze the accepted solvent data.
278 for accepted in sorted(solvent_data):
279     output.write('Solvent ID: {0}\n'.format(accepted))
280 
281     for index in range(len(solvent_data[accepted]['pdb'])):
282         output.write('+ PDB name: {0}\t\t'.format(solvent_data[accepted]['pdb'][index]))
283         output.write('Distance: {0:.2f}\t'.format(solvent_data[accepted]['distance'][index]))
284         output.write('Angle: {0:.2f}\t\n'.format(solvent_data[accepted]['angle'][index]))
285 
286     output.write('== AVERAGES ==\n')
287     average_distance = sum(solvent_data[accepted]['distance'])/len(solvent_data[accepted]['distance'])
288     distance_stddev = stddev(solvent_data[accepted]['distance'],
289                                 average_distance)
290     average_angle = sum(solvent_data[accepted]['angle'])/len(solvent_data[accepted]['angle'])
291     angle_stddev = stddev(solvent_data[accepted]['angle'], average_angle)
292     output.write("Distance:\t{0:.2f} +- ".format(average_distance))
293     output.write("{0:.2f}.\n".format(distance_stddev))
294     output.write("Angle:  \t{0:.2f} +- ".format(average_angle))
295     output.write("{0:.2f}.\n\n".format(angle_stddev))
296 
297 output.close()
298 exit(0)