Package biana :: Package BianaObjects :: Module PDB'
[hide private]
[frames] | no frames]

Source Code for Module biana.BianaObjects.PDB'

  1  """ 
  2      BIANA: Biologic Interactions and Network Analysis 
  3      Copyright (C) 2009  Javier Garcia-Garcia, Emre Guney, Baldo Oliva 
  4   
  5      This program is free software: you can redistribute it and/or modify 
  6      it under the terms of the GNU General Public License as published by 
  7      the Free Software Foundation, either version 3 of the License, or 
  8      (at your option) any later version. 
  9   
 10      This program is distributed in the hope that it will be useful, 
 11      but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 13      GNU General Public License for more details. 
 14   
 15      You should have received a copy of the GNU General Public License 
 16      along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 17   
 18  """ 
 19   
 20  from Sequence import ProteinSequence, RNASequence, DNASequence 
 21  import biana.biana_globals as biana_globals 
 22   
 23  import traceback 
 24  import math 
 25  import gzip 
 26  import sys 
 27  import os 
 28  import re 
 29   
 30   
 31  temp_acc = 0 
32 -class PDB(object):
33
34 - def __init__(self, name, resolution=None):
35 36 self.name = name 37 self.resolution = resolution 38 39 self.num_atoms = 0 40 41 self.chains = {} # A dictionary of chain objects. Key: chain_name 42 # Value: Dictionary: 43 # Key: residue_num 44 # Value: Residue objects list 45 46 self.sorted_residues = {} # A list with the residues ordered by numeration. Key: chain_name 47 self.sorted_chains = None 48 49 self.C_structure = None 50 51 self.executed_dssp = False
52 53 #self.dssp_accessibility = None 54 #self.rel_dssp_accessibility = None 55 #self.dssp_ss = None 56 57 58 # METHOD TO CHECK 59 # BETTER IF IT WAS NOT DEPENDANT ON BIOPYTHON... TOO MANY DEPENDENCES
60 - def read_pdb_file(pdbFile, fragments=[], merge_fragments=False, chain_value='A'):
61 """ 62 merge_fragments is used to reenumerate residues in fragments and consider all the fragments as a single chain 63 64 "chain_value" is used only when merging fragments 65 """ 66 67 #print "Reading %s" %pdbFile 68 69 pdbObject = PDB(name=pdbFile) 70 71 # specify the chains to obtain 72 requested_chains = [ x.get_chain() for x in fragments ] 73 74 # ATOM 2331 CE1 HIS B 154 15.127 96.397 74.300 1.00100.00 C 75 # ATOM 3395 O GLY Y 221A 22.992 34.279 -11.344 1.00 26.95 2PKA35 76 # ATOM 18 CE1 TYR 2 12.696 -11.556 -8.351 1.00 45.77 1PPI 2 77 # ATOM 2084 NH2 ARG A 256 -5.028 36.209 68.974 1.00 47.90 N 78 # ATOM 2714 OD1 ASN B1076 59.163 -6.192 27.994 1.00 94.43 O 79 # ATOM 3229 CE LYS H 217 20.184 -2.298-100.965 1.00 28.60 1GIG33 80 # ATOM 1975 O HIS B 246 45.930 10.812 85.198 81 # ATOM 4 O GLY A -1 57.715 -4.038 -11.555 1.00 39.51 O 82 #atom_regex = re.compile("ATOM\s+(\d+)\s+(\w{1,4})\s*(\w+)\s+(\w{0,1})\s*(\-{0,1}\d+)\w*\s+(\-*\d+\.\d{3})\s*(\-*\d+\.\d{3})\s*(\-*\d+\.\d{3})\s+(\d+\.\d{2})\s*(\d*\.\d+)\s+") 83 84 #atom_regex = re.compile("ATOM\s+(\d+)\s+(\w{1,4})\s*(\w+)\s+(\w{0,1})\s*(\-{0,1}\d+)\w*\s+(\-*\d*\.\d{3})\s*(\-*\d*\.\d{3})\s*(\-*\d*\.\d{3})\s*") 85 atom_regex = re.compile("ATOM\s+(\d+)\s+(\w{1,4})\s*(\w+)\s+(\w{0,1})\s+(\-{0,1}\d+)\w*\s+(\-*\d*\.\d{3})\s*(\-*\d*\.\d{3})\s*(\-*\d*\.\d{3})\s*") 86 87 88 if pdbFile.endswith(".gz"): 89 pdb_fd = gzip.open(pdbFile) 90 else: 91 pdb_fd = open(pdbFile) 92 93 94 control_num = 0 95 96 residue_num_value = 0 97 98 current_residue_num = None 99 100 for line in pdb_fd: 101 102 if line.startswith("ATOM"): 103 m = atom_regex.match(line) 104 105 if m: 106 107 chain_id = m.group(4) 108 109 if chain_id.strip() == '': 110 chain_id = " " 111 112 residue_num = int(m.group(5)) 113 114 if len(fragments)>0: 115 if True in [ current_fragment.includes( chain=chain_id, res_num = residue_num ) for current_fragment in fragments ]: 116 selected_residue = True 117 else: 118 continue 119 if merge_fragments: 120 if residue_num != current_residue_num: 121 residue_num_value += 1 122 current_residue_num = residue_num 123 chain_id = chain_value 124 else: 125 residue_num_value = residue_num 126 127 128 current_atom = PDBAtom( atom_num = int(m.group(1)), 129 atom_type = m.group(2).strip(), # TO CHECK!!! type==name?? 130 atom_name = m.group(2).strip(), 131 x = float(m.group(6)), 132 y = float(m.group(7)), 133 z = float(m.group(8)) ) 134 control_num += 1 135 pdbObject.add_atom( atomObject = current_atom, chain_name = chain_id, residue_num = residue_num_value, residue_type = m.group(3) ) 136 137 else: 138 sys.stderr.write("Alert. Following atom does not match with regular expression\n%s" %line) 139 140 if control_num == 0: 141 raise ValueError("No residues were added") 142 143 return pdbObject 144 145 # Using BioPython strategy: 146 # Commented because it needed many libraries and crashed in some ocasions when running pprc 147 148 from Bio.PDB.PDBParser import PDBParser 149 150 parser = PDBParser() 151 152 temp_struct = parser.get_structure(pdbFile,pdbFile) 153 154 if len(temp_struct.get_list())>1: 155 print "ALERT. PDB %s has more than a single model" %pdbFile 156 157 pdbObject = PDB(name=pdbFile) 158 159 # specify the chains to obtain 160 requested_chains = [ x.get_chain() for x in fragments ] 161 162 #if chain=='-': 163 # chain_list = temp_struct[0].get_list() 164 #else: 165 # chain_list = [temp_struct[0][chain]] 166 chain_list = temp_struct[0].get_list() 167 168 residue_num_value = 0 169 170 control_num = 0 171 172 for current_chain in chain_list: 173 174 #if current_chain.get_id() not in requested_chains: 175 # continue 176 for current_residue in current_chain.get_list(): 177 if current_residue.get_resname() == 'HOH' or current_residue.get_resname() == 'GDP' or current_residue.get_resname()==" MG": 178 continue 179 180 #Determine if residue is requested or not 181 residue_num = int(current_residue.get_id()[1]) 182 183 if merge_fragments: 184 chain_id = 'A' 185 else: 186 chain_id = current_chain.get_id() 187 188 if len(fragments)>0: 189 if True in [ current_fragment.includes( chain=current_chain.get_id(), res_num = residue_num ) for current_fragment in fragments ]: 190 selected_residue = True 191 else: 192 continue 193 194 #print "Adding residue %s from chain %s" %(current_residue.get_resname(),current_chain.get_id()) 195 196 #print "Residue selected!" 197 control_num = control_num + 1 198 199 if merge_fragments: 200 residue_num_value += 1 201 else: 202 residue_num_value = residue_num 203 204 205 for current_atom in current_residue.get_list(): 206 current_atom = PDBAtom( atom_num = current_atom.get_serial_number(), 207 atom_type = current_atom.get_name(), 208 atom_name = current_atom.get_name(), 209 x = current_atom.get_coord()[0], 210 y = current_atom.get_coord()[1], 211 z = current_atom.get_coord()[2]) 212 pdbObject.add_atom( atomObject = current_atom, chain_name = chain_id, residue_num = residue_num_value, residue_type = current_residue.get_resname()) 213 214 if control_num == 0: 215 raise ValueError("No residues were added") 216 217 #print "Added %s residues" %control_num 218 return pdbObject
219 220 read_pdb_file = staticmethod(read_pdb_file) 221 222
223 - def get_rel_dssp_accessibility(self, chain_name):
224 225 if self.executed_dssp is False: 226 self._get_dssp_results() 227 228 return [ self.chains[chain_name][x].dssp_rel_accessibility for x in self._get_sorted_residues(chain_name=chain_name) ]
229 230 #if self.rel_dssp_accessibility is None: 231 232 # dssp_results = self._get_dssp_results() 233 234 # self.dssp_accessibility = dssp_results[0] 235 # self.rel_dssp_accessibility = dssp_results[1] 236 # self.dssp_ss = dssp_results[2] 237 238 #return self.rel_dssp_accessibility 239
240 - def get_dssp_accessibility(self, chain_name):
241 242 if self.executed_dssp is False: 243 self._get_dssp_results() 244 245 return [ self.chains[chain_name][x].dssp_accessibility for x in self._get_sorted_residues(chain_name=chain_name) ]
246 247 #if self.dssp_accessibility is None: 248 249 # dssp_results = self._get_dssp_results() 250 251 # self.dssp_accessibility = dssp_results[0] 252 # self.rel_dssp_accessibility = dssp_results[1] 253 # self.dssp_ss = dssp_results[2] 254 255 #return self.dssp_accessibility 256
257 - def get_dssp_ss(self, chain_name):
258 259 if self.executed_dssp is False: 260 self._get_dssp_results() 261 262 return [ self.chains[chain_name][x].dssp_ss for x in self._get_sorted_residues(chain_name=chain_name) ]
263 264 #if self.dssp_ss is None: 265 266 # dssp_results = self._get_dssp_results() 267 268 # self.dssp_accessibility = dssp_results[0] 269 # self.rel_dssp_accessibility = dssp_results[1] 270 # self.dssp_ss = dssp_results[2] 271 272 #return self.dssp_ss 273
274 - def set_resolution(self, resolution):
275 self.resolution = float(resolution)
276
277 - def _get_sorted_residues(self, chain_name):
278 if self.sorted_residues[chain_name] is None: 279 self.sorted_residues[chain_name] = self.chains[chain_name].keys() 280 self.sorted_residues[chain_name].sort() 281 return self.sorted_residues[chain_name]
282
283 - def _get_sorted_chains(self):
284 """ 285 Returns a list with the chain names sorted 286 """ 287 if self.sorted_chains is None: 288 self.sorted_chains = self.chains.keys() 289 self.sorted_chains.sort() 290 291 return self.sorted_chains
292
293 - def get_chain_names(self):
294 return self.chains.keys()
295
296 - def get_num_atoms(self):
297 return self.num_atoms
298
299 - def get_chain_num_atoms(self, chain_name):
300 temp = [ x.get_num_atoms() for x in self.chains[chain_name].values() ] 301 return sum(temp)
302
303 - def get_residues(self,chain_name):
304 return self.chains[chain_name].values()
305
306 - def get_num_residues(self,chain_name=None):
307 if chain_name is None: 308 return sum([len(x) for x in self.chains.values()]) 309 else: 310 return len(self.chains[chain_name])
311
312 - def get_name(self):
313 return self.name
314
315 - def add_residue(self, chain_name, residue_num=0, atoms_initial_list=[], residue_type=None,hssp_conservation=None, hssp_entropy=None, hssp_exposure=None, hssp_norm_entropy=None, hssp_variability=None, dssp=None, residue_object = None):
316 """ 317 To add residue. Chain_name is mandatory always. residue_num and atoms_initial_list are mandatory only if residue_object is not specified 318 319 if residue_object is specified, the rest of parameters except chain_name are not used 320 """ 321 322 residue_num = int(residue_num) 323 324 if residue_object is None: 325 residue_object = PDBResidue(residue_num = residue_num, residue_type=residue_type, atoms_initial_list = atoms_initial_list, 326 hssp_conservation=hssp_conservation, hssp_entropy=hssp_entropy, hssp_exposure=hssp_exposure, 327 hssp_norm_entropy=hssp_norm_entropy, hssp_variability=hssp_variability, dssp=dssp) 328 else: 329 residue_num = residue_object.residue_num 330 331 try: 332 if self.chains[chain_name].has_key(residue_num): 333 print "Trying to insert twice the same residue... error?" 334 #[ self.chains[chain_name][residue_num].add_atom(atomObject) for atomObject in atoms_list ] 335 else: 336 self.chains[chain_name][residue_num] = residue_object 337 self.sorted_residues[chain_name]=None 338 except: 339 self.chains[chain_name] = {residue_num: residue_object} 340 self.sorted_residues[chain_name]=None
341 342
343 - def add_atom(self, chain_name, residue_num, residue_type=None, atomObject=None):
344 """ 345 Atom object can be None in order to indicate that it must be added a residue but no atoms information 346 """ 347 #self.atoms.append(atomObject) 348 self.num_atoms += 1 349 residue_num = int(residue_num) 350 try: 351 if self.chains[chain_name].has_key(residue_num): 352 self.chains[chain_name][residue_num].add_atom(atomObject) 353 else: 354 self.chains[chain_name][residue_num] = PDBResidue(residue_num = residue_num, residue_type=residue_type, atoms_initial_list = [atomObject]) 355 self.sorted_residues[chain_name]=None 356 except: 357 self.chains[chain_name] = {residue_num: PDBResidue(residue_num = residue_num, residue_type=residue_type, atoms_initial_list = [atomObject])} 358 self.sorted_residues[chain_name]=None
359 360
361 - def get_pdb_summary(self):
362 363 return "PDB with id %s, with %s chains, %s residues and %s atoms" %(self.get_name(), 364 len(self.chains), 365 sum([len(x) for x in self.chains.values()]), 366 self.num_atoms)
367 - def get_in_pdb_format(self):
368 """ 369 Gets a string with the pdb atoms in PDB format 370 """ 371 372 lines = [] 373 374 lines.append("HEADER") 375 lines.append("COMPND compount") 376 lines.append("SOURCE") 377 lines.append("AUTHOR") 378 379 for actual_chain in self.chains.keys(): 380 sorted_residues = self.chains[actual_chain].keys() 381 sorted_residues.sort() 382 for actual_residue in sorted_residues: 383 residue_object = self.chains[actual_chain][actual_residue] 384 lines.extend( ["ATOM%s %s%s%s%s%s%s%s" %(str(actual_atom.get_num()).rjust(7), 385 str(actual_atom.get_name()).ljust(3), 386 residue_object.get_residue_type().rjust(4), 387 str(actual_chain).rjust(2), 388 str(residue_object.get_residue_num()).rjust(4), 389 ("%.3f" %float(actual_atom.get_x())).rjust(12), 390 ("%.3f" %float(actual_atom.get_y())).rjust(8), 391 ("%.3f" %float(actual_atom.get_z())).rjust(8)) for actual_atom in residue_object.get_atoms() ] ) 392 393 return "\n".join(lines)+"\nTER"
394 395
396 - def get_sequence(self, chain_name=None):
397 """ 398 Returns a dictionary with chains as keys and sequence string as values 399 """ 400 401 if chain_name is not None: 402 return {chain_name: Sequence.ProteinSequence("".join( [ ProteinSequence.get_aminoacid_code_3to1(self.chains[chain_name][x].get_residue_type()) for x in self._get_sorted_residues(chain_name=chain_name)] ), sequenceID=self.name+"_"+chain_name)} 403 else: 404 if len(self.chains)>1: 405 print "ALERT: Trying to get the sequence from a PDB with more than a single chain." 406 407 chains = self.chains.keys() 408 409 return dict( [(chain_name, Sequence.ProteinSequence("".join( [ ProteinSequence.get_aminoacid_code_3to1(self.chains[chain_name][x].get_residue_type()) for x in self._get_sorted_residues(chain_name=chain_name)] ), sequenceID=self.name+"_"+chain_name)) for chain_name in chains ] )
410 411
412 - def get_conservation(self, chain_name):
413 414 return [ self.chains[chain_name][x].get_hssp_conservation() for x in self._get_sorted_residues(chain_name=chain_name) ]
415
416 - def get_entropy(self, chain_name):
417 418 return [ self.chains[chain_name][x].get_hssp_entropy() for x in self._get_sorted_residues(chain_name=chain_name) ]
419
420 - def get_accessibility(self, chain_name):
421 422 return [ self.chains[chain_name][x].get_hssp_accessibility() for x in self._get_sorted_residues(chain_name=chain_name) ]
423
424 - def get_norm_entropy(self, chain_name):
425 426 return [ self.chains[chain_name][x].get_hssp_norm_entropy() for x in self._get_sorted_residues(chain_name=chain_name) ]
427
428 - def get_variability(self, chain_name):
429 430 return [ self.chains[chain_name][x].get_hssp_variability() for x in self._get_sorted_residues(chain_name=chain_name) ]
431
432 - def get_dssp(self, chain_name):
433 434 return "".join([ self.chains[chain_name][x].dssp for x in self._get_sorted_residues(chain_name=chain_name) ])
435
436 - def get_dssp_as_horiz(self, chain_name):
437 """ 438 Returns the dssp result in psi-pred nomenclature 439 """ 440 441 return self.get_dssp(chain_name=chain_name).replace('T','C').replace('G','C').replace('S','C').replace(' ','C').replace('B','E')
442 443 444
445 - def filter_residues(self, binary_list):
446 """ 447 Returns a new PDB object with the residues filtered by binary_list 448 449 The PDB must contain a single chain 450 """ 451 452 if len(self.chains) != 1: 453 raise ValueError("Trying to filter a PDB with a more than a single chain") 454 455 chain_name = self.chains.keys()[0] 456 457 new_pdb = PDB(name="filtered_%s" %self.name) 458 459 sorted_residues = self._get_sorted_residues(chain_name) 460 461 if len(sorted_residues) != len(binary_list): 462 raise ValueError("PDB has a distinct number of residues (%s) than binary list (%s)" %(self.get_num_residues(chain_name),len(binary_list))) 463 464 for actual_residue_x in xrange(len(sorted_residues)): 465 if binary_list[actual_residue_x]==1: 466 residue_object = self.chains[chain_name][sorted_residues[actual_residue_x]] 467 new_pdb.add_residue( chain_name = chain_name, residue_object = residue_object ) 468 469 return new_pdb
470 471
472 - def get_contacting_pairs_indices(self, pdb2, chains1, chains2, type = "min", cutoff=5.0):
473 """ 474 Returns the indices!!! Not the residue nums!!!! 475 """ 476 # TODO!!!! to check because copied directly from get_distances... 477 478 if type.lower() == "min": 479 method = PDBResidue.get_min_distance 480 elif type.lower() == "mean": 481 method = PDBResidue.get_mean_distance 482 elif type.lower() == "ca": 483 method = PDBResidue.get_ca_distance 484 elif type.lower() == "cb": 485 method = PDBResidue.get_cb_distance 486 else: 487 raise "Trying to calculate distances with a method unknown (%s)" %type 488 489 distances_list = [] 490 491 distances = {} 492 493 for actual_chain1 in chains1: 494 residues1 = self._get_sorted_residues(chain_name = actual_chain1) 495 distances[actual_chain1] = {} 496 distances_list = [] 497 for actual_chain2 in chains2: 498 residues2 = pdb2._get_sorted_residues(chain_name = actual_chain2) 499 contacting_pairs = [] 500 res_index1 = 0 501 for actual_residue1 in residues1: 502 res_index2 = 0 503 for actual_residue2 in residues2: 504 if method(self.chains[actual_chain1][actual_residue1],residue2=pdb2.chains[actual_chain2][actual_residue2]) <= cutoff: 505 contacting_pairs.append((res_index1,res_index2)) 506 res_index2 += 1 507 res_index1 += 1 508 509 return contacting_pairs
510
511 - def get_distances(self, pdb2, chains1, chains2, type="min", fragments1=None, fragments2=None):
512 """ 513 "type" can be "min", "mean", "ca" or "cb" 514 515 "fragments" is used to determine which fragments are going to be used to calculate distances 516 TODO!!! 517 518 For the moment, it returns a dictionary with the wcm of the different chain combinations 519 """ 520 521 #chains1 = self._get_sorted_chains() 522 #chains2 = pdb2._get_sorted_chains() 523 524 global temp_acc 525 temp_acc = 0 526 527 try: 528 import pprc 529 except: 530 raise ValueError("It is necessary pprc module to execute this method") 531 532 if type.lower() == "min": 533 method = PDBResidue.get_min_distance 534 elif type.lower() == "mean": 535 method = PDBResidue.get_mean_distance 536 elif type.lower() == "ca": 537 method = PDBResidue.get_ca_distance 538 elif type.lower() == "cb": 539 method = PDBResidue.get_cb_distance 540 else: 541 raise "Trying to calculate distances with a method unknown (%s)" %type 542 543 distances_list = [] 544 545 distances = {} 546 547 for actual_chain1 in chains1: 548 residues1 = self._get_sorted_residues(chain_name = actual_chain1) 549 distances[actual_chain1] = {} 550 distances_list = [] 551 for actual_chain2 in chains2: 552 #a=1 553 #print "CHAIN %s - %s" %(actual_chain1, actual_chain2) 554 residues2 = pdb2._get_sorted_residues(chain_name = actual_chain2) 555 #counter+=len(self.chains[actual_chain1])*len(pdb2.chains[actual_chain2].keys()) 556 distances_list.extend( [ method(self.chains[actual_chain1][actual_residue1],residue2=pdb2.chains[actual_chain2][actual_residue2]) 557 for actual_residue1 in residues1 for actual_residue2 in residues2 ] ) 558 559 #if actual_chain1 != actual_chain2: 560 #contacting_pairs = [] 561 #for actual_residue1 in residues1: 562 # for actual_residue2 in residues2: 563 # if method(self.chains[actual_chain1][actual_residue1],residue2=pdb2.chains[actual_chain2][actual_residue2]) < 5.0: 564 # #print "Contact between residue %s[%s] - %s[%s]: %s" %(self.chains[actual_chain1][actual_residue1].residue_num,actual_chain1, 565 # # pdb2.chains[actual_chain2][actual_residue2].residue_num,actual_chain2, 566 # # method(self.chains[actual_chain1][actual_residue1],residue2=pdb2.chains[actual_chain2][actual_residue2])) 567 568 distances[actual_chain1][actual_chain2] = pprc.wcm(rows = self.get_num_residues(chain_name = actual_chain1), 569 columns = pdb2.get_num_residues(chain_name = actual_chain2), 570 values = distances_list, 571 description = "Distances between cb atoms between structure %s chain %s and %s chain %s" %(self.name, 572 actual_chain1, 573 pdb2.name, 574 actual_chain2 ), 575 type="DISTANCE_TYPE") 576 577 return distances
578 579 580
581 - def _parse_dssp_results(self, fp):
582 583 # relative_accessibilities = [ None for x in self.] 584 # accessibilities = [] 585 # ss = [] 586 587 surface = {'A': 115, 588 'C': 149, 589 'D': 170, 590 'E': 207, 591 'F': 230, 592 'G': 86, 593 'H': 206, 594 'I': 187, 595 'K': 222, 596 'L': 192, 597 'M': 210, 598 'N': 184, 599 'P': 140, 600 'Q': 208, 601 'R': 263, 602 'S': 140, 603 'T': 164, 604 'V': 161, 605 'W': 269, 606 'Y': 257} 607 608 609 start_regex = re.compile(" # RESIDUE AA STRUCTURE BP1 BP2") 610 in_results = None 611 612 temp_seq = [] 613 614 for line in fp: 615 616 if in_results is None: 617 if re.search(start_regex,line): 618 in_results = 1 619 continue 620 621 #Skipping chain breaks? 622 if line[13]=="!": 623 continue 624 625 temp_seq.append(line[13]) 626 acc = int(line[35:38]) 627 #accessibilities.append(acc) 628 chain = line[11] 629 res_num = int(line[5:10].strip()) 630 res_type = line[13] 631 632 residue_object = self.chains[chain][res_num] 633 634 residue_object.dssp_accessibility = acc 635 if ProteinSequence.get_aminoacid_code_3to1(residue_object.get_residue_type())!=res_type and res_type!="X": 636 print ProteinSequence.get_aminoacid_code_3to1(residue_object.get_residue_type()) 637 print res_type 638 sys.stderr.write("DSSP LINE: %s" %line) 639 raise ValueError("Discordance between PDB and DSSP") 640 #if( float(acc)*100/surface[line[13]] > 100 ): 641 # print "How can be a percentage greater than 100? residue: %s. Accessible area: %s, total surface: %s" %(line[13],acc,surface[line[13]]) 642 try: 643 residue_object.dssp_rel_accessibility = float(acc)*100/surface[line[13]] 644 #relative_accessibilities.append(float(acc)*100/surface[line[13]]) 645 except: 646 print "The code arrives here? if yes... why??" 647 pass 648 #print line 649 #traceback.print_exc() 650 #relative_accessibilities.append(0) #Added becuase if not it produces an error 651 652 if line[16]==' ': 653 residue_object.dssp_ss = 'N' 654 #ss.append('N') 655 else: 656 residue_object.dssp_ss = line[16]
657 #ss.append(line[16]) 658 659 #print "DSSP SEQ: %s" %"".join(temp_seq) 660 #return (accessibilities,relative_accessibilities,ss) 661
662 - def safe_traceback(self):
663 # Child processes catch exceptions so that they can exit using 664 # os._exit() without fanfare. They use this function to print 665 # the traceback to stderr before dying. 666 #import traceback 667 sys.stderr.write("Error in child process, pid %d.\n" %os.getpid()) 668 sys.stderr.flush() 669 traceback.print_exc() 670 sys.stderr.flush()
671
672 - def _get_dssp_results(self):
673 """ 674 Executes the program dssp to get the accessibility 675 """ 676 677 import popen2 678 import readline 679 680 dssp_program = biana_globals.DSSP_EXEC 681 args = ["--"] 682 683 print dssp_program 684 685 # Prepare pipes 686 p_readfd, c_writefd = os.pipe() 687 c_readfd, p_writefd = os.pipe() 688 689 if os.fork(): 690 # Parent 691 for fd in (c_readfd, c_writefd, p_writefd): 692 os.close(fd) 693 # Convert the pipe fd to a file object, so we can use its 694 # read() method to read all data. 695 fp = os.fdopen(p_readfd, 'r') 696 697 #results = 698 self._parse_dssp_results(fp) 699 fp.close() # Will close p_readfd. 700 701 #return results 702 703 else: 704 # Child 705 try: 706 if os.fork(): 707 # Still the same child 708 os.write(p_writefd,self.get_in_pdb_format()) 709 else: 710 # Grandchild 711 try: 712 # Redirect the pipe to stdin. 713 os.close(0) 714 os.dup(c_readfd) 715 # Redirect stdout to the pipe. 716 os.close(1) 717 os.dup(c_writefd) 718 719 # Trying to close stderr 720 os.close(2) 721 722 # Now close unneeded descriptors. 723 for fd in (c_readfd, c_writefd, p_readfd, p_writefd): 724 os.close(fd) 725 # Finally, execute the external command. 726 print dssp_program 727 print args 728 os.execv(dssp_program, [dssp_program] + args) 729 except: 730 self.safe_traceback() 731 os._exit(127) 732 except: 733 self.safe_traceback() 734 os._exit(127) 735 else: 736 os._exit(0) 737 738 self.executed_dssp = True
739 740
741 -class PDBResidue(object):
742
743 - def __init__(self, residue_num, residue_type, atoms_initial_list=[], hssp_conservation=None, hssp_entropy=None, hssp_exposure=None, hssp_norm_entropy=None, hssp_variability=None, dssp=None):
744 self.residue_num = int(residue_num) 745 self.residue_type = residue_type 746 747 self.atoms = [] 748 749 for current_atom in atoms_initial_list: 750 self.add_atom(current_atom) 751 752 self.ca_index = None 753 self.cb_index = None 754 755 #hssp related 756 self.hssp_conservation = hssp_conservation 757 self.hssp_entropy = hssp_entropy 758 self.hssp_exposure = hssp_exposure 759 self.hssp_norm_entropy = hssp_norm_entropy 760 self.hssp_variability = hssp_variability 761 self.dssp = dssp 762 763 self.dssp_accessibility = None 764 self.dssp_rel_accessibility = None 765 self.dssp_ss = None
766
767 - def get_dssp(self):
768 return self.dssp
769
770 - def get_hssp_conservation(self):
771 return self.hssp_conservation
772
773 - def get_hssp_entropy(self):
774 return self.hssp_entropy
775
776 - def get_hssp_accessibility(self):
777 return self.hssp_exposure
778
779 - def get_hssp_norm_entropy(self):
780 return self.hssp_norm_entropy
781
782 - def get_hssp_variability(self):
783 return self.hssp_variability
784
785 - def add_atom(self, atomObject):
786 if atomObject.atom_name == "CA": 787 self.ca_index = len(self.atoms) 788 elif atomObject.atom_name == "CB": 789 self.cb_index = len(self.atoms) 790 self.atoms.append(atomObject)
791
792 - def get_number_atoms(self):
793 return len(self.atoms)
794
795 - def get_residue_num(self):
796 return self.residue_num
797
798 - def get_atoms(self):
799 return self.atoms
800
801 - def get_residue_type(self):
802 return self.residue_type
803
804 - def get_num_atoms(self):
805 return len(self.atoms)
806
807 - def get_min_distance(self, residue2):
808 809 return min([x.get_distance(y) for x in self.atoms for y in residue2.atoms])
810
811 - def get_mean_distance(self, residue2):
812 813 raise "TODO" 814 return sum([ x.get_distance(atom2=y) for x in self.atoms for y in residue2.atoms ])/(len(self.atoms)*len(residue2.atoms))
815
816 - def get_ca_distance(self, residue2):
817 if self.ca_index and residue2.ca_index: 818 return self.atoms[self.ca_index].get_distance(atom2 = residue2.atoms[residue2.ca_index]) 819 else: 820 print "Trying to calculate distances between alpha atoms and coordinates are not known" 821 return None
822
823 - def get_cb_distance(self, residue2):
824 825 if self.cb_index and residue2.cb_index: 826 distance= self.atoms[self.cb_index].get_distance(atom2 = residue2.atoms[residue2.cb_index]) 827 elif self.ca_index and residue2.cb_index: 828 distance= self.atoms[self.ca_index].get_distance(atom2 = residue2.atoms[residue2.cb_index]) 829 elif self.cb_index and residue2.ca_index: 830 distance= self.atoms[self.cb_index].get_distance(atom2 = residue2.atoms[residue2.ca_index]) 831 elif self.ca_index and residue2.ca_index: 832 distance= self.atoms[self.ca_index].get_distance(atom2 = residue2.atoms[residue2.ca_index]) 833 else: 834 print "Trying to calculate distances between beta atoms and coordinates are not known for cb and ca: Residue %s %s" %(self.residue_num, self.residue_type) 835 distance= None 836 837 global temp_acc 838 839 #if distance <= 8: 840 #print "COntact %s(%s) - %s(%s): %s" %(self.residue_num,self.residue_type,residue2.residue_num, residue2.residue_type, distance) 841 842 if( distance <= 8 ): 843 #print "Contact %s(%s) - %s(%s): %s" %(self.residue_num,self.residue_type,residue2.residue_num, residue2.residue_type, distance) 844 temp_acc = temp_acc+1 845 846 return distance
847 848
849 -class PDBAtom(object):
850
851 - def __init__(self, atom_num, atom_type, atom_name, x, y, z):
852 853 self.atom_num = int(atom_num) 854 self.atom_type = atom_type 855 self.atom_name = atom_name 856 self.x = x 857 self.y = y 858 self.z = z
859
860 - def get_coordinates(self):
861 return (self.x,self.y,self.z)
862
863 - def get_x(self):
864 return self.x
865
866 - def get_y(self):
867 return self.y
868
869 - def get_z(self):
870 return self.z
871
872 - def get_name(self):
873 return self.atom_name
874
875 - def get_type(self):
876 return self.atom_type
877
878 - def get_num(self):
879 return self.atom_num
880
881 - def get_distance(self, atom2):
882 dx = self.x - atom2.x 883 dy = self.y - atom2.y 884 dz = self.z - atom2.z 885 return math.sqrt(dx*dx+dy*dy+dz*dz)
886 887
888 -class PDBFragment(object):
889 """ 890 Class to represent a pdb fragment 891 """ 892
893 - def __init__(self, start_residue=None, end_residue=None, chain=None):
894 895 896 self.start_residue = start_residue 897 self.end_residue = end_residue 898 self.chain = chain
899
900 - def __str__(self):
901 return "PDB Fragment. Chain: %s. Start: %s. End: %s" %(self.chain, self.start_residue, self.end_residue)
902
903 - def __repr__(self):
904 return self.__str__()
905
906 - def get_start_residue(self):
907 return self.start_residue
908
909 - def get_end_residue(self):
910 return self.end_residue
911
912 - def get_chain(self):
913 return self.chain
914
915 - def includes(self, chain, res_num):
916 """ 917 Determines if the residue belongs to this fragment or not 918 """ 919 920 if self.chain is None or self.chain==chain: 921 if self.start_residue is None and self.end_residue is None: 922 return True 923 elif self.start_residue is None and self.end_residue>=res_num: 924 return True 925 elif self.end_residue is None and self.start_residue<=res_num: 926 return True 927 928 #print "Not included" 929 #print "\tChain:\t%s (%s)" %(self.chain,chain) 930 #print "\Range: %s-%s (%s)" %(self.start_residue,self.end_residue,res_num) 931 return False
932 933
934 - def fragment_parser(fragment_str, separator=';'):
935 936 frag_regex = re.compile("(\w*)\:*(\-*\d*)\-*(\-*\d*)") 937 938 939 splitted = fragment_str.split(separator) 940 941 942 fragments = [] 943 944 for current_splitted in splitted: 945 946 if current_splitted.strip()=='': 947 continue 948 949 if current_splitted=="-": 950 current_splitted='' 951 fragments.append(PDBFragment( chain = None, start_residue = None, end_residue = None )) 952 continue 953 954 m = frag_regex.match(current_splitted) 955 956 if m: 957 if m.group(1) == '': 958 chain = None 959 else: 960 chain = m.group(1) 961 962 if m.group(2)=='': 963 start_residue = None 964 else: 965 start_residue = int(m.group(2)) 966 967 if m.group(3)=='': 968 end_residue = None 969 else: 970 end_residue = int(m.group(3)) 971 972 fragments.append(PDBFragment( chain = chain, start_residue = start_residue, end_residue = end_residue )) 973 else: 974 print "Fragment %s does not match with regular expresion" %current_splitted 975 976 return fragments
977 978 fragment_parser = staticmethod(fragment_parser)
979