Package biana :: Package BianaParser :: Module biopaxLevel2Parser
[hide private]
[frames] | no frames]

Source Code for Module biana.BianaParser.biopaxLevel2Parser

  1  """ 
  2      BIANA: Biologic Interactions and Network Analysis 
  3      Copyright (C) 2009  Javier Garcia-Garcia, Emre Guney, Baldo Oliva 
  4   
  5      This program is free software: you can redistribute it and/or modify 
  6      it under the terms of the GNU General Public License as published by 
  7      the Free Software Foundation, either version 3 of the License, or 
  8      (at your option) any later version. 
  9   
 10      This program is distributed in the hope that it will be useful, 
 11      but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 13      GNU General Public License for more details. 
 14   
 15      You should have received a copy of the GNU General Public License 
 16      along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 17   
 18  """ 
 19   
 20  """ 
 21  File        : biopaxLevel2Parser.py 
 22  Author      : Javier Garcia Garcia 
 23  Creation    : June 2008 
 24  Contents    : fills up tables in database biana with information from a BIOPAX formatted database 
 25  Called from :  
 26   
 27  ======================================================================================================= 
 28  """ 
 29   
 30  from bianaParser import * 
 31  from xml.sax import saxutils, handler, make_parser 
 32  from XMLNode import XMLNode 
 33  import copy 
 34  import sets 
 35   
 36   
37 -class BiopaxEntity(object):
38 39 resources = None 40 controlled_relations = None 41 42 database = None 43 dbaccess = None 44 _not_recognized = sets.Set() 45 46 datatype_to_biana_type = { "uniprot": "uniprotaccession", 47 "ncbi_taxonomy": "taxID", 48 "tigr": "TIGR", 49 "tigr cna": "TIGR", 50 "tigr eha": "TIGR", 51 "tigr osa": "TIGR", 52 "reactome": "Reactome", 53 "chebi": "CHEBI", 54 "go": "GO", 55 "pubchem compound" : "PubchemCompound", 56 "glycan" : "keggCode", 57 "compound" : "keggCode", 58 #"entrez": "gi", 59 "entrez": "refseq", # Previous sept 08 it was gi... 60 "pubmed": "pubmed", 61 "embl": "AccessionNumber", 62 "ensembl": "ensembl", 63 "wormbase": "wormbasesequencename", 64 "sgd": "sgd", 65 "flybase": "flybase" } 66
67 - def _identity(a):
68 return a
69 - def _entrez_funct(a):
70 # for gi: 71 #return str(a).replace("gi|","") 72 return str(a).split(".")
73 - def _reactome_funct(a):
74 if a.startswith("REACT_"): 75 return a[6:] 76 else: 77 return a
78 79 datatype_operations = {} 80 for x in datatype_to_biana_type: 81 datatype_operations[x] = _identity 82 83 datatype_operations["entrez"] = _entrez_funct 84 datatype_operations["reactome"] = _reactome_funct 85
86 - def __init__(self, XMLNode):
87 88 self.rdf_id = XMLNode.attrs["rdf:ID"] 89 90 self.synonyms = [] 91 self.comments = [] 92 self.data_source = None 93 self.short_name = None 94 self.availability = None 95 self.name = None 96 self.xrefs = [] 97 self.synonyms = [] 98 99 self.organism = None 100 self.set_attributes(XMLNode) 101 102 self.biana_object = None
103
104 - def set_attributes(self, XMLNode):
105 """ 106 """ 107 for current_child in XMLNode.getChilds(): 108 if current_child.name == "bp:NAME": 109 self.name = current_child.getValue() 110 elif current_child.name == "bp:ORGANISM": 111 self.organism = current_child.attrs["rdf:resource"] 112 elif current_child.name == "bp:XREF": 113 self.xrefs.append(current_child.attrs["rdf:resource"]) 114 elif current_child.name == "bp:SYNONYMS": 115 self.synonyms.append(current_child.getValue()) 116 elif current_child.name == "bp:SHORT_NAME": 117 self.short_name = current_child.getValue() 118 elif current_child.name == "bp:COMMENT": 119 self.comments.append(current_child.getValue())
120 121
122 - def _get_biana_object(self):
123 raise ValueError("%s has not implemented _get_biana_object" %self)
124 125
126 - def toBiana( self ):
127 """ 128 Add general attributes to external entity and adds it to the database 129 130 returns the external entity id assigned to it 131 """ 132 133 134 externalEntity = self._get_biana_object() 135 136 if externalEntity is None: 137 return 138 139 if externalEntity.get_id() is not None: 140 return externalEntity.get_id() 141 142 externalEntity = self._get_biana_object() 143 144 if externalEntity is None: 145 return 146 147 # Add name and short_name 148 149 # PROBLEM: What to do with very large names? There are names with more than 255 characters... As this happens, name is inserted as a description, and short name as name 150 if self.name is not None: 151 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "description", value = self.name ) ) # SHOULD BE UNIQUE???! Of course, not as a description 152 #externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "defaultID", value = self.name[0:255] ) ) # SHOULD BE UNIQUE???! Of course, not as a description 153 154 if self.short_name is not None: 155 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "name", value = self.short_name, type = "synonym" ) ) 156 157 158 # Add entity synonyms 159 # In reactome, synonyms seem to be geneSymbols.... Should they be inserted as gene symbols? 160 [ externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "name", value = x, type = "synonym" ) ) for x in self.synonyms ] 161 162 # Add organism if defined 163 if self.organism: 164 organism_obj = BiopaxEntity.resources[self.organism] 165 if organism_obj.tax_ref is not None: 166 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "taxID", value = BiopaxEntity.resources[organism_obj.tax_ref].id, type = "unique" ) ) 167 168 # Add comments as a description 169 [ externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "description", value = x ) ) for x in self.comments ] 170 171 # Detect FUNCTION, CATALYTIC ACTIVITY, DISEASE, SUBCELLULAR LOCATION, SIMILARITY in comments field (at least for Reactome) 172 split_regex = re.compile("(FUNCTION|CATALYTIC ACTIVITY|DISEASE|SUBCELLULAR LOCATION|SIMILARITY|DATABASE)") 173 174 for current_comment in self.comments: 175 t = split_regex.split(current_comment) 176 for x in xrange(len(t)): 177 if current_comment[x] == "FUNCTION": 178 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "function", value = current_comment[x+1] ) ) 179 x+=1 180 elif current_comment[x] == "DISEASE": 181 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "disease", value = current_comment[x+1] ) ) 182 x+=1 183 elif current_comment[x] == "SUBCELLULAR LOCATION": 184 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "SubcellularLocation", value = current_comment[x+1] ) ) 185 x+=1 186 elif current_comment[x] == "CATALYTIC ACTIVITY": 187 # Where insert catalytic activity? 188 x+=1 189 190 191 # Search for EC and MIM codes (maybe not necessary because they are defined as unificationXrefs?) 192 ec_regex = re.compile("EC\s+(\d*\.\d*\.\d*.\d*)") 193 mim_regex = re.compile("\[MIM\:(\d+)\]") 194 195 if self.name is not None: 196 for current_ec in ec_regex.findall(self.name): 197 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "ec", value = current_ec, type = "cross-reference" ) ) 198 199 if self.short_name is not None: 200 for current_ec in ec_regex.findall(self.short_name): 201 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "ec", value = current_ec, type = "cross-reference" ) ) 202 203 for current_comment in self.comments: 204 for current_ec in ec_regex.findall(current_comment): 205 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "ec", value = current_ec, type = "cross-reference" ) ) 206 for current_mim in mim_regex.findall(current_comment): 207 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "mim", value = current_mim, type = "cross-reference" ) ) 208 209 # Add all xrefs: 210 for current_xref in self.xrefs: 211 xref_object = BiopaxEntity.resources[current_xref] 212 if xref_object.id is not None and xref_object.db is not None: 213 if xref_object.db.lower() not in BiopaxEntity.datatype_to_biana_type: 214 if xref_object.db not in BiopaxEntity._not_recognized: 215 print xref_object.db, " not recognized" 216 BiopaxEntity._not_recognized.add(xref_object.db) 217 else: 218 value = BiopaxEntity.datatype_operations[xref_object.db.lower()](xref_object.id) 219 if not isinstance(value,list): 220 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = BiopaxEntity.datatype_to_biana_type[xref_object.db.lower()], 221 value = value, 222 type = "cross-reference" ) ) 223 else: 224 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = BiopaxEntity.datatype_to_biana_type[xref_object.db.lower()], 225 value = value[0], 226 version = value[1], 227 type = "cross-reference" ) ) 228 229 230 # Check if this external entity (should be a relation) is catalyzed or modulated by other entities. If it is not a relation it will give an exception 231 if BiopaxEntity.controlled_relations.has_key('#'+self.rdf_id): 232 for current_controller in BiopaxEntity.controlled_relations['#'+self.rdf_id]: 233 control_obj = BiopaxEntity.resources['#'+current_controller.rdf_id] 234 #print "Current controller: %s" %current_controller.rdf_id 235 #print "Getting %s for control obj %s" %(control_obj.controller_xref,control_obj.rdf_id) 236 237 if control_obj.controller_xref is None: 238 print "control object %s has no contoller_xref" %control_obj.rdf_id 239 continue 240 241 controller = BiopaxEntity.resources[control_obj.controller_xref] 242 243 participant_eEid = controller.toBiana() 244 245 if participant_eEid is None: 246 raise ValueError("In BiopaxEntity. %s" %controller) 247 externalEntity.add_participant( externalEntityID = participant_eEid ) 248 if control_obj.control_type is None: 249 raise ValueError("How is it possible to not have a controller role?") 250 externalEntity.add_participant_attribute( externalEntityID = participant_eEid, 251 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "role", value = control_obj.control_type ) ) 252 253 BiopaxEntity.dbaccess.insert_new_external_entity( externalEntity = externalEntity ) 254 255 #print "Going to insert ",self.name," to biana" 256 #self.biana_external_entity_id = externalEntity.get_id() 257 258 #return self.biana_external_entity_id 259 260 #if self.rdf_id=="UniProt_P35354_Prostaglandin_G_H_synthase_2_precursor__EC_1_14_99_1___Cyclooxygenase__2___COX_2___Prostaglandin_endoperoxide_synthase_2___Prostaglandin_H2_synthase_2___PGH_synthase_2___PGHS_2___PHS_II_": 261 # print "PROTEIN PTGS2 INSERTED WITH ID ",externalEntity.get_id() 262 263 return externalEntity.get_id()
264 265 266
267 -class BiopaxPhysicalEntityParticipant(BiopaxEntity):
268 """ 269 Biopax definition: any additional special characteristics of a physical entity in the context of an interaction or complex. These currently include stoichiometric coefficient and cellular location, but this list may be expanded in later levels. 270 """ 271
272 - def __init__(self, XMLNode):
273 274 self.cellular_location_xref = None 275 self.stoichiometric_coefficient = None 276 self.physical_entity_xref = None 277 self.sequence_features_list = [] 278 BiopaxEntity.__init__(self, XMLNode)
279
280 - def set_attributes(self, XMLNode):
281 for current_child in XMLNode.getChilds(): 282 if current_child.name == "bp:CELLULAR-LOCATION": 283 self.cellular_location_xref = current_child.attrs["rdf:resource"] 284 elif current_child.name == "bp:PHYSICAL-ENTITY": 285 self.physical_entity_xref = current_child.attrs["rdf:resource"] 286 elif current_child.name == "bp:STOICHIOMETRIC-COEFFICIENT": 287 self.stoichiometric_coefficient = current_child.getValue() 288 elif current_child.name == "bp:SEQUENCE-FEATURE-LIST": 289 # TO CHECK WHAT TO DO 290 pass 291 BiopaxEntity.set_attributes(self,XMLNode)
292
293 - def _get_biana_object(self):
294 if self.biana_object is None: 295 BiopaxEntity.toBiana(BiopaxEntity.resources[self.physical_entity_xref]) 296 self.biana_object = BiopaxEntity.resources[self.physical_entity_xref]._get_biana_object() 297 return self.biana_object
298
300 """ 301 """ 302 if self.stoichiometric_coefficient is not None and self.stoichiometric_coefficient!="1" and self.stoichiometric_coefficient!=1: 303 eEr.add_participant_attribute( externalEntityID = self.toBiana(), 304 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "cardinality", 305 value = self.stoichiometric_coefficient ) ) 306 307 if self.cellular_location_xref is not None: 308 309 cellular_location_xref = BiopaxEntity.resources[self.cellular_location_xref] 310 xref = BiopaxEntity.resources[cellular_location_xref.xref] 311 if xref.db.lower()=="go": 312 eEr.add_participant_attribute( externalEntityID = self.toBiana(), 313 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "go", value = xref.id ) ) 314 else: 315 print "If cellular location is not in GO... in with controlled vocabulary is? It is %s" %xref.db
316 317
318 -class BiopaxPathway(BiopaxEntity):
319
320 - def __init__(self, XMLNode):
321 322 self.evidence = None 323 self.pathway_components = [] 324 BiopaxEntity.__init__(self, XMLNode)
325
326 - def set_attributes(self, XMLNode):
327 for current_child in XMLNode.getChilds(): 328 if current_child.name == "bp:PATHWAY-COMPONENTS": 329 #print "Adding component ",current_child.attrs["rdf:resource"] 330 self.pathway_components.append(current_child.attrs["rdf:resource"]) 331 BiopaxEntity.set_attributes(self,XMLNode)
332 333
334 - def _get_biana_object(self):
335 336 if self.biana_object is None: 337 338 eEr = ExternalEntityRelation( source_database = BiopaxEntity.database, relation_type = "pathway" ) 339 340 for current_component in self.pathway_components: 341 eE_ids_list = BiopaxEntity.resources[current_component].toBiana() 342 if eE_ids_list is not None: 343 if isinstance(eE_ids_list,list): 344 for current_id in eE_ids_list: 345 if current_id is not None: 346 eEr.add_participant( externalEntityID = current_id ) 347 else: 348 eEr.add_participant( externalEntityID = eE_ids_list ) 349 350 BiopaxEntity.resources[current_component].add_participants_to_eEr( eEr = eEr ) 351 352 self.biana_object = eEr 353 354 return self.biana_object
355 356
357 -class BiopaxPathwayStep(BiopaxEntity):
358 """ 359 A step in a patwhay 360 Multiple interactions may occur in a pathway step, each should be listed in the STEP-INTERACTIONS property. 361 """ 362
363 - def __init__(self, XMLNode):
364 365 self.step_interactions_xref = [] 366 self.next_xref = None 367 self.set_attributes(XMLNode) 368 BiopaxEntity.__init__(self, XMLNode)
369
370 - def set_attributes(self, XMLNode):
371 372 for current_child in XMLNode.getChilds(): 373 if current_child.name == "bp:NEXT-STEP": # NOT USED FOR THE MOMENT... 374 self.next_xref = current_child.attrs["rdf:resource"] 375 elif current_child.name == "bp:STEP-INTERACTIONS": 376 self.step_interactions_xref.append(current_child.attrs["rdf:resource"])
377
378 - def toBiana(self):
379 return [ BiopaxEntity.resources[current_step].toBiana() for current_step in self.step_interactions_xref ]
380 381
382 - def add_participants_to_eEr(self, eEr):
383 for current_interaction in self.step_interactions_xref: 384 if BiopaxEntity.resources[current_interaction].toBiana() is not None: 385 eEr.add_participant( externalEntityID = BiopaxEntity.resources[current_interaction].toBiana() ) 386 else: 387 ## For example, control elements 388 pass
389 #raise ValueError(current_interaction,BiopaxEntity.resources[current_interaction]) 390
391 - def _get_biana_object(self):
392 return None
393 394 #def _get_biana_object(self): 395 396 # return [ BiopaxEntity.resources[current_step]._get_biana_object() for current_step in self.step_interactions_xref ] 397 398
399 -class BiopaxInteraction(BiopaxEntity):
400
401 - def __init__(self, XMLNode):
402 403 self.participants = None 404 self.evidence = None 405 BiopaxEntity.__init__(self) 406 self.set_attributes(XMLNode)
407
408 - def set_attributes(self, XMLNode):
409 # TODO 410 pass
411
412 - def _get_biana_object(self):
413 if self.biana_object is None: 414 self.biana_object = ExternalEntityRelation( source_database = BiopaxEntity.database, relation_type = "interaction" ) 415 return self.biana_object
416 417
418 -class BiopaxPhysicalInteraction(BiopaxEntity):
419
420 - def _get_biana_object(self):
421 if self.biana_object is None: 422 self.biana_object = ExternalEntityRelation( source_database = BiopaxEntity.database, relation_type = "interaction" ) 423 424 return self.biana_object
425 426
427 -class BiopaxConversion(BiopaxPhysicalInteraction):
428
429 - def _get_biana_object(self):
430 print "CONVERSION NOT WELL IMPLEMENTED YET!!!" 431 if self.biana_object is None: 432 self.biana_object = ExternalEntityRelation( source_database = BiopaxEntity.database, relation_type = "reaction" ) 433 return self.biana_object
434 435
436 -class BiopaxComplexAssembly(BiopaxConversion):
437
438 - def toBiana(self):
439 440 raise ValueError("COMPLEX ASSEMBLY NOT IMPLEMENTED YET") 441 442 # TODO 443 444 eEr = ExternalEntityRelation( source_database = BiopaxEntity.database, relation_type = "reaction" ) # COMPLEX AS AN INTERACTION? 445 return BiopaxEntity.toBiana(self,eEr)
446 447
448 -class BiopaxTransport(BiopaxConversion):
449
450 - def toBiana(self):
451 452 raise ValueError("TRANSPORT NOT IMPLEMENTED YET") 453 454 # TODO 455 eEr = ExternalEntityRelation( source_database = BiopaxEntity.database, relation_type = "reaction" ) # COMPLEX AS AN INTERACTION? 456 return BiopaxEntity.toBiana(self,eEr)
457 458
459 -class BiopaxBiochemicalReaction(BiopaxConversion):
460
461 - def __init__(self, XMLNode):
462 463 self.left_xrefs = [] 464 self.right_xrefs = [] 465 self.ec_number = [] 466 BiopaxEntity.__init__(self, XMLNode) 467 self.set_attributes(XMLNode)
468
469 - def set_attributes(self, XMLNode):
470 for current_child in XMLNode.getChilds(): 471 if current_child.name == "bp:LEFT": 472 self.left_xrefs.append(current_child.attrs["rdf:resource"]) 473 elif current_child.name == "bp:RIGHT": 474 self.right_xrefs.append(current_child.attrs["rdf:resource"]) 475 elif current_child.name == "bp:EC-NUMBER": 476 self.ec_number.append(current_child.getValue()) 477 BiopaxEntity.set_attributes(self,XMLNode)
478
479 - def _get_biana_object(self):
480 481 if self.biana_object is None: 482 483 eEr = ExternalEntityRelation( source_database = BiopaxEntity.database, relation_type = "reaction" ) 484 [ eEr.add_attribute( ExternalEntityRelationAttribute( attribute_identifier = "ec", value = current_ec )) for current_ec in self.ec_number ] # ADD EC TO A REACTION? ADD A TYPE AS CROSS-REFERENCE? 485 486 for current_left in self.left_xrefs: 487 488 participant_obj = BiopaxEntity.resources[current_left] 489 490 eEid = participant_obj.toBiana() 491 if eEid is None: 492 raise ValueError(participant_obj) 493 eEr.add_participant( externalEntityID = eEid ) 494 eEr.add_participant_attribute( externalEntityID = eEid, 495 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "role", value = "substrate" ) ) 496 participant_obj.add_participant_attributes_to_relation( eEr = eEr ) 497 498 for current_right in self.right_xrefs: 499 500 participant_obj = BiopaxEntity.resources[current_right] 501 eEid = participant_obj.toBiana() 502 if eEid is None: 503 raise ValueError(participant_obj) 504 eEr.add_participant( externalEntityID = eEid ) 505 eEr.add_participant_attribute( externalEntityID = eEid, 506 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "role", value = "product" ) ) 507 participant_obj.add_participant_attributes_to_relation( eEr = eEr ) 508 509 self.biana_object = eEr 510 511 return self.biana_object
512 513
514 -class BiopaxControl(BiopaxPhysicalInteraction):
515 """ 516 An interaction in which one entity regulates, modifies, or otherwise influences another. Two types of control interactions are defined: activation and inhibition 517 """ 518 519 # CONTROL Instances are not inserted as a relation itself, but forming part of the reactions they control 520
521 - def __init__(self, XMLNode):
522 523 self.controller_xref = None 524 self.controlled_xref = None 525 self.control_type = None 526 BiopaxEntity.__init__(self, XMLNode) 527 self.set_attributes(XMLNode) 528 529 if self.controlled_xref is not None: 530 BiopaxEntity.controlled_relations.setdefault(self.controlled_xref,[]).append(self)
531
532 - def set_attributes(self, XMLNode):
533 534 for current_child in XMLNode.getChilds(): 535 if current_child.name == "bp:CONTROLLER": 536 self.controller_xref = current_child.attrs["rdf:resource"] 537 elif current_child.name == "bp:CONTROLLED": 538 self.controlled_xref = current_child.attrs["rdf:resource"] 539 elif current_child.name == "bp:CONTROL-TYPE": 540 control_type = current_child.getValue() 541 if control_type == "ACTIVATION": 542 self.control_type = "activates" 543 elif control_type == "INHIBITION": 544 self.control_type = "inhibits" 545 elif control_type == "INHIBITION-ALLOSTERIC": 546 self.control_type = "allosteric_inhibition" 547 elif control_type == "INHIBITION-COMPETITIVE": 548 self.control_type = "competitive_inhibition" 549 elif control_type == "INHIBITION-IRREVERSIBLE": 550 self.control_type = "irreversible_inhibition" 551 elif control_type == "INHIBITION-NONCOMPETITIVE": 552 self.control_type = "non_competitive_inhibition" 553 elif control_type == "INHIBITION-OTHER": 554 self.control_type = "inhibits" 555 elif control_type == "INHIBITION-UNCOMPETITIVE": 556 self.control_type = "uncompetitive_inhibition" 557 elif control_type == "ACTIVATION-NONALLOSTERIC": 558 self.control_type = "nonallosteric_activation" 559 elif control_type == "ACTIVATION-ALLOSTERIC": 560 self.control_type = "allosteric_activation" 561 else: 562 raise ValueError("Control type %s not recognized" %control_type) 563 564 BiopaxPhysicalInteraction.set_attributes(self,XMLNode)
565
566 - def _get_biana_object(self):
567 return None
568 569
570 -class BiopaxCatalysis(BiopaxControl):
571 """ 572 A control interaction in which a physical entity (a catalyst) increases the rate of a conversion interaction by lowering its activation energy. Instances of this class describe a pairing between a catalyzing entity and a catalyzed conversion 573 """ 574
575 - def __init__(self,XMLNode):
576 self.cofactor = None 577 self.direction = None 578 BiopaxControl.__init__(self,XMLNode) 579 self.set_attributes(XMLNode)
580
581 - def set_attributes(self, XMLNode):
582 for current_child in XMLNode.getChilds(): 583 if current_child.name == "bp:COFACTOR": 584 self.cofactor = current_child.attrs["rdf:resource"] 585 # NOT IMPLEMENTED YET!!! 586 elif current_child.name == "bp:DIRECTION": 587 self.direction = current_child.getValue() 588 # NOT IMPLEMENTED YET!!! 589 BiopaxControl.set_attributes(self,XMLNode)
590 591
592 -class BiopaxModulation(BiopaxControl):
593 594 pass
595 596
597 -class BiopaxPhysicalEntity(BiopaxEntity):
598
599 - def _get_biana_object(self):
600 if self.biana_object is None: 601 self.biana_object = ExternalEntity( source_database = BiopaxEntity.database, type = "compound" ) 602 return self.biana_object
603 604
605 -class BiopaxComplex(BiopaxPhysicalEntity):
606 607 # BIOPAX Complex element is inserted as a complex relation 608
609 - def __init__(self, XMLNode):
610 self.components = [] 611 BiopaxPhysicalEntity.__init__(self,XMLNode)
612
613 - def set_attributes(self, XMLNode):
614 615 for current_child in XMLNode.getChilds(): 616 if current_child.name == "bp:COMPONENTS": 617 self.components.append( current_child.attrs["rdf:resource"] ) 618 BiopaxPhysicalEntity.set_attributes(self,XMLNode)
619
620 - def _get_biana_object(self):
621 622 if self.biana_object is None: 623 624 eEr = ExternalEntityRelation( source_database = BiopaxEntity.database, relation_type = "complex" ) 625 for current_child in self.components: 626 participant_obj = BiopaxEntity.resources[current_child] 627 eEid = participant_obj.toBiana() 628 eEr.add_participant( eEid ) 629 participant_obj.add_participant_attributes_to_relation( eEr = eEr ) 630 631 self.biana_object = eEr 632 633 return self.biana_object
634 635
636 -class BiopaxProtein(BiopaxPhysicalEntity):
637
638 - def __init__(self, XMLNode):
640
641 - def _get_biana_object(self):
642 if self.biana_object is None: 643 self.biana_object = ExternalEntity( source_database = BiopaxEntity.database, type = "protein" ) 644 return self.biana_object
645
646 -class BiopaxDNA(BiopaxPhysicalEntity):
647
648 - def _get_biana_object(self):
649 if self.biana_object is None: 650 self.biana_object = ExternalEntity( source_database = BiopaxEntity.database, type = "dna" ) 651 return self.biana_object
652
653 -class BiopaxRNA(BiopaxPhysicalEntity):
654
655 - def _get_biana_object(self):
656 if self.biana_object is None: 657 self.biana_object = ExternalEntity( source_database = BiopaxEntity.database, type = "rna" ) 658 return self.biana_object
659
660 -class BiopaxSmallMolecule(BiopaxPhysicalEntity):
661
662 - def _get_biana_object(self):
663 if self.biana_object is None: 664 self.biana_object = ExternalEntity( source_database = BiopaxEntity.database, type = "compound" ) 665 return self.biana_object
666 667
668 -class BiopaxXREF(BiopaxEntity):
669
670 - def __init__(self, XMLNode):
671 self.db = None 672 self.id = None 673 BiopaxEntity.__init__(self, XMLNode)
674
675 - def set_attributes(self, XMLNode):
676 for current_child in XMLNode.getChilds(): 677 if current_child.name == "bp:DB": 678 self.db = current_child.getValue() 679 elif current_child.name == "bp:ID": 680 self.id = current_child.getValue()
681
682 - def _get_biana_object(self):
683 return None
684 685
686 -class BiopaxBioSource(BiopaxEntity):
687
688 - def __init__(self, XMLNode):
689 self.celltype = None 690 self.tissue = None 691 self.name = None 692 self.tax_ref = None 693 BiopaxEntity.__init__(self, XMLNode)
694
695 - def set_attributes(self, XMLNode):
696 for current_child in XMLNode.getChilds(): 697 if current_child.name == "bp:NAME": 698 self.name = current_child.getValue() 699 elif current_child.name == "bp:TAXON-XREF": 700 self.tax_ref = current_child.attrs["rdf:resource"] 701 else: 702 print current_child.name," not recognized"
703
704 - def _get_biana_object(self):
705 return None
706
707 -class BiopaxDataSource(object):
708 709 710 pass
711 712
713 -class BiopaxOpenControlledVocabulary(BiopaxEntity):
714
715 - def __init__(self, XMLNode):
716 self.term = None 717 self.xref = None 718 BiopaxEntity.__init__(self, XMLNode)
719
720 - def set_attributes(self, XMLNode):
721 for current_child in XMLNode.getChilds(): 722 if current_child.name == "bp:TERM": 723 self.term = current_child.getValue() 724 elif current_child.name == "bp:XREF": 725 self.xref = current_child.attrs["rdf:resource"]
726
727 - def _get_biana_object(self):
728 return None
729 730
731 -class BiopaxLevel2Parser(BianaParser):
732 """ 733 734 """ 735 736 name = "biopax_level_2" 737 description = "This file implements a program that fills up tables in database biana with information of a BIOPAX Level 2 formatted database" 738 external_entity_definition = "" 739 external_entity_relations = "" 740
741 - def __init__(self):
742 743 # Start with the default values 744 745 BianaParser.__init__(self, default_db_description = "Biopax formatted database", 746 default_script_name = "biopaxLevel2Parser.py", 747 default_script_description = BiopaxLevel2Parser.description, 748 additional_compulsory_arguments = [("default-attribute=",None,"Name of the default identifier that this database gives (such as reactome)")])
749 750
751 - class BiopaxLevel2Handler(handler.ContentHandler):
752 """ 753 Class to handle content in Biopax Level2 XML files 754 """ 755
756 - def _identity(a):
757 return a
758 - def _entrez_funct(a):
759 return str(a).replace("gi|","")
760 761 datatype_operations = { "uniprot": _identity, 762 "ncbi_taxonomy": _identity, 763 "tigr": _identity, 764 "reactome": _identity, 765 "chebi": _identity, 766 "go": _identity, 767 "pubchem compound" : _identity, 768 "glycan" : _identity, 769 "compound" : _identity, 770 "entrez": _entrez_funct } 771 772 biopax_objects_dict = { "bp:unificationxref": BiopaxXREF, 773 "bp:relationshipxref": BiopaxXREF, 774 "bp:publicationxref": BiopaxXREF, 775 "bp:opencontrolledvocabulary": BiopaxOpenControlledVocabulary, 776 "bp:biosource": BiopaxBioSource, 777 "bp:protein": BiopaxProtein, 778 "bp:complex": BiopaxComplex, 779 "bp:dna": BiopaxDNA, 780 "bp:rna": BiopaxRNA, 781 "bp:smallmolecule": BiopaxSmallMolecule, 782 "bp:physicalentity": BiopaxSmallMolecule, 783 "bp:pathway": BiopaxPathway, 784 "bp:interaction": BiopaxInteraction, 785 "bp:physcialinteraction": BiopaxPhysicalInteraction, 786 "bp:conversion": BiopaxConversion, 787 "bp:control": BiopaxControl, 788 "bp:biochemicalreaction": BiopaxBiochemicalReaction, 789 "bp:complexassembly": BiopaxComplexAssembly, 790 "bp:transport": BiopaxTransport, 791 "bp:catalysis": BiopaxCatalysis, 792 "bp:modulation": BiopaxModulation, 793 "bp:sequenceparticipant": BiopaxPhysicalEntityParticipant, 794 "bp:physicalentityparticipant": BiopaxPhysicalEntityParticipant, 795 "bp:pathwaystep": BiopaxPathwayStep } 796 #"bp:transportwithbiochemicalreaction":, 797 # "bp:physicalentityparticipant":, 798 # "bp:proteinparticipant":, 799 # "bp:complexparticipant":, 800 # "bp:rnaparticipant":, 801 # "bp:dnaparticipant":, 802 # "bp:smallmoleculeparticipant": } 803
804 - def get_biana_data_type(self, type):
806
807 - def __init__(self):
808 809 print "initalizing BiopaxLevel2Handler" 810 811 self.current_XMLNode = None 812 self.step = 0 813 self.xmlnode_hierarchylist = [] 814 self.biopaxElements = {} 815 816 BiopaxEntity.resources = self.biopaxElements 817 BiopaxEntity.controlled_relations = {} 818 819 handler.ContentHandler.__init__(self)
820
821 - def _get_cross_ref(self, xref_id):
822 823 if xref_id[0] == '#': 824 xref_id = xref_id[1:] 825 826 try: 827 return self.unification_xrefs[xref_id] 828 except: 829 return self._get_cross_ref( xref_id = self.recursive_xref[xref_id] )
830 837 838 839 # ContentHandler methods
840 - def startDocument(self):
841 return
842
843 - def endDocument(self):
844 self.step += 1
845
846 - def startElement(self, name, attrs):
847 if self.current_XMLNode is None: 848 self.current_XMLNode = XMLNode(name = name, attrs = attrs) 849 else: 850 t = XMLNode(name = name, attrs = attrs) 851 self.current_XMLNode.addChild(t) 852 self.xmlnode_hierarchylist.append(self.current_XMLNode) 853 self.current_XMLNode = t
854
855 - def endElement(self, name):
856 857 name = name.lower() 858 859 if name!=self.current_XMLNode.name.lower(): 860 raise ValueError("ERROR IN XML FILE") 861 862 if BiopaxLevel2Parser.BiopaxLevel2Handler.biopax_objects_dict.has_key(name): 863 self.biopaxElements['#'+self.current_XMLNode.attrs["rdf:ID"]] = BiopaxLevel2Parser.BiopaxLevel2Handler.biopax_objects_dict[name](self.current_XMLNode) 864 865 # Sets the current object the parent 866 if len(self.xmlnode_hierarchylist)>0: 867 self.current_XMLNode = self.xmlnode_hierarchylist.pop() 868 else: 869 self.current_XMLNode = None 870 871 return
872 873
874 - def characters(self, text):
875 #print text 876 self.current_XMLNode.addValue(text.replace('.&lt;','<').replace('br&gt;','>').encode("ascii","ignore"))
877 #self.current_XMLNode.addValue(text.replace('.&lt;','<').replace('br&gt;','>').decode("ascii","ignore")) 878 879
880 - def toBiana(self):
881 for current_element in self.biopaxElements.values(): 882 current_element.toBiana()
883 884 885
886 - class BiopaxLevel2XMLParser(object):
887 """ 888 Class for parsing individual XML files obeying BIOPAX Level 2 standards 889 """ 890
891 - def __init__(self, flagVerbose=False): #, fileName=None, listEntry=None):
892 self.fileName = None 893 self.file = None 894 self.listEntry = [] 895 self.handler = BiopaxLevel2Parser.BiopaxLevel2Handler() 896 self.saxParser = make_parser() 897 self.saxParser.setContentHandler(self.handler) 898 return
899
900 - def __del__(self):
901 if self.file is not None and not self.file.closed: 902 self.file.close() 903 return
904
905 - def __str__(self):
906 return ""
907
908 - def parseFile(self, fileName=None):
909 self.__init__() # first reset old contents 910 if fileName is not None: 911 self.fileName = fileName 912 self.file = open(fileName) 913 self.saxParser.parse(self.fileName) 914 self.handler.toBiana() 915 if not self.file.closed: 916 self.file.close() 917 return
918 919
920 - def parse_database(self):
921 """ 922 """ 923 BiopaxEntity.database = self.database 924 BiopaxEntity.dbaccess = self.biana_access 925 926 927 # Speficy that this database has relations hierarchies 928 self.biana_access.store_relations_hierarchy = True 929 930 parser = self.BiopaxLevel2XMLParser(self.verbose) 931 932 if os.path.isdir(self.input_file): 933 files_list = os.listdir(self.input_file) 934 if not self.input_file.endswith(os.sep): 935 self.input_file += os.sep 936 files_list = [ self.input_file+x for x in files_list ] 937 else: 938 files_list = [self.input_file] 939 940 for current_file in files_list: 941 if current_file.endswith(".owl"): 942 print "Parsing file %s" %current_file 943 it = time.time() 944 parser.parseFile(current_file) 945 if self.time_control: 946 print "Done in %s seconds" %(time.time()-it)
947