1 """
2 File : psi_MiXMLParser.py
3 Author : Javier Garcia & Emre Guney
4 Creation : December 2007
5 Contents :
6 - Psi_MiXMLParser object to parse an XML file in PSI-MI (PSI molecular interaction format)
7 - Psi_MiEntry object which corresponds to the "entry" in the file (container for interactors & interactions)
8 - Psi_MiInteractor object
9 - Psi_MiInteraction object
10 - Psi_MiParticipant object
11 - Psi_MiNames object
12 - Psi_MiXref object
13 - DBReference object
14 - Psi_MiHandler object (Content Handler for XML parsing with SAX)
15
16 Called from : psi_MiFormattedDB2piana.py
17
18 =======================================================================================================
19
20 Generic parser aiming to read data available in PSI-MI XML format in various databases
21
22 """
23
24
25 from xml.sax import saxutils, handler, make_parser
26
27
28
29
30 """
31 =======================================================================================================
32 Psi_MiXMLParser OBJECT
33 """
34
36 """
37 Class for parsing individual XML files obeying PSI-MI standarts
38 """
39
41 self.fileName = None
42 self.file = None
43 self.listEntry = []
44 self.handler = Psi_MiHandler(flagVerbose)
45 self.saxParser = make_parser()
46 self.saxParser.setContentHandler(self.handler)
47 return
48
50 if self.file is not None and not self.file.closed:
51 self.file.close()
52 return
53
56
58 self.__init__()
59 if fileName is not None:
60 self.fileName = fileName
61 self.file = open(fileName)
62 self.saxParser.parse(self.fileName)
63
64
65 self.listEntry = self.handler.listEntry
66 if not self.file.closed:
67 self.file.close()
68 return
69
70 - def addEntry(self, objPsi_MiEntry):
71 self.listEntry.append(objPsi_MiEntry)
72 return
73
76
77 """
78 =======================================================================================================
79 Psi_MiEntry OBJECT
80 """
82 """
83 Class to handle content in PSI-MI XML files
84 """
86 handler.ContentHandler.__init__(self)
87 self.listTagStack = []
88 self.listObjectStack = []
89
90
91 self.strCurrent = None
92 self.tagCurrent = None
93 self.strAttributeCurrent = None
94 self.listEntry = []
95 self.flagVerbose = flagVerbose
96 return
97
99 print self.listObjectStack
100 return
101
102 - def _getAttribute(self, attrs, nameAttribute, flagReportInExistance=False):
103 strAttribute = None
104 for (name, value) in attrs.items():
105 if name == nameAttribute:
106 strAttribute = saxutils.escape(value)
107 if strAttribute is None and flagReportInExistance:
108 if self.flagVerbose:
109 print "Warning: Attribute not found - %s", nameAttribute
110 return strAttribute
111
112
115
117
118 if name == Psi_MiEntry.PSI_MI_TAG_ENTRY:
119 self.listObjectStack.append(Psi_MiEntry())
120 elif name == Psi_MiExperiment.PSI_MI_TAG_EXPERIMENT_DESCRIPTION:
121 id = self._getAttribute(attrs, Psi_MiExperiment.PSI_MI_TAG_EXPERIMENT_ATTRIBUTE_ID, True)
122 self.listObjectStack.append(Psi_MiExperiment(id))
123 elif name == Psi_MiInteractor.PSI_MI_TAG_INTERACTOR:
124 id = self._getAttribute(attrs, Psi_MiInteractor.PSI_MI_TAG_INTERACTOR_ATTRIBUTE_ID, True)
125
126 self.listObjectStack.append(Psi_MiInteractor(id))
127 elif name == Psi_MiInteraction.PSI_MI_TAG_INTERACTION:
128 id = self._getAttribute(attrs, Psi_MiInteraction.PSI_MI_TAG_INTERACTION_ATTRIBUTE_ID, True)
129 self.listObjectStack.append(Psi_MiInteraction(id))
130 elif name == Psi_MiParticipant.PSI_MI_TAG_PARTICIPANT:
131 if self.listTagStack[-1] == Psi_MiInteraction.PSI_MI_TAG_PARTICIPANT_LIST:
132 id = self._getAttribute(attrs, Psi_MiParticipant.PSI_MI_TAG_PARTICIPANT_ATTRIBUTE_ID, True)
133 self.listObjectStack.append(Psi_MiParticipant(id))
134 elif name == Psi_MiNames.PSI_MI_TAG_NAMES:
135
136 self.listObjectStack.append(Psi_MiNames())
137 elif name == Psi_MiNames.PSI_MI_TAG_ALIAS:
138 if self.listTagStack[-1] == Psi_MiNames.PSI_MI_TAG_NAMES:
139 self.strAttributeCurrent = self._getAttribute(attrs, Psi_MiNames.PSI_MI_TAG_ALIAS_ATTRIBUTE_TYPE)
140 elif name == Psi_MiXRef.PSI_MI_TAG_XREF:
141 self.listObjectStack.append(Psi_MiXRef())
142 elif name == Psi_MiXRef.PSI_MI_TAG_REF_PRIMARY:
143
144 objPsi_MiXRef = self.listObjectStack[-1]
145 db = self._getAttribute(attrs, Psi_MiXRef.PSI_MI_TAG_REF_ATTRIBUTE_DB, True)
146 id = self._getAttribute(attrs, Psi_MiXRef.PSI_MI_TAG_REF_ATTRIBUTE_ID, True)
147 type = self._getAttribute(attrs, Psi_MiXRef.PSI_MI_TAG_REF_ATTRIBUTE_TYPE)
148 secondary = self._getAttribute(attrs, Psi_MiXRef.PSI_MI_TAG_REF_ATTRIBUTE_SECONDARY)
149 objPsi_MiXRef.refPrimary = DBReference(db, id, type, secondary)
150 elif name == Psi_MiXRef.PSI_MI_TAG_REF_SECONDARY:
151 objPsi_MiXRef = self.listObjectStack[-1]
152 db = self._getAttribute(attrs, Psi_MiXRef.PSI_MI_TAG_REF_ATTRIBUTE_DB)
153 id = self._getAttribute(attrs, Psi_MiXRef.PSI_MI_TAG_REF_ATTRIBUTE_ID)
154 type = self._getAttribute(attrs, Psi_MiXRef.PSI_MI_TAG_REF_ATTRIBUTE_TYPE)
155 secondary = self._getAttribute(attrs, Psi_MiXRef.PSI_MI_TAG_REF_ATTRIBUTE_SECONDARY)
156 objPsi_MiXRef.listRefSecondary.append(DBReference(db, id, type, secondary))
157 elif name == Psi_MiInteractor.PSI_MI_TAG_ORGANISM:
158 if self.listTagStack[-1] == Psi_MiInteractor.PSI_MI_TAG_INTERACTOR:
159 objPsi_MiInteractor = self.listObjectStack[-1]
160 objPsi_MiInteractor.taxId = self._getAttribute(attrs, Psi_MiInteractor.PSI_MI_TAG_ORGANISM_ATTRIBUTE_TAX_ID, True)
161
162 self.listTagStack.append(name)
163 self.tagCurrent = name
164 return
165
285
287
288
289 if self.tagCurrent == self.listTagStack[-1]:
290
291
292 if content.lstrip() != "":
293 if self.strCurrent is None:
294 self.strCurrent = content.lstrip()
295 else:
296 self.strCurrent += content.lstrip()
297 else:
298
299 if content.lstrip() != "":
300 self.strCurrent = content.lstrip()
301 return
302
306
308
309 print "Skipping Processing Instruction: %s %s" % (target, data)
310 return
311
313 print "Skipping Entity: %s" % name
314 return
315
316
317 """
318 =======================================================================================================
319 Psi_MiEntry OBJECT
320 """
322 """
323 Class representing an entry
324 """
325 PSI_MI_TAG_ENTRY = "entry"
326 PSI_MI_TAG_EXPERIMENT_LIST = "experimentList"
327 PSI_MI_TAG_INTERACTOR_LIST = "interactorList"
328 PSI_MI_TAG_INTERACTION_LIST = "interactionList"
329
330 - def __init__(self, dictExperiment=None, dictInteractor=None, dictInteraction=None):
331 if dictExperiment is None:
332 self.dictExperiment = {}
333 else:
334 self.dictExperiment = dictExperiment
335 if dictInteractor is None:
336 self.dictInteractor = {}
337 else:
338 self.dictInteractor = dictInteractor
339 if dictInteraction is None:
340 self.dictInteraction = {}
341 else:
342 self.dictInteraction = dictInteraction
343 return
344
347
349 return "%s" % [k for k in self.dictInteractor.iterkeys()]
350
351 - def addExperiment(self, objPsi_MiExperiment):
352 self.dictExperiment[objPsi_MiExperiment.id] = objPsi_MiExperiment
353 return True
354
355 - def addInteractor(self, objPsi_MiInteractor):
356 self.dictInteractor[objPsi_MiInteractor.id] = objPsi_MiInteractor
357 return True
358
359 - def addInteraction(self, objPsi_MiInteraction):
360 self.dictInteraction[objPsi_MiInteraction.id] = objPsi_MiInteraction
361 return True
362
363 - def getExperiments(self):
364 return self.dictExperiment
365
366 - def getInteractors(self):
367 return self.dictInteractor
368
369 - def getInteractions(self):
370 return self.dictInteraction
371
372 """
373 =======================================================================================================
374 Psi_MiExperiment OBJECT
375 """
377 """
378 Class representing an interactor
379 """
380
381 PSI_MI_TAG_EXPERIMENT_DESCRIPTION = "experimentDescription"
382 PSI_MI_TAG_EXPERIMENT_ATTRIBUTE_ID = "id"
383 PSI_MI_TAG_BIB_REF = "bibref"
384 PSI_MI_TAG_INTERACTION_DETECTION_METHOD = "interactionDetectionMethod"
385 PSI_MI_TAG_PARTICIPANT_IDENTIFICATION_METHOD = "participantIdentificationMethod"
386
387 - def __init__(self, id=None, objPsi_MiNames=None, objPsi_MiXRefBib=None, objPsi_MiXRef=None, objPsi_MiXRefMethodInteraction=None, objPsi_MiXRefMethodParticipant=None, objPsi_MiNamesType=None):
388 """
389 id: in PSI_MI_TAG_EXPERIMENT_ATTRIBUTE_ID
390 objPsi_MiXRefBib: inside PSI_MI_TAG_BIBREF inside PSI_MI_TAG_XREF
391 objPsi_MiXRef: inside PSI_MI_TAG_XREF
392 objPsi_MiXRefMethodInteraction: inside PSI_MI_TAG_INTERACTION_DETECTION_METHOD
393 objPsi_MiXRefMethodParticipant: inside PSI_MI_TAG_PARTICIPANT_IDENTIFICATION_METHOD
394
395 """
396 self.id = id
397 self.description = objPsi_MiNames
398 self.xRefBib = objPsi_MiXRefBib
399 self.xRef = objPsi_MiXRef
400 self.xRefMethodInteraction = objPsi_MiXRefMethodInteraction
401 self.xRefMethodParticipant = objPsi_MiXRefMethodParticipant
402 self.nameMethodInteraction = objPsi_MiNamesType
403 return
404
407
409 return "{%s: %s, %s, %s, %s}" % (self.id, self.xRefBib, self.xRef, self.xRefMethodInteraction, self.xRefMethodParticipant)
410
411
412 """
413 =======================================================================================================
414 Psi_MiInteractor OBJECT
415 """
417 """
418 Class representing an interactor
419 """
420
421 PSI_MI_TAG_INTERACTOR = "interactor"
422 PSI_MI_TAG_INTERACTOR_ATTRIBUTE_ID = "id"
423 PSI_MI_TAG_TYPE = "interactorType"
424 PSI_MI_TAG_ORGANISM = "organism"
425 PSI_MI_TAG_ORGANISM_ATTRIBUTE_TAX_ID = "ncbiTaxId"
426 PSI_MI_TAG_SEQUENCE = "sequence"
427
428 - def __init__(self, id=None, objPsi_MiNames=None, objPsi_MiXRef=None, objPsi_MiNamesType=None, taxId=None, sequence=None):
429 """
430 id: in PSI_MI_TAG_INTERACTOR_ATTRIBUTE_ID
431 objPsi_MiNames: inside PSI_MI_TAG_NAMES
432 objPsi_MiXRef: inside PSI_MI_TAG_XREF
433 type: inside PSI_MI_TAG_TYPE
434 taxId: in PSI_MI_TAG_ORGANISM_ATTRIBUTE_TAX_ID
435 sequence: inside PSI_MI_TAG_SEQUENCE
436 """
437 self.id = id
438 self.name = objPsi_MiNames
439 self.xRef = objPsi_MiXRef
440 self.type = objPsi_MiNamesType
441 self.taxId = taxId
442 self.sequence = sequence
443 return
444
447
449 return "{%s: %s, %s, %s, %s, %s}" % (self.id, self.name, self.xRef, self.type, self.taxId, self.sequence)
450
451 """
452 =======================================================================================================
453 Psi_MiInteraction OBJECT
454 """
456 """
457 Class representing an interaction
458 """
459 PSI_MI_TAG_INTERACTION = "interaction"
460 PSI_MI_TAG_INTERACTION_ATTRIBUTE_ID = "id"
461 PSI_MI_TAG_EXPERIMENT_LIST = "experimentList"
462 PSI_MI_TAG_EXPERIMENT_REFERENCE = "experimentRef"
463 PSI_MI_TAG_PARTICIPANT_LIST = "participantList"
464 PSI_MI_TAG_TYPE = "interactionType"
465 PSI_MI_TAG_NEGATIVE = "negative"
466
467 - def __init__(self, id=None, objPsi_MiNames=None, objPsi_MiXRef=None, listExperimentId=None, listObjPsi_MiParticipant=None, objPsi_MiXRefType=None, flagNegative=False):
468 """
469 id: in PSI_MI_TAG_INTERACTION_ATTRIBUTE_ID
470 objPsi_MiNames: inside PSI_MI_TAG_NAMES
471 objPsi_MiXRef: inside PSI_MI_TAG_XREF
472 listExperimentId: inside PSI_MI_TAG_EXPERIMENT_LIST inside PSI_MI_TAG_EXPERIMENT_REFERENCE
473 listObjPsi_MiParticipant: inside PSI_MI_TAG_PARTICIPANT_LIST
474 type: inside PSI_MI_TAG_TYPE
475 """
476 self.id = id
477 self.name = objPsi_MiNames
478 self.xRef = objPsi_MiXRef
479 if listExperimentId is None:
480 self.listExperimentId = []
481 else:
482 self.listExperimentId = listExperimenId
483 if listObjPsi_MiParticipant is None:
484 self.listParticipant = []
485 else:
486 self.listParticipant = listObjPsi_MiParticipant
487 self.type = objPsi_MiXRefType
488 self.negative = flagNegative
489 return
490
493
495 return "{%s: %s, %s, %s}" % (self.id, self.name, self.xRef, self.type, self.negative)
496
498 self.listParticipant.append(objPsi_MiParticipant)
499 return
500
501
502 """
503 =======================================================================================================
504 Psi_MiParticipant OBJECT
505 """
507 """
508 Class representing a participant
509 """
510 PSI_MI_TAG_PARTICIPANT = "participant"
511 PSI_MI_TAG_PARTICIPANT_ATTRIBUTE_ID = "id"
512 PSI_MI_TAG_INTERACTOR_REFERENCE = "interactorRef"
513 PSI_MI_TAG_BIOLOGICAL_ROLE = "biologicalRole"
514 PSI_MI_TAG_EXPERIMENTAL_ROLE_LIST = "experimentalRoleList"
515 PSI_MI_TAG_EXPERIMENTAL_ROLE = "experimentalRole"
516
517 - def __init__(self, id=None, interactorReference=None, objPsi_MiNamesRoleBiological=None, listObjPsi_MiNamesRoleExperimental=None):
518 """
519 id: in PSI_MI_TAG_INTERACTOR_ATTRIBUTE_ID
520 interactorId: inside PSI_MI_TAG_INTERACTOR_REFERENCE
521 objPsi_MiXRefRoleBiological: inside PSI_MI_TAG_BIOLOGICAL_ROLE inside PSI_MI_TAG_XREF
522 listObjPsi_MiXRefExperimentalRole: inside PSI_MI_TAG_EXPERIMENTAL_ROLE_LIST inside PSI_MI_TAG_EXPERIMENTAL_ROLE inside PSI_MI_TAG_XREF
523 """
524 self.id = id
525 self.interactorId = interactorReference
526 self.nameRoleBiological = objPsi_MiNamesRoleBiological
527 if listObjPsi_MiNamesRoleExperimental is None:
528 self.listNameRoleExperimental = []
529 else:
530 self.listNameRoleExperimental = listObjPsi_MiNamesRoleExperimental
531 return
532
535
537 return "{%s: %s, %s}" % (self.id, self.interactorId, self.nameRoleBiological)
538
539
540 """
541 =======================================================================================================
542 Psi_MiNames OBJECT
543 """
545 """
546 Class representing information encapsulated within names XML tags
547 """
548 PSI_MI_TAG_NAMES = "names"
549 PSI_MI_TAG_LABEL = "shortLabel"
550 PSI_MI_TAG_NAME = "fullName"
551 PSI_MI_TAG_ALIAS = "alias"
552 PSI_MI_TAG_ALIAS_ATTRIBUTE_TYPE = "type"
553
554 - def __init__(self, label=None, name=None, listAlias=None):
555 """
556 shortLabel: inside PSI_MI_TAG_LABEL
557 fullName: inside PSI_MI_TAG_NAME
558 listAlias: [(in PSI_MI_TAG_ALIAS_ATTRIBUTE_TYPE, inside PSI_MI_TAG_ALIAS), ] ---> i.e. [('gene name', 'Caf1'), ]
559 PSI_MI_TAG_ALIAS_ATTRIBUTE_TYPE can be: gene name | gene name snonym | orf name
560
561 """
562
563
564
565 self.label = label
566 self.name = name
567 if listAlias is None:
568 self.listAlias = []
569 else:
570 self.listAlias = listAlias
571 return
572
575
577
578 return "%s" % self.label
579
580
581
582 """
583 =======================================================================================================
584 Psi_MiXref OBJECT
585 """
587 """
588 Class representing information encapsulated within xref XML tags
589 """
590 PSI_MI_TAG_XREF = "xref"
591 PSI_MI_TAG_REF_PRIMARY = "primaryRef"
592 PSI_MI_TAG_REF_SECONDARY = "secondaryRef"
593 PSI_MI_TAG_REF_ATTRIBUTE_DB = "db"
594 PSI_MI_TAG_REF_ATTRIBUTE_ID = "id"
595 PSI_MI_TAG_REF_ATTRIBUTE_TYPE = "refType"
596 PSI_MI_TAG_REF_ATTRIBUTE_SECONDARY = "secondary"
597
598 - def __init__(self, objDBReferenceRefPrimary=None, listObjDBReferenceRefSecondary=None):
599 """
600 refPrimary: (in PSI_MI_TAG_REF_ATTRIBUTE_DB, in PSI_MI_TAG_REF_ATTRIBUTE_ID)
601 listRefSecondary: [(in PSI_MI_TAG_REF_ATTRIBUTE_DB, in PSI_MI_TAG_REF_ATTRIBUTE_ID), ] ---> i.e. [('go', 'GO:0035098')]
602 """
603 self.refPrimary = objDBReferenceRefPrimary
604 if listObjDBReferenceRefSecondary is None:
605 self.listRefSecondary = []
606 else:
607 self.listRefSecondary = listObjDBReferenceRefSecondary
608 return
609
612
614 return "%s" % (self.refPrimary)
615
617 """
618 Class representing reference information to a database with db, id and type fields
619 """
620 - def __init__(self, db=None, id=None, type=None, secondary=None):
621 self.db = db
622 self.id = id
623 self.type = type
624 self.secondary = secondary
625 return
626
629
631 return "(%s, %s)" % (self.db, self.id)
632