Package biana :: Package BianaParser :: Module psi_Mi25Parser
[hide private]
[frames] | no frames]

Source Code for Module biana.BianaParser.psi_Mi25Parser

  1  """ 
  2      BIANA: Biologic Interactions and Network Analysis 
  3      Copyright (C) 2009  Javier Garcia-Garcia, Emre Guney, Baldo Oliva 
  4   
  5      This program is free software: you can redistribute it and/or modify 
  6      it under the terms of the GNU General Public License as published by 
  7      the Free Software Foundation, either version 3 of the License, or 
  8      (at your option) any later version. 
  9   
 10      This program is distributed in the hope that it will be useful, 
 11      but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 13      GNU General Public License for more details. 
 14   
 15      You should have received a copy of the GNU General Public License 
 16      along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 17   
 18  """ 
 19   
 20  """ 
 21  File        : psi_MiFormattedDB2biana.py 
 22  Author      : Javier Garcia & Emre Guney 
 23  Creation    : December 2007 
 24  Contents    : inserts information from a PSI-MI formatted XML file database into biana 
 25  Called from :  
 26   
 27  ======================================================================================================= 
 28   
 29  This file implements a program that fills up tables in database biana with information from a PSI-MI formatted database 
 30   
 31  --> Databases must be in PSI-MI XML format 
 32   
 33  """ 
 34   
 35  import os 
 36  from bianaParser import * 
 37  from psi_MiXMLParser import * 
 38  import sets 
 39   
 40  DICT_METHOD_CONVERSION_GRID_TO_PSI_MI = { 'Biochemical Activity': "MI:0401", 
 41                                            'Co-crystal Structure': "MI:0114", 
 42                                            'FRET': "MI:0055", 
 43                                            'Co-localization': "MI:0403", 
 44                                            'Co-purification': "MI:0025", 
 45                                            'Invitro': "MI:0492", 
 46                                            'Two-hybrid': "MI:0018", 
 47                                            'Far Western': "MI:0047", 
 48                                            'Invivo': "MI:0493", 
 49                                            'Phenotypic Enhancement': "MI:0802", 
 50                                            'Phenotypic Suppression': "MI:0796", 
 51                                            'Affinity Capture-Western': "MI:0004", #i "MI:0113", 
 52                                            'Co-fractionation': "MI:0027", 
 53                                            'Affinity Capture-RNA': "MI:0004", #i ~"MI:0709" 
 54                                            'Affinity Capture-MS': "MI:0004", #i "MI:0427" 
 55                                            'Synthetic Rescue': "MI:0262", 
 56                                            'Reconstituted Complex': "MI:0492", 
 57                                            'Dosage Rescue': "MI:0261", 
 58                                            'Protein-peptide': "MI:0084", 
 59                                            'Affinity Capture-Luminescence': "MI:0004", #i ~"MI:0729"  
 60                                            'Protein-RNA': "MI:0316", 
 61                                            'Synthetic Lethality': "MI:0441", 
 62                                            'Dosage Growth Defect': "MI:0274", 
 63                                            'Dosage Lethality': "MI:0441", 
 64                                            'Synthetic Growth Defect': "MI:0274", 
 65                                            'PCA': "MI:0090", # Protein Complementation assy 
 66                                            'AffinityCapture-MS': "MI:0004" } # Only assigned affinity cromatography... should be inserted MS too? 
 67   
 68  MAX_NAME_LENGTH = 100 
 69   
70 -class Psi_MiFormattedDBParser(BianaParser):
71 """ 72 PSI-MI formatted DB Parser Class 73 """ 74 75 name = "psi_mi_2.5" 76 description = "This parser inserts psi-mi 2.5 formated information to biana database" 77 external_entity_definition = "Each relation participant is considered as a distinct External Entity" 78 external_entity_relations = "External Entity Relations" 79 80 81 dictDBNameToPrefix = {} 82 #dictPrefixToDBName = {}
83 - def __init__(self):
84 # Start with the default values 85 BianaParser.__init__(self, default_db_description = "PSI-MI formatted protein-protein interaction database", 86 default_script_name = "psi_Mi25Parser.py", 87 default_script_description = "", 88 additional_compulsory_arguments = [("default-attribute=",None,"Name of the default identifier that this database gives (such as intact/mint/biogrid/dip/hprd/bind/mpact...)")]) 89 90 91 return
92
93 - def parse_database(self):
94 """ 95 Method that implements the specific operations of PSI-MI formatted database parser 96 """ 97 98 self.not_recognized_cross_refs = sets.Set() 99 100 #directoryData = self.input_file[:self.input_file.rfind("/")+1] 101 directoryData = os.path.dirname(self.input_file) 102 command = None 103 onlyOneFileFlag = False 104 105 if os.path.isdir(self.input_file): 106 command = None 107 directoryData = os.path.dirname(self.input_file+os.sep)+os.sep 108 elif os.path.isfile(self.input_file): 109 directoryData = os.path.dirname(self.input_file)+os.sep 110 if self.input_file.endswith(".zip"): 111 command = "unzip" 112 elif self.input_file.endswith(".gz"): 113 command = "gunzip" 114 elif self.input_file.endswith(".xml"): 115 command = None 116 onlyOneFileFlag = True 117 else: 118 sys.stderr.write("Warning: Input file extension (%s) not recognized by parser\n" % self.input_file[-3:]) 119 return 120 121 if command is not None: 122 os.chdir(directoryData) 123 os.system("%s %s" % (command, self.input_file)) 124 125 if onlyOneFileFlag: 126 listFileName = [self.input_file[self.input_file.rfind(os.sep)+1:]] 127 else: 128 #print directoryData 129 listFileName = os.listdir(directoryData) 130 131 self.file_number = 0 132 133 parser = Psi_MiXMLParser(self.verbose) 134 135 136 flagContinuePointReached = False 137 138 139 for fileName in listFileName: 140 141 sys.stderr.write("Parsing file %s\n" %fileName) 142 143 if not (fileName.endswith(".xml") or fileName.endswith(".xsd.xml") or fileName.endswith(".mif25")): 144 sys.stderr.write("Ignoring file: %s\n" % fileName) 145 continue 146 # if not flagContinuePointReached: 147 # if fileName == "BIOGRID-ORGANISM-Caenorhabditis_elegans-2.0.37.psi25.xml": 148 # flagContinuePointReached = True 149 # continue 150 151 if self.time_control: 152 if self.file_number%10==0: 153 sys.stderr.write("%s files done in %s seconds\n" %(self.file_number, time.time()-self.initial_time)) 154 155 self.file_number += 1 156 157 if self.verbose: 158 sys.stderr.write("\n------- %s\n" %fileName) 159 160 # continue # to print just names 161 try: 162 parser.parseFile(directoryData+fileName) 163 except Exception, inst: 164 sys.stderr.write("%s\n" %inst) 165 listEntry = parser.getEntries() 166 167 psi_MiFormatted_object_number = 0 168 for objEntry in listEntry: 169 dictIdInteractorToIdExternal = {} 170 dictExperiment = objEntry.getExperiments() 171 dictInteractor = objEntry.getInteractors() 172 dictInteraction = objEntry.getInteractions() 173 174 if self.verbose: 175 sys.stderr.write("\nInteractors:\n") 176 177 # Create external entities for interactors 178 for objInteractor in dictInteractor.itervalues(): 179 if self.verbose: 180 sys.stderr.write("%s\n" %objInteractor.id) 181 182 ###if objInteractor.id != "350": 183 ### continue 184 # Start new entry 185 #print objInteractor.type.label 186 interactorType = self.decideInteractorTypeSpecificConversions(objInteractor.type.label) 187 if interactorType is None: 188 interactorType = self.decideInteractorTypeSpecificConversions(objInteractor.type.name) 189 psi_MiFormatted_object = ExternalEntity( source_database = self.database, type=interactorType) # "protein") 190 psi_MiFormatted_object_number += 1 191 # Fill the new entry 192 # Fill name 193 self.addNameAttributesToExternalEntityObject(objInteractor.name, psi_MiFormatted_object) 194 # Fill xRef 195 self.addXRefAttributesToExternalEntityObject(objInteractor.xRef, psi_MiFormatted_object) 196 # Fill taxId 197 if objInteractor.taxId is not None and int(objInteractor.taxId) >= 0: 198 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute("taxid",objInteractor.taxId)) 199 # Fill sequence 200 if objInteractor.sequence is not None: 201 sequenceType = self.decideSequenceTypeSpecificConversions(objInteractor.type.label) 202 if sequenceType == None: 203 sequenceType = self.decideSequenceTypeSpecificConversions(objInteractor.type.name) 204 #psi_MiFormatted_object.add_attribute(ExternalEntityAttribute("sequence","".join(objInteractor.sequence),"type" : sequenceType}) 205 # Insert the entry to the database 206 self.biana_access.insert_new_external_entity( externalEntity = psi_MiFormatted_object ) 207 dictIdInteractorToIdExternal[objInteractor.id] = psi_MiFormatted_object.get_id() 208 #dictIdInteractorToIdExternal[objInteractor.id] = 1 #! 209 210 if self.verbose: 211 sys.stderr.write("\nInteractions:\n") 212 213 # Create external entity relations for interactions 214 for objInteraction in dictInteraction.itervalues(): 215 if self.verbose: 216 sys.stderr.write("%s\n" %objInteraction.id) 217 218 # Start new entry relation 219 if objInteraction.negative: 220 typeRelation = "no_interaction" 221 else: 222 typeRelation = "interaction" 223 psi_MiFormatted_object = ExternalEntityRelation( source_database=self.database, relation_type=typeRelation ) 224 # Fill xRef 225 if objInteraction.xRef is not None: 226 self.addXRefAttributesToExternalEntityObject( objPsi_MiXRef= objInteraction.xRef, psi_MiFormatted_object=psi_MiFormatted_object, attribute_class=ExternalEntityRelationAttribute ) 227 # Fill name 228 if objInteraction.name is not None: 229 self.addNameAttributesToExternalEntityObject(objInteraction.name, psi_MiFormatted_object) 230 # Fill experimentList 231 listObjXRefMethodParticipantIdentification = [] 232 for idExperiment in objInteraction.listExperimentId: 233 experiment = dictExperiment[idExperiment] 234 # Fill experiment description - for now ignored --> add_common_attribute(intactExperiment) would return internal id assigned for each exp desription which would then be inserted as an attribute like methodID 235 #if experiment.description.name is not None: # description has no type ###self.addNameAttributesToExternalEntityObject(experiment.description, psi_MiFormatted_object, nameAttribute="description", flagIgnoreAlias=True) 236 # psi_MiFormatted_object.add_attribute(attributeName="description", attributeFields={"value": experiment.description.name}) 237 # Fill experiment bibref 238 self.addXRefAttributesToExternalEntityObject(experiment.xRefBib, psi_MiFormatted_object, flagIgnoreRefSecondary=True) 239 # Fill experiment xref - secondary references are ignored 240 if experiment.xRef is not None: 241 self.addXRefAttributesToExternalEntityObject(experiment.xRef, psi_MiFormatted_object, flagIgnoreRefSecondary=True) 242 # Fill experiment identification method 243 ###self.addXRefAttributesToExternalEntityObject(experiment.xRefMethodInteraction, psi_MiFormatted_object, flagIgnoreRefSecondary=True) 244 if experiment.xRefMethodInteraction.refPrimary.db == "psi-mi": 245 psi_MiFormatted_object.add_attribute(ExternalEntityRelationAttribute( attribute_identifier = "method_id", 246 value = experiment.xRefMethodInteraction.refPrimary.id[3:] ) ) 247 if experiment.xRefMethodInteraction.refPrimary.db == "grid": 248 if DICT_METHOD_CONVERSION_GRID_TO_PSI_MI.has_key(experiment.nameMethodInteraction.label): 249 psi_MiFormatted_object.add_attribute(ExternalEntityRelationAttribute( attribute_identifier="method_id", 250 value = DICT_METHOD_CONVERSION_GRID_TO_PSI_MI[experiment.nameMethodInteraction.label][3:] )) 251 else: 252 sys.stderr.write("Method %s not recognized\n" %experiment.nameMethodInteraction.label) 253 ###else: 254 ### print "Warning interaction type is not provided as psi-mi db reference:", experiment.xRefMethodInteraction.refPrimary.db 255 # Store participant identification method as xref in a list (method is the same for all participants in this interaction) 256 if experiment.xRefMethodParticipant is not None: 257 listObjXRefMethodParticipantIdentification.append(experiment.xRefMethodParticipant) 258 # Fill participantList 259 dictIdExternalToCardinality = {} 260 for participant in objInteraction.listParticipant: 261 try: 262 idExternal = dictIdInteractorToIdExternal[participant.interactorId] 263 except: 264 sys.stderr.write("Warning: Unassigned interactor %s\n" %participant.interactorId) 265 continue 266 flagFirstTime = insertKeyIntoHistogramDictionary(dictIdExternalToCardinality, idExternal) 267 if flagFirstTime: # need not to repeat same participant information 268 # Add new participant 269 psi_MiFormatted_object.add_participant( externalEntityID = idExternal ) 270 # Fill participant identification methods using above created list 271 for objXRefMethodIdentification in listObjXRefMethodParticipantIdentification: 272 psi_MiFormatted_object.add_participant_attribute(externalEntityID = idExternal, 273 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "detection_method", 274 value = objXRefMethodIdentification.refPrimary.id[3:])) 275 # Fill biological role 276 if participant.nameRoleBiological is not None: 277 nameRoleConverted = self.decideRoleSpecificConversions(participant.nameRoleBiological.label) 278 if nameRoleConverted != "ignore": 279 psi_MiFormatted_object.add_participant_attribute(externalEntityID = idExternal, 280 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "role", 281 value = nameRoleConverted )) 282 # Fill experimental roles 283 for objNameRoleExperimental in participant.listNameRoleExperimental: 284 nameRoleConverted = self.decideRoleSpecificConversions(objNameRoleExperimental.label) 285 if nameRoleConverted != "ignore": 286 psi_MiFormatted_object.add_participant_attribute(externalEntityID = idExternal, 287 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "role", 288 value = nameRoleConverted )) 289 for (idExternal, cardinality) in dictIdExternalToCardinality.iteritems(): 290 psi_MiFormatted_object.add_participant_attribute(externalEntityID = idExternal, 291 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "cardinality", 292 value = cardinality )) 293 # Fill interactionType - physical interaction for each - ignored for now 294 ###self.addXRefAttributesToExternalEntityObject(objInteraction.type, psi_MiFormatted_object, flagIgnoreRefSecondary=True) 295 # Insert the entry to the database 296 self.biana_access.insert_new_external_entity( externalEntity = psi_MiFormatted_object ) #! 297 298 return
299
300 - def addNameAttributesToExternalEntityObject(self, objPsi_MiNames, psi_MiFormatted_object, nameAttribute="name", flagIgnoreAlias=False):
301 if objPsi_MiNames.name is not None: 302 #nameConverted = objPsi_MiNames.name.replace("&#150;", ' ') 303 nameConverted = objPsi_MiNames.name.encode("ascii", "replace") 304 if len(nameConverted) > MAX_NAME_LENGTH: 305 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute(attribute_identifier="description", value=nameConverted)) 306 else: 307 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute(nameAttribute, nameConverted)) 308 if objPsi_MiNames.label is not None: 309 if len(objPsi_MiNames.label) > MAX_NAME_LENGTH: 310 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute("description",objPsi_MiNames.label)) 311 else: 312 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute(nameAttribute, objPsi_MiNames.label))#, "type": "label"}) 313 if not flagIgnoreAlias: 314 if objPsi_MiNames.listAlias is not None: 315 for (type, name) in objPsi_MiNames.listAlias: 316 attribute = self.decideAliasTypeSpecificConversions(type) 317 if attribute != "ignore" and name is not None: 318 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute(attribute, name))#, "type": "alias"}) 319 return
320
321 - def addXRefAttributesToExternalEntityObject(self, objPsi_MiXRef, psi_MiFormatted_object, flagIgnoreRefSecondary=False, attribute_class=ExternalEntityAttribute):
322 if objPsi_MiXRef.refPrimary is not None: 323 (dbNameConverted, dictFieldName, dictFieldValue, dictFieldType) = self.decideDBReferenceSpecificConversions(objPsi_MiXRef.refPrimary) 324 if dbNameConverted != "ignore": 325 psi_MiFormatted_object.add_attribute(attribute_class(attribute_identifier = dbNameConverted, 326 value = dictFieldValue, 327 type = dictFieldType) ) 328 if not flagIgnoreRefSecondary: 329 if objPsi_MiXRef.listRefSecondary is not None: 330 for objDBReference in objPsi_MiXRef.listRefSecondary: 331 (dbNameConverted, dictFieldName, dictFieldValue, dictFieldType) = self.decideDBReferenceSpecificConversions(objDBReference) 332 if dbNameConverted != "ignore": 333 psi_MiFormatted_object.add_attribute( attribute_class( attribute_identifier = dbNameConverted, 334 value = dictFieldValue, 335 type = dictFieldType ) ) 336 return
337
338 - def decideInteractorTypeSpecificConversions(self, interactorType):
339 interactorTypeConverted = None 340 if interactorType == None: 341 return interactorTypeConverted 342 if interactorType == "protein": 343 interactorTypeConverted = "protein" 344 elif interactorType == "peptide": 345 interactorTypeConverted = "protein" 346 elif interactorType == "dna": 347 interactorTypeConverted = "DNA" 348 elif interactorType == "rna": 349 interactorTypeConverted = "RNA" 350 elif interactorType.endswith("dna"): 351 interactorTypeConverted = "DNA" 352 elif interactorType.endswith("rna"): 353 interactorTypeConverted = "RNA" 354 elif interactorType == "nucleic acid": 355 interactorTypeConverted = "DNA" 356 #elif interactorType == "mrna": 357 # interactorTypeConverted = "RNA" 358 elif interactorType == "small molecule": 359 interactorTypeConverted = "compound" 360 else: 361 sys.stderr.write("Warning: Unkown interactor type: %s\n" %interactorType) 362 return interactorTypeConverted
363
364 - def decideSequenceTypeSpecificConversions(self, sequenceType):
365 sequenceTypeConverted = None 366 if sequenceType == None: 367 return sequenceTypeConverted 368 if sequenceType == "protein": 369 sequenceTypeConverted = "peptide" 370 elif sequenceType == "peptide": 371 sequenceTypeConverted = "peptide" 372 elif sequenceType == "dna": 373 sequenceTypeConverted = "dna" 374 elif sequenceType == "rna": 375 sequenceTypeConverted = "rna" 376 elif sequenceType.endswith("dna"): 377 sequenceTypeConverted = "dna" 378 elif sequenceType.endswith("rna"): 379 sequenceTypeConverted = "rna" 380 #elif sequenceType == "mrna": 381 # sequenceTypeConverted = "rna" 382 #elif sequenceType == "ds dna": 383 # sequenceTypeConverted = "dna" 384 elif sequenceType == "nucleic acid": 385 sequenceTypeConverted = "dna" 386 else: 387 sys.stderr.write("Warning: Unkown sequence type: %s\n" %sequenceType) 388 return sequenceTypeConverted
389
390 - def decideRoleSpecificConversions(self, nameRole):
391 nameRoleConverted = "" 392 if nameRole == "bait": 393 nameRoleConverted = "bait" 394 elif nameRole == "prey": 395 nameRoleConverted = "prey" 396 elif nameRole == "neutral component": 397 nameRoleConverted = "neutral" 398 elif nameRole == "unspecified role": 399 nameRoleConverted = "ignore" 400 elif nameRole == "unspecifiedrole": 401 nameRoleConverted = "ignore" 402 elif nameRole == "fluorescence acceptor": 403 nameRoleConverted = "acceptor" 404 elif nameRole == "fluorescence accept": 405 nameRoleConverted = "acceptor" 406 elif nameRole == "fluorescence donor": 407 nameRoleConverted = "donor" 408 elif nameRole == "self": 409 nameRoleConverted = "self" 410 elif nameRole == "ancillary": 411 nameRoleConverted = "ancillary" 412 elif nameRole == "enzyme": 413 nameRoleConverted = "enzyme" 414 elif nameRole == "enzyme target": 415 nameRoleConverted = "enzyme target" 416 elif nameRole == "inhibitor": 417 nameRoleConverted = "inhibitor" 418 elif nameRole == "cofactor": 419 nameRoleConverted = "cofactor" 420 elif nameRole == "electron acceptor": 421 nameRoleConverted = "acceptor" 422 elif nameRole == "electron donor": 423 nameRoleConverted = "donor" 424 elif nameRole == "stimulator": 425 nameRoleConverted = "stimulator" 426 else: 427 sys.stderr.write("Warning: decideRoleSpecificConversions - Unknown type identifier: %s\n" %nameRole) 428 nameRoleConverted = "ignore" 429 return nameRoleConverted
430
432 #nameType = "" 433 attributeName = "ignore" 434 if type == "gene name" or type == "gene name synonym": 435 attributeName = "GeneSymbol" 436 #nameType = "alias" 437 elif type == "orf name": 438 attributeName = "orfName" 439 #nameType = "cross-reference" 440 elif type == "locus name": 441 attributeName = "OrderedLocusName" 442 #nameType = "cross-reference" 443 elif type == "isoform synonym": 444 #attributeName = "isoFormName" 445 attributeName = "ignore" 446 else: 447 sys.stderr.write("Warning: decideAliasTypeSpecificConversions - Unknown type identifier: %s\n" %type) 448 449 return attributeName
450
451 - def getIndexOfFirstOccurenceOfDigit(self, strDBName):
452 index = 0 453 for char in strDBName: 454 if ord(char) >= 48 and ord(char) <= 57: 455 return index 456 index += 1 457 return -1
458
459 - def decideDBReferenceSpecificConversions(self, objDBReference):
460 (db, id, type, secondary) = (objDBReference.db, objDBReference.id, objDBReference.type, objDBReference.secondary) 461 #dbNameConverted = None 462 dbNameConverted = "ignore" 463 dictFieldName = "value" # the default 464 dictFieldValue = id # the default 465 dictFieldType = "cross-reference" # the default 466 467 dbUpper = db.upper() 468 469 if type == "identity": 470 dictFieldType = "unique" 471 472 if dbUpper == "UNIPROTKB" or dbUpper == "UNIPROT" or dbUpper == "UNIPROT KNOWLEDGE BASE" or dbUpper == "SWISSPROT" or dbUpper == "TREMBL": 473 if id.startswith("unknown"): 474 dbNameConverted = "ignore" 475 else: 476 dbNameConverted = "uniprotaccession" 477 index = id.find("-PRO_") 478 if index != -1: 479 #dictFieldValue = id[index+5:] 480 dictFieldValue = id[:index] 481 #self.checkOrInsertDBNamePrefix(dbNameConverted, "-PRO_") 482 else: 483 index = id.find("NP_") 484 if index != -1: 485 dictFieldValue = id[index+3:] 486 self.checkOrInsertDBNamePrefix(dbNameConverted, "NP_") 487 #self.checkOrInsertDBNamePrefix(dbNameConverted+"2", "NP_") 488 elif dbUpper == "INTENZ": 489 dbNameConverted = "EC" 490 elif dbUpper == "GO": 491 dbNameConverted = "GO" 492 dictFieldValue = id[3:] 493 flagInconsistency = self.checkOrInsertDBNamePrefix(dbNameConverted, id[:3]) 494 # To correct the cases where the prefix is missing 495 if flagInconsistency: 496 if ord(id[0]) >= 48 and ord(id[0]) <= 57: 497 dictFieldValue = id 498 elif id == "CC": 499 dbNameConverted = "ignore" 500 elif id.startswith("GO ") and ord(id[3]) >= 48 and ord(id[3]) <= 57: 501 dictFieldValue = id[3:] 502 elif dbUpper == "INTERPRO": 503 dbNameConverted = "interpro" 504 dictFieldValue = id[3:] 505 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:3]) 506 elif dbUpper == "ENSEMBL": 507 dbNameConverted = "ensembl" 508 # ENS[GBRM] -CGS - 509 #dictFieldValue = id[4:] 510 #self.checkOrInsertDBNamePrefix(dbNameConverted, id[:4]) 511 elif dbUpper == "ENCODE": 512 dbNameConverted = "encode" 513 # not always starts with AC 514 #dictFieldValue = id[2:] 515 #self.checkOrInsertDBNamePrefix(dbNameConverted, id[:2]) 516 elif dbUpper == "INTACT": 517 index = id.find("MINT-") # handling mint db's crayziness 518 if index == -1: 519 dbNameConverted = "IntAct" 520 dictFieldValue = id[4:] 521 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:4]) 522 else: 523 dbNameConverted = "MINT" 524 dictFieldValue = id[5:] 525 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:5]) 526 elif dbUpper == "MIPS": 527 dbNameConverted = "MIPS" 528 elif dbUpper == "MINT": 529 dbNameConverted = "MINT" 530 dictFieldValue = id[5:] 531 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:5]) 532 elif dbUpper == "PROTEIN ACCESSION": 533 indexDigit = self.getIndexOfFirstOccurenceOfDigit(id) 534 if indexDigit == 3: 535 dbNameConverted = "AccessionNumber" 536 elif indexDigit == 1: 537 dbNameConverted = "UniprotAccession" 538 elif dbUpper == "PROTEIN GI": 539 dbNameConverted = "GI" 540 elif dbUpper == "RCSB PDB" or dbUpper == "PDB" or dbUpper == "WWPDB": 541 dbNameConverted = "pdb" 542 elif dbUpper == "REACTOME COMPLEX" or dbUpper == "REACTOME PROTEIN" or dbUpper == "REACTOME": 543 dbNameConverted = "Reactome" 544 index = id.rfind('.') 545 if index == -1: 546 dictFieldValue = id[6:] 547 else: 548 dictFieldValue = id[6:index] 549 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:6]) 550 elif dbUpper == "HUGE": 551 dbNameConverted = "Huge" 552 dictFieldValue = id[4:] 553 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:4]) 554 elif dbUpper == "DDBJ-EMBL-GENBANK" or dbUpper == "DDBJ/EMBL/GENBANK" or dbUpper == "GENBANK_NUCLEOTIDE_G": 555 dbNameConverted = "AccessionNumber" 556 if self.sourcedb_name == "mint": 557 dictFieldValue = secondary 558 elif dbUpper == "GENBANK_PROTEIN_GI": 559 if id.lower().startswith("gi:"): 560 dictFieldValue = id[3:] 561 dbNameConverted = "GI" 562 elif dbUpper == "IPI": 563 dbNameConverted = "IPI" 564 #dictFieldValue = id[3:] 565 #self.checkOrInsertDBNamePrefix(dbNameConverted, id[:3]) 566 elif dbUpper == "DIP": 567 dbNameConverted = "DIP" 568 dictFieldValue = id[4:] 569 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:4]) 570 elif dbUpper == "WORMBASE": # wormbase, WormBase 571 dbNameConverted = "wormbasegeneid" 572 dictFieldValue = id[6:] 573 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:6]) 574 elif dbUpper == "PUBMED": 575 if id.startswith("unassigned"): 576 dbNameConverted = "ignore" 577 elif id.startswith("missing"): # "missing_pmid" 578 dbNameConverted = "ignore" 579 else: 580 dbNameConverted = "pubmed" 581 elif dbUpper == "UNIPARC": 582 dbNameConverted = "uniparc" 583 dictFieldValue = id[3:] 584 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:3]) 585 elif dbUpper == "CHEBI": 586 dbNameConverted = "chebi" 587 dictFieldValue = id[6:] 588 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:6]) 589 elif dbUpper == "REFSEQ": 590 dbNameConverted = "refseq" 591 index = id.rfind('.') 592 if index != -1: 593 dictFieldValue = id[:index] 594 elif dbUpper == "RGD": 595 dbNameConverted = "rgd" 596 elif dbUpper == "SGD": 597 dbNameConverted = "SGD" 598 elif dbUpper == "CYGD": 599 dbNameConverted = "cygd" 600 elif dbUpper == "FLYBASE": 601 dbNameConverted = "FlyBase" 602 elif dbUpper == "OMIM" or dbUpper == "MIM": 603 dbNameConverted = "MIM" 604 elif dbUpper == "INTENZ": 605 dbNameConverted = "IntEnz" 606 elif dbUpper == "ENTREZGENE": 607 dbNameConverted = "geneID" 608 elif dbUpper == "ENTREZ GENE/LOCUSLINK": 609 dbNameConverted = "geneID" 610 elif dbUpper == "HPRD": 611 dbNameConverted = "HPRD" 612 elif dbUpper == "HGNC": 613 dbNameConverted = "HGNC" 614 elif dbUpper == "MGI": 615 dbNameConverted = "MGI" 616 elif dbUpper == "TAIR": 617 dbNameConverted = "TAIR" 618 elif dbUpper == "RGD": 619 dbNameConverted = "rgd" 620 elif dbUpper == "RATMAP": 621 dbNameConverted = "Ratmap" 622 elif dbUpper == "IMGT/GENE-DB": 623 dbNameConverted = "IMGT" 624 elif dbUpper == "PSI-MI": 625 dbNameConverted = "method_id" 626 elif dbUpper == "DOI": 627 dbNameConverted = "ignore" 628 elif dbUpper == "CAMJEDB": 629 dbNameConverted = "ignore" 630 elif dbUpper == "ecogene": 631 dbNameConverted = "ignore" 632 elif dbUpper == "NEWT": 633 dbNameConverted = "ignore" 634 elif dbUpper == "IMEX": 635 dbNameConverted = "ignore" 636 elif dbUpper == "AFCS": 637 dbNameConverted = "ignore" 638 elif dbUpper == "PRIDE": 639 dbNameConverted = "ignore" 640 elif dbUpper == "SO": 641 dbNameConverted = "ignore" 642 elif dbUpper == "GRID" or dbUpper == "GRID_LEGACY": 643 dbNameConverted = "ignore" 644 elif dbUpper == "CDNA GI": 645 dbNameConverted = "ignore" 646 elif dbUpper == "CDNA ACCESSION": 647 dbNameConverted = "ignore" 648 elif dbUpper == "N/A": 649 dbNameConverted = "ignore" 650 else: 651 if db not in self.not_recognized_cross_refs: 652 sys.stderr.write("Warning: decideDBReferenceSpecificConversions - Unknown database identifier: %s" %db) 653 self.not_recognized_cross_refs.add(db) 654 #dbNameConverted = db.encode("ascii", "strict") 655 dbNameConverted = "ignore" 656 657 658 return (dbNameConverted, dictFieldName, dictFieldValue, dictFieldType)
659
660 - def checkOrInsertDBNamePrefix(self, dbName, prefix):
661 flagInconsistency = False 662 if Psi_MiFormattedDBParser.dictDBNameToPrefix.has_key(dbName): 663 if Psi_MiFormattedDBParser.dictDBNameToPrefix[dbName] != prefix: 664 sys.stderr.write("Warning: Database name prefix inconsistency: %s\t%s\n" %(dbName, prefix)) 665 flagInconsistency = True 666 else: 667 Psi_MiFormattedDBParser.dictDBNameToPrefix[dbName] = prefix 668 # if Psi_MiFormattedDBParser.dictPrefixToDBName.has_key(prefix): 669 # if Psi_MiFormattedDBParser.dictPrefixToDBName[prefix] != dbName: 670 # if self.verbose: 671 # print "Warning: Database name prefix inconsistency", dbName, prefix 672 # flagInconsistency = True 673 # else: 674 # Psi_MiFormattedDBParser.dictPrefixToDBName[prefix] = dbName 675 return flagInconsistency
676
677 -def insertKeyIntoHistogramDictionary(dictHistogram, key):
678 "Inserts a key to a dictionary whish is designated to be used as histogram, returns True if this is the first occurence of the key" 679 if dictHistogram.has_key(key): 680 dictHistogram[key] += 1 681 return False 682 else: 683 dictHistogram[key] = 1 684 return True
685