| Home | Trees | Indices | Help |
|
|---|
|
|
1 """
2 BIANA: Biologic Interactions and Network Analysis
3 Copyright (C) 2009 Javier Garcia-Garcia, Emre Guney, Baldo Oliva
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 """
19
20 """
21 File : psi_MiFormattedDB2biana.py
22 Author : Javier Garcia & Emre Guney
23 Creation : December 2007
24 Contents : inserts information from a PSI-MI formatted XML file database into biana
25 Called from :
26
27 =======================================================================================================
28
29 This file implements a program that fills up tables in database biana with information from a PSI-MI formatted database
30
31 --> Databases must be in PSI-MI XML format
32
33 """
34
35 import os
36 from bianaParser import *
37 from psi_MiXMLParser import *
38 import sets
39
40 DICT_METHOD_CONVERSION_GRID_TO_PSI_MI = { 'Biochemical Activity': "MI:0401",
41 'Co-crystal Structure': "MI:0114",
42 'FRET': "MI:0055",
43 'Co-localization': "MI:0403",
44 'Co-purification': "MI:0025",
45 'Invitro': "MI:0492",
46 'Two-hybrid': "MI:0018",
47 'Far Western': "MI:0047",
48 'Invivo': "MI:0493",
49 'Phenotypic Enhancement': "MI:0802",
50 'Phenotypic Suppression': "MI:0796",
51 'Affinity Capture-Western': "MI:0004", #i "MI:0113",
52 'Co-fractionation': "MI:0027",
53 'Affinity Capture-RNA': "MI:0004", #i ~"MI:0709"
54 'Affinity Capture-MS': "MI:0004", #i "MI:0427"
55 'Synthetic Rescue': "MI:0262",
56 'Reconstituted Complex': "MI:0492",
57 'Dosage Rescue': "MI:0261",
58 'Protein-peptide': "MI:0084",
59 'Affinity Capture-Luminescence': "MI:0004", #i ~"MI:0729"
60 'Protein-RNA': "MI:0316",
61 'Synthetic Lethality': "MI:0441",
62 'Dosage Growth Defect': "MI:0274",
63 'Dosage Lethality': "MI:0441",
64 'Synthetic Growth Defect': "MI:0274",
65 'PCA': "MI:0090", # Protein Complementation assy
66 'AffinityCapture-MS': "MI:0004" } # Only assigned affinity cromatography... should be inserted MS too?
67
68 MAX_NAME_LENGTH = 100
69
71 """
72 PSI-MI formatted DB Parser Class
73 """
74
75 name = "psi_mi_2.5"
76 description = "This parser inserts psi-mi 2.5 formated information to biana database"
77 external_entity_definition = "Each relation participant is considered as a distinct External Entity"
78 external_entity_relations = "External Entity Relations"
79
80
81 dictDBNameToPrefix = {}
82 #dictPrefixToDBName = {}
84 # Start with the default values
85 BianaParser.__init__(self, default_db_description = "PSI-MI formatted protein-protein interaction database",
86 default_script_name = "psi_Mi25Parser.py",
87 default_script_description = "",
88 additional_compulsory_arguments = [("default-attribute=",None,"Name of the default identifier that this database gives (such as intact/mint/biogrid/dip/hprd/bind/mpact...)")])
89
90
91 return
92
94 """
95 Method that implements the specific operations of PSI-MI formatted database parser
96 """
97
98 self.not_recognized_cross_refs = sets.Set()
99
100 #directoryData = self.input_file[:self.input_file.rfind("/")+1]
101 directoryData = os.path.dirname(self.input_file)
102 command = None
103 onlyOneFileFlag = False
104
105 if os.path.isdir(self.input_file):
106 command = None
107 directoryData = os.path.dirname(self.input_file+os.sep)+os.sep
108 elif os.path.isfile(self.input_file):
109 directoryData = os.path.dirname(self.input_file)+os.sep
110 if self.input_file.endswith(".zip"):
111 command = "unzip"
112 elif self.input_file.endswith(".gz"):
113 command = "gunzip"
114 elif self.input_file.endswith(".xml"):
115 command = None
116 onlyOneFileFlag = True
117 else:
118 sys.stderr.write("Warning: Input file extension (%s) not recognized by parser\n" % self.input_file[-3:])
119 return
120
121 if command is not None:
122 os.chdir(directoryData)
123 os.system("%s %s" % (command, self.input_file))
124
125 if onlyOneFileFlag:
126 listFileName = [self.input_file[self.input_file.rfind(os.sep)+1:]]
127 else:
128 #print directoryData
129 listFileName = os.listdir(directoryData)
130
131 self.file_number = 0
132
133 parser = Psi_MiXMLParser(self.verbose)
134
135
136 flagContinuePointReached = False
137
138
139 for fileName in listFileName:
140
141 sys.stderr.write("Parsing file %s\n" %fileName)
142
143 if not (fileName.endswith(".xml") or fileName.endswith(".xsd.xml") or fileName.endswith(".mif25")):
144 sys.stderr.write("Ignoring file: %s\n" % fileName)
145 continue
146 # if not flagContinuePointReached:
147 # if fileName == "BIOGRID-ORGANISM-Caenorhabditis_elegans-2.0.37.psi25.xml":
148 # flagContinuePointReached = True
149 # continue
150
151 if self.time_control:
152 if self.file_number%10==0:
153 sys.stderr.write("%s files done in %s seconds\n" %(self.file_number, time.time()-self.initial_time))
154
155 self.file_number += 1
156
157 if self.verbose:
158 sys.stderr.write("\n------- %s\n" %fileName)
159
160 # continue # to print just names
161 try:
162 parser.parseFile(directoryData+fileName)
163 except Exception, inst:
164 sys.stderr.write("%s\n" %inst)
165 listEntry = parser.getEntries()
166
167 psi_MiFormatted_object_number = 0
168 for objEntry in listEntry:
169 dictIdInteractorToIdExternal = {}
170 dictExperiment = objEntry.getExperiments()
171 dictInteractor = objEntry.getInteractors()
172 dictInteraction = objEntry.getInteractions()
173
174 if self.verbose:
175 sys.stderr.write("\nInteractors:\n")
176
177 # Create external entities for interactors
178 for objInteractor in dictInteractor.itervalues():
179 if self.verbose:
180 sys.stderr.write("%s\n" %objInteractor.id)
181
182 ###if objInteractor.id != "350":
183 ### continue
184 # Start new entry
185 #print objInteractor.type.label
186 interactorType = self.decideInteractorTypeSpecificConversions(objInteractor.type.label)
187 if interactorType is None:
188 interactorType = self.decideInteractorTypeSpecificConversions(objInteractor.type.name)
189 psi_MiFormatted_object = ExternalEntity( source_database = self.database, type=interactorType) # "protein")
190 psi_MiFormatted_object_number += 1
191 # Fill the new entry
192 # Fill name
193 self.addNameAttributesToExternalEntityObject(objInteractor.name, psi_MiFormatted_object)
194 # Fill xRef
195 self.addXRefAttributesToExternalEntityObject(objInteractor.xRef, psi_MiFormatted_object)
196 # Fill taxId
197 if objInteractor.taxId is not None and int(objInteractor.taxId) >= 0:
198 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute("taxid",objInteractor.taxId))
199 # Fill sequence
200 if objInteractor.sequence is not None:
201 sequenceType = self.decideSequenceTypeSpecificConversions(objInteractor.type.label)
202 if sequenceType == None:
203 sequenceType = self.decideSequenceTypeSpecificConversions(objInteractor.type.name)
204 #psi_MiFormatted_object.add_attribute(ExternalEntityAttribute("sequence","".join(objInteractor.sequence),"type" : sequenceType})
205 # Insert the entry to the database
206 self.biana_access.insert_new_external_entity( externalEntity = psi_MiFormatted_object )
207 dictIdInteractorToIdExternal[objInteractor.id] = psi_MiFormatted_object.get_id()
208 #dictIdInteractorToIdExternal[objInteractor.id] = 1 #!
209
210 if self.verbose:
211 sys.stderr.write("\nInteractions:\n")
212
213 # Create external entity relations for interactions
214 for objInteraction in dictInteraction.itervalues():
215 if self.verbose:
216 sys.stderr.write("%s\n" %objInteraction.id)
217
218 # Start new entry relation
219 if objInteraction.negative:
220 typeRelation = "no_interaction"
221 else:
222 typeRelation = "interaction"
223 psi_MiFormatted_object = ExternalEntityRelation( source_database=self.database, relation_type=typeRelation )
224 # Fill xRef
225 if objInteraction.xRef is not None:
226 self.addXRefAttributesToExternalEntityObject( objPsi_MiXRef= objInteraction.xRef, psi_MiFormatted_object=psi_MiFormatted_object, attribute_class=ExternalEntityRelationAttribute )
227 # Fill name
228 if objInteraction.name is not None:
229 self.addNameAttributesToExternalEntityObject(objInteraction.name, psi_MiFormatted_object)
230 # Fill experimentList
231 listObjXRefMethodParticipantIdentification = []
232 for idExperiment in objInteraction.listExperimentId:
233 experiment = dictExperiment[idExperiment]
234 # Fill experiment description - for now ignored --> add_common_attribute(intactExperiment) would return internal id assigned for each exp desription which would then be inserted as an attribute like methodID
235 #if experiment.description.name is not None: # description has no type ###self.addNameAttributesToExternalEntityObject(experiment.description, psi_MiFormatted_object, nameAttribute="description", flagIgnoreAlias=True)
236 # psi_MiFormatted_object.add_attribute(attributeName="description", attributeFields={"value": experiment.description.name})
237 # Fill experiment bibref
238 self.addXRefAttributesToExternalEntityObject(experiment.xRefBib, psi_MiFormatted_object, flagIgnoreRefSecondary=True)
239 # Fill experiment xref - secondary references are ignored
240 if experiment.xRef is not None:
241 self.addXRefAttributesToExternalEntityObject(experiment.xRef, psi_MiFormatted_object, flagIgnoreRefSecondary=True)
242 # Fill experiment identification method
243 ###self.addXRefAttributesToExternalEntityObject(experiment.xRefMethodInteraction, psi_MiFormatted_object, flagIgnoreRefSecondary=True)
244 if experiment.xRefMethodInteraction.refPrimary.db == "psi-mi":
245 psi_MiFormatted_object.add_attribute(ExternalEntityRelationAttribute( attribute_identifier = "method_id",
246 value = experiment.xRefMethodInteraction.refPrimary.id[3:] ) )
247 if experiment.xRefMethodInteraction.refPrimary.db == "grid":
248 if DICT_METHOD_CONVERSION_GRID_TO_PSI_MI.has_key(experiment.nameMethodInteraction.label):
249 psi_MiFormatted_object.add_attribute(ExternalEntityRelationAttribute( attribute_identifier="method_id",
250 value = DICT_METHOD_CONVERSION_GRID_TO_PSI_MI[experiment.nameMethodInteraction.label][3:] ))
251 else:
252 sys.stderr.write("Method %s not recognized\n" %experiment.nameMethodInteraction.label)
253 ###else:
254 ### print "Warning interaction type is not provided as psi-mi db reference:", experiment.xRefMethodInteraction.refPrimary.db
255 # Store participant identification method as xref in a list (method is the same for all participants in this interaction)
256 if experiment.xRefMethodParticipant is not None:
257 listObjXRefMethodParticipantIdentification.append(experiment.xRefMethodParticipant)
258 # Fill participantList
259 dictIdExternalToCardinality = {}
260 for participant in objInteraction.listParticipant:
261 try:
262 idExternal = dictIdInteractorToIdExternal[participant.interactorId]
263 except:
264 sys.stderr.write("Warning: Unassigned interactor %s\n" %participant.interactorId)
265 continue
266 flagFirstTime = insertKeyIntoHistogramDictionary(dictIdExternalToCardinality, idExternal)
267 if flagFirstTime: # need not to repeat same participant information
268 # Add new participant
269 psi_MiFormatted_object.add_participant( externalEntityID = idExternal )
270 # Fill participant identification methods using above created list
271 for objXRefMethodIdentification in listObjXRefMethodParticipantIdentification:
272 psi_MiFormatted_object.add_participant_attribute(externalEntityID = idExternal,
273 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "detection_method",
274 value = objXRefMethodIdentification.refPrimary.id[3:]))
275 # Fill biological role
276 if participant.nameRoleBiological is not None:
277 nameRoleConverted = self.decideRoleSpecificConversions(participant.nameRoleBiological.label)
278 if nameRoleConverted != "ignore":
279 psi_MiFormatted_object.add_participant_attribute(externalEntityID = idExternal,
280 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "role",
281 value = nameRoleConverted ))
282 # Fill experimental roles
283 for objNameRoleExperimental in participant.listNameRoleExperimental:
284 nameRoleConverted = self.decideRoleSpecificConversions(objNameRoleExperimental.label)
285 if nameRoleConverted != "ignore":
286 psi_MiFormatted_object.add_participant_attribute(externalEntityID = idExternal,
287 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "role",
288 value = nameRoleConverted ))
289 for (idExternal, cardinality) in dictIdExternalToCardinality.iteritems():
290 psi_MiFormatted_object.add_participant_attribute(externalEntityID = idExternal,
291 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = "cardinality",
292 value = cardinality ))
293 # Fill interactionType - physical interaction for each - ignored for now
294 ###self.addXRefAttributesToExternalEntityObject(objInteraction.type, psi_MiFormatted_object, flagIgnoreRefSecondary=True)
295 # Insert the entry to the database
296 self.biana_access.insert_new_external_entity( externalEntity = psi_MiFormatted_object ) #!
297
298 return
299
300 - def addNameAttributesToExternalEntityObject(self, objPsi_MiNames, psi_MiFormatted_object, nameAttribute="name", flagIgnoreAlias=False):
301 if objPsi_MiNames.name is not None:
302 #nameConverted = objPsi_MiNames.name.replace("–", ' ')
303 nameConverted = objPsi_MiNames.name.encode("ascii", "replace")
304 if len(nameConverted) > MAX_NAME_LENGTH:
305 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute(attribute_identifier="description", value=nameConverted))
306 else:
307 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute(nameAttribute, nameConverted))
308 if objPsi_MiNames.label is not None:
309 if len(objPsi_MiNames.label) > MAX_NAME_LENGTH:
310 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute("description",objPsi_MiNames.label))
311 else:
312 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute(nameAttribute, objPsi_MiNames.label))#, "type": "label"})
313 if not flagIgnoreAlias:
314 if objPsi_MiNames.listAlias is not None:
315 for (type, name) in objPsi_MiNames.listAlias:
316 attribute = self.decideAliasTypeSpecificConversions(type)
317 if attribute != "ignore" and name is not None:
318 psi_MiFormatted_object.add_attribute(ExternalEntityAttribute(attribute, name))#, "type": "alias"})
319 return
320
321 - def addXRefAttributesToExternalEntityObject(self, objPsi_MiXRef, psi_MiFormatted_object, flagIgnoreRefSecondary=False, attribute_class=ExternalEntityAttribute):
322 if objPsi_MiXRef.refPrimary is not None:
323 (dbNameConverted, dictFieldName, dictFieldValue, dictFieldType) = self.decideDBReferenceSpecificConversions(objPsi_MiXRef.refPrimary)
324 if dbNameConverted != "ignore":
325 psi_MiFormatted_object.add_attribute(attribute_class(attribute_identifier = dbNameConverted,
326 value = dictFieldValue,
327 type = dictFieldType) )
328 if not flagIgnoreRefSecondary:
329 if objPsi_MiXRef.listRefSecondary is not None:
330 for objDBReference in objPsi_MiXRef.listRefSecondary:
331 (dbNameConverted, dictFieldName, dictFieldValue, dictFieldType) = self.decideDBReferenceSpecificConversions(objDBReference)
332 if dbNameConverted != "ignore":
333 psi_MiFormatted_object.add_attribute( attribute_class( attribute_identifier = dbNameConverted,
334 value = dictFieldValue,
335 type = dictFieldType ) )
336 return
337
339 interactorTypeConverted = None
340 if interactorType == None:
341 return interactorTypeConverted
342 if interactorType == "protein":
343 interactorTypeConverted = "protein"
344 elif interactorType == "peptide":
345 interactorTypeConverted = "protein"
346 elif interactorType == "dna":
347 interactorTypeConverted = "DNA"
348 elif interactorType == "rna":
349 interactorTypeConverted = "RNA"
350 elif interactorType.endswith("dna"):
351 interactorTypeConverted = "DNA"
352 elif interactorType.endswith("rna"):
353 interactorTypeConverted = "RNA"
354 elif interactorType == "nucleic acid":
355 interactorTypeConverted = "DNA"
356 #elif interactorType == "mrna":
357 # interactorTypeConverted = "RNA"
358 elif interactorType == "small molecule":
359 interactorTypeConverted = "compound"
360 else:
361 sys.stderr.write("Warning: Unkown interactor type: %s\n" %interactorType)
362 return interactorTypeConverted
363
365 sequenceTypeConverted = None
366 if sequenceType == None:
367 return sequenceTypeConverted
368 if sequenceType == "protein":
369 sequenceTypeConverted = "peptide"
370 elif sequenceType == "peptide":
371 sequenceTypeConverted = "peptide"
372 elif sequenceType == "dna":
373 sequenceTypeConverted = "dna"
374 elif sequenceType == "rna":
375 sequenceTypeConverted = "rna"
376 elif sequenceType.endswith("dna"):
377 sequenceTypeConverted = "dna"
378 elif sequenceType.endswith("rna"):
379 sequenceTypeConverted = "rna"
380 #elif sequenceType == "mrna":
381 # sequenceTypeConverted = "rna"
382 #elif sequenceType == "ds dna":
383 # sequenceTypeConverted = "dna"
384 elif sequenceType == "nucleic acid":
385 sequenceTypeConverted = "dna"
386 else:
387 sys.stderr.write("Warning: Unkown sequence type: %s\n" %sequenceType)
388 return sequenceTypeConverted
389
391 nameRoleConverted = ""
392 if nameRole == "bait":
393 nameRoleConverted = "bait"
394 elif nameRole == "prey":
395 nameRoleConverted = "prey"
396 elif nameRole == "neutral component":
397 nameRoleConverted = "neutral"
398 elif nameRole == "unspecified role":
399 nameRoleConverted = "ignore"
400 elif nameRole == "unspecifiedrole":
401 nameRoleConverted = "ignore"
402 elif nameRole == "fluorescence acceptor":
403 nameRoleConverted = "acceptor"
404 elif nameRole == "fluorescence accept":
405 nameRoleConverted = "acceptor"
406 elif nameRole == "fluorescence donor":
407 nameRoleConverted = "donor"
408 elif nameRole == "self":
409 nameRoleConverted = "self"
410 elif nameRole == "ancillary":
411 nameRoleConverted = "ancillary"
412 elif nameRole == "enzyme":
413 nameRoleConverted = "enzyme"
414 elif nameRole == "enzyme target":
415 nameRoleConverted = "enzyme target"
416 elif nameRole == "inhibitor":
417 nameRoleConverted = "inhibitor"
418 elif nameRole == "cofactor":
419 nameRoleConverted = "cofactor"
420 elif nameRole == "electron acceptor":
421 nameRoleConverted = "acceptor"
422 elif nameRole == "electron donor":
423 nameRoleConverted = "donor"
424 elif nameRole == "stimulator":
425 nameRoleConverted = "stimulator"
426 else:
427 sys.stderr.write("Warning: decideRoleSpecificConversions - Unknown type identifier: %s\n" %nameRole)
428 nameRoleConverted = "ignore"
429 return nameRoleConverted
430
432 #nameType = ""
433 attributeName = "ignore"
434 if type == "gene name" or type == "gene name synonym":
435 attributeName = "GeneSymbol"
436 #nameType = "alias"
437 elif type == "orf name":
438 attributeName = "orfName"
439 #nameType = "cross-reference"
440 elif type == "locus name":
441 attributeName = "OrderedLocusName"
442 #nameType = "cross-reference"
443 elif type == "isoform synonym":
444 #attributeName = "isoFormName"
445 attributeName = "ignore"
446 else:
447 sys.stderr.write("Warning: decideAliasTypeSpecificConversions - Unknown type identifier: %s\n" %type)
448
449 return attributeName
450
452 index = 0
453 for char in strDBName:
454 if ord(char) >= 48 and ord(char) <= 57:
455 return index
456 index += 1
457 return -1
458
460 (db, id, type, secondary) = (objDBReference.db, objDBReference.id, objDBReference.type, objDBReference.secondary)
461 #dbNameConverted = None
462 dbNameConverted = "ignore"
463 dictFieldName = "value" # the default
464 dictFieldValue = id # the default
465 dictFieldType = "cross-reference" # the default
466
467 dbUpper = db.upper()
468
469 if type == "identity":
470 dictFieldType = "unique"
471
472 if dbUpper == "UNIPROTKB" or dbUpper == "UNIPROT" or dbUpper == "UNIPROT KNOWLEDGE BASE" or dbUpper == "SWISSPROT" or dbUpper == "TREMBL":
473 if id.startswith("unknown"):
474 dbNameConverted = "ignore"
475 else:
476 dbNameConverted = "uniprotaccession"
477 index = id.find("-PRO_")
478 if index != -1:
479 #dictFieldValue = id[index+5:]
480 dictFieldValue = id[:index]
481 #self.checkOrInsertDBNamePrefix(dbNameConverted, "-PRO_")
482 else:
483 index = id.find("NP_")
484 if index != -1:
485 dictFieldValue = id[index+3:]
486 self.checkOrInsertDBNamePrefix(dbNameConverted, "NP_")
487 #self.checkOrInsertDBNamePrefix(dbNameConverted+"2", "NP_")
488 elif dbUpper == "INTENZ":
489 dbNameConverted = "EC"
490 elif dbUpper == "GO":
491 dbNameConverted = "GO"
492 dictFieldValue = id[3:]
493 flagInconsistency = self.checkOrInsertDBNamePrefix(dbNameConverted, id[:3])
494 # To correct the cases where the prefix is missing
495 if flagInconsistency:
496 if ord(id[0]) >= 48 and ord(id[0]) <= 57:
497 dictFieldValue = id
498 elif id == "CC":
499 dbNameConverted = "ignore"
500 elif id.startswith("GO ") and ord(id[3]) >= 48 and ord(id[3]) <= 57:
501 dictFieldValue = id[3:]
502 elif dbUpper == "INTERPRO":
503 dbNameConverted = "interpro"
504 dictFieldValue = id[3:]
505 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:3])
506 elif dbUpper == "ENSEMBL":
507 dbNameConverted = "ensembl"
508 # ENS[GBRM] -CGS -
509 #dictFieldValue = id[4:]
510 #self.checkOrInsertDBNamePrefix(dbNameConverted, id[:4])
511 elif dbUpper == "ENCODE":
512 dbNameConverted = "encode"
513 # not always starts with AC
514 #dictFieldValue = id[2:]
515 #self.checkOrInsertDBNamePrefix(dbNameConverted, id[:2])
516 elif dbUpper == "INTACT":
517 index = id.find("MINT-") # handling mint db's crayziness
518 if index == -1:
519 dbNameConverted = "IntAct"
520 dictFieldValue = id[4:]
521 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:4])
522 else:
523 dbNameConverted = "MINT"
524 dictFieldValue = id[5:]
525 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:5])
526 elif dbUpper == "MIPS":
527 dbNameConverted = "MIPS"
528 elif dbUpper == "MINT":
529 dbNameConverted = "MINT"
530 dictFieldValue = id[5:]
531 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:5])
532 elif dbUpper == "PROTEIN ACCESSION":
533 indexDigit = self.getIndexOfFirstOccurenceOfDigit(id)
534 if indexDigit == 3:
535 dbNameConverted = "AccessionNumber"
536 elif indexDigit == 1:
537 dbNameConverted = "UniprotAccession"
538 elif dbUpper == "PROTEIN GI":
539 dbNameConverted = "GI"
540 elif dbUpper == "RCSB PDB" or dbUpper == "PDB" or dbUpper == "WWPDB":
541 dbNameConverted = "pdb"
542 elif dbUpper == "REACTOME COMPLEX" or dbUpper == "REACTOME PROTEIN" or dbUpper == "REACTOME":
543 dbNameConverted = "Reactome"
544 index = id.rfind('.')
545 if index == -1:
546 dictFieldValue = id[6:]
547 else:
548 dictFieldValue = id[6:index]
549 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:6])
550 elif dbUpper == "HUGE":
551 dbNameConverted = "Huge"
552 dictFieldValue = id[4:]
553 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:4])
554 elif dbUpper == "DDBJ-EMBL-GENBANK" or dbUpper == "DDBJ/EMBL/GENBANK" or dbUpper == "GENBANK_NUCLEOTIDE_G":
555 dbNameConverted = "AccessionNumber"
556 if self.sourcedb_name == "mint":
557 dictFieldValue = secondary
558 elif dbUpper == "GENBANK_PROTEIN_GI":
559 if id.lower().startswith("gi:"):
560 dictFieldValue = id[3:]
561 dbNameConverted = "GI"
562 elif dbUpper == "IPI":
563 dbNameConverted = "IPI"
564 #dictFieldValue = id[3:]
565 #self.checkOrInsertDBNamePrefix(dbNameConverted, id[:3])
566 elif dbUpper == "DIP":
567 dbNameConverted = "DIP"
568 dictFieldValue = id[4:]
569 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:4])
570 elif dbUpper == "WORMBASE": # wormbase, WormBase
571 dbNameConverted = "wormbasegeneid"
572 dictFieldValue = id[6:]
573 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:6])
574 elif dbUpper == "PUBMED":
575 if id.startswith("unassigned"):
576 dbNameConverted = "ignore"
577 elif id.startswith("missing"): # "missing_pmid"
578 dbNameConverted = "ignore"
579 else:
580 dbNameConverted = "pubmed"
581 elif dbUpper == "UNIPARC":
582 dbNameConverted = "uniparc"
583 dictFieldValue = id[3:]
584 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:3])
585 elif dbUpper == "CHEBI":
586 dbNameConverted = "chebi"
587 dictFieldValue = id[6:]
588 self.checkOrInsertDBNamePrefix(dbNameConverted, id[:6])
589 elif dbUpper == "REFSEQ":
590 dbNameConverted = "refseq"
591 index = id.rfind('.')
592 if index != -1:
593 dictFieldValue = id[:index]
594 elif dbUpper == "RGD":
595 dbNameConverted = "rgd"
596 elif dbUpper == "SGD":
597 dbNameConverted = "SGD"
598 elif dbUpper == "CYGD":
599 dbNameConverted = "cygd"
600 elif dbUpper == "FLYBASE":
601 dbNameConverted = "FlyBase"
602 elif dbUpper == "OMIM" or dbUpper == "MIM":
603 dbNameConverted = "MIM"
604 elif dbUpper == "INTENZ":
605 dbNameConverted = "IntEnz"
606 elif dbUpper == "ENTREZGENE":
607 dbNameConverted = "geneID"
608 elif dbUpper == "ENTREZ GENE/LOCUSLINK":
609 dbNameConverted = "geneID"
610 elif dbUpper == "HPRD":
611 dbNameConverted = "HPRD"
612 elif dbUpper == "HGNC":
613 dbNameConverted = "HGNC"
614 elif dbUpper == "MGI":
615 dbNameConverted = "MGI"
616 elif dbUpper == "TAIR":
617 dbNameConverted = "TAIR"
618 elif dbUpper == "RGD":
619 dbNameConverted = "rgd"
620 elif dbUpper == "RATMAP":
621 dbNameConverted = "Ratmap"
622 elif dbUpper == "IMGT/GENE-DB":
623 dbNameConverted = "IMGT"
624 elif dbUpper == "PSI-MI":
625 dbNameConverted = "method_id"
626 elif dbUpper == "DOI":
627 dbNameConverted = "ignore"
628 elif dbUpper == "CAMJEDB":
629 dbNameConverted = "ignore"
630 elif dbUpper == "ecogene":
631 dbNameConverted = "ignore"
632 elif dbUpper == "NEWT":
633 dbNameConverted = "ignore"
634 elif dbUpper == "IMEX":
635 dbNameConverted = "ignore"
636 elif dbUpper == "AFCS":
637 dbNameConverted = "ignore"
638 elif dbUpper == "PRIDE":
639 dbNameConverted = "ignore"
640 elif dbUpper == "SO":
641 dbNameConverted = "ignore"
642 elif dbUpper == "GRID" or dbUpper == "GRID_LEGACY":
643 dbNameConverted = "ignore"
644 elif dbUpper == "CDNA GI":
645 dbNameConverted = "ignore"
646 elif dbUpper == "CDNA ACCESSION":
647 dbNameConverted = "ignore"
648 elif dbUpper == "N/A":
649 dbNameConverted = "ignore"
650 else:
651 if db not in self.not_recognized_cross_refs:
652 sys.stderr.write("Warning: decideDBReferenceSpecificConversions - Unknown database identifier: %s" %db)
653 self.not_recognized_cross_refs.add(db)
654 #dbNameConverted = db.encode("ascii", "strict")
655 dbNameConverted = "ignore"
656
657
658 return (dbNameConverted, dictFieldName, dictFieldValue, dictFieldType)
659
661 flagInconsistency = False
662 if Psi_MiFormattedDBParser.dictDBNameToPrefix.has_key(dbName):
663 if Psi_MiFormattedDBParser.dictDBNameToPrefix[dbName] != prefix:
664 sys.stderr.write("Warning: Database name prefix inconsistency: %s\t%s\n" %(dbName, prefix))
665 flagInconsistency = True
666 else:
667 Psi_MiFormattedDBParser.dictDBNameToPrefix[dbName] = prefix
668 # if Psi_MiFormattedDBParser.dictPrefixToDBName.has_key(prefix):
669 # if Psi_MiFormattedDBParser.dictPrefixToDBName[prefix] != dbName:
670 # if self.verbose:
671 # print "Warning: Database name prefix inconsistency", dbName, prefix
672 # flagInconsistency = True
673 # else:
674 # Psi_MiFormattedDBParser.dictPrefixToDBName[prefix] = dbName
675 return flagInconsistency
676
685
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue May 26 17:06:37 2009 | http://epydoc.sourceforge.net |