Package biana :: Package BianaParser :: Module GenericParser
[hide private]
[frames] | no frames]

Source Code for Module biana.BianaParser.GenericParser

  1   
  2  """ 
  3  File        : uniprot2piana.py 
  4  Author      : Ramon Aragues, Javier Garcia Garcia 
  5  Creation    : 16.3.2004 
  6  Modified    : Javier Garcia Garcia December 2007 
  7  Contents    : fills up tables in database piana with information from uniprot 
  8  Called from :  
  9   
 10  ======================================================================================================= 
 11   
 12  This file implements a program that fills up tables in database piana with information of uniprot databases 
 13   
 14  This parser uses biopython libraries and methods 
 15   
 16  Command line option '--help' describes usage of this program 
 17   
 18  For more details on how to use it, read piana/README.populate_piana_db 
 19  """ 
 20   
 21   
 22  from bianaParser import * 
 23  import re, sys, sets 
 24   
25 -class GenericParser(BianaParser):
26 """ 27 Generic Parser Class 28 """ 29 30 name = "generic" 31 description = "This file implements a program that fills up tables in BIANA database from data in a tabulated file" 32 external_entity_definition = "" 33 external_entity_relations = "" 34 35 mandatory_columns = sets.Set(["id", "type"]) 36 mandatory_relation_columns = sets.Set(["id", "interactor_id_list", "type"]) 37
38 - def __init__(self):
39 40 # Start with the default values 41 42 BianaParser.__init__(self, default_db_description = "Generic Tabulated parser", 43 default_script_name = "GenericParser.py", 44 default_script_description = GenericParser.description, 45 additional_compulsory_arguments = [("default-attribute=",None,"Name of the default identifier that this database gives (such as uniprotentry)")])
46
47 - def parse_database(self):
48 """ 49 Method that implements the specific operations of a general tabulated file 50 """ 51 52 value_separator = "|" 53 participant_re = re.compile("(.+):\s*(.+)") #("(\w[(\s\w)]*):\s()") 54 55 # Speficy that this database has relations hierarchies 56 self.biana_access.store_relations_hierarchy = True 57 58 self.initialize_input_file_descriptor() 59 60 self.in_external_entities = False 61 self.external_entity_fields = None 62 self.external_entity_ids_dict = {} 63 64 self.in_external_entity_relations = False 65 self.external_entity_relation_fields = None 66 67 for line in self.input_file_fd: 68 69 line = line.strip() 70 71 if line=="": 72 continue 73 74 if line.startswith("@EXTERNAL_ENTITY_DATA"): 75 self.in_external_entities = True 76 self.in_external_entity_relations = False 77 continue 78 elif line.startswith("@EXTERNAL_ENTITY_RELATION_DATA"): 79 self.in_external_entities = False 80 self.in_external_entity_relations = True 81 continue 82 83 # Parse external entities block 84 if self.in_external_entities: 85 if self.external_entity_fields is None: 86 values = re.split("\t+",line.strip()) 87 column_to_index = dict([ (i.lower(),j) for i,j in zip(values, range(len(values))) ]) 88 for x in self.mandatory_columns: 89 if not column_to_index.has_key(x): 90 raise Exception("External Entity %s column not found" % x) 91 self.external_entity_fields = column_to_index 92 else: 93 values = re.split("\t+",line.strip()) 94 #print values 95 96 new_external_entity = ExternalEntity( source_database = self.database, 97 type = values[self.external_entity_fields["type"]].strip() ) 98 #for x in xrange(len(values)): 99 for x,i in self.external_entity_fields.iteritems(): 100 if x in self.mandatory_columns: 101 continue 102 if values[i].strip()!="-": 103 for current_value in values[i].split(value_separator): 104 current_value = current_value.strip() 105 attribute_identifier = x 106 if attribute_identifier.lower()=="proteinsequence": 107 current_value = ProteinSequence(current_value) 108 new_external_entity.add_attribute( ExternalEntityAttribute( attribute_identifier= attribute_identifier, 109 value=current_value, 110 type="cross-reference") ) 111 #print attribute_identifier, current_value 112 113 self.external_entity_ids_dict[values[self.external_entity_fields["id"]]] = self.biana_access.insert_new_external_entity( externalEntity = new_external_entity ) 114 115 116 # Parse external entity relations block 117 elif self.in_external_entity_relations: 118 if self.external_entity_relation_fields is None: 119 values = re.split("\t+",line.strip()) 120 column_to_index = dict([ (i.lower(),j) for i,j in zip(values, range(len(values))) ]) 121 for x in self.mandatory_relation_columns: 122 if not column_to_index.has_key(x): 123 raise Exception("External Entity Relation %s column not found" % x) 124 self.external_entity_relation_fields = column_to_index 125 else: 126 values = re.split("\t+",line.strip()) 127 #print values 128 new_external_entity_relation = ExternalEntityRelation( source_database = self.database, 129 relation_type = values[self.external_entity_relation_fields["type"]].strip() ) 130 131 for id in values[self.external_entity_relation_fields["interactor_id_list"]].split(value_separator): 132 id = id.strip() 133 new_external_entity_relation.add_participant( externalEntityID = self.external_entity_ids_dict[id] ) 134 135 for current_attribute,index in self.external_entity_relation_fields.iteritems(): 136 if current_attribute in self.mandatory_relation_columns: 137 continue 138 v = values[index] 139 if v.strip()!="-": 140 if current_attribute.startswith("participants:"): 141 current_attribute = current_attribute.replace("participants:", '') 142 for current_value in v.split(value_separator): 143 s = participant_re.search(current_value.strip()) 144 if s: 145 participant_id = s.group(1) 146 attribute_value = s.group(2) 147 else: 148 sys.stderr.write("Format error, check file format!\n") 149 150 #print participant_id, current_attribute, attribute_value 151 new_external_entity_relation.add_participant_attribute( externalEntityID = self.external_entity_ids_dict[participant_id], 152 participantAttribute = ExternalEntityRelationParticipantAttribute( attribute_identifier = current_attribute, 153 value = attribute_value ) ) 154 else: 155 for current_value in v.split(value_separator): 156 current_value = current_value.strip() 157 new_external_entity_relation.add_attribute( ExternalEntityRelationAttribute( attribute_identifier = current_attribute, 158 value = current_value ) ) 159 #print current_attribute, current_value 160 161 self.external_entity_ids_dict[values[self.external_entity_relation_fields["id"]]] = self.biana_access.insert_new_external_entity( externalEntity = new_external_entity_relation ) 162 else: 163 sys.stderr.write("Format error, check file format!\n")
164