Package biana :: Package BianaParser :: Module taxonomyParser
[hide private]
[frames] | no frames]

Source Code for Module biana.BianaParser.taxonomyParser

  1  """ 
  2      BIANA: Biologic Interactions and Network Analysis 
  3      Copyright (C) 2009  Javier Garcia-Garcia, Emre Guney, Baldo Oliva 
  4   
  5      This program is free software: you can redistribute it and/or modify 
  6      it under the terms of the GNU General Public License as published by 
  7      the Free Software Foundation, either version 3 of the License, or 
  8      (at your option) any later version. 
  9   
 10      This program is distributed in the hope that it will be useful, 
 11      but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 13      GNU General Public License for more details. 
 14   
 15      You should have received a copy of the GNU General Public License 
 16      along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 17   
 18  """ 
 19   
 20  from bianaParser import * 
 21  import sets 
 22   
 23   
24 -class TaxonomyParser(BianaParser):
25 """ 26 Taxonomy Parser Class 27 """ 28 29 name = "taxonomy" 30 description = "This program fills up tables in database biana related with taxonomy ontology" 31 external_entity_definition = "A external entity represents a taxonomy of any type" 32 external_entity_relations = "" 33
34 - def __init__(self):
35 36 # Start with the default values 37 38 BianaParser.__init__(self, default_db_description = "NCBI Species information", 39 default_script_name = "taxonomyParser", 40 default_script_description = "This program fills up tables in database piana related to external DB 'taxonomy'") 41 self.default_eE_attribute = "taxid"
42
43 - def parse_database(self):
44 """ 45 Method that implements the specific operations of taxonomy parser 46 """ 47 48 self.biana_access.add_valid_external_entity_attribute_type( name = "TaxID_category", 49 data_type = "ENUM(\"class\",\"family\",\"forma\",\"genus\",\"infraclass\",\"infraorder\",\"kingdom\",\"no rank\",\"order\",\"parvorder\",\"phylum\",\"species\",\"species group\",\"species subgroup\",\"subclass\",\"subfamily\",\"subgenus\",\"subkingdom\",\"suborder\",\"subphylum\",\"subspecies\",\"subtribe\",\"superclass\",\"superfamily\",\"superkingdom\",\"superorder\",\"superphylum\",\"tribe\",\"varietas\")", 50 category = "eE descriptive attribute") 51 52 53 self.biana_access.add_valid_external_entity_attribute_type( name = "TaxID_name", 54 data_type = { "fields": [("value","varchar(255)"), 55 ("taxid_name_type", "ENUM(\"acronym\",\"anamorph\",\"blast name\",\"common name\",\"equivalent name\",\"genbank acronym\",\"genbank anamorph\",\"genbank common name\",\"genbank synonym\",\"includes\",\"in-part\",\"misnomer\",\"misspelling\",\"scientific name\",\"synonym\",\"teleomorph\",\"authority\",\"unpublished name\")", True)] }, 56 category = "eE special attribute") 57 58 59 # IMPORTANT: As we have added new types and attributes that are not in the default BIANA distribution, we must execute the follwing command: 60 self.biana_access.refresh_database_information() 61 62 # Add the possibility to transfer taxonomy name and taxonomy category using taxID as a key 63 self.biana_access._add_transfer_attribute( externalDatabaseID = self.database.get_id(), # A single taxonomy element can have multiple names 64 key_attribute = "taxID", 65 transfer_attribute="TaxID_name" ) 66 67 self.biana_access._add_transfer_attribute( externalDatabaseID = self.database.get_id(), # Category is stored in a different attribute, as a single taxonomy can have multiple names 68 key_attribute = "taxID", 69 transfer_attribute = "TaxID_category" ) 70 71 nodes_dmp_file = None 72 names_dmp_file = None 73 74 if os.path.isdir(self.input_file): 75 if( not self.input_file.endswith(os.sep) ): 76 self.input_file += os.sep 77 nodes_dmp_file = os.path.dirname(self.input_file) + os.sep + "nodes.dmp" 78 names_dmp_file = os.path.dirname(self.input_file) + os.sep + "names.dmp" 79 80 ontology = Ontology( source_database = self.database, linkedAttribute="taxid", name="taxonomy", descriptionAttribute="TaxID_name", levelAttribute="TaxID_category" ) 81 82 specific_identifiers_and_parent = {} 83 84 names_fd = file(names_dmp_file,"r") 85 86 names_comment_dict = {} 87 88 print "Reading names" 89 for line in names_fd: 90 91 line_fields = line.split("|") 92 93 if line_fields: 94 95 tax_id = line_fields[0].strip() 96 tax_name = line_fields[1].strip().replace('"','\"').replace("\\","\\\\")[0:254] 97 tax_comment = line_fields[3].strip().replace('"','\"').replace("\\","\\\\") 98 99 if tax_id != "" and tax_name != "": 100 names_comment_dict.setdefault(tax_id, sets.Set()).add((tax_name, tax_comment)) 101 102 names_fd.close() 103 104 print "Reading nodes" 105 nodes_fd = file(nodes_dmp_file,"r") 106 for line in nodes_fd: 107 108 line_fields = line.split("|") 109 tax_id = line_fields[0].strip() 110 parent_tax_id = line_fields[1].strip() 111 type = line_fields[2].strip() 112 113 114 externalEntity = ExternalEntity( source_database = self.database, type = "taxonomyElement" ) 115 116 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "taxID", value = tax_id ) ) 117 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "TaxID_category", value = type ) ) 118 119 for (tax_name, tax_comment) in names_comment_dict[tax_id]: 120 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "TaxID_name", 121 value = tax_name, 122 additional_fields = {"taxid_name_type": tax_comment} ) ) 123 124 self.biana_access.insert_new_external_entity( externalEntity ) 125 126 specific_identifiers_and_parent[tax_id] = (externalEntity.get_id(),parent_tax_id) 127 128 nodes_fd.close() 129 130 print "Done!" 131 132 # Set the ontology hierarch and insert elements to ontology 133 for current_tax_ID in specific_identifiers_and_parent: 134 ontology.add_element( ontologyElementID = specific_identifiers_and_parent[current_tax_ID][0], 135 isA = [specific_identifiers_and_parent[specific_identifiers_and_parent[current_tax_ID][1]][0]] ) 136 137 print "Inserting ontology to database" 138 self.biana_access.insert_new_external_entity( ontology )
139