1 """
2 BIANA: Biologic Interactions and Network Analysis
3 Copyright (C) 2009 Javier Garcia-Garcia, Emre Guney, Baldo Oliva
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 """
19
20 from bianaParser import *
21 import sets
22
23
25 """
26 Taxonomy Parser Class
27 """
28
29 name = "taxonomy"
30 description = "This program fills up tables in database biana related with taxonomy ontology"
31 external_entity_definition = "A external entity represents a taxonomy of any type"
32 external_entity_relations = ""
33
35
36
37
38 BianaParser.__init__(self, default_db_description = "NCBI Species information",
39 default_script_name = "taxonomyParser",
40 default_script_description = "This program fills up tables in database piana related to external DB 'taxonomy'")
41 self.default_eE_attribute = "taxid"
42
44 """
45 Method that implements the specific operations of taxonomy parser
46 """
47
48 self.biana_access.add_valid_external_entity_attribute_type( name = "TaxID_category",
49 data_type = "ENUM(\"class\",\"family\",\"forma\",\"genus\",\"infraclass\",\"infraorder\",\"kingdom\",\"no rank\",\"order\",\"parvorder\",\"phylum\",\"species\",\"species group\",\"species subgroup\",\"subclass\",\"subfamily\",\"subgenus\",\"subkingdom\",\"suborder\",\"subphylum\",\"subspecies\",\"subtribe\",\"superclass\",\"superfamily\",\"superkingdom\",\"superorder\",\"superphylum\",\"tribe\",\"varietas\")",
50 category = "eE descriptive attribute")
51
52
53 self.biana_access.add_valid_external_entity_attribute_type( name = "TaxID_name",
54 data_type = { "fields": [("value","varchar(255)"),
55 ("taxid_name_type", "ENUM(\"acronym\",\"anamorph\",\"blast name\",\"common name\",\"equivalent name\",\"genbank acronym\",\"genbank anamorph\",\"genbank common name\",\"genbank synonym\",\"includes\",\"in-part\",\"misnomer\",\"misspelling\",\"scientific name\",\"synonym\",\"teleomorph\",\"authority\",\"unpublished name\")", True)] },
56 category = "eE special attribute")
57
58
59
60 self.biana_access.refresh_database_information()
61
62
63 self.biana_access._add_transfer_attribute( externalDatabaseID = self.database.get_id(),
64 key_attribute = "taxID",
65 transfer_attribute="TaxID_name" )
66
67 self.biana_access._add_transfer_attribute( externalDatabaseID = self.database.get_id(),
68 key_attribute = "taxID",
69 transfer_attribute = "TaxID_category" )
70
71 nodes_dmp_file = None
72 names_dmp_file = None
73
74 if os.path.isdir(self.input_file):
75 if( not self.input_file.endswith(os.sep) ):
76 self.input_file += os.sep
77 nodes_dmp_file = os.path.dirname(self.input_file) + os.sep + "nodes.dmp"
78 names_dmp_file = os.path.dirname(self.input_file) + os.sep + "names.dmp"
79
80 ontology = Ontology( source_database = self.database, linkedAttribute="taxid", name="taxonomy", descriptionAttribute="TaxID_name", levelAttribute="TaxID_category" )
81
82 specific_identifiers_and_parent = {}
83
84 names_fd = file(names_dmp_file,"r")
85
86 names_comment_dict = {}
87
88 print "Reading names"
89 for line in names_fd:
90
91 line_fields = line.split("|")
92
93 if line_fields:
94
95 tax_id = line_fields[0].strip()
96 tax_name = line_fields[1].strip().replace('"','\"').replace("\\","\\\\")[0:254]
97 tax_comment = line_fields[3].strip().replace('"','\"').replace("\\","\\\\")
98
99 if tax_id != "" and tax_name != "":
100 names_comment_dict.setdefault(tax_id, sets.Set()).add((tax_name, tax_comment))
101
102 names_fd.close()
103
104 print "Reading nodes"
105 nodes_fd = file(nodes_dmp_file,"r")
106 for line in nodes_fd:
107
108 line_fields = line.split("|")
109 tax_id = line_fields[0].strip()
110 parent_tax_id = line_fields[1].strip()
111 type = line_fields[2].strip()
112
113
114 externalEntity = ExternalEntity( source_database = self.database, type = "taxonomyElement" )
115
116 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "taxID", value = tax_id ) )
117 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "TaxID_category", value = type ) )
118
119 for (tax_name, tax_comment) in names_comment_dict[tax_id]:
120 externalEntity.add_attribute( ExternalEntityAttribute( attribute_identifier = "TaxID_name",
121 value = tax_name,
122 additional_fields = {"taxid_name_type": tax_comment} ) )
123
124 self.biana_access.insert_new_external_entity( externalEntity )
125
126 specific_identifiers_and_parent[tax_id] = (externalEntity.get_id(),parent_tax_id)
127
128 nodes_fd.close()
129
130 print "Done!"
131
132
133 for current_tax_ID in specific_identifiers_and_parent:
134 ontology.add_element( ontologyElementID = specific_identifiers_and_parent[current_tax_ID][0],
135 isA = [specific_identifiers_and_parent[specific_identifiers_and_parent[current_tax_ID][1]][0]] )
136
137 print "Inserting ontology to database"
138 self.biana_access.insert_new_external_entity( ontology )
139