1 """
2 BIANA: Biologic Interactions and Network Analysis
3 Copyright (C) 2009 Javier Garcia-Garcia, Emre Guney, Baldo Oliva
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 """
19
20 """
21 File : keggkoParser.py
22 Author : Javier Garcia Garcia
23 Creation : January 2008
24 Contents : fills up tables in database biana with information from kegg ko database
25 Called from :
26
27 =======================================================================================================
28
29 This file implements a program that fills up tables in database biana with information of kegg ko databases
30
31 """
32
33 from bianaParser import *
34
35
37 """
38
39 """
40
41 name = "kegg_kO"
42 description = "This file implements a program that fills up tables in database biana with information of kegg KO Database"
43 external_entity_definition = "A external entity represents a KEGG KO"
44 external_entity_relations = ""
45
55
57 """
58 """
59
60 self.initialize_input_file_descriptor()
61
62
63 continue_field_regex = re.compile("^\s{3,}([^;]+);*$")
64 field_regex = re.compile("^(\w+)\s+([^;]+);*$")
65 pathway_regex = re.compile("PATH\:\s+(map|rn)(\d+)\s+(.+)$")
66
67 space_regex = re.compile("\s+")
68 parenthesis_regex = re.compile("\(.+\)")
69
70
71 entry_regex = re.compile("^ENTRY\s+(\w+)\s+KO")
72
73 dblink_split_regex = re.compile("(\w+)\:")
74
75 kegg_ko_object = None
76
77 temp_value = []
78 current_field = None
79
80 for line in self.input_file_fd:
81
82 m = entry_regex.match(line)
83
84 if m:
85 if kegg_ko_object is not None:
86 self.biana_access.insert_new_external_entity( externalEntity = kegg_ko_object )
87
88 kegg_ko_object = ExternalEntityRelation( source_database = self.database, relation_type="cluster" )
89 kegg_ko_object.add_attribute( ExternalEntityAttribute( attribute_identifier = "keggCode", value=m.group(1), type = "unique" ) )
90 continue
91
92
93 new_field = field_regex.match(line)
94 if new_field:
95 if current_field == "DBLINK":
96 all_db_links = " ".join(temp_value)
97 list_db_links = [ x.strip() for x in dblink_split_regex.split(all_db_links) ]
98
99 for actual_position in xrange(len(list_db_links)):
100 if list_db_links[actual_position] == "EC":
101 [ kegg_ko_object.add_attribute(ExternalEntityAttribute( attribute_identifier = "EC", value=x, type="cross-reference")) for x in list_db_links[actual_position+1].split(" ") ]
102
103 elif list_db_links[actual_position] == "COG":
104 [ kegg_ko_object.add_attribute(ExternalEntityAttribute( attribute_identifier = "COG", value=x,type="cross-reference")) for x in list_db_links[actual_position+1].split(" ") ]
105
106 elif list_db_links[actual_position] == "GO":
107 [ kegg_ko_object.add_attribute(ExternalEntityAttribute( attribute_identifier = "GO", value=x,type="cross-reference")) for x in list_db_links[actual_position+1].split(" ") ]
108
109 current_field = new_field.group(1)
110 temp_value = [new_field.group(2)]
111 else:
112 cont_value = continue_field_regex.match(line)
113 if cont_value:
114 temp_value.append(cont_value.group(1))
115
116
117
118 if kegg_ko_object is not None:
119 self.biana_access.insert_new_external_entity( externalEntity = kegg_ko_object )
120