Package biana :: Package BianaParser :: Module keggKOParser
[hide private]
[frames] | no frames]

Source Code for Module biana.BianaParser.keggKOParser

  1  """ 
  2      BIANA: Biologic Interactions and Network Analysis 
  3      Copyright (C) 2009  Javier Garcia-Garcia, Emre Guney, Baldo Oliva 
  4   
  5      This program is free software: you can redistribute it and/or modify 
  6      it under the terms of the GNU General Public License as published by 
  7      the Free Software Foundation, either version 3 of the License, or 
  8      (at your option) any later version. 
  9   
 10      This program is distributed in the hope that it will be useful, 
 11      but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 13      GNU General Public License for more details. 
 14   
 15      You should have received a copy of the GNU General Public License 
 16      along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 17   
 18  """ 
 19   
 20  """ 
 21  File        : keggkoParser.py 
 22  Author      : Javier Garcia Garcia 
 23  Creation    : January 2008 
 24  Contents    : fills up tables in database biana with information from kegg ko database 
 25  Called from :  
 26   
 27  ======================================================================================================= 
 28   
 29  This file implements a program that fills up tables in database biana with information of kegg ko databases 
 30   
 31  """ 
 32   
 33  from bianaParser import * 
 34   
 35   
36 -class KeggKOParser(BianaParser):
37 """ 38 39 """ 40 41 name = "kegg_kO" 42 description = "This file implements a program that fills up tables in database biana with information of kegg KO Database" 43 external_entity_definition = "A external entity represents a KEGG KO" 44 external_entity_relations = "" 45
46 - def __init__(self):
47 48 # Start with the default values 49 50 BianaParser.__init__(self, default_db_description = "KEGG KO database", 51 default_script_name = "keggKOParser.py", 52 default_script_description = KeggKOParser.description ) 53 self.default_eE_attribute = "keggCode" 54 self.initialize_input_file_descriptor()
55
56 - def parse_database(self):
57 """ 58 """ 59 60 self.initialize_input_file_descriptor() 61 62 # General regex 63 continue_field_regex = re.compile("^\s{3,}([^;]+);*$") 64 field_regex = re.compile("^(\w+)\s+([^;]+);*$") 65 pathway_regex = re.compile("PATH\:\s+(map|rn)(\d+)\s+(.+)$") 66 67 space_regex = re.compile("\s+") 68 parenthesis_regex = re.compile("\(.+\)") # used to eliminate extra information in sequence 69 70 71 entry_regex = re.compile("^ENTRY\s+(\w+)\s+KO") 72 73 dblink_split_regex = re.compile("(\w+)\:") 74 75 kegg_ko_object = None 76 77 temp_value = [] # List used to store the information of those fields that can have more than a single line 78 current_field = None 79 80 for line in self.input_file_fd: 81 82 m = entry_regex.match(line) 83 84 if m: 85 if kegg_ko_object is not None: 86 self.biana_access.insert_new_external_entity( externalEntity = kegg_ko_object ) 87 88 kegg_ko_object = ExternalEntityRelation( source_database = self.database, relation_type="cluster" ) 89 kegg_ko_object.add_attribute( ExternalEntityAttribute( attribute_identifier = "keggCode", value=m.group(1), type = "unique" ) ) 90 continue 91 92 93 new_field = field_regex.match(line) 94 if new_field: 95 if current_field == "DBLINK": 96 all_db_links = " ".join(temp_value) 97 list_db_links = [ x.strip() for x in dblink_split_regex.split(all_db_links) ] 98 99 for actual_position in xrange(len(list_db_links)): 100 if list_db_links[actual_position] == "EC": 101 [ kegg_ko_object.add_attribute(ExternalEntityAttribute( attribute_identifier = "EC", value=x, type="cross-reference")) for x in list_db_links[actual_position+1].split(" ") ] 102 103 elif list_db_links[actual_position] == "COG": 104 [ kegg_ko_object.add_attribute(ExternalEntityAttribute( attribute_identifier = "COG", value=x,type="cross-reference")) for x in list_db_links[actual_position+1].split(" ") ] 105 106 elif list_db_links[actual_position] == "GO": 107 [ kegg_ko_object.add_attribute(ExternalEntityAttribute( attribute_identifier = "GO", value=x,type="cross-reference")) for x in list_db_links[actual_position+1].split(" ") ] 108 109 current_field = new_field.group(1) 110 temp_value = [new_field.group(2)] 111 else: 112 cont_value = continue_field_regex.match(line) 113 if cont_value: 114 temp_value.append(cont_value.group(1)) 115 116 117 # Insert the last one 118 if kegg_ko_object is not None: 119 self.biana_access.insert_new_external_entity( externalEntity = kegg_ko_object )
120