Package biana :: Package BianaParser :: Module scopParser
[hide private]
[frames] | no frames]

Source Code for Module biana.BianaParser.scopParser

  1  """ 
  2      BIANA: Biologic Interactions and Network Analysis 
  3      Copyright (C) 2009  Javier Garcia-Garcia, Emre Guney, Baldo Oliva 
  4   
  5      This program is free software: you can redistribute it and/or modify 
  6      it under the terms of the GNU General Public License as published by 
  7      the Free Software Foundation, either version 3 of the License, or 
  8      (at your option) any later version. 
  9   
 10      This program is distributed in the hope that it will be useful, 
 11      but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 13      GNU General Public License for more details. 
 14   
 15      You should have received a copy of the GNU General Public License 
 16      along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 17   
 18  """ 
 19   
 20  from bianaParser import * 
 21  from sets import * 
 22  import os 
 23  from biana.BianaObjects.PDB import PDBFragment 
 24   
25 -class SCOPParser(BianaParser):
26 """ 27 SCOP Parser Class 28 """ 29 30 name = "scop" 31 description = "This program fills up tables in database biana related with SCOP" 32 external_entity_definition = "A external entity represents a SCOP entity (fold, class, domain,...)" 33 external_entity_relations = "" 34
35 - def __init__(self):
36 37 # Start with the default values 38 39 BianaParser.__init__(self, default_db_description = "Structural Clasification Of Proteins", 40 default_script_name = "scopParser.py", 41 default_script_description = "This program fills up tables in database biana related to SCOP", 42 additional_compulsory_arguments = []) 43 self.default_eE_attribute = "scop"
44 #self.is_promiscuous = True 45 46
47 - def parse_database(self):
48 """ 49 Method that implements the specific operations of scop parser 50 """ 51 52 self.biana_access.add_valid_external_entity_attribute_type( name = "SCOP_Category", 53 data_type = "ENUM(\"class\",\"fold\",\"superfamily\",\"family\",\"domain\")", 54 category = "eE attribute" ) 55 56 57 # IMPORTANT: As we have added new types and attributes that are not in the default BIANA distribution, we must execute the follwing command: 58 self.biana_access.refresh_database_information() 59 60 61 def new_list(): 62 return []
63 64 categories = {"cl":"class", 65 "cf":"fold", 66 "sf":"superfamily", 67 "fa":"family", 68 "dm":"domain"} 69 70 number_of_lines = 0 71 72 # 73 # Reading external DB "scop" and inserting its data into Biana DB 74 # 75 76 #d1uvya_ 1uvy A: a.1.1.1 100068 cl=46456,cf=46457,sf=46458,fa=46459,dm=46460,sp=46461,px=100068 77 cl_regex = re.compile("cl=(\d+)") 78 cf_regex = re.compile("cf=(\d+)") 79 sf_regex = re.compile("sf=(\d+)") 80 fa_regex = re.compile("fa=(\d+)") 81 dm_regex = re.compile("dm=(\d+)") 82 sp_regex = re.compile("sp=(\d+)") 83 px_regex = re.compile("px=(\d+)") 84 range_regex = re.compile("(\w+):(\S*)") 85 tax_regex = re.compile("\[TaxId:\s(\d+)\]") 86 87 domains_dict = {} 88 #domains_to_id_dict = {} 89 #hierarchy_dict = {"cl":{},"cf":{},"sf":{},"fa":{},"dm":{},"sp":{}} 90 hierarchy_dict = {"cf":{},"sf":{},"fa":{},"dm":{}} 91 descriptions_dict = {"cl":{},"cf":{},"sf":{},"fa":{},"dm":{},"sp":{},"px":{}} 92 sp_dict = {} 93 94 scop_entry_to_eE_id = {} 95 96 if not self.input_file.endswith(os.sep): 97 self.input_file += os.sep 98 99 scop_dir_cla_fd = file(self.input_file+"dir.cla.scop.txt_"+self.sourcedb_version.replace("\"",""),'r') 100 101 for line in scop_dir_cla_fd: 102 103 if line.startswith("#"): 104 continue 105 106 line_fields = line.strip().split() # line_fields[0] is complete pdb entry 107 # line_fields[1] is pdb code 108 # line_fields[2] is pdb chain follow by : XX-YY (optional) 109 # line_fields[3] is ??? 110 # line_fields[4] is ??? 111 # line_fields[5] is comma-separated codes 112 113 if len(line_fields) != 6: 114 # skip incomplete lines 115 print "skipping..." 116 continue 117 118 pdb_code = line_fields[1] 119 120 #problem: a domain can be in different chains... 121 #m = range_regex.search(line_fields[2]) 122 #if m: 123 # chain = m.group(1) 124 # range = m.group(2) 125 range = line_fields[2] 126 127 cl = cl_regex.search(line_fields[5]).group(1) 128 cf = cf_regex.search(line_fields[5]).group(1) 129 sf = sf_regex.search(line_fields[5]).group(1) 130 fa = fa_regex.search(line_fields[5]).group(1) 131 dm = dm_regex.search(line_fields[5]).group(1) 132 sp = sp_regex.search(line_fields[5]).group(1) 133 134 hierarchy_dict["cf"][cf] = cl 135 hierarchy_dict["sf"][sf] = cf 136 hierarchy_dict["fa"][fa] = sf 137 hierarchy_dict["dm"][dm] = fa 138 139 #sp_dict.setdefault(dm,new_list()).append(sp) 140 sp_dict.setdefault(dm,Set(new_list())).add(sp) 141 domains_dict.setdefault(dm,new_list()).append((pdb_code,range)) 142 #domains_to_id_dict[dm] = line_fields[3] 143 144 scop_dir_cla_fd.close() 145 146 #print len(hierarchy_dict), hierarchy_dict 147 #print len(sp_dict), sp_dict 148 #print len(domains_dict), domains_dict 149 150 scop_des_fd = file(self.input_file+"dir.des.scop.txt_"+self.sourcedb_version.replace("\"",""),'r') 151 152 for line in scop_des_fd: 153 154 if line.startswith("#"): 155 continue 156 157 line_fields = line.strip().split("\t") 158 descriptions_dict[line_fields[1]][line_fields[0]] = line_fields[4] 159 160 scop_des_fd.close() 161 162 #print descriptions_dict 163 164 for current_category in descriptions_dict: 165 if current_category!="px" and current_category!="sp": 166 for current_scop_entry in descriptions_dict[current_category]: 167 eE = ExternalEntity( source_database = self.database, type = "SCOPElement" ) 168 eE.add_attribute( ExternalEntityAttribute(attribute_identifier="SCOP", value = current_scop_entry) ) 169 eE.add_attribute(ExternalEntityAttribute( attribute_identifier="SCOP_Category", value = categories[current_category] )) 170 if current_category == "dm": 171 for current_pdb in domains_dict[current_scop_entry]: 172 173 fragments = PDBFragment.fragment_parser( fragment_str = current_pdb[1], separator = "," ) 174 175 chain = fragments[0].chain # Only takes the chain of the first fragment (all fragments should be on the same chain!). 176 # If different fragments belong to different chains, it is not taken into account 177 178 additional_fields = { "pdb_range": current_pdb[1] } 179 180 if chain is not None: 181 additional_fields["chain"] = chain 182 183 eE.add_attribute(ExternalEntityAttribute(attribute_identifier = "pdb", value=current_pdb[0], 184 additional_fields = additional_fields )) 185 eE.add_attribute(ExternalEntityAttribute(attribute_identifier="description", value = descriptions_dict[current_category][current_scop_entry])) 186 187 #[ eE.add_attribute(ExternalEntityAttribute(attribute_identifier="taxid", value = current_sp_id)) for current_sp_id in sp_dict[current_scop_entry] ] 188 189 190 for current_sp_id in sp_dict[current_scop_entry]: 191 192 m = tax_regex.search(descriptions_dict["sp"][current_sp_id]) 193 194 if m: 195 eE.add_attribute( ExternalEntityAttribute( attribute_identifier="taxid", value = m.group(1) ) ) 196 197 else: 198 print current_sp_id 199 200 #[ eE.add_attribute(ExternalEntityAttribute(attribute_identifier="taxid", value = tax_regex.search(descriptions_dict["sp"][current_sp_id]).group(1))) for current_sp_id in sp_dict[current_scop_entry] ] 201 202 self.biana_access.insert_new_external_entity(eE) 203 204 scop_entry_to_eE_id[current_scop_entry] = eE.get_id() 205 206 207 ontology = Ontology( source_database = self.database, linkedAttribute="scop", name="scop", descriptionAttribute="description", levelAttribute="SCOPCategory" ) 208 209 210 for current_dm_scop_element in hierarchy_dict["dm"]: 211 ontology.add_element( ontologyElementID = scop_entry_to_eE_id[current_dm_scop_element], 212 isA = [scop_entry_to_eE_id[hierarchy_dict["dm"][current_dm_scop_element]]] ) 213 for current_fa_scop_element in hierarchy_dict["fa"]: 214 ontology.add_element( ontologyElementID = scop_entry_to_eE_id[current_fa_scop_element], 215 isA = [scop_entry_to_eE_id[hierarchy_dict["fa"][current_fa_scop_element]]] ) 216 for current_sf_scop_element in hierarchy_dict["sf"]: 217 ontology.add_element( ontologyElementID = scop_entry_to_eE_id[current_sf_scop_element], 218 isA = [scop_entry_to_eE_id[hierarchy_dict["sf"][current_sf_scop_element]]] ) 219 for current_cf_scop_element in hierarchy_dict["cf"]: 220 ontology.add_element( ontologyElementID = scop_entry_to_eE_id[current_cf_scop_element], 221 isA = [scop_entry_to_eE_id[hierarchy_dict["cf"][current_cf_scop_element]]] ) 222 for current_cl in descriptions_dict["cl"]: 223 ontology.add_element( ontologyElementID = scop_entry_to_eE_id[current_cl] ) 224 225 self.biana_access.insert_new_external_entity( ontology )
226