1 """
2 BIANA: Biologic Interactions and Network Analysis
3 Copyright (C) 2009 Javier Garcia-Garcia, Emre Guney, Baldo Oliva
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 """
19
20 from bianaParser import *
21 from sets import *
22 import os
23 from biana.BianaObjects.PDB import PDBFragment
24
26 """
27 SCOP Parser Class
28 """
29
30 name = "scop"
31 description = "This program fills up tables in database biana related with SCOP"
32 external_entity_definition = "A external entity represents a SCOP entity (fold, class, domain,...)"
33 external_entity_relations = ""
34
36
37
38
39 BianaParser.__init__(self, default_db_description = "Structural Clasification Of Proteins",
40 default_script_name = "scopParser.py",
41 default_script_description = "This program fills up tables in database biana related to SCOP",
42 additional_compulsory_arguments = [])
43 self.default_eE_attribute = "scop"
44
45
46
48 """
49 Method that implements the specific operations of scop parser
50 """
51
52 self.biana_access.add_valid_external_entity_attribute_type( name = "SCOP_Category",
53 data_type = "ENUM(\"class\",\"fold\",\"superfamily\",\"family\",\"domain\")",
54 category = "eE attribute" )
55
56
57
58 self.biana_access.refresh_database_information()
59
60
61 def new_list():
62 return []
63
64 categories = {"cl":"class",
65 "cf":"fold",
66 "sf":"superfamily",
67 "fa":"family",
68 "dm":"domain"}
69
70 number_of_lines = 0
71
72
73
74
75
76
77 cl_regex = re.compile("cl=(\d+)")
78 cf_regex = re.compile("cf=(\d+)")
79 sf_regex = re.compile("sf=(\d+)")
80 fa_regex = re.compile("fa=(\d+)")
81 dm_regex = re.compile("dm=(\d+)")
82 sp_regex = re.compile("sp=(\d+)")
83 px_regex = re.compile("px=(\d+)")
84 range_regex = re.compile("(\w+):(\S*)")
85 tax_regex = re.compile("\[TaxId:\s(\d+)\]")
86
87 domains_dict = {}
88
89
90 hierarchy_dict = {"cf":{},"sf":{},"fa":{},"dm":{}}
91 descriptions_dict = {"cl":{},"cf":{},"sf":{},"fa":{},"dm":{},"sp":{},"px":{}}
92 sp_dict = {}
93
94 scop_entry_to_eE_id = {}
95
96 if not self.input_file.endswith(os.sep):
97 self.input_file += os.sep
98
99 scop_dir_cla_fd = file(self.input_file+"dir.cla.scop.txt_"+self.sourcedb_version.replace("\"",""),'r')
100
101 for line in scop_dir_cla_fd:
102
103 if line.startswith("#"):
104 continue
105
106 line_fields = line.strip().split()
107
108
109
110
111
112
113 if len(line_fields) != 6:
114
115 print "skipping..."
116 continue
117
118 pdb_code = line_fields[1]
119
120
121
122
123
124
125 range = line_fields[2]
126
127 cl = cl_regex.search(line_fields[5]).group(1)
128 cf = cf_regex.search(line_fields[5]).group(1)
129 sf = sf_regex.search(line_fields[5]).group(1)
130 fa = fa_regex.search(line_fields[5]).group(1)
131 dm = dm_regex.search(line_fields[5]).group(1)
132 sp = sp_regex.search(line_fields[5]).group(1)
133
134 hierarchy_dict["cf"][cf] = cl
135 hierarchy_dict["sf"][sf] = cf
136 hierarchy_dict["fa"][fa] = sf
137 hierarchy_dict["dm"][dm] = fa
138
139
140 sp_dict.setdefault(dm,Set(new_list())).add(sp)
141 domains_dict.setdefault(dm,new_list()).append((pdb_code,range))
142
143
144 scop_dir_cla_fd.close()
145
146
147
148
149
150 scop_des_fd = file(self.input_file+"dir.des.scop.txt_"+self.sourcedb_version.replace("\"",""),'r')
151
152 for line in scop_des_fd:
153
154 if line.startswith("#"):
155 continue
156
157 line_fields = line.strip().split("\t")
158 descriptions_dict[line_fields[1]][line_fields[0]] = line_fields[4]
159
160 scop_des_fd.close()
161
162
163
164 for current_category in descriptions_dict:
165 if current_category!="px" and current_category!="sp":
166 for current_scop_entry in descriptions_dict[current_category]:
167 eE = ExternalEntity( source_database = self.database, type = "SCOPElement" )
168 eE.add_attribute( ExternalEntityAttribute(attribute_identifier="SCOP", value = current_scop_entry) )
169 eE.add_attribute(ExternalEntityAttribute( attribute_identifier="SCOP_Category", value = categories[current_category] ))
170 if current_category == "dm":
171 for current_pdb in domains_dict[current_scop_entry]:
172
173 fragments = PDBFragment.fragment_parser( fragment_str = current_pdb[1], separator = "," )
174
175 chain = fragments[0].chain
176
177
178 additional_fields = { "pdb_range": current_pdb[1] }
179
180 if chain is not None:
181 additional_fields["chain"] = chain
182
183 eE.add_attribute(ExternalEntityAttribute(attribute_identifier = "pdb", value=current_pdb[0],
184 additional_fields = additional_fields ))
185 eE.add_attribute(ExternalEntityAttribute(attribute_identifier="description", value = descriptions_dict[current_category][current_scop_entry]))
186
187
188
189
190 for current_sp_id in sp_dict[current_scop_entry]:
191
192 m = tax_regex.search(descriptions_dict["sp"][current_sp_id])
193
194 if m:
195 eE.add_attribute( ExternalEntityAttribute( attribute_identifier="taxid", value = m.group(1) ) )
196
197 else:
198 print current_sp_id
199
200
201
202 self.biana_access.insert_new_external_entity(eE)
203
204 scop_entry_to_eE_id[current_scop_entry] = eE.get_id()
205
206
207 ontology = Ontology( source_database = self.database, linkedAttribute="scop", name="scop", descriptionAttribute="description", levelAttribute="SCOPCategory" )
208
209
210 for current_dm_scop_element in hierarchy_dict["dm"]:
211 ontology.add_element( ontologyElementID = scop_entry_to_eE_id[current_dm_scop_element],
212 isA = [scop_entry_to_eE_id[hierarchy_dict["dm"][current_dm_scop_element]]] )
213 for current_fa_scop_element in hierarchy_dict["fa"]:
214 ontology.add_element( ontologyElementID = scop_entry_to_eE_id[current_fa_scop_element],
215 isA = [scop_entry_to_eE_id[hierarchy_dict["fa"][current_fa_scop_element]]] )
216 for current_sf_scop_element in hierarchy_dict["sf"]:
217 ontology.add_element( ontologyElementID = scop_entry_to_eE_id[current_sf_scop_element],
218 isA = [scop_entry_to_eE_id[hierarchy_dict["sf"][current_sf_scop_element]]] )
219 for current_cf_scop_element in hierarchy_dict["cf"]:
220 ontology.add_element( ontologyElementID = scop_entry_to_eE_id[current_cf_scop_element],
221 isA = [scop_entry_to_eE_id[hierarchy_dict["cf"][current_cf_scop_element]]] )
222 for current_cl in descriptions_dict["cl"]:
223 ontology.add_element( ontologyElementID = scop_entry_to_eE_id[current_cl] )
224
225 self.biana_access.insert_new_external_entity( ontology )
226