1 """
2 BIANA: Biologic Interactions and Network Analysis
3 Copyright (C) 2009 Javier Garcia-Garcia, Emre Guney, Baldo Oliva
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 """
19
20
21
22 import time
23 import sys
24 import md5
25 import traceback
26 import copy
27 import sets
28 from math import ceil
29 import re
30
31
32 import ConnectorDB
33 import database
34 import biana.utilities.int_ascii as int_ascii
35
36
37 import biana.BianaObjects as BianaObjects
38 from BianaDatabaseDefinition import BianaDatabase
39
40
41 debug_web = 0
42
43
44
45
46
47 BIANA_SOURCE_CODE_VERSION = "Mar_16_09"
48
49
51 """
52 Class used as an interface with database biana
53 """
54
55 - def __init__(self, dbname=None, dbhost=None, dbuser=None, dbpassword=None, dbport=None, dbsocket=None, use_buffer=False, lock_tables = False, check_integrity=False ):
56 """
57 "dbname" is the database name to which you want to connect to (required)
58 "dbhost" is the machine with the mysql server that holds the biana database (required)
59 "dbuser" is the mysql user (not required in most systems)
60 "dbpassword" is the mysql password (not required in most systems)
61 "dbport" is the mysql port (not required in most systems)
62
63 The following parameters should not be used by standard users, only for advanced users or developers:
64 "use_buffer" must be set True when populating database due to performance issues. Automatically controlled by parsers
65 "lock_tables" Allow Connector db to lock tables when using them. It is set to True when populating database due to performance issues
66 "check_integrity" determines if integrity of the database must be checked (if there is any parser not finished or if some table definitions have been changed)
67 """
68
69
70 self.db = ConnectorDB.DB(dbname=dbname, dbhost=dbhost, dbuser=dbuser, dbpassword=dbpassword, dbport=dbport, dbsocket=dbsocket, lock_tables=lock_tables)
71 self.dbname = dbname
72 self.dbhost = dbhost
73 self.dbuser=dbuser
74 self.dbpassword=dbpassword
75 self.dbport=dbport
76 self.dbsocket=dbsocket
77 self.lock_tables=lock_tables
78 self.use_buffer = use_buffer
79
80
81 self.validSources = None
82 self.valid_source_database_ids = {}
83 self.available_unification_protocols = None
84
85
86 self.available_ontology_names = None
87 self.ontology_linked_attributes = None
88 self.loaded_ontologies = {}
89
90 self.db_version_modified = None
91
92 self.db_versions_list = None
93
94 self.db.add_autoincrement_columns( table = "externalEntity", attribute = "externalEntityID" )
95 self.db.add_autoincrement_columns( table = "externalEntityRelationParticipant", attribute = "externalEntityRelationParticipantID" )
96 self.db.add_autoincrement_columns( table = "sequenceProtein", attribute = "proteinSequenceID" )
97 self.db.add_autoincrement_columns( table = "sequenceNucleotide", attribute = "nucleotideSequenceID" )
98 self.db.add_autoincrement_columns( table = "externalDatabaseAttributeTransfer", attribute = "keyID" )
99
100 self.db_description = None
101 self.db_optimized_for = None
102
103 self.biana_database = BianaDatabase()
104
105 self.transferred_attributes = {}
106
107
108
109
110
111 self.key_attribute_ids = {}
112
113 if self.dbname is not None:
114
115 if not self.db.check_consistency_with_given_source_version(BIANA_SOURCE_CODE_VERSION):
116
117 raise Exception("Source code - database inconsistency")
118
119 self._load_biana_types_and_attributes()
120
121
122 self.db.check_database(database = self.biana_database, verbose=True)
123
124 if check_integrity:
125 self._check_database_integrity()
126
127 self._load_biana_database_information()
128
129
130 self.temporal_data = {}
131 self.temporal_data["relations_hierarchy_parents"] = {}
132 self.store_relations_hierarchy = False
133
134 return
135
136
137
138
139
141 odict = self.__dict__.copy()
142 del odict['db']
143 return odict
144
146
147 self.__dict__.update(dict)
148 self.db = ConnectorDB.DB(dict["dbname"], dict["dbhost"], dict["dbuser"], dict["dbpassword"], dict["dbport"], dict["dbsocket"], dict["lock_tables"])
149 try:
150 self.db.add_autoincrement_columns( table = "externalEntity", attribute = "externalEntityID" )
151 self.db.add_autoincrement_columns( table = "externalEntityRelationParticipant", attribute = "externalEntityRelationParticipantID" )
152 self.db.add_autoincrement_columns( table = "sequenceProtein", attribute = "proteinSequenceID" )
153 self.db.add_autoincrement_columns( table = "sequenceNucleotide", attribute = "nucleotideSequenceID" )
154 self.db.add_autoincrement_columns( table = "externalEntityEquivalencesTransfer", attribute = "keyID" )
155 except:
156 pass
157 return
158
160 return (self.dbname, self.dbhost, self.dbuser, self.dbpassword)
161
163 return "BIANA Connection. Database connection: %s" %self.db
164
166 return self.db_optimized_for=="running"
167
169 self.db = ConnectorDB.DB(dbname=self.dbname, dbhost=self.dbhost, dbuser=self.dbuser, dbpassword=self.dbpassword, dbport=self.dbport, dbsocket=self.dbsocket, lock_tables=self.lock_tables)
170
171 - def create_database(self, dbname, description="BIANA DATABASE", optimize_for="parsing", ignore_primary_keys=False):
172 """
173 It creates necessary tables into a database server
174
175 "description" is a tag for label the new database
176
177 "optimize_for" can take two distinct values: "parsing" and "running". By default, it creates a database optimized for parsing (as it will be created empty).
178 """
179
180 self.db.insert_db_content( sql_query = self.biana_database.get_sql_query(ignore_primary_keys=ignore_primary_keys).split(";") )
181
182 self.optimize_database_for( mode=optimize_for )
183
184
185 import biana.biana_globals as BIANA_GLOBALS
186
187 [ self.add_valid_external_entity_type(current_type) for current_type in BIANA_GLOBALS.EXTERNAL_ENTITY_TYPES ]
188 [ self.add_valid_external_entity_relation_type(current_type) for current_type in BIANA_GLOBALS.EXTERNAL_ENTITY_RELATION_TYPES ]
189 [ self.add_valid_identifier_reference_types(current_reference_type) for current_reference_type in BIANA_GLOBALS.VALID_IDENTIFIER_REFERENCE_TYPES ]
190 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_GENERAL_ATTRIBUTES ]
191 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE identifier attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_IDENTIFIER_ATTRIBUTES ]
192 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE versionable identifier attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_VERSIONABLE_IDENTIFIER_ATTRIBUTE_TYPES ]
193 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE descriptive searchable attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_DESCRIPTIVE_SEARCHABLE_ATTRIBUTE_TYPES ]
194 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE descriptive attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_DESCRIPTIVE_ATTRIBUTE_TYPES ]
195 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE numeric attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_NUMERIC_ATTRIBUTE_TYPES ]
196 [ self.add_valid_external_entity_relation_participant_attribute_type(current_type, data_type) for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TYPES ]
197 [ self.add_valid_external_entity_attribute_type(current_type, data_type_dict, "eE special attribute") for (current_type, data_type_dict) in BIANA_GLOBALS.EXTERNAL_ENTITY_SPECIAL_ATTRIBUTE_TYPES.iteritems()]
198
199
200 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.BIANA_DATABASE_TABLE,
201 column_values = (("description",description),
202 ("optimized_for",optimize_for)),
203 use_buffer = False ) )
204 self.db.close()
205
206 self.db = ConnectorDB.DB(dbname=dbname, dbhost=self.dbhost, dbuser=self.dbuser, dbpassword=self.dbpassword, dbport=self.dbport, dbsocket=self.dbsocket)
207
208 self.dbname = dbname
209
210 self._load_biana_types_and_attributes()
211
212 self.db.check_database(database=self.biana_database, ignore_primary_keys=ignore_primary_keys)
213
214
215 default_protocol = BianaObjects.UnificationProtocol( description = "No unification", BianaDatabaseVersion = "X" )
216
217 self.db_description = description
218 self.db_optimized_for = optimize_for
219
220 self._create_new_unification_protocol_tables(default_protocol)
221
222
228
229
243
244
248
249
251
252
253 if self.biana_database.is_valid_external_entity_relation_type(type):
254 sys.stderr.write("Trying to add an existing External Entity Relation Type: %s. Not added again.\n" %type)
255 return
256
257
258 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.TYPES_AND_ATTRIBUTES_TABLE,
259 column_values = [("name",type),
260 ("category","eEr type")],
261 use_buffer = False ) )
262
263 self.biana_database.add_valid_external_entity_relation_type(type)
264
265
269
270
272
273 if self.biana_database.is_valid_identifier_reference_type(current_reference_type):
274 sys.stderr.write("Trying to add an existing Identifier reference type: %s. Not added again.\n" %tcurrent_reference_ype)
275 return
276
277 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.TYPES_AND_ATTRIBUTES_TABLE,
278 column_values = [("name",current_reference_type),
279 ("category","identifier reference type")],
280 use_buffer = False ) )
281
282 self.biana_database.add_valid_identifier_reference_type( current_reference_type )
283
284
285
287
288 if self.biana_database.is_valid_external_entity_attribute_type(name):
289 sys.stderr.write("Trying to add an existing External Entity Attribute Type: %s. Not added again\n" %name)
290 return
291
292 additional_fields_tuple_list = []
293
294 if category.lower()=="ee special attribute":
295 data_type_dict = data_type
296 for current_field_tuple in data_type_dict["fields"]:
297 current_data_type = current_field_tuple[1]
298 current_field = current_field_tuple[0]
299 if current_field_tuple[0] == "value":
300 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.TYPES_AND_ATTRIBUTES_TABLE,
301 column_values = [("name",name),
302 ("data_type",current_data_type),
303 ("category",category)],
304 use_buffer = False ) )
305 else:
306 current_null = current_field_tuple[2]
307 if current_null is False: current_null=0
308 else: current_null=1
309
310 additional_fields_tuple_list.append((current_field, current_data_type, current_null))
311
312 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.SPECIAL_ATTRIBUTES_TABLE,
313 column_values = [("attribute_name",name),
314 ("field_name",current_field),
315 ("data_type",current_data_type),
316 ("canbenull",current_null)],
317 use_buffer = False ) )
318
319 else:
320 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.TYPES_AND_ATTRIBUTES_TABLE,
321 column_values = [("name",name),
322 ("data_type",data_type),
323 ("category",category)],
324 use_buffer = False ) )
325
326 self.biana_database.add_valid_external_entity_attribute_type(name, data_type, category, additional_fields_tuple_list)
327
328
340
341
343 """
344 Method to load current biana database types and attributes
345 """
346
347 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [ self.biana_database.TYPES_AND_ATTRIBUTES_TABLE ],
348 columns = ["name", "data_type","category"] ),
349 answer_mode = "raw" )
350
351 for current_data in data:
352 if current_data[2].lower() == "identifier reference type": self.biana_database.add_valid_identifier_reference_type(current_data[0])
353
354
355 for current_data in data:
356 current_category = current_data[2]
357 current_attribute = current_data[0]
358 data_type = current_data[1]
359 new_table = None
360
361 if current_category.lower() == "ee type": self.biana_database.add_valid_external_entity_type(current_attribute)
362 elif current_category.lower() == "eer type": self.biana_database.add_valid_external_entity_relation_type(current_attribute)
363 elif current_category.lower() == "eerp attribute" or current_category.lower() == "eerp numeric attribute" or current_category.lower() == "eerp descriptive attribute" or current_category.lower() == "eerp descriptive searchable attribute":
364 self.biana_database.add_valid_external_entity_relation_participant_attribute_type( current_attribute, data_type, current_category, [])
365 elif current_category.lower() == "ee identifier attribute" or current_category.lower() == "ee versionable identifier attribute" or current_category.lower() == "ee descriptive attribute" or current_category.lower() == "ee descriptive searchable attribute" or current_category.lower() == "ee numeric attribute": self.biana_database.add_valid_external_entity_attribute_type( current_attribute, data_type, current_category, [] )
366 elif current_category.lower() == "ee attribute": self.biana_database.add_valid_external_entity_attribute_type( current_attribute, data_type, current_category, [] )
367 elif current_category.lower()== "ee special attribute":
368
369 special_data = self.db.select_db_content( self.db._get_select_sql_query( tables = [ self.biana_database.SPECIAL_ATTRIBUTES_TABLE ],
370 columns = ["field_name", "data_type", "canbenull"],
371 fixed_conditions = [("attribute_name","=",current_attribute)] ),
372 answer_mode = "raw" )
373 self.biana_database.add_valid_external_entity_attribute_type( current_attribute, data_type, current_category, special_data )
374 elif current_category.lower()== "identifier reference type": pass
375 else:
376 raise ValueError("%s category not recognized!" %current_category)
377
378 if new_table:
379 self.biana_database.add_table(new_table)
380
381
418
419
446
447
449 """
450 """
451
452 if self.available_ontology_names is None:
453
454 data = self.db.select_db_content( sql_query = self.db._get_select_sql_query ( tables = [ self.biana_database.ONTOLOGY_INFO_TABLE ],
455 columns = ["name","linked_attribute","key_id","level_attribute"] ),
456 answer_mode = "raw" )
457
458 self.available_ontology_names = dict([ (x[0],x[1]) for x in data ])
459 self.ontology_linked_attributes = dict([ (x[1],{"ontology_name":x[0], "key_id":x[2], "level_attribute":x[3]}) for x in data ])
460
461 if name is None:
462 return self.available_ontology_names
463 else:
464 return self.available_ontology_names[name.lower()]
465
466
468
469 if self.ontology_linked_attributes is None:
470 self.get_available_ontology_names()
471
472 if( attribute_identifier.lower() in self.ontology_linked_attributes ):
473 return True
474
475 return False
476
477
479 """
480 Adds a key attribute, used in transfer attributes (key attribute) and ontologies (linked attribute)
481
482 Creates the necessary tables
483 """
484 key_attribute = key_attribute.lower()
485
486 if self.key_attribute_ids.has_key( (external_database_id, key_attribute) ):
487
488 return self.key_attribute_ids[ (external_database_id, key_attribute) ]
489
490 else:
491
492 new_id = self._get_new_key_id()
493
494 value_data_type = self.biana_database.get_attribute_data_type(key_attribute)
495
496 new_table = database.TableDB( table_name = self._get_key_attribute_table_name( key_id = new_id ),
497 table_fields = [ database.FieldDB(field_name = "externalEntityID",
498 data_type = "integer(4) unsigned",
499 null = False ),
500 database.FieldDB(field_name = "value",
501 data_type = value_data_type,
502 null = False ) ],
503 indices = [("value","externalEntityID")] )
504
505 self.db.insert_db_content( sql_query = new_table.create_mysql_query(), answer_mode = None )
506
507 self.key_attribute_ids.setdefault((external_database_id,key_attribute.lower()),new_id)
508
509 return new_id
510
511
512
513
515
516 key_id = self._add_key_attribute( external_database_id = externalDatabaseID,
517 key_attribute = key_attribute )
518
519 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query(table = self.biana_database.EXTERNAL_DATABASE_ATTRIBUTE_TRANSFER_TABLE.get_table_name(),
520 column_values = (("keyID",key_id),
521 ("externalDatabaseID", externalDatabaseID),
522 ("attributeKey", key_attribute),
523 ("transferAttribute", transfer_attribute)),
524 use_buffer = False ),
525 answer_mode = None )
526
527 self.transferred_attributes.setdefault(transfer_attribute.lower(),sets.Set()).add((externalDatabaseID,key_attribute.lower()))
528
529
530
531
532 try:
533 self.transferID_key[transfer_attribute.lower()] = (key_id,key_attribute.lower())
534 except:
535 self.transferID_key = {}
536 self.transferID_key[transfer_attribute.lower()] = (key_id,key_attribute.lower())
537
538
540 return self.transferred_attributes.has_key(attribute_identifier.lower())
541
543 return self.key_attribute_ids.has_key( (external_database_id, attribute_identifier.lower()) )
544
546 return "key_attribute_%s" %key_id
547
549 """
550 Returns the name of the ontology linked to an attribute
551 """
552
553 if self._is_ontology_linked_attribute(attribute_identifier):
554 return self.ontology_linked_attributes[attribute_identifier.lower()]["name"]
555 raise ValueError("Trying to get the name for an unlinked attribute")
556 else:
557 raise ValueError("Trying to get the name for an unlinked attribute")
558
559
560
562 """
563 Performs some modifications in database in order to optimize database access efficiency for parsing or running
564
565 "mode" can take two different values: "running" or "parsing"
566
567 "optimize" parameter will be only used if mode is "running"
568 """
569
570 if mode == "parsing":
571 self.db._disable_indices()
572
573 elif mode == "running":
574 self.db._enable_indices()
575 self.db.insert_db_content( sql_query = self.biana_database.optimize_database( optimize = optimize, analyze = True ) )
576
577
578
579
580
581
582
583
584 self._update_relations_hierarchy()
585
586
587
588
589
590 else:
591 raise ValueError("optimizing database mode can only be \"parsing\" or \"running\"")
592
593
594 self.db_optimized_for = mode
595
596 self.db.insert_db_content( sql_query = self.db._get_update_sql_query(table = self.biana_database.BIANA_DATABASE_TABLE,
597 update_column_values = [("optimized_for", self.db_optimized_for)]) )
598
599
601 """
602 Close the connection with database
603
604 Although it is only necessary to close the BianaDBaccess object when parsing,
605 is highly recommended to use it always as maybe in the future this requirite may change
606 """
607
608
609
610
611
612 if len( self.temporal_data["relations_hierarchy_parents"] )>0:
613 hierarchy_set = sets.Set()
614
615 eE_eERid_dict = self.temporal_data["relations_hierarchy_parents"]
616
617 def get_all_parents(eEid):
618 parents = sets.Set()
619 if( eE_eERid_dict.has_key(eEid) ):
620 parents.update(eE_eERid_dict[eEid])
621 for current_eEid in eE_eERid_dict[eEid]:
622 parents.update(get_all_parents(current_eEid))
623 return parents
624
625 for eEid in self.temporal_data["relations_hierarchy_parents"].keys():
626 for current_parent in get_all_parents(eEid):
627 hierarchy_set.add((current_parent,eEid))
628
629
630 for current_eERid, current_eEid in hierarchy_set:
631 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE,
632 column_values = (("externalEntityRelationParticipantID", self._get_new_external_entity_relation_participant_id()),
633 (self.biana_database.external_entity_relation_id_col, current_eERid),
634 (self.biana_database.externalEntityID_col, current_eEid )),
635 use_buffer = True ))
636
637 if self.db_version_modified:
638 self._update_bianaDB_autoincrement_fields()
639 self._update_bianaDB_version()
640
641 self.db.close()
642
643
644
645
646
647
648
649
650
651
652
653
675
676
677
678
680 """
681 Method used internally to control biana version to be able to control compatibilities between diffent database versions
682
683 It also stores the stable values for the database
684 """
685
686 date = time.localtime()
687 actual_date = "%s-%s-%s" %(date[0],date[1],date[2])
688
689 self.db.insert_db_content( self.db._get_insert_sql_query(table = self.biana_database.DATABASE_VERSION_TABLE.get_table_name(),
690 column_values = (("dbControlID", md5.new(str(time.localtime())+str(time.time())).hexdigest()),
691 ("date", actual_date),
692
693 ("stable_externalEntityID",self._get_last_external_entity_id()),
694
695 ("stable_externalEntityRelationParticipantID",self._get_last_external_entity_relation_participant_id()))),
696 answer_mode = None )
697
698
700 """
701 Checks the integrity of the database
702 """
703
704
705 sys.stderr.write("checking database integrity\n")
706
707 self.db._check_locked_table(self.biana_database.EXTERNAL_DATABASE_TABLE.get_table_name())
708 if len(self.db.select_db_content( sql_query = "SELECT * FROM %s WHERE parsingTime IS NULL" %self.biana_database.EXTERNAL_DATABASE_TABLE, answer_mode="list" )):
709 sys.stderr.write("Database does not have integrity. Fixing it...")
710 self._rollback()
711 return
712
713
715 """
716 Method used to undo all the changes from an stable data insert
717 """
718
719 max_external_entity_id = self._get_last_stable_external_entity_id()
720
721 max_external_entity_relation_participant_id = self._get_last_stable_external_entity_relation_participant_id()
722
723 for current_table in self.biana_database.get_tables():
724 if current_table.has_field("externalEntityID"):
725 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = current_table.get_table_name(),
726 fixed_conditions = [("externalEntityID",">",max_external_entity_id, None)] ),
727 answer_mode = None )
728
729 if current_table.has_field("externalEntityRelationParticipantID"):
730 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = current_table.get_table_name(),
731 fixed_conditions = [("externalEntityRelationParticipantID",">",max_external_entity_relation_participant_id, None)] ),
732 answer_mode = None )
733
734
735
736 max_key_id = self._get_last_stable_key_id()
737 transfer_tables_list = self.db.select_db_content( sql_query = "SHOW TABLES",
738 answer_mode = "list" )
739
740 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTERNAL_DATABASE_ATTRIBUTE_TRANSFER_TABLE.get_table_name(),
741 fixed_conditions = [("keyID",">",max_key_id, None)] ),
742 answer_mode = None )
743
744 regex = re.compile("key_attribute_(\d+)")
745 for current_table in transfer_tables_list:
746 m = regex.match(current_table)
747 if m:
748 if( int(m.group(1))>int(max_key_id) ):
749 self.db.insert_db_content( sql_query = self.db._get_drop_sql_query( table_list = [current_table] ) )
750
751
752
753 for current_table in [self.biana_database.ONTOLOGY_IS_A_TABLE.get_table_name(), self.biana_database.ONTOLOGY_IS_PART_OF_TABLE.get_table_name() ]:
754 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = current_table,
755 fixed_conditions = [("externalEntityID",">",max_external_entity_id, None)] ),
756 answer_mode = None )
757
758 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE.get_table_name(),
759 fixed_conditions = [("parentExternalEntityID",">",max_external_entity_id, None)] ),
760 answer_mode = None )
761
762 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE.get_table_name(),
763 fixed_conditions = [("childExternalEntityID",">",max_external_entity_id,None)] ),
764 answer_mode = None )
765
766
767
768 max_protein_sequence_id = self._get_last_stable_sequenceProtein_id()
769 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"].get_table_name(),
770 fixed_conditions = [("proteinSequenceID",">",max_protein_sequence_id,None)] ),
771 answer_mode = None )
772
773 max_nucleotide_sequence_id = self._get_last_stable_sequenceNucleotide_id()
774 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"].get_table_name(),
775 fixed_conditions = [("nucleotideSequenceID",">",max_nucleotide_sequence_id,None)] ),
776 answer_mode = None )
777
778
779 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTERNAL_DATABASE_TABLE.get_table_name(),
780 fixed_conditions = [("parsingTime","IS","NULL",None)] ),
781 answer_mode = None )
782
783 return
784
785
787
788 if self.db_versions_list is None:
789
790 self.db_versions_list = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.DATABASE_VERSION_TABLE.get_table_name()],
791 columns = ["dbControlID"]),
792 answer_mode = "list" )
793 return self.db_versions_list
794
795
799
800
801
807
808
810 """
811 """
812
813 self._get_valid_source_dbs()
814
815 return self.valid_source_database_ids.values()
816
821
822
824 """
825 Returns a dictionary with current external databases stored in BIANA database
826
827 Database objects are accessed by its name and version
828 """
829
830 if self.validSources is None:
831
832
833 columns = ["externalDatabaseID","databaseName","databaseVersion","parsedFile","parsedDate","databaseDescription","defaultExternalEntityAttribute","isPromiscuous"]
834
835
836 try:
837 current_sources = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_DATABASE_TABLE.get_table_name()],
838 columns = columns ),
839 answer_mode="raw", remove_duplicates="no" )
840 return_dict = {}
841 for actual_source in current_sources:
842 databaseObject = BianaObjects.ExternalDatabase( databaseName = actual_source[1],
843 databaseVersion = actual_source[2],
844 databaseFile = actual_source[3],
845 databaseDescription = actual_source[5],
846 defaultExternalEntityAttribute = actual_source[6],
847 databaseDate = actual_source[4],
848 externalDatabaseID = actual_source[0],
849 isPromiscuous = actual_source[7])
850
851 self.valid_source_database_ids[ databaseObject.get_id() ] = databaseObject
852
853 available_eE_attributes = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eE_ATTRIBUTE_TABLE],
854 columns = ["attributeType"],
855 fixed_conditions = [("externalDatabaseID","=",databaseObject.get_id())] ),
856 answer_mode = "list", remove_duplicates = "no" )
857
858 for actual_attribute in available_eE_attributes:
859 databaseObject.add_valid_external_entity_attribute_type(actual_attribute)
860
861
862 available_eEr_attributes = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eEr_ATTRIBUTE_TABLE],
863 columns = ["attributeType"],
864 fixed_conditions = [("externalDatabaseID","=",databaseObject.get_id())] ),
865 answer_mode = "list", remove_duplicates = "no" )
866
867 for actual_attribute in available_eEr_attributes:
868 databaseObject.add_valid_external_entity_relation_attribute_type(actual_attribute)
869
870
871 available_eE_types = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eE_TYPES_TABLE],
872 columns = ["eEType"],
873 fixed_conditions = [("externalDatabaseID","=",databaseObject.get_id())] ),
874 answer_mode = "list", remove_duplicates = "no" )
875
876 for actual_attribute in available_eE_types:
877 databaseObject.add_valid_external_entity_type(actual_attribute)
878
879
880
881 available_eEr_types = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eEr_TYPES_TABLE],
882 columns = ["eErType"],
883 fixed_conditions = [("externalDatabaseID","=",databaseObject.get_id())] ),
884 answer_mode = "list", remove_duplicates = "no" )
885
886 for actual_attribute in available_eEr_types:
887 databaseObject.add_valid_external_entity_relation_type(actual_attribute)
888
889
890
891
892 if return_dict.has_key(actual_source[1]):
893 return_dict[actual_source[1]][actual_source[2]] = databaseObject
894 else:
895 return_dict[actual_source[1]] = {actual_source[2]: databaseObject}
896
897 except:
898 raise
899
900 self.validSources = return_dict
901
902 return self.validSources
903
904
905
906
907
908
910 """
911 Inserts into database the information of a new external database that is being integrated into BIANA database
912 """
913
914
915 if self._get_valid_source_dbs().has_key(externalDatabase.get_name()) and self._get_valid_source_dbs()[externalDatabase.get_name()].has_key(externalDatabase.get_version()):
916 sys.stderr.write("\n\nDatabase Name and Version already existed in the database.\n\n")
917 sys.exit(-1)
918 else:
919
920
921
922 new_database_identifier = self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_DATABASE_TABLE.get_table_name(),
923 column_values = ( ("databaseName", externalDatabase.get_name().strip("\"")),
924 ("databaseVersion", externalDatabase.get_version().strip("\"")),
925 ("parsedFile",externalDatabase.get_parsed_file().strip("\"") ),
926 ("parsedDate", externalDatabase.get_parsing_date().strip("\"") ),
927 ("defaultExternalEntityAttribute", externalDatabase.get_default_eE_attribute().strip("\"") ),
928 ("databaseDescription", externalDatabase.get_description().strip("\"") ),
929 ("isPromiscuous", externalDatabase.get_promiscuity()) ),
930 use_buffer=False ),
931 answer_mode="last_id")
932
933 externalDatabase.set_id(externalDatabaseID = new_database_identifier)
934
935
936 if self._get_valid_source_dbs().has_key(externalDatabase.get_name()):
937 self._get_valid_source_dbs()[externalDatabase.get_name()][externalDatabase.get_version()] = externalDatabase
938 else:
939 self._get_valid_source_dbs()[externalDatabase.get_name()] = {externalDatabase.get_version(): externalDatabase }
940
941 self.valid_source_database_ids[new_database_identifier] = externalDatabase
942
943
944 self.db_version_modified = 1
945
946
948
949 self.db.insert_db_content( self.db._get_update_sql_query( table = self.biana_database.EXTERNAL_DATABASE_TABLE,
950 update_column_values = (("parsingTime",externalDatabase.get_parsing_time()),),
951 fixed_conditions = (("externalDatabaseID","=",externalDatabase.get_id()),) ),
952 answer_mode = None )
953
954
955
956 for current in externalDatabase.get_valid_external_entity_attribute_type():
957 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eE_ATTRIBUTE_TABLE,
958 column_values = ( ("externalDatabaseID",externalDatabase.get_id()),
959 ("attributeType",current) )),
960 answer_mode = None )
961
962
963 for current in externalDatabase.get_valid_external_entity_type():
964 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eE_TYPES_TABLE,
965 column_values = ( ("externalDatabaseID",externalDatabase.get_id()),
966 ("eEType",current) )),
967 answer_mode = None )
968
969 for current in externalDatabase.get_valid_external_entity_relation_attribute_type():
970 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eEr_ATTRIBUTE_TABLE,
971 column_values = ( ("externalDatabaseID",externalDatabase.get_id()),
972 ("attributeType",current) )),
973 answer_mode = None )
974
975 for current in externalDatabase.get_valid_external_entity_relation_type():
976 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eEr_TYPES_TABLE,
977 column_values = ( ("externalDatabaseID",externalDatabase.get_id()),
978 ("eErType",current) )),
979 answer_mode = None )
980
981
982
983
984
986 """
987 Returns the decimal value assigned to the sourceDB (externalDatabaseID will be converted to lower_case)
988
989 If "sourceDBVersion" is None, it returns a list with the all the identifiers assigned to the sourceDBName
990 """
991
992 try:
993 if sourceDBVersion is not None:
994 return self._get_valid_source_dbs()[sourceDBName.lower()][sourceDBVersion.lower()].get_id()
995 else:
996 return [ self._get_valid_source_dbs()[sourceDBName.lower()][x].get_id() for x in self._get_valid_source_dbs()[sourceDBName.lower()] ]
997 except:
998 sys.stderr.write("Trying to get the identifier for an unexisting external database: %s %s\n" %(sourceDBName, sourceDBVersion))
999 raise "Trying to get the identifier for an unexisting external database: %s %s\n" %(sourceDBName, sourceDBVersion)
1000
1001
1002
1003
1004
1005
1006
1007
1008
1011
1014
1017
1019 return self.db.get_next_autoincrement(table="externalEntityRelationParticipant", attribute="externalEntityRelationParticipantID" )
1020
1022 return self.db._get_current_autoincrement(table="externalEntityRelationParticipant", attribute="externalEntityRelationParticipantID" )
1023
1026
1029
1032
1035
1038
1041
1044
1047
1050
1053
1054
1056 """
1057 Inserts a new external entity to the biana database
1058
1059 If the externalEntity has been previously inserted in the database, it will show a message advertising about this. It won't insert the attributes, as an external entity can be inserted only once!
1060
1061 It is required to use this method once by each externalEntity.
1062
1063 """
1064
1065 if externalEntity.get_id() is not None:
1066 raise ValueError("Trying to insert an inserted external entity...")
1067
1068
1069 self.db_version_modified = 1
1070
1071 if externalEntity.get_id() is None:
1072
1073
1074
1075 new_external_entity_id = self._get_new_external_entity_id()
1076
1077 self.db.insert_db_content(self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name(),
1078 column_values = ( ("externalEntityID", new_external_entity_id),
1079 ("externalDatabaseID",externalEntity.get_source_database().get_id()),
1080 ("type",externalEntity.get_type())),
1081 use_buffer=self.use_buffer ))
1082
1083
1084
1085 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.USER_ENTITY_TABLE,
1086 column_values = [("userEntityID",new_external_entity_id),
1087 ("externalEntityID",new_external_entity_id)],
1088 use_buffer=self.use_buffer ) )
1089
1090 externalEntity.set_id(id_value = new_external_entity_id)
1091
1092
1093 externalEntity_attributes = externalEntity.get_attributes_dict()
1094
1095
1096 self.valid_source_database_ids[externalEntity.get_source_database().get_id()].add_valid_external_entity_type( eE_type = externalEntity.get_type())
1097
1098 if externalEntity.get_type()== "ontology":
1099 self.insert_ontology(externalEntity)
1100
1101 for current_attribute_identifier in externalEntity_attributes:
1102 if externalEntity.get_type()== "relation":
1103 self.valid_source_database_ids[externalEntity.get_source_database().get_id()].add_valid_external_entity_relation_attribute_type( attribute_identifier = current_attribute_identifier )
1104
1105 if current_attribute_identifier.lower() == "methodID":
1106 self.valid_source_database_ids[externalEntity.get_source_database().get_id()].add_valid_external_entity_relation_attribute_type( attribute_identifier = "psimi_name" )
1107 else:
1108 self.valid_source_database_ids[externalEntity.get_source_database().get_id()].add_valid_external_entity_attribute_type( attribute_identifier = current_attribute_identifier )
1109
1110 [ self._insert_external_entity_attribute( externalEntityID = new_external_entity_id,
1111 externalEntityAttribute = x ) for x in externalEntity_attributes[current_attribute_identifier] ]
1112
1113
1114
1115 if self._is_key_attribute( attribute_identifier = current_attribute_identifier, external_database_id = externalEntity.get_source_database().get_id() ):
1116 for current_key_attribute_object in externalEntity_attributes[current_attribute_identifier]:
1117 self.db.insert_db_content( self.db._get_insert_sql_query( table = self._get_key_attribute_table_name( key_id = self.key_attribute_ids[(externalEntity.get_source_database().get_id(), current_attribute_identifier.lower())] ),
1118 column_values = [("externalEntityID",new_external_entity_id),
1119 ("value",current_key_attribute_object.value)],
1120 use_buffer = self.use_buffer ),
1121 answer_mode = None )
1122
1123
1124 if externalEntity.get_type()== "relation":
1125
1126 self.db.insert_db_content(self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_ENTITY_RELATION_TABLE.get_table_name(),
1127 column_values = ( ("externalEntityRelationID", new_external_entity_id),
1128 ("type",externalEntity.get_relation_type() ) ),
1129 use_buffer=self.use_buffer ),
1130 answer_mode=None)
1131
1132 self.valid_source_database_ids[externalEntity.get_source_database().get_id()].add_valid_external_entity_relation_type( eEr_type = externalEntity.get_relation_type() )
1133
1134 for actual_participant_external_entity_id in externalEntity.get_participant_external_entity_ids_list():
1135
1136 new_id = self._get_new_external_entity_relation_participant_id()
1137
1138
1139 if self.store_relations_hierarchy:
1140 self.temporal_data["relations_hierarchy_parents"].setdefault(actual_participant_external_entity_id, []).append(new_external_entity_id)
1141 else:
1142 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE,
1143 column_values = (("externalEntityRelationParticipantID", new_id),
1144 (self.biana_database.external_entity_relation_id_col, new_external_entity_id),
1145 (self.biana_database.externalEntityID_col, actual_participant_external_entity_id )),
1146 use_buffer = True ) )
1147
1148
1149 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE,
1150 column_values = (("externalEntityRelationParticipantID", new_id),
1151 (self.biana_database.external_entity_relation_id_col, new_external_entity_id),
1152 (self.biana_database.externalEntityID_col, actual_participant_external_entity_id) ),
1153 use_buffer=self.use_buffer ))
1154
1155
1156 participant_attributes = externalEntity.get_participant_attributes( participantExternalEntityID = actual_participant_external_entity_id )
1157
1158 [ self._insert_external_entity_relation_participant_attribute( pParticipantID = new_id,
1159 pExternalEntityRelationParticipantAttribute = current_attribute ) for actual_key in participant_attributes for current_attribute in participant_attributes[actual_key] ]
1160
1161
1162 else:
1163 sys.stderr.write("Trying to insert an external Entity which is already in the database! Any new attribute will be inserted!\n")
1164
1165 return externalEntity.get_id()
1166
1167
1168
1170 """
1171
1172 """
1173
1174 tableObject = self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT[pExternalEntityRelationParticipantAttribute.attribute_identifier.lower()]
1175
1176
1177 column_values = [ ("externalEntityRelationParticipantID",pParticipantID),
1178 ("value",pExternalEntityRelationParticipantAttribute.value) ]
1179
1180 for current_field in pExternalEntityRelationParticipantAttribute.additional_fields:
1181 column_values.append((current_field[0],current_field[1].replace('\\','\\\\').replace('"','\\"')))
1182
1183
1184 self.db.insert_db_content( self.db._get_insert_sql_query( table = tableObject.get_table_name(),
1185 column_values = column_values,
1186 use_buffer=self.use_buffer ),
1187 answer_mode=None )
1188
1189 return
1190
1191
1193 """
1194 Adds a new attribute to the external entity
1195 externalEntityID must exist previously
1196 """
1197
1198 tableObject = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[externalEntityAttribute.attribute_identifier.lower()]
1199
1200 if externalEntityAttribute.attribute_identifier.lower()=="proteinsequence" or externalEntityAttribute.attribute_identifier.lower()=="nucleotidenequence" :
1201 if externalEntityAttribute.value.get_sequence_MD5() not in self.temporal_data.setdefault("sequence",sets.Set()):
1202 self._insert_sequence(externalEntityAttribute.value)
1203 value = externalEntityAttribute.value.get_sequence_MD5()
1204 else:
1205 value = externalEntityAttribute.value
1206
1207 column_values = [ ("externalEntityID",externalEntityID),
1208 ("value",value) ]
1209
1210 for current_field in externalEntityAttribute.additional_fields:
1211 column_values.append((current_field,externalEntityAttribute.get_field(current_field)))
1212
1213 if externalEntityAttribute.type is not None:
1214 column_values.append(("type",externalEntityAttribute.type))
1215
1216 if externalEntityAttribute.version is not None:
1217 column_values.append(("version",externalEntityAttribute.version))
1218
1219 self.db.insert_db_content( self.db._get_insert_sql_query( table = tableObject.get_table_name(),
1220 column_values = column_values,
1221 use_buffer=self.use_buffer ),
1222 answer_mode=None )
1223 return
1224
1225
1227 """
1228 Inserts a sequence object into BIANA database and returns its ID.
1229
1230 """
1231
1232
1233
1234
1235
1236
1237 special_column_values = [ ("sequence", "COMPRESS(\"%s\")" %sequence.get_sequence()) ]
1238
1239 if sequence.sequence_type == "peptide" :
1240 if sequence.get_sequenceID() is not None:
1241 seq_id = sequence.get_sequenceID()
1242 else:
1243 seq_id = self._get_new_sequenceProtein_id()
1244 column_values = [("proteinSequenceID",seq_id)]
1245 table = self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"]
1246 column_values.extend([("sequenceMW",sequence.get_proteinMW()),("sequenceIP",sequence.get_proteinIP()) ])
1247
1248 elif sequence.sequence_type == "rna" or sequence.sequence_type == "dna":
1249 if sequence.get_sequenceID() is not None:
1250 seq_id = sequence.get_sequenceID()
1251 else:
1252 seq_id = self._get_new_sequenceNucleotide_id()
1253 column_values = [("nucleotideSequenceID", seq_id)]
1254 table = self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"]
1255
1256 column_values.extend([("sequenceMD5",sequence.get_sequence_MD5()),
1257 ("sequenceLength", sequence.get_length())])
1258
1259 self.db.insert_db_content(self.db._get_insert_sql_query( table = table,
1260 column_values = column_values,
1261 special_column_values = special_column_values,
1262 use_buffer = self.use_buffer),
1263 answer_mode = None)
1264
1265 self.temporal_data.setdefault("sequence",sets.Set()).add(sequence.get_sequence_MD5())
1266
1267 return seq_id
1268
1269
1271 """
1272 Inserts a sequence file to the database
1273 """
1274
1275 num = 0
1276
1277 if type.lower() == "proteinsequence":
1278 seqObj = BianaObjects.ProteinSequence
1279 elif type.lower() == "nucleotidesequence":
1280 seqObj = BianaObjects.NucleotideSequence
1281
1282
1283 if format.lower()=="fasta":
1284 temp_seq = []
1285 for line in input_fd:
1286 if line[0]==">":
1287 if len(temp_seq)>0:
1288 self._insert_sequence( sequence = seqObj(sequence="".join(temp_seq),sequenceID=sequenceID ) )
1289 sequenceID=line[1:].strip()
1290 if( sequenceID == "NULL" or sequenceID == "None" or sequenceID is None ):
1291 sequenceID = self._get_new_sequenceProtein_id()
1292 else:
1293 temp_seq.append(line.strip())
1294 num += 1
1295 if (num%100000)==0 and verbose:
1296 sys.stderr.write("%s sequences inserted\n" %num)
1297
1298
1299 elif format.lower() == "seq":
1300 for line in input_fd:
1301 splitted = line.strip().split("\t")
1302 sequenceID = splitted[0]
1303 if( sequenceID == "NULL" or sequenceID == "None" or sequenceID is None ):
1304 sequenceID = self._get_new_sequenceProtein_id()
1305 self._insert_sequence( sequence = seqObj(sequence=splitted[1],sequenceID=sequenceID ) )
1306 num += 1
1307 if (num%100000)==0 and verbose:
1308 sys.stderr.write("%s sequences inserted\n" %num)
1309
1310 else:
1311 raise ValueError("Format %s not recognized" %(format))
1312
1313
1315 """
1316
1317 """
1318
1319
1320
1321
1322 for current_match in cd_hit_cluster.get_matches():
1323 self.db.insert_db_content(self.db._get_insert_sql_query( table = "sequenceProteinCD_HIT",
1324 column_values = [("representant_proteinSequenceID", cd_hit_cluster.get_representant()),
1325 ("proteinSequenceID",current_match.sequenceID),
1326 ("representant_start_position",current_match.repr_start_pos),
1327 ("representant_end_position",current_match.repr_end_pos),
1328 ("start_position",current_match.start_pos),
1329 ("end_position",current_match.end_pos),
1330 ("identity",current_match.identity)],
1331 use_buffer = self.use_buffer),
1332 answer_mode = None)
1333
1334
1336 """
1337 Inserts information about CD-HIT clusters into database
1338 """
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353 input_file_fd = file(cd_hit_clusters_file, 'r')
1354 num_clusters=0
1355 current_cluster = None
1356
1357 cluster_re = re.compile(">Cluster \d+")
1358 sequence_re = re.compile("\s*\d+\s+\d+aa,\s+\>(\w+)\.+\s+at\s+(\d+)\:(\d+)\:(\d+)\:(\d+)\/(\d+)\%")
1359 representant_re = re.compile("\d+\s+\d+aa,\s+\>(\w+)\.+")
1360
1361 for line in input_file_fd:
1362
1363 if cluster_re.match(line):
1364 num_clusters += 1
1365 if num_clusters % 100000 == 0:
1366 sys.stderr.write("%s clusters inserted\n" %(num_clusters))
1367 if current_cluster is not None:
1368 self._insert_protein_sequence_cd_hit_cluster(current_cluster)
1369 current_cluster = BianaObjects.CDHITCluster()
1370 continue
1371
1372 sequence = sequence_re.match(line)
1373 if sequence:
1374 current_cluster.add_match( BianaObjects.CDHITMatch( sequenceID= sequence.group(1),
1375 start_pos = int(sequence.group(2)),
1376 end_pos = int(sequence.group(3)),
1377 repr_start_pos = int(sequence.group(4)),
1378 repr_end_pos = int(sequence.group(5)),
1379 identity = int(sequence.group(6)) ) )
1380 else:
1381
1382 if line[-2] == "*":
1383 sequence = representant_re.search(line)
1384 current_cluster.set_representant(sequence.group(1))
1385 else:
1386 sys.stderr.write("Error parsing cd-hit results in line:\n%s" %line)
1387
1388 self._insert_protein_sequence_cd_hit_cluster(current_cluster)
1389
1390
1391 self.db._empty_buffer()
1392
1393 return
1394
1395
1396 - def _get_similar_sequences(self, sequenceID, bit_score=None, identity_percent=None, similarity_percent=None, query_coverage_percent=None, match_coverage_percent=None):
1397 """
1398 Gets similar sequences from database
1399
1400 In order to be able to run this method, is mandatory to fill before the table of blast or psi-blast results
1401
1402 All specified conditions are evaluated jointly, with AND
1403
1404 TO CHECK!!! The indices are optimized only for identity and coverage!!!
1405
1406 returns a list of sequenceIDs from similar sequences according to the requeriments.
1407 """
1408
1409 sqlStat = database.SQLSelectStatement()
1410
1411 sqlStat.add_element( tables = [self.biana_database.PROTEIN_BLAST_RESULTS_TABLE] )
1412 sqlStat.add_element( fixed_conditions = [("sequenceID_A","=",sequenceID)] )
1413 sqlStat.add_element( columns = ["sequenceID_B"] )
1414
1415 if( bit_score is not None ):
1416 sqlStat.add_element( fixed_conditions = [("bit_score",">=",bit_score)] )
1417 if( identity_percent is not None ):
1418 sqlStat.add_element( fixed_conditions = [("identities",">=",identity_percent)])
1419 if( similarity_percent is not None ):
1420 sqlStat.add_element( fixed_conditions = [("similarity",">=",similarity_percent)])
1421 if( query_coverage_percent is not None ):
1422 sqlStat.add_element( fixed_conditions = [("coverage_A",">=",query_coverage_percent)])
1423 if( match_coverage_percent is not None ):
1424 sqlStat.add_element( fixed_conditions = [("coverage_B",">=",match_coverage_percent)] )
1425
1426 query = self.db._get_select_sql_query( tables = sqlStat.tables,
1427 columns = sqlStat.columns,
1428 fixed_conditions = sqlStat.fixed_conditions,
1429 join_conditions = sqlStat.join_conditions )
1430
1431 data = self.db.select_db_content( query, answer_mode = "list" )
1432
1433
1434 return data
1435
1436
1438 """
1439 METHOD TO TEST
1440 """
1441
1442 columns = ["sequenceID_A","sequenceID_B","evalue","score","bit_score","start_A","end_A","start_B","end_B",
1443 "Identities","similarity","gaps","program","filter","coverage_A","coverage_B"]
1444
1445 for line in file_fd:
1446 if line.strip().split("\t")[12] != "bl2seq" and line.strip().split("\t")[12] != "blastall":
1447 sys.stderr.write(line)
1448 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.PROTEIN_BLAST_RESULTS_TABLE,
1449 column_values = zip(columns,line.strip().split("\t")),
1450 use_buffer = self.use_buffer ) )
1451
1452
1453
1454
1455
1457 """
1458 Gets multiple sequence object from BIANA database
1459
1460 "sequenceID" can be a single id or a id list
1461
1462 "type" can be "protein" or "nucleotide"
1463 """
1464
1465 results = {}
1466
1467 if type == "proteinsequence":
1468 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"]],
1469 columns = ["proteinSequenceID","UNCOMPRESS(sequence)"],
1470 fixed_conditions = [("proteinSequenceID","IN","(%s)" %(", ".join(map(str,sequenceIdList))),None)] ),
1471 answer_mode = "raw" )
1472
1473 for current_data in data:
1474 results[current_data[0]] = BianaObjects.ProteinSequence(sequence=current_data[1],sequenceID=current_data[0])
1475
1476 elif type == "nucleotidesequence":
1477 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"]],
1478 columns = ["nucleotideSequenceID","sequenceType","UNCOMPRESS(sequence)"],
1479 fixed_conditions = [("nucleotideSequenceID","IN","(%s)" %(", ".join(map(str,sequenceIdList))))] ),
1480 answer_mode = "raw" )
1481 for current_data in data:
1482 if current_data[1] == "dna":
1483 results[current_data[0]] = BianaObjects.ProteinSequence(sequence=current_data[2],sequenceID=current_data[0])
1484 elif current_data[1] == "rna":
1485 results[current_data[0]] = BianaObjects.ProteinSequence(sequence=current_data[2],sequenceID=current_data[0])
1486
1487 else:
1488 raise ValueError("type must be \"proteinsequence\" or \"nucleotidesequence\"")
1489
1490 return results
1491
1492
1494 """
1495 Inserts to the database the complete ontology object
1496
1497 The external entities of the ontology must be inserted previously
1498 """
1499
1500 if not ontology.linked_attribute.lower() in self.biana_database.VALID_EXTERNAL_ENTITY_ATTRIBUTE_TYPES_DICT:
1501 raise ValueError("Trying to link this ontology to an invalid attribute type")
1502
1503 key_id = self._add_key_attribute = self._add_key_attribute( external_database_id = ontology.get_source_database().get_id(), key_attribute = ontology.linked_attribute )
1504
1505 column_values = [ ("externalEntityID", ontology.get_id()),
1506 ("linked_attribute", ontology.linked_attribute),
1507 ("key_id", key_id),
1508 ("name", ontology.name),
1509 ("description_attribute", ontology.description_attribute) ]
1510
1511
1512
1513
1514
1515
1516 if ontology.level_attribute is not None:
1517 column_values.append(("level_attribute", ontology.level_attribute))
1518
1519 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.ONTOLOGY_INFO_TABLE,
1520 column_values = column_values,
1521 use_buffer = self.use_buffer ) )
1522
1523 for current_eE_id in ontology.get_all_external_entity_ids():
1524 for current_parent_id in ontology.get_parents_ids(current_eE_id):
1525 if current_parent_id != current_eE_id:
1526 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.ONTOLOGY_IS_A_TABLE,
1527 column_values = [ ("externalEntityID", current_eE_id),
1528 ("is_a",current_parent_id) ],
1529 use_buffer = self.use_buffer ) )
1530
1531 for current_part_parent_id in ontology.get_part_parents_ids(current_eE_id):
1532 if current_part_parent_id != current_eE_id:
1533 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.ONTOLOGY_IS_PART_OF_TABLE,
1534 column_values = [ ("externalEntityID", current_eE_id),
1535 ("is_part_of",current_part_parent_id) ],
1536 use_buffer = self.use_buffer ) )
1537
1538
1539 print "Inserting extended ontology"
1540 for current_eE_id in ontology.get_all_external_entity_ids():
1541 for current_child in ontology.get_descendants( ontologyElementID = current_eE_id ):
1542 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE,
1543 column_values = [ ("parentExternalEntityID",current_eE_id),
1544 ("childExternalEntityID",current_child) ],
1545 use_buffer = self.use_buffer ) )
1546 return
1547
1548 - def get_ontology(self, ontology_name, root_attribute_values = [], load_external_entities=False):
1549 """
1550 Loads ontology object
1551 """
1552
1553
1554 root_attribute_values.sort()
1555 key = ontology_name.lower()+str(root_attribute_values)
1556
1557 if key in self.loaded_ontologies:
1558 return self.loaded_ontologies[key]
1559
1560 is_a_table = self.biana_database.ONTOLOGY_IS_A_TABLE
1561 is_part_of_table = self.biana_database.ONTOLOGY_IS_PART_OF_TABLE
1562 eE_table = self.biana_database.EXTERNAL_ENTITY_TABLE
1563 eE_field = self.biana_database.externalEntityID_col
1564
1565 linked_attr = self.get_available_ontology_names(name=ontology_name)
1566 attr_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[linked_attr.lower()]
1567
1568 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.ONTOLOGY_INFO_TABLE,
1569 self.biana_database.EXTERNAL_ENTITY_TABLE],
1570 columns = ["externalDatabaseID","linked_attribute","key_id","level_attribute","description_attribute"],
1571 join_conditions = [("%s.externalEntityID" %self.biana_database.ONTOLOGY_INFO_TABLE,
1572 "=",
1573 "%s.externalEntityID" %self.biana_database.EXTERNAL_ENTITY_TABLE)],
1574 fixed_conditions = [("name","=",ontology_name)]),
1575 answer_mode = "raw" )
1576
1577 if len(data) == 0 or len(data)>1:
1578 raise ValueError("Trying to get an unexsiting ontology or multiple with the same name...")
1579
1580 ontology = BianaObjects.Ontology( source_database = data[0][0], linkedAttribute = data[0][1], name= ontology_name, descriptionAttribute = data[0][4], levelAttribute=data[0][3] )
1581
1582
1583 def add_data(sql_results):
1584
1585 for current_data in sql_results:
1586 if current_data[1] is not None:
1587 is_a = [ int(x) for x in current_data[1].split(",") ]
1588 else:
1589 is_a = []
1590 if current_data[2] is not None:
1591 is_part_of = [ int(x) for x in current_data[2].split(",") ]
1592 else:
1593 is_part_of = []
1594
1595 ontology.add_element( ontologyElementID = current_data[0],
1596 isA = is_a,
1597 isPartOf = is_part_of,
1598 linkedAttributeValue = current_data[3] )
1599
1600
1601 if len(root_attribute_values)==0:
1602
1603 query = "SELECT %s.%s, GROUP_CONCAT(DISTINCT %s), GROUP_CONCAT(DISTINCT %s), value FROM %s LEFT JOIN %s ON %s.%s=%s.%s LEFT JOIN %s ON %s.%s=%s.%s, %s WHERE externalDatabaseID=%s AND %s.%s = %s.%s GROUP BY %s.%s" %(eE_table, eE_field,
1604 "is_a", "is_part_of",
1605 eE_table,
1606 is_a_table,
1607 eE_table,eE_field,
1608 is_a_table,eE_field,
1609 is_part_of_table,
1610 eE_table,eE_field,
1611 is_part_of_table,eE_field,
1612 attr_table,
1613 ontology.get_source_database(),
1614 eE_table, eE_field,
1615 attr_table, eE_field,
1616 eE_table, eE_field)
1617
1618
1619
1620 add_data( self.db.select_db_content( query, answer_mode="raw" ) )
1621
1622 else:
1623
1624
1625 root_list= self.get_list_external_entities_IDs_by_attribute( attribute_identifier = self.get_available_ontology_names(name = ontology_name),
1626 field_values = [("value",x) for x in root_attribute_values],
1627 source_databases = [ontology.get_source_database() ],
1628 expand_ontology_attributes = False )
1629
1630 root_eE_dict = self.get_external_entities_dict(externalEntityIdsList=root_list,
1631 attribute_list=[linked_attr])
1632
1633 add_data( [(x,None,None,root_eE_dict[x].get_attribute(linked_attr).pop().value) for x in root_list ] )
1634
1635 current_level = map(str, root_list)
1636
1637 if( len(current_level) == 0 ):
1638 raise ValueError("Trying to load an ontology with an unexisting root")
1639
1640 def get_recursive_query(list_root_ids):
1641
1642 where_sql = "is_a IN (%s)" %(",".join(list_root_ids))
1643
1644
1645
1646 query = "SELECT %s.%s, GROUP_CONCAT(DISTINCT %s), GROUP_CONCAT(DISTINCT %s), value FROM %s LEFT JOIN %s ON %s.%s=%s.%s LEFT JOIN %s ON %s.%s=%s.%s, %s WHERE externalDatabaseID=%s AND %s.%s=%s.%s AND %s GROUP BY %s.%s" %(eE_table, eE_field,
1647 "is_a", "is_part_of",
1648 eE_table,
1649 is_a_table,
1650 is_a_table,eE_field,
1651 eE_table,eE_field,
1652 is_part_of_table,
1653 is_part_of_table,eE_field,
1654 eE_table,eE_field,
1655 attr_table,
1656 ontology.get_source_database(),
1657 eE_table, eE_field,
1658 attr_table, eE_field,
1659 where_sql,
1660 eE_table,eE_field)
1661
1662 return query
1663
1664
1665 def t(v):
1666 return str(v[0])
1667
1668 while( True ):
1669
1670 if len(current_level)==0:
1671 break
1672
1673 data = self.db.select_db_content( get_recursive_query(current_level), answer_mode="raw" )
1674 add_data( data )
1675 current_level = map(t,data)
1676
1677
1678
1679 if load_external_entities is True:
1680 external_entities_dict = {}
1681
1682
1683 all_list = ontology.get_all_external_entity_ids()
1684
1685 for x in xrange(0,len(all_list)/50):
1686 current_list = all_list[50*x:50*x+50]
1687 external_entities_dict.update( self.get_external_entities_dict( externalEntityIdsList = current_list,
1688 attribute_list = [ontology.description_attribute],
1689 useTransferAttributes = False ) )
1690
1691 ontology._set_external_entities_dict( self.get_external_entities_dict( externalEntityIdsList = ontology.get_all_external_entity_ids(),
1692 attribute_list = [ontology.description_attribute],
1693 useTransferAttributes = False ) )
1694
1695 self.loaded_ontologies[key] = ontology
1696
1697 return ontology
1698
1699
1700
1701
1702
1703
1704
1705
1706
1708 externalDatabaseID_list = []
1709 if source_databases is not None:
1710 for actual_source in source_databases:
1711 if isinstance(actual_source,int) or isinstance(actual_source,long):
1712 externalDatabaseID_list.append(actual_source)
1713 continue
1714 if actual_source[1] is None:
1715 externalDatabaseID_list.extend(self._get_externalDatabaseID_int(sourceDBName=actual_source[0],sourceDBVersion=None))
1716 else:
1717 externalDatabaseID_list.append(self._get_externalDatabaseID_int(sourceDBName=actual_source[0],sourceDBVersion=actual_source[1]))
1718 return externalDatabaseID_list
1719
1720
1721
1722
1723
1724 - def get_external_entities_dict_by_attribute(self, attribute_identifier, field_values, source_databases=None, attribute_restrictions=None, attribute_list=[], relation_attribute_list=[], participant_attribute_list=[] ):
1725 """
1726 """
1727
1728 list_eE = self.get_list_external_entities_IDs_by_attribute( attribute_identifier = attribute_identifier,
1729 field_values = field_values,
1730 source_databases = source_databases,
1731 attribute_restrictions = attribute_restrictions )
1732
1733 return self.get_external_entities_dict( externalEntityIdsList = list_eE,
1734 attribute_list=attribute_list,
1735 relation_attribute_list=relation_attribute_list,
1736 participant_attribute_list=participant_attribute_list)
1737
1738
1739
1740
1741
1743
1744 attribute_identifier = attribute_identifier.lower()
1745
1746 in_str = "(\"%s\")" %",".join(map(str,values_to_expand_list))
1747
1748 if not self._is_ontology_linked_attribute(attribute_identifier):
1749 raise ValueError("Not possible to expand by using %s attribute. Not ontology linked" %attribute_identifier)
1750
1751
1752
1753 attribute_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier]
1754
1755 tables = [ (self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE, "o"),
1756 (attribute_table, "attr1"),
1757 (attribute_table, "attr2") ]
1758
1759 join_conditions = [ ("attr1.value","IN",in_str),
1760 ("o.parentExternalEntityID","=","attr1.externalEntityID"),
1761 ("o.childExternalEntityID","=","attr2.externalEntityID") ]
1762
1763 columns = [("attr2.value","childs")]
1764
1765 query = self.db._get_select_sql_query( tables = tables,
1766 columns = columns,
1767 join_conditions = join_conditions )
1768
1769 query_list = [query]
1770 query_list.extend( [ "SELECT %s" %x for x in values_to_expand_list ] )
1771
1772 return "(%s)" %self.db._get_union_queries( queries = query_list )
1773
1774
1775
1776
1777
1779 """
1780 Gets a list of external entities that have an attribute with "attribute_value" value
1781 "field_value" is the field/S and value/S we want to obtain
1782 If a value is *, it will obtain all having this attribute
1783 """
1784
1785
1786
1787
1788 sqlStat = database.SQLSelectStatement()
1789
1790 attribute_identifier = attribute_identifier.lower()
1791
1792 if attribute_identifier == "proteinsequenceid":
1793
1794 seq_table = self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"]
1795 table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["proteinsequence"]
1796 sqlStat.add_element( tables = [seq_table, table] )
1797 sqlStat.add_element( columns = ["%s.externalEntityID" %(table) ] )
1798 sqlStat.add_element( join_conditions = [("%s.sequenceMD5" %seq_table,
1799 "=",
1800 "%s.value" %table)] )
1801
1802 else:
1803
1804 table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier.lower()]
1805 sqlStat.add_element( tables = [table] )
1806 sqlStat.add_element( columns = ["%s.externalEntityID" %(table) ] )
1807
1808
1809 if len(source_databases)>0:
1810 externalDatabaseID_list = self._convertListSourceDBVersionToSourceDBIdList(source_databases)
1811 if externalDatabaseID_list != []:
1812 sqlStat.add_element( tables = [self.biana_database.EXTERNAL_ENTITY_TABLE] )
1813 sqlStat.add_element( fixed_conditions = [("externalDatabaseID","IN","(%s)" %(",".join([ str(x) for x in externalDatabaseID_list ])),None)] )
1814
1815 sqlStat.add_element( join_conditions = [("%s.externalEntityID" %table,
1816 "=",
1817 "%s.externalEntityID" %self.biana_database.EXTERNAL_ENTITY_TABLE)] )
1818
1819 if attribute_restrictions is not None:
1820 num = 1
1821 for current_restriction_attribute, restriction_values in attribute_restrictions:
1822 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_restriction_attribute.lower()]
1823 sqlStat.add_element( tables = [str(current_table)+" AS Q%s" %num] )
1824
1825 values_list = restriction_values.split(",")
1826
1827
1828 if BianaObjects.ExternalEntityAttribute.isFullTextSearchable(attribute_identifier, self.biana_database):
1829 sqlStat.add_element( join_conditions = [ ("MATCH (Q%s.value)" %num,
1830 "AGAINST",
1831 "('%s' IN BOOLEAN MODE)" %("\",\"".join(values_list))) ] )
1832 else:
1833
1834 if self._is_ontology_linked_attribute( current_restriction_attribute ):
1835 sqlStat.add_element( tables = [(self._get_expand_ontology_attribute_table_query( attribute_identifier = current_restriction_attribute,
1836 values_to_expand_list = values_list ), "childs")] )
1837 sqlStat.add_element( join_conditions = [("Q%s.value" %num, "=", "childs.childs")] )
1838
1839 else:
1840 sqlStat.add_element( join_conditions = [ ("Q%s.value" %num,
1841 "IN",
1842 "(\"%s\")" %",".join(map(str, values_list))) ] )
1843
1844 sqlStat.add_element( join_conditions = [ ("%s.externalEntityID" %table,
1845 "=",
1846 "Q%s.externalEntityID" %num) ] )
1847 num += 1
1848
1849
1850
1851
1852
1853 values_list = []
1854
1855 for current_field, current_value in field_values:
1856 if current_field.lower() == "value":
1857 values_list.append(str(current_value).strip())
1858 else:
1859 raise ValueError("TO IMPLEMENT")
1860
1861 if BianaObjects.ExternalEntityAttribute.isVersionableIdentifier(attribute_identifier, self.biana_database):
1862 for current_value in values_list:
1863 splitted = current_value.split(".")
1864 if len(splitted)>1:
1865 field_values.append(("value",splitted[0]))
1866 field_values.append(("version",splitted[1]))
1867 raise ValueError("TO IMPLEMENT")
1868 else:
1869 field_values.append(("value",splitted[0]))
1870
1871 if "*" not in values_list:
1872 if attribute_identifier == "proteinsequenceid":
1873 sqlStat.add_element( join_conditions = [("%s.proteinSequenceID" %seq_table,
1874 "IN",
1875 "(\"%s\")" % "\",\"".join(values_list) )] )
1876 elif BianaObjects.ExternalEntityAttribute.isFullTextSearchable(attribute_identifier, self.biana_database):
1877 sqlStat.add_element( join_conditions = [("MATCH (%s.value)" %table,
1878 "AGAINST",
1879 "('%s' IN BOOLEAN MODE)" % " ".join(values_list) )] )
1880 else:
1881 if self._is_ontology_linked_attribute( attribute_identifier ):
1882 sqlStat.add_element( tables = [(self._get_expand_ontology_attribute_table_query( attribute_identifier = attribute_identifier,
1883 values_to_expand_list = values_list ), "childs")] )
1884 sqlStat.add_element( join_conditions = [("%s.value" %table, "=", "childs.childs")] )
1885 else:
1886 sqlStat.add_element( join_conditions = [("%s.value" %table, "IN", "(\"%s\")" %"\",\"".join(map(str, values_list)))] )
1887
1888 return sqlStat
1889
1890
1891
1892
1917
1918
1919
1920
1922 """
1923 returns a list of (field, value) of the transferred attribute
1924 """
1925
1926 attribute_identifier = attribute_identifier.lower()
1927
1928 attribute_values = []
1929
1930 for current_transfer in self.transferred_attributes[attribute_identifier]:
1931
1932
1933 tables = [ self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier],
1934 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_transfer[1]],"key1"),
1935 (self.biana_database.EXTERNAL_ENTITY_TABLE, "e1") ]
1936
1937 columns = ["DISTINCT(key1.value)"]
1938
1939 fixed_conditions = [("e1.externalDatabaseID",
1940 "=",
1941 current_transfer[0])]
1942
1943 join_conditions = [("key1.externalEntityID","=","%s.externalEntityID" %(self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier]))]
1944
1945 if BianaObjects.ExternalEntityAttribute.isFullTextSearchable(attribute_identifier, self.biana_database):
1946 join_conditions.append(("MATCH (%s.value)" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier],
1947 "AGAINST",
1948 "('%s' IN BOOLEAN MODE)" % " ".join([ str(actual_restriction[1]) for actual_restriction in field_values ] ) ))
1949 else:
1950 join_conditions.append(("%s.value" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier],
1951 "IN",
1952 "(\"%s\")" % "\",\"".join([ str(actual_restriction[1]) for actual_restriction in field_values ] ) ))
1953
1954
1955 attribute_values_query = self.db._get_select_sql_query( tables = tables,
1956 columns = columns,
1957 fixed_conditions = fixed_conditions,
1958 join_conditions = join_conditions)
1959
1960
1961
1962 attribute_values_list = self.db.select_db_content( attribute_values_query, answer_mode = "list" )
1963
1964
1965 attribute_values.extend([(current_transfer[1],str(x)) for x in attribute_values_list ])
1966
1967 return attribute_values
1968
1969
1970
1971
1973
1974
1975
1976
1977 if unification_protocol_name is None:
1978 raise ValueError("You are trying to transfer attributes without using a unification protocol...")
1979
1980 attribute_identifier = attribute_identifier.lower()
1981
1982 if attribute_identifier not in self.transferred_attributes:
1983 raise ValueError("Not transferable attribute")
1984
1985
1986 user_entities_list = []
1987
1988 for current_transfer in self.transferred_attributes[attribute_identifier]:
1989
1990
1991 tables = [ self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier],
1992 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_transfer[1]],"key1"),
1993 (self.biana_database.EXTERNAL_ENTITY_TABLE, "e1") ]
1994
1995 columns = ["DISTINCT(key1.value)"]
1996
1997 fixed_conditions = [("e1.externalDatabaseID",
1998 "=",
1999 current_transfer[0])]
2000
2001 join_conditions = [("key1.externalEntityID","=","%s.externalEntityID" %(self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier]))]
2002
2003 if BianaObjects.ExternalEntityAttribute.isFullTextSearchable(attribute_identifier, self.biana_database):
2004 join_conditions.append(("MATCH (%s.value)" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier],
2005 "AGAINST",
2006 "('%s' IN BOOLEAN MODE)" % " ".join([ str(actual_restriction[1]) for actual_restriction in field_values ] ) ))
2007 else:
2008 join_conditions.append(("%s.value" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier],
2009 "IN",
2010 "(\"%s\")" % "\",\"".join([ str(actual_restriction[1]) for actual_restriction in field_values ] ) ))
2011
2012 key_values_query = self.db._get_select_sql_query( tables = tables,
2013 columns = columns,
2014 fixed_conditions = fixed_conditions,
2015 join_conditions = join_conditions)
2016
2017 key_values = self.db.select_db_content( key_values_query, answer_mode = "list" )
2018
2019 key_values = [("value",x) for x in key_values ]
2020
2021 new_list = self.get_list_user_entities_IDs_by_attribute(unification_protocol_name = unification_protocol_name,
2022 attribute_identifier = current_transfer[1],
2023 field_values = key_values,
2024 attribute_restrictions = attribute_restrictions,
2025 restrict_to_user_entity_ids_list = restrict_to_user_entity_ids_list,
2026 include_type = include_type)
2027
2028 user_entities_list.extend(new_list)
2029
2030 return user_entities_list
2031
2032
2033
2034
2036
2037
2038 sqlStat = self._get_list_external_entities_IDs_by_attribute_SQLstat(attribute_identifier=attribute_identifier,
2039 field_values=field_values,
2040 source_databases=source_databases,
2041 attribute_restrictions=attribute_restrictions)
2042
2043 query = self.db._get_select_sql_query( tables = sqlStat.tables,
2044 columns = sqlStat.columns,
2045 fixed_conditions = sqlStat.fixed_conditions,
2046 join_conditions = sqlStat.join_conditions )
2047
2048 direct_linked_list = self.db.select_db_content( query, answer_mode = "list" )
2049
2050
2051 return direct_linked_list
2052
2053
2054
2055
2057
2058 negative_attribute_restrictions = negative_attribute_restrictions
2059
2060 queries = []
2061
2062 self._load_available_unification_protocols()
2063 unif_table = self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)
2064
2065
2066 num_query = 1
2067 for current_restriction_attribute, current_restriction_values in negative_attribute_restrictions:
2068
2069
2070
2071 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_restriction_attribute.lower()]
2072
2073 tables = [(unif_table,"nu"),
2074 (current_table, "nq")]
2075
2076 columns = ["nu.userEntityID"]
2077
2078 group_conditions = columns
2079
2080 join_conditions = [("nq.externalEntityID","=","nu.externalEntityID")]
2081
2082 values_list = [ x for x in str(current_restriction_values).split(",") ]
2083
2084
2085
2086 if self._is_ontology_linked_attribute( current_restriction_attribute ):
2087 tables.append( ( self._get_expand_ontology_attribute_table_query( attribute_identifier = current_restriction_attribute, values_to_expand_list = values_list), "childs" ) )
2088 join_conditions.append( ("nq.value","=","childs.childs") )
2089
2090 if BianaObjects.ExternalEntityAttribute.isFullTextSearchable(current_restriction_attribute, self.biana_database):
2091 join_conditions.append(("MATCH (nq.value)",
2092 "AGAINST",
2093 "('%s' IN BOOLEAN MODE)" %("\",\"".join(map(str,values_list))) ))
2094 else:
2095 join_conditions.append(("nq.value",
2096 "IN",
2097 "(\"%s\")" %("\",\"".join(map(str,values_list))) ))
2098
2099 num_query += 1
2100
2101 queries.append(self.db._get_select_sql_query( tables = tables,
2102 columns = columns,
2103 join_conditions = join_conditions,
2104 group_conditions = group_conditions ))
2105
2106 return self.db._get_union_queries( queries )
2107
2108
2109
2110
2112 """
2113 """
2114
2115 if len(negative_attribute_restrictions)>0:
2116
2117 inner_negative_query = self._get_attribute_restrictions_query( unification_protocol_name = unification_protocol_name,
2118 negative_attribute_restrictions = negative_attribute_restrictions )
2119 tables = [("(%s)" %query,"query_to_restrict_negatively")]
2120
2121 join_conditions = [("query_to_restrict_negatively.%s" %(column_name_to_restrict),"NOT IN","(%s)" %inner_negative_query)]
2122
2123 query = self.db._get_select_sql_query( tables = tables,
2124 columns = ["query_to_restrict_negatively.*"],
2125 join_conditions = join_conditions )
2126
2127
2128 return query
2129
2130
2131
2132
2133 - def _apply_restrictions_to_query( self, query, unification_protocol_name, attribute_restrictions, column_name_to_restrict="userEntityID" ):
2134 """
2135 Applies the restrictions for a given query, where some user entites must be restricted
2136 """
2137
2138 unif_table = self._get_user_entity_table_name(unification_protocol_name = unification_protocol_name)
2139
2140
2141
2142
2143 if attribute_restrictions is not None:
2144
2145 attribute_restrictions = attribute_restrictions
2146
2147 num_nested_query = 1
2148
2149 for current_restriction_attribute, current_restriction_values in attribute_restrictions:
2150
2151 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_restriction_attribute.lower()]
2152 query_name = "subrestrictionquery%s" %num_nested_query
2153
2154 tables = [("(%s)" %query,query_name),
2155 (unif_table,"u"),
2156 (current_table, "q")]
2157
2158 columns = [ "%s.*" %query_name ]
2159
2160 join_conditions = [("%s.%s" %(query_name,column_name_to_restrict),"=","u.userEntityID"),
2161 ("q.externalEntityID","=","u.externalEntityID")]
2162
2163 values_list = [ x for x in str(current_restriction_values).split(",") ]
2164
2165 if BianaObjects.ExternalEntityAttribute.isFullTextSearchable(current_restriction_attribute, self.biana_database):
2166 join_conditions.append(("MATCH (q.value)",
2167 "AGAINST",
2168 "('%s' IN BOOLEAN MODE)" %("\",\"".join(map(str,values_list))) ))
2169
2170 if BianaObjects.ExternalEntityAttribute.isNumericAttribute(current_restriction_attribute, self.biana_database) or BianaObjects.ExternalEntityAttribute.isSpecialAttribute(current_restriction_attribute, self.biana_database):
2171 regex = re.compile("([><=]*)([\d\.]+)")
2172 list_of_non_greater_values = []
2173 for current_value in values_list:
2174 m = regex.match(current_value)
2175 if m:
2176 join_conditions.append(("q.value",m.group(1),m.group(2)))
2177 else:
2178 list_of_non_greater_values.append(current_value)
2179
2180 if len(list_of_non_greater_values)>0:
2181 if self._is_ontology_linked_attribute( current_restriction_attribute ):
2182 tables.append( (self._get_expand_ontology_attribute_table_query( attribute_identifier = current_restriction_attribute,
2183 values_to_expand_list = list_of_non_greater_values ), "childs") )
2184 join_conditions.append(("q.value","=","childs.childs"))
2185 else:
2186 join_conditions.append(("q.value",
2187 "IN",
2188 "(\"%s\")" %("\",\"".join(map(list_of_non_greater_values)))))
2189 else:
2190 if self._is_ontology_linked_attribute( current_restriction_attribute ):
2191 tables.append( (self._get_expand_ontology_attribute_table_query( attribute_identifier = current_restriction_attribute,
2192 values_to_expand_list = values_list ), "childs") )
2193 join_conditions.append(("q.value","=","childs.childs"))
2194 else:
2195 join_conditions.append(("q.value",
2196 "IN",
2197 "(\"%s\")" %("\",\"".join(map(str,values_list)))))
2198 num_nested_query += 1
2199
2200 query = self.db._get_select_sql_query( tables = tables,
2201 columns = columns,
2202 join_conditions = join_conditions,
2203 distinct_columns = True )
2204
2205 return query
2206
2207
2208
2210 """
2211 """
2212
2213 num = 1
2214
2215 for attribute_name, values in attribute_restrictions_dict.iteritems():
2216
2217 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_name.lower()]
2218
2219 tables = [ ("(%s)" %query, "query_%s" %num),
2220 (current_table.get_table_name(), "EERA%s" %num) ]
2221
2222 columns = [ "query_%s.*" %num ]
2223
2224 join_conditions = [("query_%s.%s" %(num,column_name_to_restrict),"=","EERA%s.externalEntityID" %num)]
2225
2226 if BianaObjects.ExternalEntityAttribute.isNumericAttribute(attribute_name, self.biana_database) or BianaObjects.ExternalEntityAttribute.isSpecialAttribute(attribute_name, self.biana_database):
2227 regex = re.compile("([><=]*)([\d\.]+)")
2228 for current_value in values:
2229 m = regex.match(current_value)
2230 if m:
2231 join_conditions.append(("EERA%s.value" %num,m.group(1),m.group(2)))
2232 else:
2233 join_conditions.append(("EERA%s.value" %num,"=","\"%s\"" %current_value))
2234 else:
2235 join_conditions.append(("EERA%s.value" %num,"IN","(\"%s\")" %("\",\"".join([ str(x) for x in values]))))
2236
2237 num += 1
2238
2239 query = self.db._get_select_sql_query( tables = tables,
2240 columns = columns,
2241 join_conditions = join_conditions,
2242 distinct_columns = True )
2243
2244 return query
2245
2246
2247
2249 """
2250 Returns a string with a sql query. The sql query returns 2 columns: userEntityID1 and userEntityID2, because they share the combination of attributes in the attribute_list
2251 """
2252
2253
2254
2255 unif_table = self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)
2256
2257 last_query = None
2258
2259 current_nested_query = 1
2260
2261 for (current_attribute, parameter_and_value_list) in attribute_list:
2262
2263 nested_query_tables = [(unif_table,"u1"),
2264 (unif_table,"u2"),
2265 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute.lower()].get_table_name(),"attr1"),
2266 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute.lower()].get_table_name(),"attr2")]
2267
2268 nested_query_columns = [("u1.userEntityID AS userEntityID1"),
2269 ("u2.userEntityID AS userEntityID2")]
2270
2271
2272 nested_query_join_conditions = [("u1.userEntityID","!=","u2.userEntityID"),
2273 ("u1.externalEntityID", "=", "attr1.externalEntityID"),
2274 ("u2.externalEntityID", "=", "attr2.externalEntityID")]
2275
2276 nested_query_fixed_conditions = []
2277
2278
2279 if len(parameter_and_value_list) == 0:
2280
2281 if self._is_ontology_linked_attribute(current_attribute) and ontology_levels.has_key( self.ontology_linked_attributes[current_attribute.lower()]["level_attribute"] ):
2282
2283
2284
2285
2286 nested_query_tables.append( (self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE, "o") )
2287 nested_query_tables.append( (self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE, "o2") )
2288 nested_query_tables.append( (self._get_key_attribute_table_name( key_id = self.ontology_linked_attributes[current_attribute.lower()]["key_id"] ), "k") )
2289 nested_query_tables.append( (self._get_key_attribute_table_name( key_id = self.ontology_linked_attributes[current_attribute.lower()]["key_id"] ), "k2") )
2290 nested_query_tables.append( (self._get_key_attribute_table_name( key_id = self.ontology_linked_attributes[current_attribute.lower()]["level_attribute"] ), "level") )
2291
2292 nested_query_join_conditions.extend[ ("attr1.value", "=", "k.value"),
2293 ("k.externalEntityID", "=", "o.childExternalEntityID"),
2294 ("o.parentExternalEntityID", "=", "level.externalEntityID"),
2295 ("level.value","=",ontology_levels[self.ontology_linked_attributes[current_attribute.lower()]]["level_attribute"]),
2296 ("o.parentExternalEntityID", "=", "o2.parentExternalEntityID"),
2297 ("o2.externalEntityID", "=","k2.externalEntityID"),
2298 ("k2.value", "=", "attr2.value") ]
2299
2300
2301
2302 else:
2303 nested_query_join_conditions.append(("attr1.value", "=", "attr2.value"))
2304
2305
2306 else:
2307
2308 if current_attribute.lower() == "proteinsequence":
2309 nested_query_tables.append( ("%s" %(self.biana_database.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[current_attribute.lower()]["table"].get_table_name()), "d_1") )
2310 nested_query_tables.append( ("%s" %(self.biana_database.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[current_attribute.lower()]["table"].get_table_name()), "d_2") )
2311 nested_query_tables.append( ("%s" %(self.biana_database.PROTEIN_BLAST_RESULTS_TABLE.get_table_name()), "b") )
2312 nested_query_join_conditions.append(("attr1.value", "=", "d_1.sequenceMD5"))
2313 nested_query_join_conditions.append(("attr2.value", "=", "d_2.sequenceMD5"))
2314 nested_query_join_conditions.append(("b.sequenceID_A","=","d_1.proteinSequenceID"))
2315 nested_query_join_conditions.append(("b.sequenceID_B","=","d_2.proteinSequenceID"))
2316 nested_query_fixed_conditions.extend( [ ("b.%s" % parameter, ">=", value) for (parameter, value) in parameter_and_value_list ] )
2317 else:
2318 notSupported = False
2319 for (parameter, value) in parameter_and_value_list:
2320 if str(value.strip()) != "":
2321 notSupported = True
2322 break
2323 if notSupported:
2324 sys.err.write("Warning: Not supported parameterizable attribute (ignored): %s\n" % current_attribute)
2325 continue
2326 nested_query_join_conditions.extend( [ ("a%s.%s" % (current_attribute, parameter), "=","b%s.%s" % (current_attribute, parameter) ) for (parameter, value) in parameter_and_value_list ] )
2327
2328 if current_nested_query == 1:
2329 nested_query_join_conditions.append(("u1.userEntityID","IN","(%s)" %",".join(map(str,user_entity_ids_list))))
2330 if restrict_to_user_entity_ids_list:
2331 nested_query_join_conditions.append(("u2.userEntityID","IN","(%s)" %",".join(map(str,user_entity_ids_list))))
2332 else:
2333 nested_query_join_conditions.append(("u1.userEntityID","=","nested_query%s.userEntityID1" %(current_nested_query-1)))
2334 nested_query_join_conditions.append(("u2.userEntityID","=","nested_query%s.userEntityID2" %(current_nested_query-1)))
2335 nested_query_tables.append(("(%s)" %last_query, "nested_query%s" %(current_nested_query-1)))
2336
2337
2338 last_query = self.db._get_select_sql_query( tables = nested_query_tables,
2339 columns = nested_query_columns,
2340 fixed_conditions = nested_query_fixed_conditions,
2341 join_conditions = nested_query_join_conditions,
2342 distinct_columns = True )
2343
2344 current_nested_query += 1
2345
2346
2347
2348 return self.db._get_select_sql_query( tables = [("(%s)" %last_query, "nested_queries")],
2349 columns = ["userEntityID1","userEntityID2"],
2350 distinct_columns = True )
2351
2352
2353 - def get_list_user_entities_IDs_by_attribute(self, unification_protocol_name, attribute_identifier, field_values, attribute_restrictions=None, negative_attribute_restrictions=None, restrict_to_user_entity_ids_list=[], include_type=False ):
2354 """
2355 Returns a list of user entities that match with the attributes specified of type attribute_identifier
2356
2357 If include_type is set to True, it returns a list of tuples
2358 """
2359
2360 sqlStat = self._get_list_external_entities_IDs_by_attribute_SQLstat( attribute_identifier=attribute_identifier,
2361 field_values=field_values )
2362
2363 self._load_available_unification_protocols()
2364
2365 unif_table = self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)
2366
2367 if attribute_identifier.lower()=="proteinsequenceid":
2368 attribute_identifier="proteinsequence"
2369
2370 attr_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier.lower()].get_table_name()
2371 sqlStat.add_element( tables = [(unif_table,"u")] )
2372 sqlStat.reset_columns( columns = ["u.userEntityID"] )
2373 sqlStat.add_element( join_conditions = [("u.externalEntityID",
2374 "=",
2375 "%s.externalEntityID" %attr_table )] )
2376 if include_type is False:
2377 sqlStat.add_element( group_conditions = ["u.userEntityID"] )
2378 else:
2379 sqlStat.add_element( tables = [(self.biana_database.EXTERNAL_ENTITY_TABLE,"e")] )
2380 sqlStat.reset_columns( columns = ["u.userEntityID","e.type"] )
2381 sqlStat.add_element( group_conditions = ["u.userEntityID","e.type"] )
2382 sqlStat.add_element( join_conditions = [("u.externalEntityID","=","e.externalEntityID")] )
2383
2384 if len(restrict_to_user_entity_ids_list)>0:
2385 sqlStat.add_element( fixed_conditions = [("u.userEntityID","IN","(%s)" %",".join(map(str,restrict_to_user_entity_ids_list)),None)] )
2386
2387
2388
2389
2390
2391 general_query = self.db._get_select_sql_query( tables = sqlStat.tables,
2392 columns = sqlStat.columns,
2393 fixed_conditions = sqlStat.fixed_conditions,
2394 join_conditions = sqlStat.join_conditions,
2395 group_conditions = sqlStat.group_conditions )
2396
2397 restricted_query = self._apply_restrictions_to_query( query = general_query,
2398 unification_protocol_name = unification_protocol_name,
2399 attribute_restrictions = attribute_restrictions,
2400 column_name_to_restrict="userEntityID" )
2401
2402 negatively_restricted_query = self._apply_negative_restrictions_to_query( query = restricted_query,
2403 unification_protocol_name = unification_protocol_name,
2404 column_name_to_restrict="userEntityID",
2405 negative_attribute_restrictions = negative_attribute_restrictions )
2406
2407
2408 if len(sqlStat.columns)>1:
2409 initial_list = list(self.db.select_db_content( negatively_restricted_query, answer_mode = "raw" ))
2410 else:
2411 initial_list = self.db.select_db_content( negatively_restricted_query, answer_mode = "list" )
2412
2413 return initial_list
2414
2415
2416
2417
2418
2419
2420
2422 """
2423 "attribute_identifier" must be an accepted one
2424
2425 "field_values" must be a dictionary with the keys (accepted ones) and its values
2426
2427 """
2428
2429 temp_field_values = [ (x.lower(),field_values[x]) for x in field_values ]
2430
2431 try:
2432 temp = self.biana_database.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[attribute_identifier.lower()]
2433 tableObject = temp["table"]
2434 if tableObject.has_optimized_fields():
2435 table = tableObject.get_temp_table_name()
2436 else:
2437 table = tableObject.get_table_name()
2438 fields = temp["fields"]
2439 except:
2440 raise "%s is not a valid attribute identifier" %attribute_identifier
2441
2442 column_values = []
2443
2444 for actual_field in temp_field_values:
2445 if actual_field[1] is not None:
2446 fieldObject = fields[actual_field[0]]
2447
2448
2449 if fieldObject.get_optimized_space() is not None:
2450 fieldName = fieldObject.get_optimized_field_name()
2451 else:
2452 fieldName = fieldObject.get_field_name()
2453
2454
2455 if fieldObject.is_compressed() is not None:
2456
2457 column_values.append((fieldName,"COMPRESS(\"%s\")" %str(actual_field[1]).replace('\\','\\\\').replace('"','\\"'),1))
2458 else:
2459
2460 column_values.append((fieldName,str(actual_field[1]).replace('\\','\\\\').replace('"','\\"')))
2461
2462 self.db.insert_db_content(self.db._get_insert_sql_query( table = table,
2463 column_values = column_values),
2464
2465 answer_mode=None)
2466
2467 - def output_sequences(self, outmethod, type="proteinsequence", format="fasta", sequenceIDs=None):
2468 """
2469 outmethod: destination where data in fasta format would be written
2470 type: either "proteinsequence" or "nucleotidesequence" - decides which type of sequence is desired
2471 format: either "fasta" or "seq" corresponding to fasta format or raw sequence format
2472 sequenceIDs: list of sequence ids whose sequence would be outputted - if None will output all sequences in the database
2473 """
2474
2475 database_block_limit=1000000
2476 database_block_offset=0
2477 aa_x_line = 79
2478 enter_to_loop = True
2479
2480 if sequenceIDs is not None:
2481 enter_to_loop = False
2482 if type.lower() == "proteinsequence":
2483 query = "SELECT proteinSequenceID, UNCOMPRESS(sequence) FROM %s WHERE proteinSequenceID IN (%s)" % (self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"].get_table_name(), ",".join(sequenceIDs))
2484 self.db._check_locked_table(table=self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"].get_table_name())
2485 elif type.lower() == "nucleotidesequence":
2486 query = "SELECT nucleotideSequenceID, UNCOMPRESS(sequence) FROM %s WHERE nucleotideSequenceID IN (%s)" % (self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"].get_table_name(), ",".join(sequenceIDs))
2487 self.db._check_locked_table(table=self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"].get_table_name())
2488 else:
2489 raise ValueError("Sequence type not recognized")
2490 temp_sequences = self.db.select_db_content( query, answer_mode = "raw", remove_duplicates="no" )
2491
2492 else:
2493 enter_to_loop = True
2494 if type.lower() == "proteinsequence":
2495 query = "SELECT proteinSequenceID, UNCOMPRESS(sequence) FROM %s LIMIT %s" %(self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"].get_table_name(), database_block_limit)
2496 self.db._check_locked_table(table=self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"].get_table_name())
2497 elif type.lower() == "nucleotidesequence":
2498 query = "SELECT nucleotideSequenceID, UNCOMPRESS(sequence) FROM %s LIMIT %s" %(self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"], database_block_limit)
2499 self.db._check_locked_table(table=self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"].get_table_name())
2500 else:
2501 raise ValueError("Sequence type not recognized")
2502 temp_sequences = self.db.select_db_content( query + " OFFSET %s" % database_block_offset, answer_mode = "raw", remove_duplicates="no" )
2503
2504 while( len(temp_sequences) > 0 ):
2505 database_block_offset += database_block_limit
2506
2507 for actual_sequence in temp_sequences:
2508 if format.lower() == "fasta":
2509
2510 outmethod(">%s\n%s\n" %(actual_sequence[0],"\n".join([ actual_sequence[1][x*aa_x_line:x*aa_x_line+aa_x_line] for x in xrange(int(ceil(len(actual_sequence[1])/float(aa_x_line)))) ])))
2511 elif format.lower() == "seq":
2512 outmethod("%s\t%s\n" %(actual_sequence[0],actual_sequence[1]))
2513 if not enter_to_loop:
2514 break
2515 temp_sequences = self.db.select_db_content( query + " OFFSET %s" % database_block_offset, answer_mode = "raw", remove_duplicates="no" )
2516
2517
2519 """
2520
2521 """
2522
2523 if type=="proteinsequence":
2524 table = "sequenceProtein"
2525 elif type=="nucleotidesequence":
2526 table = "sequenceNucleotide"
2527 else:
2528 raise ValueError("Type %s not recognized to empy sequences table" %(type) )
2529
2530 self.db.insert_db_content( self.db._get_delete_sql_query(table = table ) )
2531
2532 return
2533
2534
2535
2536
2537
2539 """
2540 name is lower cased
2541 """
2542 a = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["taxid"]
2543 b = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["taxid_name"]
2544
2545 self.db._enable_indices( table_list = [a,b] )
2546
2547 fixed_conditions = []
2548 if tax_id_name_type is not None:
2549 fixed_conditions.append(("taxid_name_type","=","scientific name"))
2550
2551 result = dict(self.db.select_db_content( self.db._get_select_sql_query( tables = [a,b],
2552 columns = ["LCASE(%s.value)" %b,"%s.value" %a],
2553 join_conditions = [("%s.externalEntityID" %a, "=", "%s.externalEntityID" %b )],
2554 fixed_conditions = fixed_conditions),
2555 answer_mode = "raw") )
2556 return result
2557
2558
2559
2561
2562 list_taxID = self.db.select_db_content( self.db.SelectAttributeDescription( attribute_identifier = "ncbi_taxonomy_names",
2563 fields = ["taxID"],
2564 conditions = {"name": species_name} ),
2565 answer_mode = "list", remove_duplicates = "yes" )
2566 return list_taxID
2567
2568
2569
2570
2571
2572
2574
2575
2576
2577 return self.db.select_db_content( "SELECT UNCOMPRESS(sequence) FROM sequenceProtein WHERE sequenceProteinID=%s" %sequenceID,
2578 answer_mode = "single" )
2579
2581
2582 return self.db.select_db_content( "SELECT externalEntityTaxID.value FROM sequenceProtein,externalEntityTaxID,externalEntityProteinSequence WHERE proteinSequenceID=%s AND externalEntityProteinSequence.value = sequenceMD5 AND externalEntityTaxID.externalEntityID = externalEntityProteinSequence.externalEntityID" %sequenceID,
2583 answer_mode = "list", remove_duplicates=True )
2584
2585
2586
2587
2588
2589
2591
2592 pdb = PDBObject.get_name()
2593
2594
2595
2596
2597 for actual_chain in PDBObject.get_chain_names():
2598
2599 externalEntity = BianaObjects.ExternalEntity( source_database= source_database, type="structure")
2600
2601 externalEntity.add_attribute(BianaObjects.ExternalEntityAttribute( attribute_identifier = "proteinsequence",
2602 value = BianaObjects.ProteinSequence(sequence = PDBObject.get_sequence( chain_name = actual_chain ))))
2603
2604
2605 residues_list = PDBObject.get_residues( chain_name = actual_chain )
2606 temp_residue_num_list = [ int(x.get_residue_num()) for x in residues_list ]
2607
2608 externalEntity.add_attribute(BianaObjects.ExternalEntityAttribute( attribute_identifier = "pdb",
2609 value = pdb,
2610 type = "unique",
2611 additional_fields = { "chain": actual_chain,
2612 "pdb_range": "%s-%s" %(min(temp_residue_num_list),
2613 max(temp_residue_num_list))}))
2614 if description is not None:
2615 externalEntity.add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier = "description", value = description))
2616
2617 self.insert_new_external_entity( externalEntity = externalEntity )
2618
2619
2620
2621 total_num_atoms = PDBObject.get_chain_num_atoms( chain_name = actual_chain )
2622
2623
2624 residue_num_list = int_ascii.int_list_to_ascii(2,temp_residue_num_list).replace('\\','\\\\').replace('"','\\"')
2625
2626 residue_type_list = '\x00'.join([ x.get_residue_type() for x in residues_list ]).replace('\\','\\\\').replace('"','\\"')
2627
2628
2629
2630 residue_atom_num_list = int_ascii.unsigned_int_list_to_ascii(1,[ x.get_num_atoms() for x in residues_list ]).replace('\\','\\\\').replace('"','\\"')
2631
2632 atom_type_list = "".join([y.get_type() for x in residues_list for y in x.get_atoms() ]).replace('\\','\\\\').replace('"','\\"')
2633
2634 atom_name_list = '\x00'.join([y.get_name() for x in residues_list for y in x.get_atoms() ]).replace('\\','\\\\').replace('"','\\"')
2635
2636 atom_coordinates = []
2637 [ atom_coordinates.extend(y.get_coordinates()) for x in residues_list for y in x.get_atoms() ]
2638
2639 atom_coordinates_list = int_ascii.float_list_to_ascii(3,3,atom_coordinates).replace('\\','\\\\').replace('"','\\"')
2640
2641 resolution = PDBObject.resolution
2642
2643 column_values = [ ("pdb",pdb),
2644 ("chain",actual_chain),
2645 ("externalEntityID", externalEntity.get_id()),
2646 ("total_num_atoms",total_num_atoms) ]
2647
2648 if resolution is not None:
2649 column_values.append(("resolution",str(resolution)))
2650
2651 self.db.insert_db_content( self.db._get_insert_sql_query( table = "pdb",
2652 column_values = column_values,
2653 special_column_values = [("residue_num_list","COMPRESS(\"%s\")" %residue_num_list),
2654 ("residue_type_list","COMPRESS(\"%s\")" %residue_type_list),
2655 ("residue_atom_num_list","COMPRESS(\"%s\")" %residue_atom_num_list),
2656 ("atom_type_list","COMPRESS(\"%s\")" %atom_type_list),
2657 ("atom_name_list","COMPRESS(\"%s\")" %atom_name_list),
2658 ("atom_coordinates","COMPRESS(\"%s\")" %atom_coordinates_list) ],
2659 use_buffer=self.use_buffer),
2660 answer_mode = None)
2661
2662
2663 - def add_hssp_info_to_pdb(self,pdb_code, chain, residue_pdb_number, residue_hssp_entropy, residue_hssp_norm_entropy, residue_hssp_variability, conservation_hssp, solvent_exposure_hssp, dssp):
2664 """
2665 Adds HSSP information to pdb files
2666
2667 If the PDB information does not exist, inserts the data
2668 """
2669
2670 print "inserting %s to chain %s" %(residue_pdb_number,chain)
2671
2672 self.db._check_locked_table(table="pdb")
2673
2674 accessibility = int_ascii.unsigned_int_list_to_ascii(1,solvent_exposure_hssp).replace('\\','\\\\').replace('"','\\"')
2675 variability = int_ascii.unsigned_int_list_to_ascii(1,residue_hssp_variability).replace('\\','\\\\').replace('"','\\"')
2676 entropy = int_ascii.unsigned_float_list_to_ascii(2,3,residue_hssp_entropy).replace('\\','\\\\').replace('"','\\"')
2677 norm_entropy = int_ascii.unsigned_int_list_to_ascii(1,residue_hssp_norm_entropy).replace('\\','\\\\').replace('"','\\"')
2678 conservation = int_ascii.unsigned_float_list_to_ascii(1,2,conservation_hssp).replace('\\','\\\\').replace('"','\\"')
2679 residue_numbers = int_ascii.int_list_to_ascii(2,residue_pdb_number).replace('\\','\\\\').replace('"','\\"')
2680
2681 query = "UPDATE pdb SET hssp_residue_num_correspondences=COMPRESS(\"%s\"),residue_dssp_results=COMPRESS(\"%s\"),residue_hssp_entropy=COMPRESS(\"%s\"),residue_hssp_norm_entropy=COMPRESS(\"%s\"),residue_hssp_variability=COMPRESS(\"%s\"),conservation_hssp=COMPRESS(\"%s\"),solvent_exposure_hssp=COMPRESS(\"%s\") WHERE pdb=\"%s\" AND chain=\"%s\"" %(residue_numbers,"".join(dssp),entropy,norm_entropy,variability,conservation,accessibility,pdb_code,chain)
2682
2683 self.db.insert_db_content( query, answer_mode = None )
2684
2685
2686 - def load_pdb_object(self, pdb_code, fragments=[], merge_fragments = False, request_information=None):
2687 """
2688 "pdb_code" is mandatory
2689
2690 "fragments" is a list of pdb fragments.
2691
2692 "merge_fragments" is used to merge all fragments or chains as a single chain or to mantain each chain as a different chain. If it is used, atom numbers and residue numbers are changed
2693
2694 "request_information" indicates which information has to be loaded. It is a list. It can be:
2695 It can be: "residue_type","atoms_info","hssp_conservation","hssp_entropy","hssp_exposure","hssp_norm_entropy","hssp_variability","dssp"
2696 """
2697
2698 information = ["residue_type","atoms_info"]
2699
2700 if isinstance(request_information,list):
2701 information.extend(request_information)
2702
2703
2704 pdbObject = BianaObjects.PDB( name=pdb_code )
2705
2706
2707
2708 fixed_conditions = [("pdb.pdb","=",pdb_code)]
2709
2710
2711 requested_chains = [ x.get_chain() for x in fragments ]
2712
2713 if len(requested_chains)>0 and None not in requested_chains:
2714 fixed_conditions.append(("pdb.chain","IN","(\"%s\")" %("\",\"".join(requested_chains)),None))
2715
2716 possible_requested_data = {"residue_type": "UNCOMPRESS(residue_type_list)",
2717 "atoms_info": ["UNCOMPRESS(atom_type_list)",
2718 "UNCOMPRESS(atom_name_list)",
2719 "UNCOMPRESS(atom_coordinates)",
2720 "UNCOMPRESS(residue_atom_num_list)"],
2721 "hssp_residue_num_correspondences":"UNCOMPRESS(hssp_residue_num_correspondences)",
2722 "hssp_conservation":"UNCOMPRESS(conservation_hssp)",
2723 "hssp_entropy":"UNCOMPRESS(residue_hssp_entropy)",
2724 "hssp_exposure":"UNCOMPRESS(solvent_exposure_hssp)",
2725 "hssp_norm_entropy":"UNCOMPRESS(residue_hssp_norm_entropy)",
2726 "hssp_variability": "UNCOMPRESS(residue_hssp_variability)",
2727 "dssp": "UNCOMPRESS(residue_dssp_results)"}
2728
2729 columns = ["pdb","chain","resolution","UNCOMPRESS(residue_num_list)"]
2730 pos = len(columns)
2731 requested_data_indexes = {}
2732 append_hssp_eq = None
2733 try:
2734 for actual_information in information:
2735 if actual_information != "atoms_info":
2736 if actual_information != "residue_type":
2737 append_hssp_eq = 1
2738 columns.append(possible_requested_data[actual_information.lower()])
2739 requested_data_indexes[actual_information.lower()] = pos
2740 pos += 1
2741 else:
2742 columns.extend(possible_requested_data["atoms_info"])
2743 requested_data_indexes[actual_information.lower()] = pos
2744 pos += 4
2745 except:
2746 raise ValueError("Trying to get unavailable pdb information")
2747
2748 if append_hssp_eq is not None:
2749 columns.append(possible_requested_data["hssp_residue_num_correspondences"])
2750 requested_data_indexes["hssp_residue_num_correspondences"] = pos
2751
2752 query = self.db._get_select_sql_query( tables= ["pdb"],
2753 columns = columns,
2754 fixed_conditions = fixed_conditions )
2755
2756
2757 data = self.db.select_db_content( query, answer_mode = "raw", remove_duplicates = "no" )
2758
2759 atom_num = 1
2760 residue_num_value = 1
2761
2762 for actual_data in data:
2763 chain = actual_data[1]
2764 if merge_fragments:
2765 new_chain = 'A'
2766 else:
2767 new_chain = chain
2768 residue_num_list = int_ascii.ascii_to_int_list(2,actual_data[3])
2769 residue_type_list = None
2770 residue_type_value = None
2771 atom_type_list = None
2772 atom_name_list = None
2773 atom_coordinates_list = None
2774 residue_atom_num_list = None
2775 hssp_residue_num_correspondence = None
2776 hssp_conservation = None
2777 hssp_entropy = None
2778 hssp_exposure = None
2779 hssp_norm_entropy = None
2780 hssp_variability = None
2781 hssp_conservation_value = None
2782 hssp_entropy_value = None
2783 hssp_exposure_value = None
2784 hssp_norm_entropy_value = None
2785 hssp_variability_value = None
2786 dssp = None
2787 dssp_value = " "
2788
2789 for actual_requested in requested_data_indexes:
2790
2791 if actual_requested == "residue_type":
2792 residue_type_list = actual_data[requested_data_indexes["residue_type"]].split('\x00')
2793
2794 continue
2795 if actual_requested == "atoms_info":
2796
2797 atom_type_list = actual_data[requested_data_indexes["atoms_info"]]
2798 atom_name_list = actual_data[requested_data_indexes["atoms_info"]+1].split('\x00')
2799 atom_coordinates_list = int_ascii.ascii_to_float_list(3,3,actual_data[requested_data_indexes["atoms_info"]+2])
2800 residue_atom_num_list = int_ascii.ascii_to_unsigned_int_list(1,actual_data[requested_data_indexes["atoms_info"]+3])
2801
2802
2803
2804
2805 continue
2806 if actual_requested == "hssp_conservation":
2807 temp = actual_data[requested_data_indexes["hssp_conservation"]]
2808 if temp is not None:
2809 hssp_conservation = int_ascii.ascii_to_unsigned_float_list(1,2,temp)
2810 else:
2811 print "Trying to get the conservation for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1])
2812 continue
2813 if actual_requested == "hssp_entropy":
2814 temp = actual_data[requested_data_indexes["hssp_entropy"]]
2815 if temp is not None:
2816 hssp_entropy = int_ascii.ascii_to_unsigned_float_list(2,3,temp)
2817 else:
2818 print "Trying to get the entropy for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1])
2819 continue
2820 if actual_requested == "hssp_exposure":
2821 temp = actual_data[requested_data_indexes["hssp_exposure"]]
2822 if temp is not None:
2823 hssp_exposure = int_ascii.ascii_to_unsigned_int_list(1,temp)
2824 else:
2825 print "Trying to get the exposure for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1])
2826 continue
2827 if actual_requested == "hssp_norm_entropy":
2828 temp = actual_data[requested_data_indexes["hssp_norm_entropy"]]
2829 if temp is not None:
2830 hssp_norm_entropy = int_ascii.ascii_to_unsigned_int_list(1,temp)
2831 else:
2832 print "Trying to get the normalized entropy for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1])
2833 continue
2834 if actual_requested == "hssp_variability":
2835 temp = actual_data[requested_data_indexes["hssp_variability"]]
2836 if temp is not None:
2837 hssp_variability = int_ascii.ascii_to_unsigned_int_list(1,temp)
2838 else:
2839 print "Trying to get the variability for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1])
2840 continue
2841 if actual_requested == "dssp":
2842 temp = actual_data[requested_data_indexes["dssp"]]
2843 if temp is not None:
2844 dssp = temp
2845 else:
2846 print "Trying to get the dssp for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1])
2847 continue
2848 if actual_requested == "hssp_residue_num_correspondences":
2849 temp = actual_data[requested_data_indexes["hssp_residue_num_correspondences"]]
2850 if temp is not None:
2851 hssp_residue_num_correspondence = int_ascii.ascii_to_int_list(2,temp)
2852 else:
2853 print "Trying to get the hssp equivalences for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1])
2854 continue
2855
2856
2857
2858 if hssp_residue_num_correspondence is not None:
2859 hssp_num = 0
2860 hssp_pdb_equivalences = []
2861
2862
2863 for x in xrange(len(residue_num_list)):
2864 if hssp_num>=len(hssp_residue_num_correspondence):
2865 hssp_pdb_equivalences.append(None)
2866 continue
2867 if residue_num_list[x] == hssp_residue_num_correspondence[hssp_num]:
2868 hssp_pdb_equivalences.append(x)
2869 hssp_num+=1
2870 else:
2871 hssp_pdb_equivalences.append(None)
2872
2873
2874 actual_atom_pos = 0
2875
2876 for x in xrange(len(residue_num_list)):
2877 residue_requested = None
2878
2879
2880 if len(fragments)>0:
2881 if True in [ current_fragment.includes( chain=chain, res_num=residue_num_list[x] ) for current_fragment in fragments ]:
2882 residue_requested = True
2883 else:
2884 residue_requested = True
2885
2886
2887 if residue_requested:
2888
2889 if hssp_residue_num_correspondence is not None and hssp_pdb_equivalences[x] is not None:
2890 if hssp_conservation is not None:
2891 hssp_conservation_value = hssp_conservation[hssp_pdb_equivalences[x]]
2892 if hssp_entropy is not None:
2893 hssp_entropy_value = hssp_entropy[hssp_pdb_equivalences[x]]
2894 if hssp_exposure is not None:
2895 hssp_exposure_value = hssp_exposure[hssp_pdb_equivalences[x]]
2896 if hssp_norm_entropy is not None:
2897 hssp_norm_entropy_value = hssp_norm_entropy[hssp_pdb_equivalences[x]]
2898 if hssp_variability is not None:
2899 hssp_variability_value = hssp_variability[hssp_pdb_equivalences[x]]
2900 if dssp is not None:
2901 dssp_value = dssp[hssp_pdb_equivalences[x]]
2902 if residue_type_list is not None:
2903 residue_type_value = residue_type_list[x]
2904 if merge_fragments:
2905 residue_num_value += 1
2906 else:
2907 residue_num_value = residue_num_list[x]
2908
2909 pdbObject.add_residue( chain_name = new_chain,
2910 residue_num = residue_num_value,
2911 residue_type = residue_type_value,
2912 atoms_initial_list = [],
2913 hssp_conservation = hssp_conservation_value,
2914 hssp_entropy = hssp_entropy_value,
2915 hssp_exposure = hssp_exposure_value,
2916 hssp_norm_entropy = hssp_norm_entropy_value,
2917 hssp_variability = hssp_variability_value,
2918 dssp = dssp_value )
2919
2920 if requested_data_indexes.has_key("atoms_info"):
2921
2922 for y in xrange(residue_atom_num_list[x]):
2923 current_atom = BianaObjects.PDBAtom( atom_num = atom_num,
2924 atom_type = atom_type_list[actual_atom_pos+y],
2925 atom_name = atom_name_list[actual_atom_pos+y],
2926 x = atom_coordinates_list[(actual_atom_pos+y)*3],
2927 y = atom_coordinates_list[(actual_atom_pos+y)*3+1],
2928 z = atom_coordinates_list[(actual_atom_pos+y)*3+2])
2929 atom_num+=1
2930
2931 pdbObject.add_atom( atomObject= current_atom,
2932 chain_name = new_chain,
2933 residue_num = residue_num_value,
2934 residue_type = residue_type_list[x] )
2935
2936 actual_atom_pos += residue_atom_num_list[x]
2937
2938
2939
2940
2941 if pdbObject.get_num_residues() == 0:
2942 raise ValueError("Empty PDB created")
2943 else:
2944 print pdbObject.get_num_residues()
2945
2946 return pdbObject
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958 - def insert_alignment( self, alignmentObject, externalEntityID, insert_aligned="no" ):
2959 """
2960 "insert_aligned" is used to insert exactly the aligned sequence (used in done alignments, as HSSP, as the sequences does not always is exactly the same
2961 In this case, the fragments are not inserted
2962 """
2963
2964 for x in xrange(len(alignmentObject.get_alignment())):
2965
2966 column_values = [("externalEntityID",externalEntityID),
2967
2968
2969 ("position",x)]
2970
2971 if alignmentObject.sequence_ids_list[x] is not None:
2972 column_values.append(("sequenceMD5",str(alignmentObject.sequence_ids_list[x])))
2973 if alignmentObject.cross_ids_list[x] is not None:
2974 column_values.append(("crossID",str(alignmentObject.cross_ids_list[x][1])))
2975 column_values.append(("crossID_identifier_type",alignmentObject.cross_ids_list[x][0]))
2976
2977 special_column_values = []
2978
2979
2980
2981 if insert_aligned == "no":
2982 column_values.append(("alignmentBlocks",alignmentObject.get_sequence_fragments_in_ascii(x)))
2983 else:
2984 special_column_values.append(("alignedSequence","COMPRESS(\"%s\")" %alignmentObject.get_aligned_sequence(x)))
2985
2986 if alignmentObject.sim[x] is not None:
2987 column_values.append(("identity",alignmentObject.sim[x]))
2988 if alignmentObject.wsim[x] is not None:
2989 column_values.append(("weighted_sim",alignmentObject.wsim[x]))
2990 if alignmentObject.lali[x] is not None:
2991 column_values.append(("sequence_covered",alignmentObject.lali[x]))
2992
2993
2994 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.ALIGNMENT_ELEMENT_TABLE,
2995 column_values = column_values,
2996 special_column_values = special_column_values,
2997 use_buffer=self.use_buffer),
2998 answer_mode = None )
2999
3000
3001 - def get_alignment( self, externalEntityID, get_species=False, range=None ):
3002 """
3003 Returns an alignment object identified by the externalEntityID used as parameter
3004 """
3005
3006 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.ALIGNMENT_ELEMENT_TABLE],
3007 columns = ["sequenceMD5","position","alignmentBlocks","UNCOMPRESS(alignedSequence)","crossID","crossID_identifier_type"],
3008 fixed_conditions = [(self.biana_database.externalEntityID_col,"=",externalEntityID)]),
3009 answer_mode = "raw" )
3010
3011 aln = BianaObjects.SequenceAlignment()
3012
3013 aln.set_number_of_sequences( number_of_sequences = len(data) )
3014
3015 seq_id_tax_id_dict = {}
3016
3017 temp_list_seq_ids = [ actual_aligned_seq[0] for actual_aligned_seq in data ]
3018
3019 def start_list(x):
3020 seq_id_tax_id_dict[x]=[]
3021
3022 map(start_list,temp_list_seq_ids)
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042 tax_ids = []
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061 for x_actual_aligned_seq in xrange(len(data)):
3062 actual_aligned_seq = data[x_actual_aligned_seq]
3063 try:
3064 tax_id_list = [ actual_aligned_seq[4].split("_")[1] ]
3065 except:
3066 tax_id_list = []
3067 aln.add_sequence_to_position( sequence_id = actual_aligned_seq[0],
3068
3069
3070 taxID_list = tax_id_list,
3071 sequence_position = actual_aligned_seq[1],
3072 sequence_fragments_ascii = actual_aligned_seq[2],
3073 aligned_sequence = actual_aligned_seq[3],
3074 crossID = (actual_aligned_seq[5],actual_aligned_seq[4]) )
3075
3076
3077
3078 return aln
3079
3080
3082 """
3083
3084 """
3085 final_aln = None
3086
3087 for current_fragment in fragments:
3088 new_aln = self.load_hssp_alignment(pdb_id=pdb_id, chain=current_fragment.get_chain(), fragments=[current_fragment], get_species=get_species)
3089 if final_aln is None:
3090 final_aln = new_aln
3091 else:
3092 final_aln = final_aln.concatenate_by_crossID(new_aln)
3093
3094 return final_aln
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3115 """
3116 Returns an alignment for an structure given in the hssp
3117
3118 For the moment, all the fragments must belong to the same chain
3119 """
3120
3121 pdb_attr_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["pdb"]
3122
3123 fixed_conditions = [("%s.value" %pdb_attr_table,"=",pdb_id)]
3124 join_conditions = [("%s.externalEntityID" %pdb_attr_table,"=","%s.externalEntityID" %self.biana_database.ALIGNMENT_ELEMENT_TABLE ),
3125 ("%s.pdb" %self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["pdb"],"=","%s.value" %pdb_attr_table)]
3126
3127 if chain is not None:
3128 fixed_conditions.append(("%s.chain" %pdb_attr_table,"=",chain))
3129 join_conditions.append(("%s.chain" %pdb_attr_table,"=","%s.chain" %pdb_attr_table))
3130
3131 query = self.db._get_select_sql_query( tables = [self.biana_database.ALIGNMENT_ELEMENT_TABLE,
3132 self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["pdb"],
3133 pdb_attr_table],
3134 columns = ["%s.externalEntityID" %pdb_attr_table,
3135 "UNCOMPRESS(hssp_residue_num_correspondences)",
3136 "UNCOMPRESS(residue_num_list)"],
3137 fixed_conditions = fixed_conditions,
3138 join_conditions = join_conditions )
3139
3140 data = self.db.select_db_content( query, answer_mode = "raw" )
3141
3142 if len(data)==0:
3143 print "HSSP %s NOT FOUND\n" %pdb_id
3144
3145 return None
3146 else:
3147 alignmentExternalEntityID = data[0][0]
3148 hssp_residue_num_correspondence = int_ascii.ascii_to_int_list(2,data[0][1])
3149 residue_num_list = int_ascii.ascii_to_int_list(2,data[0][2])
3150
3151
3152
3153 align_fragments_to_select = []
3154 current_start = 0
3155 current_end = None
3156
3157 if len(fragments)>0:
3158 for actual_fragment in fragments:
3159 for x in xrange(len(residue_num_list)):
3160 if (actual_fragment.get_start_residue() is None and actual_fragment.get_end_residue() is None) or (actual_fragment.get_start_residue()<=residue_num_list[x] and (actual_fragment.get_end_residue()>=residue_num_list[x] or actual_fragment.get_end_residue() is None)):
3161
3162 current_end = x
3163 else:
3164 if current_end is not None:
3165
3166 align_fragments_to_select.append((current_start,current_end))
3167 current_end = None
3168 current_start = x+1
3169
3170 if current_end is not None:
3171 align_fragments_to_select.append((current_start,current_end))
3172
3173 else:
3174 align_fragments_to_select.append((current_start,len(residue_num_list)-1))
3175
3176 alignment = self.get_alignment(externalEntityID=alignmentExternalEntityID,get_species=get_species)
3177
3178 return alignment.get_subalignment( fragments = align_fragments_to_select )
3179
3180
3181
3182
3183
3184
3185
3187 """
3188 Gets the information about available unification protocols
3189 """
3190
3191 if( self.available_unification_protocols is None ):
3192
3193 self.available_unification_protocols = {}
3194
3195 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.USER_ENTITY_PROTOCOL_TABLE.get_table_name()],
3196 columns = ["description","unificationProtocolID","databaseVersion"]),
3197 answer_mode = "raw" )
3198
3199 for actual_protocol in data:
3200
3201 uP = BianaObjects.UnificationProtocol(description = actual_protocol[0], BianaDatabaseVersion = actual_protocol[1], id = str(actual_protocol[1]))
3202
3203
3204 atoms = self.db.select_db_content( self.db._get_select_sql_query( tables = [(self.biana_database.USER_ENTITY_PROTOCOL_ATOMS_TABLE.get_table_name(),"A"),
3205 (self.biana_database.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE.get_table_name(),"B")],
3206 columns = ["externalDatabaseID_A","externalDatabaseID_B","GROUP_CONCAT(B.cross_referenced_code)"],
3207 join_conditions = [("A.unificationAtomID","=","B.unificationAtomID")],
3208 fixed_conditions = [("A.unificationProtocolID","=",actual_protocol[1])],
3209 group_conditions = ["A.unificationAtomID"] ),
3210 answer_mode = "raw" )
3211 for current_atom in atoms:
3212 uP.add_unification_atom_elements( BianaObjects.UnificationAtomElement( externalDatabaseID_A = current_atom[0],
3213 externalDatabaseID_B = current_atom[1],
3214 externalAttribute = current_atom[2].split(",") ) )
3215
3216 self.available_unification_protocols[actual_protocol[0].lower()] = uP
3217
3218
3219 self.available_unification_protocols["no unification"] = None
3220
3221 return
3222
3223
3225
3226 self._load_available_unification_protocols()
3227
3228 unification_protocol_name = str(unification_protocol_name).lower()
3229
3230 if self.available_unification_protocols[unification_protocol_name] is None:
3231 return self.biana_database.USER_ENTITY_TABLE.get_table_name()
3232
3233 return self.biana_database.USER_ENTITY_TABLE.get_table_name()+self.available_unification_protocols[unification_protocol_name].get_id()
3234
3235
3244
3245
3247 """
3248 """
3249
3250
3251 protocol_id = self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.USER_ENTITY_PROTOCOL_TABLE,
3252 column_values = [("description",protocol.get_description()),
3253 ("databaseVersion","TEST_DB_VERSION")],
3254 use_buffer = False ),
3255 answer_mode = "last_id" )
3256 protocol.id = protocol_id
3257
3258
3259 for actual_unification_atom_element in protocol.get_unification_atom_elements():
3260 atom_id = self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.USER_ENTITY_PROTOCOL_ATOMS_TABLE,
3261 column_values = [ ("unificationProtocolID",protocol_id),
3262 ("externalDatabaseID_A",actual_unification_atom_element.get_externalDatabaseID_A() ),
3263 ("externalDatabaseID_B",actual_unification_atom_element.get_externalDatabaseID_B() ) ],
3264 use_buffer=False ),
3265 answer_mode = "last_id" )
3266
3267 crossed_attributes = actual_unification_atom_element.get_external_attribute_list()
3268
3269 for actual_crossed_attribute in crossed_attributes:
3270 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE,
3271 column_values = [ ("unificationAtomID",atom_id),
3272 ("cross_referenced_code",actual_crossed_attribute)],
3273 use_buffer = False) )
3274
3275
3277 """
3278
3279 "protocol" is the UnificationProtocol object that has to be followed
3280 """
3281
3282
3283
3284
3285 if self.isOptimizedForRunning is False:
3286 self.optimize_database_for( mode="running", optimize=True )
3287
3288 self._create_new_unification_protocol_tables(protocol)
3289
3290 protocol_temp_table = database.TableDB( table_name = "temp_equivalences_protocol_%s" %protocol.id,
3291 table_fields = [ database.FieldDB(field_name = "externalEntity1",
3292 data_type = "integer(4) unsigned"),
3293 database.FieldDB(field_name = "externalEntity2",
3294 data_type = "integer(4) unsigned")] )
3295
3296 self.db.insert_db_content( protocol_temp_table.create_mysql_query(), answer_mode = None )
3297
3298 print "Unifying..."
3299
3300
3301 import tempfile
3302
3303 temp_eq_file = tempfile.NamedTemporaryFile(bufsize=0)
3304
3305
3306 temp_eq_file_name = temp_eq_file.name
3307 temp_eq_file.close()
3308 temp_eq_file = open(temp_eq_file_name,'w')
3309
3310
3311 for actual_unification_atom_element in protocol.get_unification_atom_elements():
3312 query = self._get_equivalent_external_entities(actual_unification_atom_element)
3313 if query is not None:
3314
3315 self.db.insert_db_content( "INSERT INTO %s (%s)" %(protocol_temp_table.get_table_name(),query),
3316 answer_mode = None )
3317
3318
3319
3320
3321
3322
3323
3324
3325 l=1000000
3326 o=0
3327
3328 temp_equivalences = self.db.select_db_content( "SELECT * FROM %s LIMIT %s OFFSET %s" %(protocol_temp_table.get_table_name(),l,o),
3329 answer_mode = "raw", remove_duplicates="no" )
3330
3331 while( len(temp_equivalences) > 0 ):
3332 o += l
3333 [ temp_eq_file.write("%s\t%s\n" %(equivalence[0],equivalence[1])) for equivalence in temp_equivalences ]
3334 temp_equivalences = self.db.select_db_content( "SELECT * FROM %s LIMIT %s OFFSET %s" %(protocol_temp_table.get_table_name(),l,o),
3335 answer_mode = "raw", remove_duplicates="no" )
3336
3337
3338
3339 self.db.insert_db_content( "DELETE FROM %s" %protocol_temp_table.get_table_name() )
3340
3341 temp_all_file = tempfile.NamedTemporaryFile(bufsize=0)
3342 temp_all_file_name = temp_all_file.name
3343 temp_all_file.close()
3344 temp_all_file = open(temp_all_file_name, 'w')
3345
3346
3347
3348 databases = protocol.get_database_ids()
3349 for actual_database in databases:
3350 if not self.get_external_database(database_id = actual_database).get_promiscuity():
3351 query = self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_ENTITY_TABLE],
3352 columns = [self.biana_database.externalEntityID_col],
3353 fixed_conditions = [("externalDatabaseID","=",actual_database),
3354 ("type","!=","relation")] )
3355
3356
3357
3358 self.db.insert_db_content( "INSERT INTO %s (externalEntity1) (%s)" %(protocol_temp_table.get_table_name(),query),
3359 answer_mode = None )
3360
3361
3362
3363
3364
3365
3366 l=1000000
3367 o=0
3368 temp_equivalences = self.db.select_db_content( "SELECT externalEntity1 FROM %s LIMIT %s OFFSET %s" %(protocol_temp_table.get_table_name(),l,o),
3369 answer_mode = "raw", remove_duplicates="no" )
3370 while( len(temp_equivalences) > 0 ):
3371 o += l
3372 [ temp_all_file.write("%s\n" %equivalence) for equivalence in temp_equivalences ]
3373 temp_equivalences = self.db.select_db_content( "SELECT externalEntity1 FROM %s LIMIT %s OFFSET %s" %(protocol_temp_table.get_table_name(),l,o),
3374 answer_mode = "raw", remove_duplicates="no" )
3375
3376
3377 self.db.insert_db_content( protocol_temp_table.get_drop_query() )
3378
3379 unification_temp_file = tempfile.NamedTemporaryFile(bufsize=0)
3380 unification_temp_file_name = unification_temp_file.name
3381
3382 unification_temp_file.close()
3383 temp_eq_file.close()
3384 temp_all_file.close()
3385
3386 from biana import __path__ as biana_path
3387 import os
3388
3389 command = "\""+biana_path[0]+os.sep+"BianaDB"+os.sep+"%s\" "+temp_eq_file_name+" "+temp_all_file_name+" "+unification_temp_file_name
3390
3391 if sys.platform.lower().startswith("win"):
3392 command = command %("win_unify.exe")
3393 else:
3394 command = command %("unify")
3395
3396 p = os.system(command)
3397
3398 if p==1:
3399 raise ValueError("ERROR in unification. Unify compiled program error")
3400
3401
3402
3403
3404 temp_eq_file.close()
3405 temp_all_file.close()
3406
3407
3408 print "Creating the protocol table"
3409 protocol_table = copy.deepcopy(self.biana_database.USER_ENTITY_TABLE)
3410 protocol_table.set_table_name(new_name = "%s%s" %(protocol_table.get_table_name(),protocol.id))
3411
3412 self.db.insert_db_content( protocol_table.create_mysql_query(ignore_primary_key=True), answer_mode = None )
3413
3414 self.db._disable_indices( table_list = [protocol_table] )
3415
3416
3417
3418 if False:
3419 self.db._lock_tables( table_list = [protocol_table.get_table_name()] )
3420 self.db.insert_db_content( "LOAD DATA LOCAL INFILE '%s' INTO TABLE %s" %(unification_temp_file.name.replace("\\","\\\\"),protocol_table.get_table_name()) )
3421 else:
3422
3423
3424
3425 unification_file = open(unification_temp_file.name,'r')
3426 for line in unification_file:
3427 fields = line.strip().split("\t")
3428 self.db.insert_db_content( self.db._get_insert_sql_query( table = protocol_table,
3429 column_values = [("userEntityID", fields[0]),
3430 ("externalEntityID", fields[1])],
3431 use_buffer = "yes" ) )
3432 unification_file.close()
3433
3434 import os
3435 os.unlink(unification_temp_file.name)
3436
3437
3438 self.db._empty_buffer()
3439
3440 self.db._enable_indices( table_list = [protocol_table] )
3441
3442 self.db._unlock_tables()
3443
3444
3445 self._unify_promiscuous_external_entities( protocol = protocol )
3446
3447 return
3448
3449
3451 """
3452
3453 """
3454
3455 print "Adding promiscuous external entities"
3456
3457
3458
3459 unification_table = self._get_user_entity_table_name(unification_protocol_name=protocol.description)
3460
3461
3462 for actual_unification_atom_element in protocol.get_unification_atom_elements():
3463 db_id_A = actual_unification_atom_element.get_externalDatabaseID_A()
3464 db_id_B = actual_unification_atom_element.get_externalDatabaseID_B()
3465
3466
3467
3468 if not (self.get_external_database(database_id = db_id_A).get_promiscuity() ^ self.get_external_database(database_id = db_id_B).get_promiscuity()):
3469 continue
3470
3471 if self.get_external_database(database_id = db_id_A).get_promiscuity():
3472 promiscuous_db_id, non_promiscuous_db_id = db_id_A, db_id_B
3473 else:
3474 promiscuous_db_id, non_promiscuous_db_id = db_id_B, db_id_A
3475
3476 attribute_list = actual_unification_atom_element.get_external_attribute_list()
3477
3478
3479 tables = [ (unification_table, "u"),
3480 (self.biana_database.EXTERNAL_ENTITY_TABLE, "eE1"),
3481 (self.biana_database.EXTERNAL_ENTITY_TABLE, "eE2") ]
3482
3483 columns = ["userEntityID", "eE2.externalEntityID"]
3484
3485 join_conditions = [ ("eE1.externalEntityID", "=", "u.externalEntityID") ]
3486 fixed_conditions = [ ("eE2.type","!=","relation") ]
3487
3488 for attribute_index in xrange(len(attribute_list)):
3489 unifying_attribute = attribute_list[attribute_index]
3490 tables.append( (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[unifying_attribute], "a1_%s" % attribute_index) )
3491 tables.append( (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[unifying_attribute], "a2_%s" % attribute_index) )
3492 join_conditions.append( ("a1_%s.value" % attribute_index,"=","a2_%s.value" % attribute_index) )
3493 join_conditions.append( ("eE1.externalEntityID","=","a1_%s.externalEntityID" % attribute_index) )
3494 join_conditions.append( ("eE2.externalEntityID","=","a2_%s.externalEntityID" % attribute_index) )
3495 fixed_conditions.append( ("eE1.externalDatabaseID","=",non_promiscuous_db_id) )
3496 fixed_conditions.append( ("eE2.externalDatabaseID","=",promiscuous_db_id) )
3497
3498 if unifying_attribute.lower()=="pdb":
3499 join_conditions.append( ("a1_%s.chain"%attribute_index,"=","b2_%s.chain"%attribute_index) )
3500
3501 get_equivalences_query = self.db._get_select_sql_query( tables = tables,
3502 columns = columns,
3503 join_conditions = join_conditions,
3504 fixed_conditions = fixed_conditions )
3505 print get_equivalences_query
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521 self.db.insert_db_content( sql_query = self.db._get_nested_insert_sql_query( table = unification_table,
3522 columns = ["userEntityID", "externalEntityID"],
3523 subquery = get_equivalences_query ),
3524 answer_mode = None)
3525
3526
3527
3529 """
3530 Drops from the database all the information of a protocol of unification
3531
3532 "protocol_description" corresponds to the description of the protocol to drop
3533 """
3534
3535 self._load_available_unification_protocols()
3536
3537 unification_protocol_name = unification_protocol_name.lower()
3538
3539 if not self.available_unification_protocols.has_key( unification_protocol_name ):
3540 raise ValueError("ERROR. Trying to drop an unexisting unification protocol")
3541
3542 if unification_protocol_name == "no unification":
3543 raise ValueError("Default unification protocol cannot be deleted")
3544
3545 unificationProtocolObj = self.available_unification_protocols[unification_protocol_name]
3546
3547
3548 atom_ids_query = self.db._get_select_sql_query( tables = [self.biana_database.USER_ENTITY_PROTOCOL_ATOMS_TABLE],
3549 columns = ["unificationAtomID"],
3550 fixed_conditions = [("unificationProtocolID","=", unificationProtocolObj.get_id())] )
3551
3552 self.db.insert_db_content( self.db._get_delete_sql_query(table = self.biana_database.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE,
3553 fixed_conditions = [("unificationAtomID","IN","(%s)" %atom_ids_query, None)]) )
3554
3555 self.db.insert_db_content( self.db._get_delete_sql_query(table = self.biana_database.USER_ENTITY_PROTOCOL_ATOMS_TABLE,
3556 fixed_conditions = [("unificationProtocolID","=", unificationProtocolObj.get_id())]) )
3557
3558
3559 self.db.insert_db_content( self.db._get_delete_sql_query(table = self.biana_database.USER_ENTITY_PROTOCOL_TABLE,
3560 fixed_conditions = [("unificationProtocolID","=", unificationProtocolObj.get_id())]) )
3561
3562
3563 self.db.insert_db_content( self.db._get_drop_sql_query( [self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)] ) )
3564
3565 return
3566
3568 """
3569 Fetchs from the database all the atom information of a protocol of unification
3570
3571 "unification_protocol_name" corresponds to the description of the protocol
3572 """
3573 self._load_available_unification_protocols()
3574
3575 unification_protocol_name = unification_protocol_name.lower()
3576
3577 if not self.available_unification_protocols.has_key( unification_protocol_name ):
3578 raise ValueError("ERROR. Trying to get an unexisting unification protocol")
3579
3580 if unification_protocol_name == "no unification":
3581 raise ValueError("Default unification protocol does not have any atom information")
3582
3583 unificationProtocolObj = self.available_unification_protocols[unification_protocol_name]
3584
3585 unification_protocol_atoms = unificationProtocolObj.get_unification_atom_elements()
3586
3587
3588
3589
3590
3591
3592 return unification_protocol_atoms
3593
3594
3596 """
3597 """
3598 table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute.lower()]["table"].get_table_name()
3599 protocol_table = "%s%s" %(self.biana_database.USER_ENTITY_TABLE,protocol_id)
3600
3601 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [(protocol_table,"u1"),
3602 (protocol_table,"u2"),
3603 (table,"e1"),
3604 (table,"e2")],
3605 columns = ["u1.userEntityID", "u2.userEntityID"],
3606 join_conditions = [("e1.%s" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute.lower()]["fields"]["value"].get_field_name(),
3607 "=",
3608 "e2.%s" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute.lower()]["fields"]["value"].get_field_name()),
3609 ("u1.externalEntityID","<","u2.externalEntityID"),
3610 ("u1.externalEntityID","=","e1.externalEntityID"),
3611 ("u2.externalEntityID","=","e2.externalEntityID")],
3612 fixed_conditions = [("u1.userEntityID","IN","(%s)" %(",".join([ str(x) for x in userEntitiesList])),None),
3613 ("u2.userEntityID","IN","(%s)" %(",".join([ str(x) for x in userEntitiesList])),None)] ),
3614 answer_mode = "raw", remove_duplicates="no" )
3615
3616 import networkx
3617
3618 g = networkx.Graph()
3619 g.add_nodes_from(userEntitiesList)
3620
3621 for current_data in data:
3622 g.add_edge(current_data[0],current_data[1])
3623
3624 return networkx.connected_components(g)
3625
3626
3627
3629 """
3630 returns the query to obtain the list of equivalent
3631 """
3632
3633 actual_atom_element = unification_atom_element
3634 attribute_list = actual_atom_element.get_external_attribute_list()
3635
3636 tables = [(self.biana_database.EXTERNAL_ENTITY_TABLE,"e1"),
3637 (self.biana_database.EXTERNAL_ENTITY_TABLE,"e2")]
3638
3639 fixed_conditions = []
3640 join_conditions = []
3641
3642
3643 db_id_A = actual_atom_element.get_externalDatabaseID_A()
3644 db_id_B = actual_atom_element.get_externalDatabaseID_B()
3645 if (self.get_external_database(database_id = db_id_A).get_promiscuity() or self.get_external_database(database_id = db_id_B).get_promiscuity()):
3646 return None
3647
3648
3649 for attribute_index in xrange(len(attribute_list)):
3650
3651 actual_attribute = attribute_list[attribute_index]
3652
3653 tables.append( (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[actual_attribute.lower()],"a%s" %(attribute_index)) )
3654 tables.append( (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[actual_attribute.lower()],"b%s" %(attribute_index)) )
3655
3656
3657 fixed_conditions.extend( [ ("a%s.%s" %(attribute_index,self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[actual_attribute]["fields"][x[0].lower()]),"=",x[1])
3658 for x in actual_atom_element.get_field_conditions_A() ] )
3659
3660 fixed_conditions.extend( [ ("b%s.%s" %(attribute_index,self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[actual_attribute]["fields"][x[0].lower()]),"=",x[1])
3661 for x in actual_atom_element.get_field_conditions_B() ] )
3662
3663 join_conditions.extend( [ ("a%s.value" %attribute_index,
3664 "=",
3665 "b%s.value" %attribute_index)
3666 for x in actual_atom_element.get_field_cross_references() ] )
3667
3668
3669 if actual_attribute.lower()=="pdb":
3670 join_conditions.extend( [ ("a%s.chain" %attribute_index,
3671 "=",
3672 "b%s.chain" %attribute_index)
3673 for x in actual_atom_element.get_field_cross_references() ] )
3674
3675 join_conditions.append( ("e1.externalEntityID","=","a%s.externalEntityID" %attribute_index) )
3676 join_conditions.append( ("e2.externalEntityID","=","b%s.externalEntityID" %attribute_index) )
3677
3678
3679 join_conditions.append( ("e1.type","=","e2.type") )
3680
3681 fixed_conditions.append( ("e1.externalDatabaseID","=",actual_atom_element.get_externalDatabaseID_A()) )
3682 fixed_conditions.append( ("e2.externalDatabaseID","=",actual_atom_element.get_externalDatabaseID_B()) )
3683
3684 fixed_conditions.append( ("e1.type","!=","relation") )
3685 fixed_conditions.append( ("e2.type","!=","relation") )
3686
3687 if( actual_atom_element.get_externalDatabaseID_A() == actual_atom_element.get_externalDatabaseID_B() ):
3688 join_conditions.append( ("e1.externalEntityID","<","e2.externalEntityID") )
3689
3690
3691 return self.db._get_select_sql_query( tables = tables,
3692 columns = ["e1.externalEntityID","e2.externalEntityID"],
3693 fixed_conditions = fixed_conditions,
3694 join_conditions = join_conditions )
3695
3696
3697 - def get_external_entities_dict(self, externalEntityIdsList, attribute_list=[], relation_attribute_list=[], participant_attribute_list=[], useTransferAttributes=True):
3698 """
3699 Returns a dict of external Entity Objects with the attributes specified in the "attribute_identifier_list"
3700
3701 The key in the dictionary corresponds to the external Entity ID
3702
3703 The external entity can be of any type (included relations)
3704
3705 "attribute_field_restrictions" is to restrict the attributes by additional fields. Sintaxis: [(attribute_identifier, field, value)]
3706 """
3707
3708 if len(externalEntityIdsList)==0:
3709 return {}
3710
3711 eE_id_str_list = ", ".join([ str(x) for x in externalEntityIdsList ])
3712
3713
3714 data = self.db.select_db_content(self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name()],
3715 columns = ["%s.externalEntityID" %self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name(),
3716 "%s.externalDatabaseID" %self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name(),
3717 "%s.type" %self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name()],
3718 fixed_conditions = [("%s.externalEntityID" %self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name(),
3719 "IN",
3720 "(%s)" %eE_id_str_list, None)] ),
3721 answer_mode="raw", remove_duplicates="no")
3722 eE_dict = {}
3723
3724 eEr_id_list = []
3725
3726 for current_data in data:
3727
3728 if current_data[2]=="relation":
3729
3730 relation_type = self.db.select_db_content( self.db._get_select_sql_query( tables=[self.biana_database.EXTERNAL_ENTITY_RELATION_TABLE],
3731 columns = ["type"],
3732 fixed_conditions = [("externalEntityRelationID","=",current_data[0])] ),
3733 answer_mode = "single")
3734
3735 relationObj = BianaObjects.ExternalEntityRelation( id = current_data[0],
3736 source_database = current_data[1],
3737 relation_type = relation_type )
3738
3739 eE_dict[current_data[0]] = relationObj
3740
3741
3742 participants = self.db.select_db_content( self.db._get_select_sql_query( tables=[self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE],
3743 columns = ["externalEntityID"],
3744 fixed_conditions = [("externalEntityRelationID","=",current_data[0])] ),
3745 answer_mode = "raw" )
3746
3747 [ relationObj.add_participant( externalEntityID=x[0] ) for x in participants ]
3748
3749 eEr_id_list.append(str(current_data[0]))
3750
3751 else:
3752 eE_dict[current_data[0]] = BianaObjects.ExternalEntity( id = current_data[0],
3753 source_database = current_data[1],
3754 type = current_data[2] )
3755
3756
3757
3758
3759
3760
3761
3762 attributes = sets.Set([ x.lower() for x in attribute_list ])
3763 attributes.update([ x.lower() for x in relation_attribute_list ])
3764
3765
3766 for current_attribute in attributes:
3767
3768 if current_attribute == "proteinsequence":
3769
3770 tables = [self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute],
3771 self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"]]
3772 columns = ["externalEntityID","UNCOMPRESS(sequence)"]
3773 data = self.db.select_db_content(self.db._get_select_sql_query( tables = tables,
3774 columns=columns,
3775 fixed_conditions = [("externalEntityID",
3776 "IN",
3777 "(%s)" %eE_id_str_list, None)],
3778 join_conditions = [("%s.value" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute],
3779 "=",
3780 "%s.sequenceMD5" %self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"])]),
3781 answer_mode = "raw" )
3782 for current_sequence in data:
3783 eE_dict[current_sequence[0]].add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier=current_attribute,
3784 value=BianaObjects.ProteinSequence(sequence=current_sequence[1])))
3785
3786 continue
3787
3788 if current_attribute == "nucleotidesequence":
3789
3790 tables = [self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute],
3791 self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"]]
3792 columns = ["externalEntityID","sequenceType","UNCOMPRESS(sequence)"]
3793 data = self.db.select_db_content(self.db._get_select_sql_query( tables = tables,
3794 columns=columns,
3795 fixed_conditions = [("externalEntityID",
3796 "IN",
3797 "(%s)" %eE_id_str_list, None)],
3798 join_conditions = [("%s.value" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute],
3799 "=",
3800 "%s.sequenceMD5" %self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"])]),
3801 answer_mode = "raw" )
3802 for current_sequence in data:
3803 if( current_sequence[1]=="dna" ):
3804 eE_dict[current_data[0]].add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier=current_attribute,
3805 value=BianaObjects.DNASequence(sequence=current_sequence[2])))
3806 elif( current_sequence[1]=="rna" ):
3807 eE_dict[current_data[0]].add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier=current_attribute,
3808 value=BianaObjects.RNASequence(sequence=current_sequence[2])))
3809
3810 continue
3811
3812 try:
3813 table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute]
3814
3815 columns = ["%s.externalEntityID" %table,
3816 "%s.value" %table]
3817
3818
3819 fixed_conditions = [("%s.externalEntityID" %table,
3820 "IN",
3821 "(%s)" %eE_id_str_list, None)]
3822
3823 data = self.db.select_db_content(self.db._get_select_sql_query( tables=[table],
3824 columns=columns,
3825 fixed_conditions = fixed_conditions),
3826 answer_mode="raw", remove_duplicates="no")
3827
3828 for current_data in data:
3829 eE_dict[current_data[0]].add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier=current_attribute,
3830 value=current_data[1]))
3831
3832
3833
3834
3835 if self._is_transferred_attribute( current_attribute ) and useTransferAttributes is True:
3836
3837 for current_transfer in self.transferred_attributes[current_attribute.lower()]:
3838
3839 tables = [ (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute],"transferred"),
3840 (self._get_key_attribute_table_name( key_id = self.key_attribute_ids[(current_transfer[0],current_transfer[1])] ),"key_attr"),
3841 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_transfer[1]],"key_attr2") ]
3842
3843 fixed_conditions = [("key_attr2.externalEntityID",
3844 "IN",
3845 "(%s)" %eE_id_str_list, None)]
3846
3847 join_conditions = [("key_attr.value","=","key_attr2.value"),
3848 ("transferred.externalEntityID","=","key_attr.externalEntityID")]
3849
3850 columns = ["key_attr2.externalEntityID","transferred.value","transferred.type"]
3851
3852
3853 data = self.db.select_db_content(self.db._get_select_sql_query( tables=tables,
3854 columns=columns,
3855 fixed_conditions = fixed_conditions,
3856 join_conditions = join_conditions),
3857 answer_mode="raw", remove_duplicates="no")
3858
3859 for current_data in data:
3860 eE_dict[current_data[0]].add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier=current_attribute,
3861 value=current_data[1],
3862 type="transferred_%s" %(current_data[2]) ))
3863
3864
3865
3866 except:
3867 traceback.print_exc()
3868 sys.stderr.write("Attribute %s is not found in available attributes\n" %(current_attribute))
3869
3870
3871 for current_attribute in sets.Set([ x.lower() for x in participant_attribute_list]):
3872
3873 try:
3874 for current_eErID in eEr_id_list:
3875 current_eEr_obj = eE_dict[int(current_eErID)]
3876 data = self.db.select_db_content( self.db._get_select_sql_query( tables=[self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE,
3877 self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT[current_attribute]],
3878 columns = ["externalEntityID","value"],
3879 join_conditions = [("%s.externalEntityRelationParticipantID" %self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE,
3880 "=",
3881 "%s.externalEntityRelationParticipantID" %self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT[current_attribute])],
3882 fixed_conditions = [("externalEntityID","IN","(%s)" %",".join([ str(x) for x in current_eEr_obj.get_participant_external_entity_ids_list()]),None)]),
3883 answer_mode = "raw" )
3884
3885 for current_data in data:
3886 current_eEr_obj.add_participant_attribute( externalEntityID = current_data[0],
3887 participantAttribute = BianaObjects.ExternalEntityRelationParticipantAttribute( attribute_identifier = current_attribute,
3888 value = current_data[1] ))
3889
3890
3891 except:
3892 traceback.print_exc()
3893 sys.stderr.write("Attribute %s is not found in available relation participant attributes\n" %(current_attribute))
3894
3895
3896 return eE_dict
3897
3898
3900 """
3901 Returns a dictionary with { userEntityID: list of attributes }
3902
3903 This method is intended to be faster for getting user entity attributes than getting external entity objects and its attribute objects
3904
3905 Numerical values are stored as strings!!!
3906 """
3907
3908
3909
3910
3911 if debug_web == 1:
3912 import os
3913 debug_file_fd = file("/usr/local/apache2/htdocs/webiana/sessions/debug_file_biana.txt", "w")
3914 os.chmod("/usr/local/apache2/htdocs/webiana/sessions/debug_file_biana.txt", 0777)
3915 debug_file_fd.write("attribute_identifier is %s" %(attribute_identifier))
3916 debug_file_fd.write("external entity attribute tables dict has keys %s" %(self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT.keys()))
3917
3918 attr_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier]
3919 unif_table = self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)
3920
3921 if attribute_identifier.lower()=="proteinsequence":
3922 query = self.db._get_select_sql_query( tables = [attr_table,unif_table,
3923 self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"]],
3924 columns = ["userEntityID","UNCOMPRESS(sequence) AS seq"],
3925 join_conditions = [("userEntityID","IN","(%s)" %",".join(map(str,listUserEntityID))),
3926 ("%s.externalEntityID" %attr_table,"=","%s.externalEntityID" %unif_table),
3927 ("%s.value" %attr_table,"=","%s.sequenceMD5" %self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"])],
3928 group_conditions = ["userEntityID","seq"] )
3929 else:
3930 query = self.db._get_select_sql_query( tables = [attr_table,unif_table],
3931 columns = ["userEntityID","value"],
3932 join_conditions = [("userEntityID","IN","(%s)" %",".join(map(str,listUserEntityID))),
3933 ("%s.externalEntityID" %attr_table,"=","%s.externalEntityID" %unif_table)],
3934 group_conditions = ["userEntityID","value"] )
3935
3936 data = self.db.select_db_content( query, answer_mode = "raw" )
3937
3938 return_dict = {}
3939
3940 for current_data in data:
3941 return_dict.setdefault(current_data[0],[]).append(str(current_data[1]))
3942
3943
3944
3945 if self._is_transferred_attribute( attribute_identifier ):
3946
3947 for current_transfer in self.transferred_attributes[attribute_identifier.lower()]:
3948
3949 tables = [ unif_table,
3950 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier],"transferred"),
3951 (self._get_key_attribute_table_name( key_id = self.key_attribute_ids[(current_transfer[0],current_transfer[1])] ),"key1"),
3952 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_transfer[1]],"key2") ]
3953
3954 columns = ["userEntityID","transferred.value"]
3955
3956 join_conditions = [("userEntityID","IN","(%s)" %",".join(map(str,listUserEntityID))),
3957 ("%s.externalEntityID" %unif_table,"=","key2.externalEntityID"),
3958 ("key1.value","=","key2.value"),
3959 ("transferred.externalEntityID","=","key1.externalEntityID")]
3960
3961 query = self.db._get_select_sql_query( tables = tables,
3962 columns = columns,
3963 join_conditions = join_conditions )
3964
3965 data = self.db.select_db_content( query, answer_mode = "raw" )
3966
3967 for current_data in data:
3968 return_dict.setdefault(current_data[0],[]).append(str(current_data[1]))
3969
3970 return return_dict
3971
3973 """
3974 Gets types associated with given user entity ids from database
3975
3976 Returns a dictionary of user entity id and type
3977 """
3978
3979 self._load_available_unification_protocols()
3980
3981
3982 fixed_conditions = [ ("u.userEntityID","IN","(%s)" %",".join(map(str,user_entity_ids)),None) ]
3983 tables = [ "%s u" % self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name),
3984 "%s e" % self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name() ]
3985 columns = [ "u.userEntityID", "e.type" ]
3986 join_conditions = [ ("u.externalEntityID", "=", "e.externalEntityID") ]
3987 group_conditions = [ "u.userEntityID", "e.type" ]
3988
3989 query = self.db._get_select_sql_query( tables = tables,
3990 columns = columns,
3991 fixed_conditions = fixed_conditions,
3992 join_conditions = join_conditions,
3993 group_conditions = group_conditions)
3994
3995 userEntityId_type_tuples = self.db.select_db_content(query,
3996 answer_mode = "raw" )
3997
3998 uEId_to_type = {}
3999 for uEId, type in userEntityId_type_tuples:
4000 if uEId_to_type.setdefault(uEId, type) != "gene":
4001 if type == "gene":
4002 uEId_to_type[uEId] = type
4003
4004 return uEId_to_type
4005
4006
4008 """
4009 Gets a list of user entities
4010
4011 It returns a set with user entities objects
4012
4013 "attribute_restrictions" must be a list of (attribute_identifier, value)
4014
4015 #! Obselete: Not used by any method
4016 """
4017
4018 external_entity_IDs_list = self.get_list_external_entities_IDs_by_attribute( attribute_identifier = attribute_identifier.lower(),
4019 field_values = field_values,
4020 attribute_restrictions = attribute_restrictions )
4021 self._load_available_unification_protocols()
4022
4023 fixed_conditions = [("u.externalEntityID","IN","(%s)" %",".join(map(str,external_entity_IDs_list)),None)]
4024
4025 if len(restrict_to_user_entity_ids_list)>0:
4026 fixed_conditions.append( ("u.userEntityID","IN","(%s)" %",".join(map(str,restrict_to_user_entity_ids_list)),None) )
4027
4028 if( len(external_entity_IDs_list) > 0 ):
4029
4030 tables = [ "%s u" % self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name),
4031 "%s e" % self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name() ]
4032 columns = [ "u.userEntityID", "e.type" ]
4033 join_conditions = [ ("u.externalEntityID", "=", "e.externalEntityID") ]
4034 group_conditions = [ "u.userEntityID", "e.type" ]
4035
4036 query = self.db._get_select_sql_query( tables = tables,
4037 columns = columns,
4038 fixed_conditions = fixed_conditions,
4039 join_conditions = join_conditions,
4040 group_conditions = group_conditions)
4041
4042 userEntityIDlist = self.db.select_db_content(query,
4043 answer_mode = "raw" )
4044
4045 return userEntityIDlist
4046
4047 return []
4048
4049
4056
4057
4058 - def get_user_entity_relations_by_sharing_attributes(self, unification_protocol_name, userEntityID_list, listAttributes, limit_to_userEntityID_list=False, attribute_restrictions=[], negative_attribute_restrictions=[], ontology_expansion_level=None):
4059 """
4060 Returns a list of relations between user entities, in which the share attributes listed in listAttributes
4061
4062 "listAttributes" is a list of lists of external entity attributes. Each sublist is a "sharing" restriction
4063
4064 "expand_ontology_attributes": Boolean to specify if ontology attributes should be expanded to lower levels
4065
4066 "ontology_expansion_level": Dictionary to specigy the category level to which should be considered equivalent two ontology attributes (for example, family level in scop)
4067 """
4068
4069
4070
4071 if len(userEntityID_list)==0:
4072 return []
4073
4074 equivalent_uE_pair_queries = []
4075
4076 for current_attribute_group in listAttributes:
4077
4078 equivalent_uE_pair_queries.append(self._get_nested_queries_for_getting_equivalent_uE_by_sharing_attributes( unification_protocol_name = unification_protocol_name,
4079 attribute_list = current_attribute_group,
4080 user_entity_ids_list = userEntityID_list,
4081 restrict_to_user_entity_ids_list = limit_to_userEntityID_list))
4082
4083 union_query = self.db._get_union_queries( equivalent_uE_pair_queries )
4084
4085
4086
4087
4088 restricted_query = self._apply_restrictions_to_query( query = union_query,
4089 unification_protocol_name = unification_protocol_name,
4090 attribute_restrictions = attribute_restrictions,
4091 column_name_to_restrict="userEntityID2" )
4092
4093 negatively_restricted_query = self._apply_negative_restrictions_to_query( query = restricted_query,
4094 unification_protocol_name = unification_protocol_name,
4095 column_name_to_restrict="userEntityID2",
4096 negative_attribute_restrictions = negative_attribute_restrictions )
4097
4098
4099
4100 results = self.db.select_db_content( negatively_restricted_query,
4101 answer_mode = "raw" )
4102
4103 if results:
4104 uE1s, uE2s = zip(*results)
4105 uE2_types_dict = self.get_user_entity_type(unification_protocol_name, uE2s)
4106
4107 new_results = []
4108 for current_result in results:
4109 new_results.append((current_result[0],current_result[1],uE2_types_dict[current_result[1]]))
4110 else:
4111 new_results = []
4112
4113 return new_results
4114
4116
4117 tables = [ "%s u" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name) ) ]
4118 columns = ["externalEntityID"]
4119 join_conditions = [("u.userEntityID","IN","(%s)" %",".join(map(str,userEntityID_list)))]
4120
4121 return self.db.select_db_content( self.db._get_select_sql_query( tables = tables,
4122 columns = columns,
4123 join_conditions = join_conditions ),
4124 answer_mode = "list" )
4125
4127
4128 tables = [ "%s u" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name) ) ]
4129 columns = ["userEntityID"]
4130 join_conditions = [("u.externalEntityID","IN","(%s)" %",".join(map(str,externalEntityID_list)))]
4131
4132 return self.db.select_db_content( self.db._get_select_sql_query( tables = tables,
4133 columns = columns,
4134 join_conditions = join_conditions ),
4135 answer_mode = "list" )
4136
4137
4139
4140
4141
4142 if len(externalEntityIDsList)==0:
4143 return {}
4144
4145 default_attribute_identifiers = self.db.select_db_content( self.db._get_select_sql_query( tables = [ self.biana_database.EXTERNAL_DATABASE_TABLE, self.biana_database.EXTERNAL_ENTITY_TABLE ],
4146 columns = [ "%s.externalEntityID" %(self.biana_database.EXTERNAL_ENTITY_TABLE),
4147 "%s.defaultExternalEntityAttribute" %(self.biana_database.EXTERNAL_DATABASE_TABLE) ],
4148
4149 join_conditions = [("%s.externalDatabaseID" %(self.biana_database.EXTERNAL_DATABASE_TABLE),"=","%s.externalDatabaseID" %(self.biana_database.EXTERNAL_ENTITY_TABLE)),
4150 ("%s.externalEntityID" %(self.biana_database.EXTERNAL_ENTITY_TABLE),"IN","(%s)" %(",".join(map(str,externalEntityIDsList))))] ),
4151 answer_mode = "raw" )
4152
4153 attribute_identifiers_dict = {}
4154 for externalEntityID, defaultAttribute in default_attribute_identifiers:
4155 attribute_identifiers_dict.setdefault(defaultAttribute,[]).append(externalEntityID)
4156
4157 return_dict = {}
4158
4159
4160 for current_attribute, list_externalEntityIds in attribute_identifiers_dict.iteritems():
4161 if current_attribute is None:
4162 for current_eEid in list_externalEntityIds:
4163 return_dict[current_eEid] = ""
4164 else:
4165 tables = [self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute.lower()]]
4166 join_conditions = [("externalEntityID","IN","(%s)" %",".join(map(str,list_externalEntityIds)))]
4167 columns = ["externalEntityID","value"]
4168
4169
4170
4171
4172
4173 raw_data = self.db.select_db_content( self.db._get_select_sql_query( tables = tables,
4174 columns = columns,
4175 join_conditions = join_conditions,
4176 fixed_conditions = [("type","=","unique")],
4177 group_conditions = ["externalEntityID"]),
4178 answer_mode = "raw" )
4179
4180 return_dict.update( dict( [ (x[0],"%s: %s" %(current_attribute,x[1])) for x in raw_data ] ) )
4181
4182 del raw_data
4183
4184
4185
4186
4187
4188
4189
4190 return return_dict
4191
4192
4193
4194
4195 if len(externalEntityIDsList)==0:
4196 return {}
4197
4198
4199 tables = [self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["defaultid"]]
4200 join_conditions = [("externalEntityID","IN","(%s)" %",".join(map(str,externalEntityIDsList)))]
4201 columns = ["externalEntityID","value"]
4202
4203 return_dict = dict(self.db.select_db_content( self.db._get_select_sql_query( tables = tables,
4204 columns = columns,
4205 join_conditions = join_conditions ),
4206 answer_mode = "raw" ))
4207
4208
4209
4210 tables = [self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["defaultid"]]
4211 join_conditions = [("externalEntityID","IN","(%s)" %",".join(map(str,externalEntityIDsList)))]
4212 columns = ["externalEntityID","value"]
4213
4214 return_dict.update(dict(self.db.select_db_content( self.db._get_select_sql_query( tables = tables,
4215 columns = columns,
4216 join_conditions = join_conditions ),
4217 answer_mode = "raw" )))
4218
4219 return return_dict
4220
4221
4223 """
4224 Returns a list of tuples (relationID1, relationID2) in where relationID1 is a child of relationID2
4225 """
4226
4227
4228
4229 tables = ["%s p" %self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE]
4230
4231
4232 columns = ["p.externalEntityID","p.externalEntityRelationID"]
4233 join_conditions = [("p.externalEntityID","IN","(%s)" %",".join(map(str,externalEntityRelationIDs))),
4234 ("p.externalEntityRelationID","IN","(%s)" %",".join(map(str,externalEntityRelationIDs)))]
4235
4236 return self.db.select_db_content( self.db._get_select_sql_query( tables = tables,
4237 columns = columns,
4238 join_conditions = join_conditions ),
4239 answer_mode = "raw" )
4240
4241
4243 """
4244 Precalculates and stores in database relations hierachy
4245 """
4246
4247 return "DEPRECATED"
4248
4249 hierarchy_set = sets.Set()
4250
4251
4252 eE_eERid_list = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE],
4253 columns = ["externalEntityID","externalEntityRelationID"]),
4254 answer_mode = "raw" )
4255 eE_eERid_dict = {}
4256
4257 for current_pair in eE_eERid_list:
4258 eE_eERid_dict.setdefault(current_pair[0],[]).append(current_pair[1])
4259
4260
4261 def get_all_parents(eEid):
4262 parents = sets.Set()
4263 if( eE_eERid_dict.has_key(eEid) ):
4264 parents.update(eE_eERid_dict[eEid])
4265 for current_eEid in eE_eERid_dict[eEid]:
4266 parents.update(get_all_parents(current_eEid))
4267 return parents
4268
4269
4270 for eEid in eE_eERid_dict.keys():
4271
4272 for current_parent in get_all_parents(eEid):
4273 hierarchy_set.add((current_parent,eEid))
4274
4275
4276 self.db.set_lock_tables(True)
4277
4278 self.db._disable_indices([self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE])
4279
4280 for current_eERid, current_eEid in hierarchy_set:
4281 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE,
4282 column_values = (("externalEntityRelationParticipantID", self._get_new_external_entity_relation_participant_id()),
4283 (self.biana_database.external_entity_relation_id_col, current_eERid),
4284 (self.biana_database.externalEntityID_col, current_eEid )),
4285 use_buffer = True ))
4286 self.db._empty_buffer()
4287 self.db._enable_indices([self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE])
4288
4289 self.db.set_lock_tables(False)
4290 return
4291
4292 - def NEWget_user_entity_relations(self, unification_protocol_name, userEntityID_list, attribute_restrictions = [], negative_attribute_restrictions = [], listRelationType=[], dictRelationAttributeRestriction={}, use_self_relations=True, limit_to_userEntityID_list=False, use_nested_relations=True):
4293 """
4294 Returns the list of relations where the userEntity is involved
4295 use_nested_relations ==> Include relations greater than a nested level
4296 """
4297
4298 if len(userEntityID_list)==0:
4299 return []
4300
4301 if use_nested_relations:
4302 PARTICIPANT_TABLE = self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE.get_table_name()
4303 else:
4304 PARTICIPANT_TABLE = self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE.get_table_name()
4305
4306
4307 tables = [ "%s u1" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)),
4308 "%s p1" %PARTICIPANT_TABLE,
4309 "%s r" %(self.biana_database.EXTERNAL_ENTITY_RELATION_TABLE.get_table_name())]
4310
4311 columns = [ ("u1.userEntityID", "userEntityID"),
4312 ("r.externalEntityRelationID", "externalEntityRelationID"),
4313 ("r.type", "type"),
4314 ("p1.externalEntityID", "externalEntityID")]
4315
4316 join_conditions = [("u1.userEntityID","IN","(%s)" %",".join([ str(x) for x in userEntityID_list])),
4317 ("p1.externalEntityID","=","u1.externalEntityID"),
4318 ("p1.externalEntityRelationID","=","r.externalEntityRelationID")]
4319
4320 if len(listRelationType) > 0:
4321 join_conditions.append(("r.type","IN","(\"%s\")" % "\",\"".join([ relationType for relationType in listRelationType])))
4322
4323
4324 num = 1
4325 for attribute_name, values in dictRelationAttributeRestriction.iteritems():
4326
4327 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_name.lower()]
4328 tables.append( "%s AS EERA%s" %(current_table.get_table_name(),num) )
4329 if BianaObjects.ExternalEntityAttribute.isNumericAttribute(attribute_name, self.biana_database) or BianaObjects.ExternalEntityAttribute.isSpecialAttribute(attribute_name, self.biana_database):
4330 regex = re.compile("([><=]*)([\d\.]+)")
4331 for current_value in values:
4332 m = regex.match(current_value)
4333 if m:
4334 join_conditions.append(("EERA%s.value" %num,m.group(1),m.group(2)))
4335 else:
4336 join_conditions.append(("EERA%s.value" %num,"=","\"%s\"" %current_value))
4337 else:
4338 join_conditions.append(("EERA%s.value" %num,"IN","(\"%s\")" %("\",\"".join([ str(x) for x in values]))))
4339 join_conditions.append( ("r.externalEntityRelationID","=","EERA%s.externalEntityID" %num ) )
4340 num += 1
4341
4342 query = self.db._get_select_sql_query( tables = tables,
4343 columns = columns,
4344 join_conditions = join_conditions )
4345
4346
4347 tables = [ ("(%s)" %query, "query1"),
4348 "%s p2" %PARTICIPANT_TABLE,
4349 "%s u2" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)),
4350 "%s e" %(self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name()) ]
4351
4352 columns = [ "query1.userEntityID",
4353 ("u2.userEntityID", "userEntityID2"),
4354 "query1.externalEntityRelationID",
4355 "query1.type",
4356 ("e.type", "etype") ]
4357
4358 join_conditions = [ ("query1.externalEntityRelationID","=","p2.externalEntityRelationID"),
4359 ("p2.externalEntityID","=","u2.externalEntityID"),
4360 ("query1.externalEntityID","!=","p2.externalEntityID"),
4361 ("p2.externalEntityID", "=", "e.externalEntityID")]
4362
4363 if limit_to_userEntityID_list is True:
4364 join_conditions.append(("u2.userEntityID","IN","(%s)" %",".join([ str(x) for x in userEntityID_list ])))
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376 query = self.db._get_select_sql_query( tables = tables,
4377 columns = columns,
4378 join_conditions = join_conditions,
4379 distinct_columns = True )
4380
4381 query = self._apply_restrictions_to_query( query = query,
4382 unification_protocol_name = unification_protocol_name,
4383 attribute_restrictions= attribute_restrictions,
4384 column_name_to_restrict="userEntityID2" )
4385
4386 query = self._apply_negative_restrictions_to_query( query = query,
4387 unification_protocol_name = unification_protocol_name,
4388 negative_attribute_restrictions = negative_attribute_restrictions,
4389 column_name_to_restrict="userEntityID2" )
4390
4391 interacting_uE = list(self.db.select_db_content( query, answer_mode = "raw" ))
4392
4393 return interacting_uE
4394
4395 - def get_user_entity_relations(self, unification_protocol_name, userEntityID_list, attribute_restrictions = [], negative_attribute_restrictions = [], listRelationType=[], dictRelationAttributeRestriction={}, use_self_relations=True, limit_to_userEntityID_list=False, use_nested_relations=True):
4396
4397 return self.OLDget_user_entity_relations(unification_protocol_name, userEntityID_list, attribute_restrictions, negative_attribute_restrictions, listRelationType, dictRelationAttributeRestriction, use_self_relations, limit_to_userEntityID_list, use_nested_relations)
4398
4399
4400
4401 - def OLDget_user_entity_relations(self, unification_protocol_name, userEntityID_list, attribute_restrictions = [], negative_attribute_restrictions = [], listRelationType=[], dictRelationAttributeRestriction={}, use_self_relations=True, limit_to_userEntityID_list=False, use_nested_relations=True):
4402 """
4403 #! Nasty trick (not used anymore): to not to join externalEntityType table in the query in case sth is interacting with itself (see use_self_relations) there is an internal type called "self_type" which should be handled carefully whenever this method is called and type is going to be used
4404 """
4405
4406 if len(userEntityID_list)==0:
4407 return []
4408
4409 if use_nested_relations:
4410 PARTICIPANT_TABLE = self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE.get_table_name()
4411 else:
4412 PARTICIPANT_TABLE = self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE.get_table_name()
4413
4414
4415 tables = ["%s u1" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)),
4416 "%s p1" %PARTICIPANT_TABLE,
4417 "%s u2" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)),
4418 "%s p2" %PARTICIPANT_TABLE,
4419 "%s r" %(self.biana_database.EXTERNAL_ENTITY_RELATION_TABLE.get_table_name()),
4420 "%s e" %(self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name())]
4421
4422 columns = [ ("u1.userEntityID","userEntityID1"),
4423 ("u2.userEntityID","userEntityID2"),
4424 ("r.externalEntityRelationID", "externalEntityRelationID"),
4425 ("r.type", "type"),
4426 ("e.type", "etype") ]
4427
4428
4429
4430 fixed_conditions = [("u1.userEntityID","IN","(%s)" %",".join([ str(x) for x in userEntityID_list]),None)]
4431 join_conditions = [("p1.externalEntityID","=","u1.externalEntityID"),
4432 ("p1.externalEntityRelationID","=","p2.externalEntityRelationID"),
4433 ("p2.externalEntityID","=","u2.externalEntityID"),
4434 ("p1.externalEntityID","!=","p2.externalEntityID"),
4435 ("p1.externalEntityRelationID","=","r.externalEntityRelationID"),
4436 ("p2.externalEntityID", "=", "e.externalEntityID")]
4437
4438 if limit_to_userEntityID_list is True:
4439 join_conditions.append(("u2.userEntityID","IN","(%s)" %",".join([ str(x) for x in userEntityID_list ]),None))
4440
4441
4442 num = 1
4443 for attribute_name, values in dictRelationAttributeRestriction.iteritems():
4444
4445 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_name.lower()]
4446 tables.append( "%s AS EERA%s" %(current_table.get_table_name(),num) )
4447 if BianaObjects.ExternalEntityAttribute.isNumericAttribute(attribute_name, self.biana_database) or BianaObjects.ExternalEntityAttribute.isSpecialAttribute(attribute_name, self.biana_database):
4448 regex = re.compile("([><=]*)([\d\.]+)")
4449 for current_value in values:
4450 m = regex.match(current_value)
4451 if m:
4452 join_conditions.append(("EERA%s.value" %num,m.group(1),m.group(2)))
4453 else:
4454 join_conditions.append(("EERA%s.value" %num,"=","\"%s\"" %current_value))
4455 else:
4456 join_conditions.append(("EERA%s.value" %num,"IN","(\"%s\")" %("\",\"".join([ str(x) for x in values]))))
4457 join_conditions.append( ("r.externalEntityRelationID","=","EERA%s.externalEntityID" %num ) )
4458 num += 1
4459
4460
4461 if len(listRelationType) > 0:
4462 fixed_conditions.append(("r.type","IN","(\"%s\")" % "\",\"".join([ relationType for relationType in listRelationType]),None ))
4463
4464
4465 query = self.db._get_select_sql_query( tables = tables,
4466 columns = columns,
4467 fixed_conditions = fixed_conditions,
4468 join_conditions = join_conditions,
4469 distinct_columns = True )
4470
4471
4472
4473
4474
4475
4476
4477
4478 query = self._apply_restrictions_to_query( query = query,
4479 unification_protocol_name = unification_protocol_name,
4480 attribute_restrictions= attribute_restrictions,
4481 column_name_to_restrict="userEntityID2" )
4482
4483
4484 query = self._apply_negative_restrictions_to_query( query = query,
4485 unification_protocol_name = unification_protocol_name,
4486 negative_attribute_restrictions = negative_attribute_restrictions,
4487 column_name_to_restrict="userEntityID2" )
4488
4489
4490
4491
4492
4493 interacting_uE = list(self.db.select_db_content( query, answer_mode = "raw" ))
4494
4495 if( use_self_relations is True ):
4496
4497
4498 fixed_conditions = [("u1.userEntityID","IN","(%s)" %",".join([ str(x) for x in userEntityID_list]),None)]
4499
4500 if len(listRelationType) > 0:
4501 fixed_conditions.append(("r.type","IN","(\"%s\")" % "\",\"".join([ relationType for relationType in listRelationType]),None ))
4502
4503 tables = ["%s u1" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)),
4504 "%s p1" %PARTICIPANT_TABLE,
4505 "%s c" %self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT["cardinality"].get_table_name(),
4506 "%s r" %(self.biana_database.EXTERNAL_ENTITY_RELATION_TABLE.get_table_name()),
4507 "%s e" %(self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name())]
4508
4509 columns = ["u1.userEntityID","r.externalEntityRelationID", ("r.type", "type"), ("e.type", "etype") ]
4510
4511
4512 join_conditions = [("p1.externalEntityID","=","u1.externalEntityID"),
4513 ("c.externalEntityRelationParticipantID","=","p1.externalEntityRelationParticipantID"),
4514 ("c.value",">",1),
4515 ("r.externalEntityRelationID","=","p1.externalEntityRelationID"),
4516 ("p1.externalEntityID", "=", "e.externalEntityID")]
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537 query = self.db._get_select_sql_query( tables = tables,
4538 columns = columns,
4539 fixed_conditions = fixed_conditions,
4540 join_conditions = join_conditions )
4541
4542 query = self._apply_relation_restrictions_to_query( query = query,
4543 attribute_restrictions_dict = dictRelationAttributeRestriction,
4544 column_name_to_restrict="externalEntityRelationID" )
4545
4546
4547 query = self._apply_restrictions_to_query( query = query,
4548 unification_protocol_name = unification_protocol_name,
4549 attribute_restrictions= attribute_restrictions,
4550 column_name_to_restrict="userEntityID" )
4551
4552 query = self._apply_negative_restrictions_to_query( query = query,
4553 unification_protocol_name = unification_protocol_name,
4554 negative_attribute_restrictions = negative_attribute_restrictions,
4555 column_name_to_restrict="userEntityID" )
4556
4557
4558
4559
4560 interacting_uE.extend([ (x,x,y,z,t) for x,y,z,t in self.db.select_db_content( query, answer_mode="raw" )])
4561
4562
4563
4564 return interacting_uE
4565
4566
4568 """
4569 Returns a list of tuples with userEntityID1, userEntityID2
4570 OBSOLETE - use get_user_entity_relations_by_sharing_attributes instead
4571
4572 """
4573
4574 raise Exception("OBSOLETE function call!")
4575
4576 equivalent_uE_set = sets.Set()
4577
4578 for current_expansion_block in attribute_list:
4579 tables = ["%s u1" %(self._get_user_entity_table_name(unification_protocol_name = unification_protocol_name)),
4580 "%s u2" %(self._get_user_entity_table_name(unification_protocol_name = unification_protocol_name))]
4581 columns = ["u1.userEntityID as original_uE",
4582 "u2.userEntityID AS eq_uE"]
4583 group_by = ["u1.userEntityID" , "u2.userEntityID"]
4584 join_conditions = [("u1.userEntityID","IN","(%s)" %",".join(map(str,user_entity_id_list)))]
4585
4586 for (current_expansion_attribute, parameter_and_value_list) in current_expansion_block:
4587 tables.append( "%s %s_1" %(self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_expansion_attribute.lower()],current_expansion_attribute) )
4588 tables.append( "%s %s_2" %(self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_expansion_attribute.lower()],current_expansion_attribute) )
4589 tables.append( "%s u_%s" %(self._get_user_entity_table_name(unification_protocol_name = unification_protocol_name),current_expansion_attribute) )
4590
4591 if current_expansion_attribute.lower() == "proteinsequence":
4592
4593 tables.append( "%s d_1" %(self.biana_database.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[current_expansion_attribute.lower()]["table"].get_table_name()) )
4594 tables.append( "%s d_2" %(self.biana_database.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[current_expansion_attribute.lower()]["table"].get_table_name()) )
4595
4596 join_conditions.extend( [ ("%s_1.externalEntityID" %current_expansion_attribute,"=","u1.externalEntityID"),
4597 ("%s_2.externalEntityID" %current_expansion_attribute,"=","u_%s.externalEntityID" %current_expansion_attribute),
4598 ("%s_1.value" %current_expansion_attribute,"=","d_1.sequenceMD5"),
4599 ("%s_2.value" %current_expansion_attribute,"=","d_2.sequenceMD5"),
4600 ("u_%s.userEntityID" %current_expansion_attribute,"=","u2.userEntityID"),
4601 ("u1.userEntityID","!=","u2.userEntityID") ] )
4602
4603 if len(parameter_and_value_list) > 0:
4604 tables.append( "%s b" %(self.biana_database.PROTEIN_BLAST_RESULTS_TABLE.get_table_name()) )
4605 join_conditions.extend( [ ("b.sequenceID_A","=","d_1.proteinSequenceID"),
4606 ("b.sequenceID_B","=","d_2.proteinSequenceID") ] )
4607 join_conditions.extend( [ ("b.%s" % parameter, ">=", value) for (parameter, value) in parameter_and_value_list ] )
4608 else:
4609 join_conditions.extend( [ ("d_1.value","=","d_2.value") ] )
4610 else:
4611 join_conditions.extend( [ ("%s_1.value" %current_expansion_attribute,"=","%s_2.value" %current_expansion_attribute),
4612 ("%s_1.externalEntityID" %current_expansion_attribute,"=","u1.externalEntityID"),
4613 ("%s_2.externalEntityID" %current_expansion_attribute,"=","u_%s.externalEntityID" %current_expansion_attribute),
4614 ("u_%s.userEntityID" %current_expansion_attribute,"=","u2.userEntityID"),
4615 ("u1.userEntityID","!=","u2.userEntityID") ] )
4616 if len(parameter_and_value_list) > 0:
4617 join_conditions.extend( [ ("%s_2.%s" %(current_expansion_attribute, parameter),"=","\"%s\"" %value) for parameter, value in parameter_and_value_list ] )
4618
4619
4620 query = self.db._get_select_sql_query( tables = tables,
4621 columns = columns,
4622 join_conditions = join_conditions,
4623 group_conditions = group_by )
4624
4625
4626
4627 equivalent_uE_set.update( self.db.select_db_content( sql_query = query,
4628 answer_mode="raw" ) )
4629
4630 return equivalent_uE_set
4631
4632
4633
4634 - def get_expanded_entity_relations(self, unification_protocol_name, userEntityID_list, expansionAttributesList=[], listRelationType=[], use_self_relations=True, limit_to_userEntityID_list=False, expansionLevel=2, attribute_restrictions=[], negative_attribute_restrictions=[]):
4635 """
4636 unification_protocol_name: name of the unification protocol to be used
4637
4638 userEntityID_list: user entity ids to be searched for relations based on the shared attributes
4639
4640 expansionAttributesList: list of lists containing (attribute, list_of_parameters_and_values) where list_of_parameter_and_values contains tuples like (parameter, value). Each tuple in the inner list is an expansion criterion (anded with the other expansion criteria in the inner list whereas all inner lists are ored in the query). List contains all the attributes that must be used together for the expansion.
4641
4642 listRelationType: type of relations between attribute sharing entry and its partners to be inferred
4643
4644 use_self_relations: including self relations or not
4645
4646 limit_to_userEntityID_list: use given user entity ids only in inference # TODO!!! NOT USED NOW.
4647
4648 expansionLevel: number of relations (edges) to look further during inference based on shared attributes
4649
4650 attribute_restrictions: restrictions to be applied on the attributes # TODO!!!! NOT USED NOW.
4651 """
4652
4653
4654 if expansionLevel != 1 and expansionLevel != 2:
4655 raise ValueError("You must specify level to 1 or 2")
4656
4657
4658
4659 u1_u2_set = sets.Set( self.get_user_entity_relations_by_sharing_attributes( unification_protocol_name = unification_protocol_name,
4660 userEntityID_list = userEntityID_list,
4661 listAttributes = expansionAttributesList ) )
4662
4663
4664 u2_list = [ x[1] for x in u1_u2_set ]
4665
4666
4667 u2_to_u1 = dict([ (x[1],(x[0], x[2])) for x in u1_u2_set ])
4668
4669 temporary_predictions_uEr = self.get_user_entity_relations( unification_protocol_name = unification_protocol_name,
4670 userEntityID_list = u2_list,
4671 listRelationType = listRelationType,
4672 use_self_relations = True,
4673 limit_to_userEntityID_list = False )
4674
4675
4676
4677 predicted_uEr = []
4678
4679
4680 u3_externalEntityRelationID = {}
4681 u3_u1_predictions = {}
4682 u3_relation_type = {}
4683 u1_u2 = {}
4684
4685
4686 for (uE2, uE3, eErID, eErType, eE3Type) in temporary_predictions_uEr:
4687
4688 uE1, type = u2_to_u1[uE2]
4689
4690
4691
4692
4693 if use_self_relations is False and uE1==uE3:
4694 pass
4695 else:
4696 predicted_uEr.append( ( uE1,
4697 uE3,
4698 None,
4699
4700
4701
4702
4703
4704 "inferred_" + eErType,
4705 eE3Type) )
4706
4707 u3_externalEntityRelationID.setdefault(uE3,sets.Set()).add(eErID)
4708 u3_relation_type[uE3] = eErType
4709 u3_u1_predictions.setdefault(uE3,[]).append(uE1)
4710 u1_u2.setdefault(uE1,sets.Set()).add(uE2)
4711
4712 del u2_to_u1
4713
4714
4715 if expansionLevel==2 and len(temporary_predictions_uEr)>0:
4716
4717 del temporary_predictions_uEr
4718
4719 u3set = sets.Set(map(str,u3_externalEntityRelationID.keys()))
4720
4721 done = 0
4722
4723 itime = time.time()
4724
4725 for current_uE3 in u3set:
4726 done += 1
4727
4728 temp_u3_u4_sharing_attributes = self.get_user_entity_relations_by_sharing_attributes( unification_protocol_name = unification_protocol_name,
4729 userEntityID_list = [current_uE3],
4730 listAttributes = expansionAttributesList
4731 )
4732
4733 for current_uE3, current_uE4, current_uE4_type in temp_u3_u4_sharing_attributes:
4734 for current_eErID in u3_externalEntityRelationID[current_uE3]:
4735 for current_uE1 in u3_u1_predictions[current_uE3]:
4736 if use_self_relations is False and current_uE1==current_uE4:
4737 pass
4738 else:
4739 predicted_uEr.append((current_uE1,
4740 current_uE4,
4741 None,
4742
4743
4744
4745
4746
4747 "inferred_" + u3_relation_type[current_uE3],
4748 current_uE4_type))
4749
4750 if done%100 == 0:
4751 print "%s done in %s seconds [%s]" %(done, time.time()-itime,len(predicted_uEr))
4752
4753
4754
4755
4756
4757
4758 predicted_uE_ids_set = sets.Set( [int(x[1]) for x in predicted_uEr] )
4759
4760 if limit_to_userEntityID_list is True:
4761 predicted_uE_ids_set = sets.Set(map(int,userEntityID_list)).intersection(predicted_uE_ids_set)
4762
4763 if len(attribute_restrictions)>0:
4764 all_possible = self.db.select_db_content( self._get_attribute_restrictions_query( unification_protocol_name = unification_protocol_name,
4765 negative_attribute_restrictions = attribute_restrictions ),
4766 answer_mode = "list" )
4767 predicted_uE_ids_set = predicted_uE_ids_set.intersection(sets.Set(map(int,all_possible)))
4768
4769 if len(negative_attribute_restrictions)>0:
4770 excluded_list = self.db.select_db_content( self._get_attribute_restrictions_query( unification_protocol_name = unification_protocol_name,
4771 negative_attribute_restrictions = negative_attribute_restrictions ),
4772 answer_mode = "list" )
4773
4774 predicted_uE_ids_set = predicted_uE_ids_set.difference(sets.Set(map(int,excluded_list)))
4775
4776 filtered_predicted_uEr = []
4777 for current_prediction in predicted_uEr:
4778 if int(current_prediction[1]) in predicted_uE_ids_set:
4779 filtered_predicted_uEr.append(current_prediction)
4780
4781
4782 return filtered_predicted_uEr
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798 if __name__=="__main__":
4799
4800 db_access = BianaDBaccess(dbname="biana_v5", dbhost="localhost", dbuser="root", dbsocket="/home/jgarcia/local/mysql/var/mysql.sock")
4801 print db_access
4802 db_access.create_database()
4803 sys.exit()
4804