Package biana :: Package BianaDB :: Module BianaDBaccess'
[hide private]
[frames] | no frames]

Source Code for Module biana.BianaDB.BianaDBaccess'

   1  """ 
   2      BIANA: Biologic Interactions and Network Analysis 
   3      Copyright (C) 2009  Javier Garcia-Garcia, Emre Guney, Baldo Oliva 
   4   
   5      This program is free software: you can redistribute it and/or modify 
   6      it under the terms of the GNU General Public License as published by 
   7      the Free Software Foundation, either version 3 of the License, or 
   8      (at your option) any later version. 
   9   
  10      This program is distributed in the hope that it will be useful, 
  11      but WITHOUT ANY WARRANTY; without even the implied warranty of 
  12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  13      GNU General Public License for more details. 
  14   
  15      You should have received a copy of the GNU General Public License 
  16      along with this program.  If not, see <http://www.gnu.org/licenses/>. 
  17   
  18  """ 
  19   
  20   
  21  # Python library 
  22  import time 
  23  import sys 
  24  import md5 
  25  import traceback 
  26  import copy 
  27  import sets 
  28  from math import ceil 
  29  import re 
  30   
  31  # General Database 
  32  import ConnectorDB 
  33  import database 
  34  import biana.utilities.int_ascii as int_ascii 
  35   
  36  # Biana specific 
  37  import biana.BianaObjects as BianaObjects 
  38  from BianaDatabaseDefinition import BianaDatabase 
  39   
  40  # Enable debugging for web php scripts 
  41  debug_web = 0 #1; #! 
  42   
  43   
  44  # a hard-coded version id assigned to source to prevent a previously created biana database <-> source code inconsistencies 
  45  #BIANA_SOURCE_CODE_VERSION = "Feb_24_09" #"Feb_18_09": method -> method_id 
  46  #BIANA_SOURCE_CODE_VERSION = "Mar_13_09"  #Dynamic attributes 
  47  BIANA_SOURCE_CODE_VERSION = "Mar_16_09"   #Dynamic attributes biana database specific 
  48   
  49   
50 -class BianaDBaccess(object):
51 """ 52 Class used as an interface with database biana 53 """ 54
55 - def __init__(self, dbname=None, dbhost=None, dbuser=None, dbpassword=None, dbport=None, dbsocket=None, use_buffer=False, lock_tables = False, check_integrity=False ):
56 """ 57 "dbname" is the database name to which you want to connect to (required) 58 "dbhost" is the machine with the mysql server that holds the biana database (required) 59 "dbuser" is the mysql user (not required in most systems) 60 "dbpassword" is the mysql password (not required in most systems) 61 "dbport" is the mysql port (not required in most systems) 62 63 The following parameters should not be used by standard users, only for advanced users or developers: 64 "use_buffer" must be set True when populating database due to performance issues. Automatically controlled by parsers 65 "lock_tables" Allow Connector db to lock tables when using them. It is set to True when populating database due to performance issues 66 "check_integrity" determines if integrity of the database must be checked (if there is any parser not finished or if some table definitions have been changed) 67 """ 68 69 # opening connection to database biana using class BianaDB 70 self.db = ConnectorDB.DB(dbname=dbname, dbhost=dbhost, dbuser=dbuser, dbpassword=dbpassword, dbport=dbport, dbsocket=dbsocket, lock_tables=lock_tables) 71 self.dbname = dbname 72 self.dbhost = dbhost 73 self.dbuser=dbuser 74 self.dbpassword=dbpassword 75 self.dbport=dbport 76 self.dbsocket=dbsocket 77 self.lock_tables=lock_tables 78 self.use_buffer = use_buffer 79 80 # Check into BIANA database which database sources are available 81 self.validSources = None 82 self.valid_source_database_ids = {} 83 self.available_unification_protocols = None # Key: Description. Value: ID 84 85 # Ontology related variables 86 self.available_ontology_names = None # Stores the available ontologies 87 self.ontology_linked_attributes = None 88 self.loaded_ontologies = {} 89 90 self.db_version_modified = None # Variable used to control if it is necessary to update the database version identifier 91 92 self.db_versions_list = None 93 94 self.db.add_autoincrement_columns( table = "externalEntity", attribute = "externalEntityID" ) 95 self.db.add_autoincrement_columns( table = "externalEntityRelationParticipant", attribute = "externalEntityRelationParticipantID" ) 96 self.db.add_autoincrement_columns( table = "sequenceProtein", attribute = "proteinSequenceID" ) 97 self.db.add_autoincrement_columns( table = "sequenceNucleotide", attribute = "nucleotideSequenceID" ) 98 self.db.add_autoincrement_columns( table = "externalDatabaseAttributeTransfer", attribute = "keyID" ) 99 100 self.db_description = None 101 self.db_optimized_for = None 102 103 self.biana_database = BianaDatabase() 104 105 self.transferred_attributes = {} # Dictionary to store how transfer attributes between databases. Key: transferred attribute 106 # Value: set of (externalDatabaseID, key_attribute) 107 # This information has to be hardcoded in parsers 108 # For example, pfam code can be tranferred from pfam database using as key attribute the uniprot accession 109 # This means that pfam codes are going to be fetched from pfam database 110 # Another example is taxonomy name. This can be transferred using as key attribute taxid 111 self.key_attribute_ids = {} 112 113 if self.dbname is not None: 114 115 if not self.db.check_consistency_with_given_source_version(BIANA_SOURCE_CODE_VERSION): 116 # sys.stderr.write("Bailing.. The database you are trying to use is inconsistent with the current source code!\n") 117 raise Exception("Source code - database inconsistency") 118 119 self._load_biana_types_and_attributes() 120 121 # Create new tables if necessary, and modify existing ones if necessary 122 self.db.check_database(database = self.biana_database, verbose=True) 123 124 if check_integrity: 125 self._check_database_integrity() 126 127 self._load_biana_database_information() #JAVI: Indented 128 129 # Temporal data is used while parsing for storing temporal data 130 self.temporal_data = {} 131 self.temporal_data["relations_hierarchy_parents"] = {} # stores all the parents for each external entity id 132 self.store_relations_hierarchy = False 133 134 return
135 136 137 # ---- 138 # methods required for using pickle with biana objects 139 # ----
140 - def __getstate__(self):
141 odict = self.__dict__.copy() # copy the dict since we change it 142 del odict['db'] # remove database entry 143 return odict
144
145 - def __setstate__(self, dict):
146 147 self.__dict__.update(dict) # update attributes 148 self.db = ConnectorDB.DB(dict["dbname"], dict["dbhost"], dict["dbuser"], dict["dbpassword"], dict["dbport"], dict["dbsocket"], dict["lock_tables"]) 149 try: 150 self.db.add_autoincrement_columns( table = "externalEntity", attribute = "externalEntityID" ) 151 self.db.add_autoincrement_columns( table = "externalEntityRelationParticipant", attribute = "externalEntityRelationParticipantID" ) 152 self.db.add_autoincrement_columns( table = "sequenceProtein", attribute = "proteinSequenceID" ) 153 self.db.add_autoincrement_columns( table = "sequenceNucleotide", attribute = "nucleotideSequenceID" ) 154 self.db.add_autoincrement_columns( table = "externalEntityEquivalencesTransfer", attribute = "keyID" ) 155 except: 156 pass 157 return
158
159 - def __getnewargs__(self):
160 return (self.dbname, self.dbhost, self.dbuser, self.dbpassword)
161
162 - def __str__(self):
163 return "BIANA Connection. Database connection: %s" %self.db
164
165 - def isOptimizedForRunning(self):
166 return self.db_optimized_for=="running"
167
168 - def reconnect(self):
169 self.db = ConnectorDB.DB(dbname=self.dbname, dbhost=self.dbhost, dbuser=self.dbuser, dbpassword=self.dbpassword, dbport=self.dbport, dbsocket=self.dbsocket, lock_tables=self.lock_tables)
170
171 - def create_database(self, dbname, description="BIANA DATABASE", optimize_for="parsing", ignore_primary_keys=False):
172 """ 173 It creates necessary tables into a database server 174 175 "description" is a tag for label the new database 176 177 "optimize_for" can take two distinct values: "parsing" and "running". By default, it creates a database optimized for parsing (as it will be created empty). 178 """ 179 180 self.db.insert_db_content( sql_query = self.biana_database.get_sql_query(ignore_primary_keys=ignore_primary_keys).split(";") ) 181 182 self.optimize_database_for( mode=optimize_for ) 183 184 # Insert initial valid data types and attributes 185 import biana.biana_globals as BIANA_GLOBALS 186 187 [ self.add_valid_external_entity_type(current_type) for current_type in BIANA_GLOBALS.EXTERNAL_ENTITY_TYPES ] 188 [ self.add_valid_external_entity_relation_type(current_type) for current_type in BIANA_GLOBALS.EXTERNAL_ENTITY_RELATION_TYPES ] 189 [ self.add_valid_identifier_reference_types(current_reference_type) for current_reference_type in BIANA_GLOBALS.VALID_IDENTIFIER_REFERENCE_TYPES ] 190 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_GENERAL_ATTRIBUTES ] 191 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE identifier attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_IDENTIFIER_ATTRIBUTES ] 192 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE versionable identifier attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_VERSIONABLE_IDENTIFIER_ATTRIBUTE_TYPES ] 193 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE descriptive searchable attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_DESCRIPTIVE_SEARCHABLE_ATTRIBUTE_TYPES ] 194 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE descriptive attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_DESCRIPTIVE_ATTRIBUTE_TYPES ] 195 [ self.add_valid_external_entity_attribute_type(current_type, data_type, "eE numeric attribute") for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_NUMERIC_ATTRIBUTE_TYPES ] 196 [ self.add_valid_external_entity_relation_participant_attribute_type(current_type, data_type) for (current_type, data_type) in BIANA_GLOBALS.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TYPES ] 197 [ self.add_valid_external_entity_attribute_type(current_type, data_type_dict, "eE special attribute") for (current_type, data_type_dict) in BIANA_GLOBALS.EXTERNAL_ENTITY_SPECIAL_ATTRIBUTE_TYPES.iteritems()] 198 199 200 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.BIANA_DATABASE_TABLE, 201 column_values = (("description",description), 202 ("optimized_for",optimize_for)), 203 use_buffer = False ) ) 204 self.db.close() 205 206 self.db = ConnectorDB.DB(dbname=dbname, dbhost=self.dbhost, dbuser=self.dbuser, dbpassword=self.dbpassword, dbport=self.dbport, dbsocket=self.dbsocket) 207 208 self.dbname = dbname 209 210 self._load_biana_types_and_attributes() 211 212 self.db.check_database(database=self.biana_database, ignore_primary_keys=ignore_primary_keys) 213 214 # Creates a default protocol (used to work without any kind of unification) 215 default_protocol = BianaObjects.UnificationProtocol( description = "No unification", BianaDatabaseVersion = "X" ) 216 217 self.db_description = description 218 self.db_optimized_for = optimize_for 219 220 self._create_new_unification_protocol_tables(default_protocol)
221 222
224 225 self._load_biana_types_and_attributes() 226 self._load_biana_database_information() 227 self.db.check_database(self.biana_database)
228 229
230 - def add_valid_external_entity_type(self, type):
231 232 if self.biana_database.is_valid_external_entity_type(type): 233 sys.stderr.write("Trying to add an existing External Entity Type: %s. Not added again.\n" %type) 234 return 235 236 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.TYPES_AND_ATTRIBUTES_TABLE, 237 column_values = [("name",type), 238 ("category","eE type")], 239 240 use_buffer = False ) ) 241 242 self.biana_database.add_valid_external_entity_type(type)
243 244
246 247 return self.biana_database.get_valid_external_entity_types()
248 249 251 252 253 if self.biana_database.is_valid_external_entity_relation_type(type): 254 sys.stderr.write("Trying to add an existing External Entity Relation Type: %s. Not added again.\n" %type) 255 return 256 257 258 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.TYPES_AND_ATTRIBUTES_TABLE, 259 column_values = [("name",type), 260 ("category","eEr type")], 261 use_buffer = False ) ) 262 263 self.biana_database.add_valid_external_entity_relation_type(type)
264 265
266 - def get_valid_external_entity_relation_types(self):
267 268 return self.biana_database.get_valid_external_entity_relation_types()
269 270
271 - def add_valid_identifier_reference_types(self, current_reference_type):
272 273 if self.biana_database.is_valid_identifier_reference_type(current_reference_type): 274 sys.stderr.write("Trying to add an existing Identifier reference type: %s. Not added again.\n" %tcurrent_reference_ype) 275 return 276 277 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.TYPES_AND_ATTRIBUTES_TABLE, 278 column_values = [("name",current_reference_type), 279 ("category","identifier reference type")], 280 use_buffer = False ) ) 281 282 self.biana_database.add_valid_identifier_reference_type( current_reference_type )
283 284 285
286 - def add_valid_external_entity_attribute_type(self, name, data_type, category):
287 288 if self.biana_database.is_valid_external_entity_attribute_type(name): 289 sys.stderr.write("Trying to add an existing External Entity Attribute Type: %s. Not added again\n" %name) 290 return 291 292 additional_fields_tuple_list = [] 293 294 if category.lower()=="ee special attribute": 295 data_type_dict = data_type 296 for current_field_tuple in data_type_dict["fields"]: 297 current_data_type = current_field_tuple[1] 298 current_field = current_field_tuple[0] 299 if current_field_tuple[0] == "value": 300 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.TYPES_AND_ATTRIBUTES_TABLE, 301 column_values = [("name",name), 302 ("data_type",current_data_type), 303 ("category",category)], 304 use_buffer = False ) ) 305 else: 306 current_null = current_field_tuple[2] 307 if current_null is False: current_null=0 308 else: current_null=1 309 310 additional_fields_tuple_list.append((current_field, current_data_type, current_null)) 311 312 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.SPECIAL_ATTRIBUTES_TABLE, 313 column_values = [("attribute_name",name), 314 ("field_name",current_field), 315 ("data_type",current_data_type), 316 ("canbenull",current_null)], 317 use_buffer = False ) ) 318 319 else: 320 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.TYPES_AND_ATTRIBUTES_TABLE, 321 column_values = [("name",name), 322 ("data_type",data_type), 323 ("category",category)], 324 use_buffer = False ) ) 325 326 self.biana_database.add_valid_external_entity_attribute_type(name, data_type, category, additional_fields_tuple_list)
327 328
329 - def add_valid_external_entity_relation_participant_attribute_type(self, name, data_type):
330 331 if self.biana_database.is_valid_external_entity_relation_participant_attribute_type(name): 332 sys.stderr.write("Trying to add an existing External Entity Relation Participant Attribute Type: %s. Not added again\n" %name) 333 return 334 335 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query( table = self.biana_database.TYPES_AND_ATTRIBUTES_TABLE, 336 column_values = [("name",name), 337 ("data_type",data_type), 338 ("category","eErP attribute")], 339 use_buffer = False ) )
340 341
342 - def _load_biana_types_and_attributes(self):
343 """ 344 Method to load current biana database types and attributes 345 """ 346 347 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [ self.biana_database.TYPES_AND_ATTRIBUTES_TABLE ], 348 columns = ["name", "data_type","category"] ), 349 answer_mode = "raw" ) 350 351 for current_data in data: 352 if current_data[2].lower() == "identifier reference type": self.biana_database.add_valid_identifier_reference_type(current_data[0]) 353 354 355 for current_data in data: 356 current_category = current_data[2] 357 current_attribute = current_data[0] 358 data_type = current_data[1] 359 new_table = None 360 361 if current_category.lower() == "ee type": self.biana_database.add_valid_external_entity_type(current_attribute) 362 elif current_category.lower() == "eer type": self.biana_database.add_valid_external_entity_relation_type(current_attribute) 363 elif current_category.lower() == "eerp attribute" or current_category.lower() == "eerp numeric attribute" or current_category.lower() == "eerp descriptive attribute" or current_category.lower() == "eerp descriptive searchable attribute": 364 self.biana_database.add_valid_external_entity_relation_participant_attribute_type( current_attribute, data_type, current_category, []) 365 elif current_category.lower() == "ee identifier attribute" or current_category.lower() == "ee versionable identifier attribute" or current_category.lower() == "ee descriptive attribute" or current_category.lower() == "ee descriptive searchable attribute" or current_category.lower() == "ee numeric attribute": self.biana_database.add_valid_external_entity_attribute_type( current_attribute, data_type, current_category, [] ) 366 elif current_category.lower() == "ee attribute": self.biana_database.add_valid_external_entity_attribute_type( current_attribute, data_type, current_category, [] ) 367 elif current_category.lower()== "ee special attribute": 368 # Get additional fields 369 special_data = self.db.select_db_content( self.db._get_select_sql_query( tables = [ self.biana_database.SPECIAL_ATTRIBUTES_TABLE ], 370 columns = ["field_name", "data_type", "canbenull"], 371 fixed_conditions = [("attribute_name","=",current_attribute)] ), 372 answer_mode = "raw" ) 373 self.biana_database.add_valid_external_entity_attribute_type( current_attribute, data_type, current_category, special_data ) 374 elif current_category.lower()== "identifier reference type": pass 375 else: 376 raise ValueError("%s category not recognized!" %current_category) 377 378 if new_table: 379 self.biana_database.add_table(new_table)
380 381
382 - def _transform_attribute_value_data_type_to_biana_database_attribute_data_type( self, attribute_identifier, value ):
383 384 NUMBER_RE = re.compile("[0-9]+") 385 CHAR_RE = re.compile("(char|varchar|text)\((\d+)\)") # (var|)char 386 387 new_value = value 388 389 data_type = self.biana_database.get_attribute_data_type(attribute_identifier) 390 391 # to avoid incorrect string value assignment to a integer field in database 392 if isinstance(value, str): 393 if value.strip() == "": 394 sys.stderr.write("Trying to create an External Entity Attribute with an empty value") 395 new_value = None 396 397 if data_type.lower().startswith("int"): 398 search = NUMBER_RE.search(value) 399 if search: 400 new_value = value[search.start():] 401 else: 402 new_value = None 403 sys.stderr.write("Trying to create an External Entity Attribute %s of an incorrect value: %s\n" % (attribute_identifier, value)) 404 else: 405 search = CHAR_RE.match(value) 406 char_count = 0 407 408 if search: 409 char_count = int(search.group(2)) 410 else: 411 char_count = 200000000 412 413 if char_count > 0 and char_count < len(value): 414 new_value = value[:char_count] 415 sys.stderr.write("Trying to create an External Entity Attribute %s of an overfloating value: %s\n" % (attribute_identifier, value)) 416 417 return new_value
418 419
420 - def _load_biana_database_information(self):
421 """ 422 Method to load biana database information into this class 423 424 It loads information about the database description, its optimization status, special attributes and special attributes 425 426 It is used when initializing a BianaDBaccess object 427 """ 428 429 if self.dbname is not None: 430 431 data = self.db.select_db_content( sql_query = self.db._get_select_sql_query( tables = [ self.biana_database.BIANA_DATABASE_TABLE ], 432 columns = ["description","optimized_for"] ), 433 answer_mode = "raw" ) 434 435 self.db_description = data[0][0] 436 self.db_optimized_for = data[0][1] 437 438 439 data = self.db.select_db_content( sql_query = self.db._get_select_sql_query( tables = [ self.biana_database.EXTERNAL_DATABASE_ATTRIBUTE_TRANSFER_TABLE ], 440 columns = ["keyID","externalDatabaseID", "attributeKey", "transferAttribute"] ), 441 answer_mode = "raw" ) 442 443 for (keyID, externalDatabaseID, key, transfer) in data: 444 self.transferred_attributes.setdefault(transfer.lower(),sets.Set()).add((externalDatabaseID,key.lower(),keyID)) 445 self.key_attribute_ids.setdefault((externalDatabaseID,key.lower()),keyID)
446 447
448 - def get_available_ontology_names(self, name=None):
449 """ 450 """ 451 452 if self.available_ontology_names is None: 453 454 data = self.db.select_db_content( sql_query = self.db._get_select_sql_query ( tables = [ self.biana_database.ONTOLOGY_INFO_TABLE ], 455 columns = ["name","linked_attribute","key_id","level_attribute"] ), 456 answer_mode = "raw" ) 457 458 self.available_ontology_names = dict([ (x[0],x[1]) for x in data ]) 459 self.ontology_linked_attributes = dict([ (x[1],{"ontology_name":x[0], "key_id":x[2], "level_attribute":x[3]}) for x in data ]) 460 461 if name is None: 462 return self.available_ontology_names 463 else: 464 return self.available_ontology_names[name.lower()]
465 466
467 - def _is_ontology_linked_attribute(self, attribute_identifier):
468 469 if self.ontology_linked_attributes is None: 470 self.get_available_ontology_names() 471 472 if( attribute_identifier.lower() in self.ontology_linked_attributes ): 473 return True 474 475 return False
476 477
478 - def _add_key_attribute(self, external_database_id, key_attribute):
479 """ 480 Adds a key attribute, used in transfer attributes (key attribute) and ontologies (linked attribute) 481 482 Creates the necessary tables 483 """ 484 key_attribute = key_attribute.lower() 485 486 if self.key_attribute_ids.has_key( (external_database_id, key_attribute) ): 487 488 return self.key_attribute_ids[ (external_database_id, key_attribute) ] 489 490 else: 491 492 new_id = self._get_new_key_id() 493 494 value_data_type = self.biana_database.get_attribute_data_type(key_attribute) 495 496 new_table = database.TableDB( table_name = self._get_key_attribute_table_name( key_id = new_id ), 497 table_fields = [ database.FieldDB(field_name = "externalEntityID", 498 data_type = "integer(4) unsigned", 499 null = False ), 500 database.FieldDB(field_name = "value", 501 data_type = value_data_type, 502 null = False ) ], 503 indices = [("value","externalEntityID")] ) 504 505 self.db.insert_db_content( sql_query = new_table.create_mysql_query(), answer_mode = None ) 506 507 self.key_attribute_ids.setdefault((external_database_id,key_attribute.lower()),new_id) 508 509 return new_id
510 511 512 513
514 - def _add_transfer_attribute(self, externalDatabaseID, key_attribute, transfer_attribute):
515 516 key_id = self._add_key_attribute( external_database_id = externalDatabaseID, 517 key_attribute = key_attribute ) 518 519 self.db.insert_db_content( sql_query = self.db._get_insert_sql_query(table = self.biana_database.EXTERNAL_DATABASE_ATTRIBUTE_TRANSFER_TABLE.get_table_name(), 520 column_values = (("keyID",key_id), 521 ("externalDatabaseID", externalDatabaseID), 522 ("attributeKey", key_attribute), 523 ("transferAttribute", transfer_attribute)), 524 use_buffer = False ), 525 answer_mode = None ) 526 527 self.transferred_attributes.setdefault(transfer_attribute.lower(),sets.Set()).add((externalDatabaseID,key_attribute.lower())) 528 529 530 531 # Create a new variable for this class. This variable is only created in this method when inserting a new transferred attribute 532 try: 533 self.transferID_key[transfer_attribute.lower()] = (key_id,key_attribute.lower()) 534 except: 535 self.transferID_key = {} 536 self.transferID_key[transfer_attribute.lower()] = (key_id,key_attribute.lower())
537 538
539 - def _is_transferred_attribute(self, attribute_identifier):
540 return self.transferred_attributes.has_key(attribute_identifier.lower())
541
542 - def _is_key_attribute(self, attribute_identifier, external_database_id):
543 return self.key_attribute_ids.has_key( (external_database_id, attribute_identifier.lower()) )
544
545 - def _get_key_attribute_table_name( self, key_id ):
546 return "key_attribute_%s" %key_id
547
548 - def _get_linked_attribute_ontology_name(self, attribute_identifier):
549 """ 550 Returns the name of the ontology linked to an attribute 551 """ 552 553 if self._is_ontology_linked_attribute(attribute_identifier): 554 return self.ontology_linked_attributes[attribute_identifier.lower()]["name"] 555 raise ValueError("Trying to get the name for an unlinked attribute") 556 else: 557 raise ValueError("Trying to get the name for an unlinked attribute")
558 559 560
561 - def optimize_database_for(self, mode, optimize=False):
562 """ 563 Performs some modifications in database in order to optimize database access efficiency for parsing or running 564 565 "mode" can take two different values: "running" or "parsing" 566 567 "optimize" parameter will be only used if mode is "running" 568 """ 569 570 if mode == "parsing": 571 self.db._disable_indices() 572 573 elif mode == "running": 574 self.db._enable_indices() 575 self.db.insert_db_content( sql_query = self.biana_database.optimize_database( optimize = optimize, analyze = True ) ) 576 577 # NOT DONE AS IT IS VERY INEFFICIENT 578 # Update the external entity equivalences for attribute transfer 579 #print "Updating attribute crossing table..." 580 #self._update_external_entity_equivalences_for_attribute_transfer() 581 582 583 # Precalculate relations hierarchy 584 self._update_relations_hierarchy() 585 586 587 588 589 590 else: 591 raise ValueError("optimizing database mode can only be \"parsing\" or \"running\"") 592 593 # Stores the 594 self.db_optimized_for = mode 595 596 self.db.insert_db_content( sql_query = self.db._get_update_sql_query(table = self.biana_database.BIANA_DATABASE_TABLE, 597 update_column_values = [("optimized_for", self.db_optimized_for)]) )
598 599
600 - def close(self):
601 """ 602 Close the connection with database 603 604 Although it is only necessary to close the BianaDBaccess object when parsing, 605 is highly recommended to use it always as maybe in the future this requirite may change 606 """ 607 608 #print "Closing BianaDBaccess" 609 610 # Check if there is temporal data to be processed in temporal buffer 611 # Process relations hierarchy temporal data 612 if len( self.temporal_data["relations_hierarchy_parents"] )>0: 613 hierarchy_set = sets.Set() 614 615 eE_eERid_dict = self.temporal_data["relations_hierarchy_parents"] 616 617 def get_all_parents(eEid): 618 parents = sets.Set() 619 if( eE_eERid_dict.has_key(eEid) ): 620 parents.update(eE_eERid_dict[eEid]) 621 for current_eEid in eE_eERid_dict[eEid]: 622 parents.update(get_all_parents(current_eEid)) 623 return parents
624 625 for eEid in self.temporal_data["relations_hierarchy_parents"].keys(): 626 for current_parent in get_all_parents(eEid): 627 hierarchy_set.add((current_parent,eEid)) 628 629 630 for current_eERid, current_eEid in hierarchy_set: 631 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE, 632 column_values = (("externalEntityRelationParticipantID", self._get_new_external_entity_relation_participant_id()), 633 (self.biana_database.external_entity_relation_id_col, current_eERid), 634 (self.biana_database.externalEntityID_col, current_eEid )), 635 use_buffer = True )) 636 # If database has been modified, add the control id 637 if self.db_version_modified: 638 self._update_bianaDB_autoincrement_fields() 639 self._update_bianaDB_version() 640 641 self.db.close() 642 643 644 645 646 #################################################################################### 647 # METHODS USED TO MANAGE AVAILABLE PROTEIN TYPES AND DATABASES IN BIANA DATABASE # 648 #################################################################################### 649 650 # -------------------------------------------------------------------------- 651 # Methods used to control BIANA database autoincrement fields 652 # -------------------------------------------------------------------------- 653
654 - def _update_bianaDB_autoincrement_fields(self):
655 """ 656 Method used internally to control biana autoincrement fields. 657 Called by _update_bianaDB_version since updation of autoincrement fields is required before updating version stable ids. 658 """ 659 column_values = [] 660 #if self._get_last_external_entity_id() is not None: 661 column_values.append(("last_externalEntityID",self._get_last_external_entity_id())) 662 #if self._get_last_external_entity_relation_participant_id() is not None: 663 column_values.append(("last_externalEntityRelationParticipantID",self._get_last_external_entity_relation_participant_id())) 664 #if self.get_last_sequence_protein_id() is not None: 665 column_values.append(("last_proteinSequenceID", self._get_last_sequenceProtein_id())) 666 #if self.get_last_sequence_nucleotide_id(): 667 column_values.append(("last_nucleotideSequenceID", self._get_last_sequenceNucleotide_id())) 668 #if self.get_last_transfer_id(): 669 column_values.append(("last_keyID", self._get_last_key_id())) 670 671 #if len(column_values) > 0: 672 self.db.insert_db_content( self.db._get_update_sql_query(table = self.biana_database.BIANA_DATABASE_TABLE.get_table_name(), 673 update_column_values = column_values), 674 answer_mode = None )
675 676 # -------------------------------------------------------------------------- 677 # Methods used to control BIANA database version 678 # --------------------------------------------------------------------------
679 - def _update_bianaDB_version(self):
680 """ 681 Method used internally to control biana version to be able to control compatibilities between diffent database versions 682 683 It also stores the stable values for the database 684 """ 685 686 date = time.localtime() 687 actual_date = "%s-%s-%s" %(date[0],date[1],date[2]) 688 689 self.db.insert_db_content( self.db._get_insert_sql_query(table = self.biana_database.DATABASE_VERSION_TABLE.get_table_name(), 690 column_values = (("dbControlID", md5.new(str(time.localtime())+str(time.time())).hexdigest()), 691 ("date", actual_date), 692 #("stable_externalEntityID",self._get_new_external_entity_id()-1), 693 ("stable_externalEntityID",self._get_last_external_entity_id()), 694 #("stable_externalEntityRelationParticipantID",self._get_new_external_entity_relation_participant_id()-1))), 695 ("stable_externalEntityRelationParticipantID",self._get_last_external_entity_relation_participant_id()))), 696 answer_mode = None )
697 698
699 - def _check_database_integrity(self):
700 """ 701 Checks the integrity of the database 702 """ 703 704 705 sys.stderr.write("checking database integrity\n") 706 707 self.db._check_locked_table(self.biana_database.EXTERNAL_DATABASE_TABLE.get_table_name()) 708 if len(self.db.select_db_content( sql_query = "SELECT * FROM %s WHERE parsingTime IS NULL" %self.biana_database.EXTERNAL_DATABASE_TABLE, answer_mode="list" )): 709 sys.stderr.write("Database does not have integrity. Fixing it...") 710 self._rollback() 711 return
712 713
714 - def _rollback(self):
715 """ 716 Method used to undo all the changes from an stable data insert 717 """ 718 719 max_external_entity_id = self._get_last_stable_external_entity_id() 720 721 max_external_entity_relation_participant_id = self._get_last_stable_external_entity_relation_participant_id() 722 723 for current_table in self.biana_database.get_tables(): 724 if current_table.has_field("externalEntityID"): 725 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = current_table.get_table_name(), 726 fixed_conditions = [("externalEntityID",">",max_external_entity_id, None)] ), 727 answer_mode = None ) 728 729 if current_table.has_field("externalEntityRelationParticipantID"): 730 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = current_table.get_table_name(), 731 fixed_conditions = [("externalEntityRelationParticipantID",">",max_external_entity_relation_participant_id, None)] ), 732 answer_mode = None ) 733 734 735 # Delete keyID 736 max_key_id = self._get_last_stable_key_id() 737 transfer_tables_list = self.db.select_db_content( sql_query = "SHOW TABLES", 738 answer_mode = "list" ) 739 740 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTERNAL_DATABASE_ATTRIBUTE_TRANSFER_TABLE.get_table_name(), 741 fixed_conditions = [("keyID",">",max_key_id, None)] ), 742 answer_mode = None ) 743 744 regex = re.compile("key_attribute_(\d+)") 745 for current_table in transfer_tables_list: 746 m = regex.match(current_table) 747 if m: 748 if( int(m.group(1))>int(max_key_id) ): 749 self.db.insert_db_content( sql_query = self.db._get_drop_sql_query( table_list = [current_table] ) ) 750 751 # Other tables to delete information 752 # Ontology specific information 753 for current_table in [self.biana_database.ONTOLOGY_IS_A_TABLE.get_table_name(), self.biana_database.ONTOLOGY_IS_PART_OF_TABLE.get_table_name() ]: 754 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = current_table, 755 fixed_conditions = [("externalEntityID",">",max_external_entity_id, None)] ), 756 answer_mode = None ) 757 758 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE.get_table_name(), 759 fixed_conditions = [("parentExternalEntityID",">",max_external_entity_id, None)] ), 760 answer_mode = None ) 761 762 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE.get_table_name(), 763 fixed_conditions = [("childExternalEntityID",">",max_external_entity_id,None)] ), 764 answer_mode = None ) 765 766 767 # Remove sequences 768 max_protein_sequence_id = self._get_last_stable_sequenceProtein_id() 769 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"].get_table_name(), 770 fixed_conditions = [("proteinSequenceID",">",max_protein_sequence_id,None)] ), 771 answer_mode = None ) 772 773 max_nucleotide_sequence_id = self._get_last_stable_sequenceNucleotide_id() 774 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"].get_table_name(), 775 fixed_conditions = [("nucleotideSequenceID",">",max_nucleotide_sequence_id,None)] ), 776 answer_mode = None ) 777 778 # DELETE ALL external Databases without parsing time, as it means the parsing has not been finished... 779 self.db.insert_db_content( sql_query = self.db._get_delete_sql_query( table = self.biana_database.EXTERNAL_DATABASE_TABLE.get_table_name(), 780 fixed_conditions = [("parsingTime","IS","NULL",None)] ), 781 answer_mode = None ) 782 783 return
784 785
786 - def _get_db_versions_list(self):
787 788 if self.db_versions_list is None: 789 790 self.db_versions_list = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.DATABASE_VERSION_TABLE.get_table_name()], 791 columns = ["dbControlID"]), 792 answer_mode = "list" ) 793 return self.db_versions_list
794 795
796 - def _get_table_names(self):
797 798 return self.db.select_db_content( self.db.GetTableNames(), answer_mode="list" )
799 800 801
802 - def get_external_database(self, database_id):
803 804 self._get_valid_source_dbs() 805 806 return self.valid_source_database_ids[database_id]
807 808
809 - def get_external_database_list(self):
810 """ 811 """ 812 813 self._get_valid_source_dbs() 814 815 return self.valid_source_database_ids.values()
816
817 - def _get_valid_source_databases_by_id(self):
818 819 self._get_valid_source_dbs() 820 return self.valid_source_database_ids
821 822
823 - def _get_valid_source_dbs(self):
824 """ 825 Returns a dictionary with current external databases stored in BIANA database 826 827 Database objects are accessed by its name and version 828 """ 829 830 if self.validSources is None: 831 832 833 columns = ["externalDatabaseID","databaseName","databaseVersion","parsedFile","parsedDate","databaseDescription","defaultExternalEntityAttribute","isPromiscuous"] 834 #columns.extend( list(available_attributes) ) 835 836 try: 837 current_sources = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_DATABASE_TABLE.get_table_name()], 838 columns = columns ), 839 answer_mode="raw", remove_duplicates="no" ) 840 return_dict = {} 841 for actual_source in current_sources: 842 databaseObject = BianaObjects.ExternalDatabase( databaseName = actual_source[1], 843 databaseVersion = actual_source[2], 844 databaseFile = actual_source[3], 845 databaseDescription = actual_source[5], 846 defaultExternalEntityAttribute = actual_source[6], 847 databaseDate = actual_source[4], 848 externalDatabaseID = actual_source[0], 849 isPromiscuous = actual_source[7]) 850 851 self.valid_source_database_ids[ databaseObject.get_id() ] = databaseObject 852 853 available_eE_attributes = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eE_ATTRIBUTE_TABLE], 854 columns = ["attributeType"], 855 fixed_conditions = [("externalDatabaseID","=",databaseObject.get_id())] ), 856 answer_mode = "list", remove_duplicates = "no" ) 857 858 for actual_attribute in available_eE_attributes: 859 databaseObject.add_valid_external_entity_attribute_type(actual_attribute) 860 861 862 available_eEr_attributes = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eEr_ATTRIBUTE_TABLE], 863 columns = ["attributeType"], 864 fixed_conditions = [("externalDatabaseID","=",databaseObject.get_id())] ), 865 answer_mode = "list", remove_duplicates = "no" ) 866 867 for actual_attribute in available_eEr_attributes: 868 databaseObject.add_valid_external_entity_relation_attribute_type(actual_attribute) 869 870 871 available_eE_types = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eE_TYPES_TABLE], 872 columns = ["eEType"], 873 fixed_conditions = [("externalDatabaseID","=",databaseObject.get_id())] ), 874 answer_mode = "list", remove_duplicates = "no" ) 875 876 for actual_attribute in available_eE_types: 877 databaseObject.add_valid_external_entity_type(actual_attribute) 878 879 880 881 available_eEr_types = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eEr_TYPES_TABLE], 882 columns = ["eErType"], 883 fixed_conditions = [("externalDatabaseID","=",databaseObject.get_id())] ), 884 answer_mode = "list", remove_duplicates = "no" ) 885 886 for actual_attribute in available_eEr_types: 887 databaseObject.add_valid_external_entity_relation_type(actual_attribute) 888 889 890 891 892 if return_dict.has_key(actual_source[1]): 893 return_dict[actual_source[1]][actual_source[2]] = databaseObject 894 else: 895 return_dict[actual_source[1]] = {actual_source[2]: databaseObject} 896 897 except: 898 raise 899 900 self.validSources = return_dict 901 902 return self.validSources
903 904 905 #################################################################################### 906 # DATABASE MODIFICATION METHODS (INSERT EXTERNAL INFORMATION TO THE DATABASE # 907 #################################################################################### 908
909 - def insert_new_external_database( self, externalDatabase ):
910 """ 911 Inserts into database the information of a new external database that is being integrated into BIANA database 912 """ 913 # First: check if this databaseName is already in the database 914 915 if self._get_valid_source_dbs().has_key(externalDatabase.get_name()) and self._get_valid_source_dbs()[externalDatabase.get_name()].has_key(externalDatabase.get_version()): 916 sys.stderr.write("\n\nDatabase Name and Version already existed in the database.\n\n") 917 sys.exit(-1) 918 else: 919 #content_type_list = externalDatabase.get_content_types() 920 #for parent, child_list in self.biana_database.EXTERNAL_DATA_TYPE_HIERARCHY_DICTIONARY.iteritems(): 921 922 new_database_identifier = self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_DATABASE_TABLE.get_table_name(), 923 column_values = ( ("databaseName", externalDatabase.get_name().strip("\"")), 924 ("databaseVersion", externalDatabase.get_version().strip("\"")), 925 ("parsedFile",externalDatabase.get_parsed_file().strip("\"") ), 926 ("parsedDate", externalDatabase.get_parsing_date().strip("\"") ), 927 ("defaultExternalEntityAttribute", externalDatabase.get_default_eE_attribute().strip("\"") ), 928 ("databaseDescription", externalDatabase.get_description().strip("\"") ), 929 ("isPromiscuous", externalDatabase.get_promiscuity()) ), 930 use_buffer=False ), 931 answer_mode="last_id") 932 933 externalDatabase.set_id(externalDatabaseID = new_database_identifier) 934 935 # Add this external database in the current external databases on memory 936 if self._get_valid_source_dbs().has_key(externalDatabase.get_name()): 937 self._get_valid_source_dbs()[externalDatabase.get_name()][externalDatabase.get_version()] = externalDatabase 938 else: 939 self._get_valid_source_dbs()[externalDatabase.get_name()] = {externalDatabase.get_version(): externalDatabase } 940 941 self.valid_source_database_ids[new_database_identifier] = externalDatabase 942 943 # Mark the database as modified 944 self.db_version_modified = 1
945 946
947 - def update_external_database_external_entity_attributes( self, externalDatabase ):
948 949 self.db.insert_db_content( self.db._get_update_sql_query( table = self.biana_database.EXTERNAL_DATABASE_TABLE, 950 update_column_values = (("parsingTime",externalDatabase.get_parsing_time()),), 951 fixed_conditions = (("externalDatabaseID","=",externalDatabase.get_id()),) ), 952 answer_mode = None ) 953 954 955 956 for current in externalDatabase.get_valid_external_entity_attribute_type(): 957 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eE_ATTRIBUTE_TABLE, 958 column_values = ( ("externalDatabaseID",externalDatabase.get_id()), 959 ("attributeType",current) )), 960 answer_mode = None ) 961 962 963 for current in externalDatabase.get_valid_external_entity_type(): 964 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eE_TYPES_TABLE, 965 column_values = ( ("externalDatabaseID",externalDatabase.get_id()), 966 ("eEType",current) )), 967 answer_mode = None ) 968 969 for current in externalDatabase.get_valid_external_entity_relation_attribute_type(): 970 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eEr_ATTRIBUTE_TABLE, 971 column_values = ( ("externalDatabaseID",externalDatabase.get_id()), 972 ("attributeType",current) )), 973 answer_mode = None ) 974 975 for current in externalDatabase.get_valid_external_entity_relation_type(): 976 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_DATABASE_AVAILABLE_eEr_TYPES_TABLE, 977 column_values = ( ("externalDatabaseID",externalDatabase.get_id()), 978 ("eErType",current) )), 979 answer_mode = None )
980 981 # add this database to the default unification protocol... 982 # TODO 983 984
985 - def _get_externalDatabaseID_int(self, sourceDBName, sourceDBVersion):
986 """ 987 Returns the decimal value assigned to the sourceDB (externalDatabaseID will be converted to lower_case) 988 989 If "sourceDBVersion" is None, it returns a list with the all the identifiers assigned to the sourceDBName 990 """ 991 992 try: 993 if sourceDBVersion is not None: 994 return self._get_valid_source_dbs()[sourceDBName.lower()][sourceDBVersion.lower()].get_id() 995 else: 996 return [ self._get_valid_source_dbs()[sourceDBName.lower()][x].get_id() for x in self._get_valid_source_dbs()[sourceDBName.lower()] ] 997 except: 998 sys.stderr.write("Trying to get the identifier for an unexisting external database: %s %s\n" %(sourceDBName, sourceDBVersion)) 999 raise "Trying to get the identifier for an unexisting external database: %s %s\n" %(sourceDBName, sourceDBVersion)
1000 1001 1002 #################################################################################### 1003 # EXTERNAL ENTITIES INSERTION METHODS # 1004 #################################################################################### 1005 1006 1007 # AUTOINCREMENT VALUES 1008
1009 - def _get_new_external_entity_id(self):
1010 return self.db.get_next_autoincrement(table="externalEntity", attribute="externalEntityID" )
1011
1012 - def _get_last_external_entity_id(self):
1013 return self.db._get_current_autoincrement(table="externalEntity", attribute="externalEntityID" )
1014
1015 - def _get_last_stable_external_entity_id(self):
1016 return self.db._get_last_stable_autoincrement(table="externalEntity", attribute="externalEntityID" )
1017
1018 - def _get_new_external_entity_relation_participant_id(self):
1019 return self.db.get_next_autoincrement(table="externalEntityRelationParticipant", attribute="externalEntityRelationParticipantID" )
1020
1021 - def _get_last_external_entity_relation_participant_id(self):
1022 return self.db._get_current_autoincrement(table="externalEntityRelationParticipant", attribute="externalEntityRelationParticipantID" )
1023
1024 - def _get_last_stable_external_entity_relation_participant_id(self):
1025 return self.db._get_last_stable_autoincrement(table="externalEntityRelationParticipant", attribute="externalEntityRelationParticipantID" )
1026
1027 - def _get_new_sequenceProtein_id(self):
1028 return self.db.get_next_autoincrement(table="sequenceProtein",attribute="proteinSequenceID")
1029
1030 - def _get_last_sequenceProtein_id(self):
1031 return self.db._get_current_autoincrement(table="sequenceProtein",attribute="proteinSequenceID")
1032
1033 - def _get_last_stable_sequenceProtein_id(self):
1034 return self.db._get_last_stable_autoincrement(table="sequenceProtein",attribute="proteinSequenceID")
1035
1036 - def _get_new_sequenceNucleotide_id(self):
1037 return self.db.get_next_autoincrement(table="sequenceNucleotide", attribute="nucleotideSequenceID")
1038
1039 - def _get_last_sequenceNucleotide_id(self):
1040 return self.db._get_current_autoincrement(table="sequenceNucleotide", attribute="nucleotideSequenceID")
1041
1042 - def _get_last_stable_sequenceNucleotide_id(self):
1043 return self.db._get_last_stable_autoincrement(table="sequenceNucleotide", attribute="nucleotideSequenceID")
1044
1045 - def _get_new_key_id(self):
1046 return self.db.get_next_autoincrement(table="externalDatabaseAttributeTransfer", attribute="keyID")
1047
1048 - def _get_last_key_id(self):
1049 return self.db._get_current_autoincrement(table="externalDatabaseAttributeTransfer", attribute="keyID")
1050
1051 - def _get_last_stable_key_id(self):
1052 return self.db._get_last_stable_autoincrement(table="externalDatabaseAttributeTransfer", attribute="keyID")
1053 1054
1055 - def insert_new_external_entity( self, externalEntity):
1056 """ 1057 Inserts a new external entity to the biana database 1058 1059 If the externalEntity has been previously inserted in the database, it will show a message advertising about this. It won't insert the attributes, as an external entity can be inserted only once! 1060 1061 It is required to use this method once by each externalEntity. 1062 1063 """ 1064 1065 if externalEntity.get_id() is not None: 1066 raise ValueError("Trying to insert an inserted external entity...") 1067 1068 # Mark the database as modified 1069 self.db_version_modified = 1 1070 1071 if externalEntity.get_id() is None: 1072 1073 # Insert the externalEntity and get the new externalEntity ID 1074 1075 new_external_entity_id = self._get_new_external_entity_id() 1076 1077 self.db.insert_db_content(self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name(), 1078 column_values = ( ("externalEntityID", new_external_entity_id), 1079 ("externalDatabaseID",externalEntity.get_source_database().get_id()), 1080 ("type",externalEntity.get_type())), 1081 use_buffer=self.use_buffer )) 1082 1083 # Insert this externalEntity in the default unification protocol 1084 1085 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.USER_ENTITY_TABLE, 1086 column_values = [("userEntityID",new_external_entity_id), 1087 ("externalEntityID",new_external_entity_id)], 1088 use_buffer=self.use_buffer ) ) 1089 1090 externalEntity.set_id(id_value = new_external_entity_id) 1091 1092 1093 externalEntity_attributes = externalEntity.get_attributes_dict() 1094 1095 # Updates the information that this database is adding to the biana database 1096 self.valid_source_database_ids[externalEntity.get_source_database().get_id()].add_valid_external_entity_type( eE_type = externalEntity.get_type()) 1097 1098 if externalEntity.get_type()== "ontology": 1099 self.insert_ontology(externalEntity) 1100 1101 for current_attribute_identifier in externalEntity_attributes: 1102 if externalEntity.get_type()== "relation": 1103 self.valid_source_database_ids[externalEntity.get_source_database().get_id()].add_valid_external_entity_relation_attribute_type( attribute_identifier = current_attribute_identifier ) 1104 # Dirty trick to make "psimi_name" to appear in the graphical interface 1105 if current_attribute_identifier.lower() == "methodID": 1106 self.valid_source_database_ids[externalEntity.get_source_database().get_id()].add_valid_external_entity_relation_attribute_type( attribute_identifier = "psimi_name" ) 1107 else: 1108 self.valid_source_database_ids[externalEntity.get_source_database().get_id()].add_valid_external_entity_attribute_type( attribute_identifier = current_attribute_identifier ) 1109 1110 [ self._insert_external_entity_attribute( externalEntityID = new_external_entity_id, 1111 externalEntityAttribute = x ) for x in externalEntity_attributes[current_attribute_identifier] ] 1112 1113 1114 ### IF THIS ATTRIBUTE IS A TRANSFER ATTRIBUTE, SAVE THE KEY IN THE CORRESPONDING TABLE. THE PARSER HAS TO DEFINE IT PREVIOUS TO ADD ANY THING IN THE DATABASE 1115 if self._is_key_attribute( attribute_identifier = current_attribute_identifier, external_database_id = externalEntity.get_source_database().get_id() ): 1116 for current_key_attribute_object in externalEntity_attributes[current_attribute_identifier]: 1117 self.db.insert_db_content( self.db._get_insert_sql_query( table = self._get_key_attribute_table_name( key_id = self.key_attribute_ids[(externalEntity.get_source_database().get_id(), current_attribute_identifier.lower())] ), 1118 column_values = [("externalEntityID",new_external_entity_id), 1119 ("value",current_key_attribute_object.value)], 1120 use_buffer = self.use_buffer ), 1121 answer_mode = None ) 1122 1123 # Specific for relations 1124 if externalEntity.get_type()== "relation": 1125 1126 self.db.insert_db_content(self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_ENTITY_RELATION_TABLE.get_table_name(), 1127 column_values = ( ("externalEntityRelationID", new_external_entity_id), 1128 ("type",externalEntity.get_relation_type() ) ), 1129 use_buffer=self.use_buffer ), 1130 answer_mode=None) 1131 1132 self.valid_source_database_ids[externalEntity.get_source_database().get_id()].add_valid_external_entity_relation_type( eEr_type = externalEntity.get_relation_type() ) 1133 1134 for actual_participant_external_entity_id in externalEntity.get_participant_external_entity_ids_list(): 1135 1136 new_id = self._get_new_external_entity_relation_participant_id() 1137 1138 # Save temporarly this information for storing the extended relations hierarchy 1139 if self.store_relations_hierarchy: 1140 self.temporal_data["relations_hierarchy_parents"].setdefault(actual_participant_external_entity_id, []).append(new_external_entity_id) 1141 else: 1142 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE, 1143 column_values = (("externalEntityRelationParticipantID", new_id), 1144 (self.biana_database.external_entity_relation_id_col, new_external_entity_id), 1145 (self.biana_database.externalEntityID_col, actual_participant_external_entity_id )), 1146 use_buffer = True ) ) 1147 1148 1149 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE, 1150 column_values = (("externalEntityRelationParticipantID", new_id), 1151 (self.biana_database.external_entity_relation_id_col, new_external_entity_id), 1152 (self.biana_database.externalEntityID_col, actual_participant_external_entity_id) ), 1153 use_buffer=self.use_buffer )) 1154 1155 1156 participant_attributes = externalEntity.get_participant_attributes( participantExternalEntityID = actual_participant_external_entity_id ) 1157 1158 [ self._insert_external_entity_relation_participant_attribute( pParticipantID = new_id, 1159 pExternalEntityRelationParticipantAttribute = current_attribute ) for actual_key in participant_attributes for current_attribute in participant_attributes[actual_key] ] 1160 1161 1162 else: 1163 sys.stderr.write("Trying to insert an external Entity which is already in the database! Any new attribute will be inserted!\n") 1164 1165 return externalEntity.get_id()
1166 1167 1168
1169 - def _insert_external_entity_relation_participant_attribute(self, pParticipantID, pExternalEntityRelationParticipantAttribute ):
1170 """ 1171 1172 """ 1173 1174 tableObject = self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT[pExternalEntityRelationParticipantAttribute.attribute_identifier.lower()] 1175 1176 1177 column_values = [ ("externalEntityRelationParticipantID",pParticipantID), 1178 ("value",pExternalEntityRelationParticipantAttribute.value) ] 1179 1180 for current_field in pExternalEntityRelationParticipantAttribute.additional_fields: 1181 column_values.append((current_field[0],current_field[1].replace('\\','\\\\').replace('"','\\"'))) 1182 1183 1184 self.db.insert_db_content( self.db._get_insert_sql_query( table = tableObject.get_table_name(), 1185 column_values = column_values, 1186 use_buffer=self.use_buffer ), 1187 answer_mode=None ) 1188 1189 return
1190 1191
1192 - def _insert_external_entity_attribute(self, externalEntityID, externalEntityAttribute):
1193 """ 1194 Adds a new attribute to the external entity 1195 externalEntityID must exist previously 1196 """ 1197 1198 tableObject = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[externalEntityAttribute.attribute_identifier.lower()] 1199 1200 if externalEntityAttribute.attribute_identifier.lower()=="proteinsequence" or externalEntityAttribute.attribute_identifier.lower()=="nucleotidenequence" : 1201 if externalEntityAttribute.value.get_sequence_MD5() not in self.temporal_data.setdefault("sequence",sets.Set()): 1202 self._insert_sequence(externalEntityAttribute.value) 1203 value = externalEntityAttribute.value.get_sequence_MD5() 1204 else: 1205 value = externalEntityAttribute.value 1206 1207 column_values = [ ("externalEntityID",externalEntityID), 1208 ("value",value) ] 1209 1210 for current_field in externalEntityAttribute.additional_fields: 1211 column_values.append((current_field,externalEntityAttribute.get_field(current_field))) 1212 1213 if externalEntityAttribute.type is not None: 1214 column_values.append(("type",externalEntityAttribute.type)) 1215 1216 if externalEntityAttribute.version is not None: 1217 column_values.append(("version",externalEntityAttribute.version)) 1218 1219 self.db.insert_db_content( self.db._get_insert_sql_query( table = tableObject.get_table_name(), 1220 column_values = column_values, 1221 use_buffer=self.use_buffer ), 1222 answer_mode=None ) 1223 return
1224 1225
1226 - def _insert_sequence(self, sequence):
1227 """ 1228 Inserts a sequence object into BIANA database and returns its ID. 1229 1230 """ 1231 1232 #if sequence.sequenceID is not None: 1233 # column_values = [("sequenceID", sequence.get_sequenceID())] 1234 #else: 1235 # column_values = [("sequenceID",self._get_new_sequenceProtein_id())] 1236 1237 special_column_values = [ ("sequence", "COMPRESS(\"%s\")" %sequence.get_sequence()) ] 1238 1239 if sequence.sequence_type == "peptide" : 1240 if sequence.get_sequenceID() is not None: 1241 seq_id = sequence.get_sequenceID() 1242 else: 1243 seq_id = self._get_new_sequenceProtein_id() 1244 column_values = [("proteinSequenceID",seq_id)] 1245 table = self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"] 1246 column_values.extend([("sequenceMW",sequence.get_proteinMW()),("sequenceIP",sequence.get_proteinIP()) ]) 1247 1248 elif sequence.sequence_type == "rna" or sequence.sequence_type == "dna": 1249 if sequence.get_sequenceID() is not None: 1250 seq_id = sequence.get_sequenceID() 1251 else: 1252 seq_id = self._get_new_sequenceNucleotide_id() 1253 column_values = [("nucleotideSequenceID", seq_id)] 1254 table = self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"] 1255 1256 column_values.extend([("sequenceMD5",sequence.get_sequence_MD5()), 1257 ("sequenceLength", sequence.get_length())]) 1258 1259 self.db.insert_db_content(self.db._get_insert_sql_query( table = table, 1260 column_values = column_values, 1261 special_column_values = special_column_values, 1262 use_buffer = self.use_buffer), 1263 answer_mode = None) 1264 1265 self.temporal_data.setdefault("sequence",sets.Set()).add(sequence.get_sequence_MD5()) 1266 1267 return seq_id
1268 1269
1270 - def _insert_sequence_file(self, input_fd, type, format="fasta", verbose=True):
1271 """ 1272 Inserts a sequence file to the database 1273 """ 1274 1275 num = 0 1276 1277 if type.lower() == "proteinsequence": 1278 seqObj = BianaObjects.ProteinSequence 1279 elif type.lower() == "nucleotidesequence": 1280 seqObj = BianaObjects.NucleotideSequence 1281 1282 1283 if format.lower()=="fasta": 1284 temp_seq = [] 1285 for line in input_fd: 1286 if line[0]==">": 1287 if len(temp_seq)>0: 1288 self._insert_sequence( sequence = seqObj(sequence="".join(temp_seq),sequenceID=sequenceID ) ) 1289 sequenceID=line[1:].strip() 1290 if( sequenceID == "NULL" or sequenceID == "None" or sequenceID is None ): 1291 sequenceID = self._get_new_sequenceProtein_id() 1292 else: 1293 temp_seq.append(line.strip()) 1294 num += 1 1295 if (num%100000)==0 and verbose: 1296 sys.stderr.write("%s sequences inserted\n" %num) 1297 1298 1299 elif format.lower() == "seq": 1300 for line in input_fd: 1301 splitted = line.strip().split("\t") 1302 sequenceID = splitted[0] 1303 if( sequenceID == "NULL" or sequenceID == "None" or sequenceID is None ): 1304 sequenceID = self._get_new_sequenceProtein_id() 1305 self._insert_sequence( sequence = seqObj(sequence=splitted[1],sequenceID=sequenceID ) ) 1306 num += 1 1307 if (num%100000)==0 and verbose: 1308 sys.stderr.write("%s sequences inserted\n" %num) 1309 1310 else: 1311 raise ValueError("Format %s not recognized" %(format))
1312 1313
1314 - def _insert_protein_sequence_cd_hit_cluster(self, cd_hit_cluster):
1315 """ 1316 1317 """ 1318 1319 # Problem: clusters with only a sequence are not added. It is necessary to add them? 1320 # It is not necessary, as they are only composed by the representative one... 1321 # But then... How to be sure that this sequenceID has been used? For the moment, it is not necessary 1322 for current_match in cd_hit_cluster.get_matches(): 1323 self.db.insert_db_content(self.db._get_insert_sql_query( table = "sequenceProteinCD_HIT", 1324 column_values = [("representant_proteinSequenceID", cd_hit_cluster.get_representant()), 1325 ("proteinSequenceID",current_match.sequenceID), 1326 ("representant_start_position",current_match.repr_start_pos), 1327 ("representant_end_position",current_match.repr_end_pos), 1328 ("start_position",current_match.start_pos), 1329 ("end_position",current_match.end_pos), 1330 ("identity",current_match.identity)], 1331 use_buffer = self.use_buffer), 1332 answer_mode = None)
1333 1334
1335 - def insert_cd_hit_clusters_to_biana_database(self, cd_hit_clusters_file):
1336 """ 1337 Inserts information about CD-HIT clusters into database 1338 """ 1339 ## EXAMPLE OF Clusters output file 1340 ## >Cluster 0 1341 ## 0 36805aa, >9662380a987baf844... * 1342 ## >Cluster 1 1343 ## 0 35213aa, >0623d1531507bb004... * 1344 ## 1 90aa, >07b1a5a6aab138163... at 1:90:11834:11923/100% 1345 ## 2 247aa, >5e00433d0ae984091... at 1:247:12464:12710/100% 1346 ## 3 153aa, >d845d402ebfa7c203... at 1:153:11430:11582/100% 1347 ## 4 100aa, >ea8147fd16954563f... at 1:100:11483:11582/100% 1348 ## 5 183aa, >fab09a87d7f461e75... at 1:183:10973:11155/100% 1349 ## >Cluster 2 1350 ## 0 391aa, >1cacc168fca71118c... at 1:391:10887:11277/99% 1351 ## 1 34942aa, >d861c67fa2f88e4cd... * 1352 1353 input_file_fd = file(cd_hit_clusters_file, 'r') 1354 num_clusters=0 1355 current_cluster = None 1356 1357 cluster_re = re.compile(">Cluster \d+") 1358 sequence_re = re.compile("\s*\d+\s+\d+aa,\s+\>(\w+)\.+\s+at\s+(\d+)\:(\d+)\:(\d+)\:(\d+)\/(\d+)\%") 1359 representant_re = re.compile("\d+\s+\d+aa,\s+\>(\w+)\.+") 1360 1361 for line in input_file_fd: 1362 1363 if cluster_re.match(line): 1364 num_clusters += 1 1365 if num_clusters % 100000 == 0: 1366 sys.stderr.write("%s clusters inserted\n" %(num_clusters)) 1367 if current_cluster is not None: 1368 self._insert_protein_sequence_cd_hit_cluster(current_cluster) 1369 current_cluster = BianaObjects.CDHITCluster() 1370 continue 1371 1372 sequence = sequence_re.match(line) 1373 if sequence: 1374 current_cluster.add_match( BianaObjects.CDHITMatch( sequenceID= sequence.group(1), 1375 start_pos = int(sequence.group(2)), 1376 end_pos = int(sequence.group(3)), 1377 repr_start_pos = int(sequence.group(4)), 1378 repr_end_pos = int(sequence.group(5)), 1379 identity = int(sequence.group(6)) ) ) 1380 else: 1381 # representant 1382 if line[-2] == "*": 1383 sequence = representant_re.search(line) 1384 current_cluster.set_representant(sequence.group(1)) 1385 else: 1386 sys.stderr.write("Error parsing cd-hit results in line:\n%s" %line) 1387 1388 self._insert_protein_sequence_cd_hit_cluster(current_cluster) 1389 1390 # Make sure to empty the buffer... 1391 self.db._empty_buffer() 1392 1393 return
1394 1395
1396 - def _get_similar_sequences(self, sequenceID, bit_score=None, identity_percent=None, similarity_percent=None, query_coverage_percent=None, match_coverage_percent=None):
1397 """ 1398 Gets similar sequences from database 1399 1400 In order to be able to run this method, is mandatory to fill before the table of blast or psi-blast results 1401 1402 All specified conditions are evaluated jointly, with AND 1403 1404 TO CHECK!!! The indices are optimized only for identity and coverage!!! 1405 1406 returns a list of sequenceIDs from similar sequences according to the requeriments. 1407 """ 1408 1409 sqlStat = database.SQLSelectStatement() 1410 1411 sqlStat.add_element( tables = [self.biana_database.PROTEIN_BLAST_RESULTS_TABLE] ) 1412 sqlStat.add_element( fixed_conditions = [("sequenceID_A","=",sequenceID)] ) 1413 sqlStat.add_element( columns = ["sequenceID_B"] ) 1414 1415 if( bit_score is not None ): 1416 sqlStat.add_element( fixed_conditions = [("bit_score",">=",bit_score)] ) 1417 if( identity_percent is not None ): 1418 sqlStat.add_element( fixed_conditions = [("identities",">=",identity_percent)]) 1419 if( similarity_percent is not None ): 1420 sqlStat.add_element( fixed_conditions = [("similarity",">=",similarity_percent)]) 1421 if( query_coverage_percent is not None ): 1422 sqlStat.add_element( fixed_conditions = [("coverage_A",">=",query_coverage_percent)]) 1423 if( match_coverage_percent is not None ): 1424 sqlStat.add_element( fixed_conditions = [("coverage_B",">=",match_coverage_percent)] ) 1425 1426 query = self.db._get_select_sql_query( tables = sqlStat.tables, 1427 columns = sqlStat.columns, 1428 fixed_conditions = sqlStat.fixed_conditions, 1429 join_conditions = sqlStat.join_conditions ) 1430 1431 data = self.db.select_db_content( query, answer_mode = "list" ) 1432 1433 #print data 1434 return data
1435 1436
1437 - def _insert_blast_results_file(self, file_fd):
1438 """ 1439 METHOD TO TEST 1440 """ 1441 1442 columns = ["sequenceID_A","sequenceID_B","evalue","score","bit_score","start_A","end_A","start_B","end_B", 1443 "Identities","similarity","gaps","program","filter","coverage_A","coverage_B"] 1444 1445 for line in file_fd: 1446 if line.strip().split("\t")[12] != "bl2seq" and line.strip().split("\t")[12] != "blastall": 1447 sys.stderr.write(line) 1448 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.PROTEIN_BLAST_RESULTS_TABLE, 1449 column_values = zip(columns,line.strip().split("\t")), 1450 use_buffer = self.use_buffer ) )
1451 1452 #self.db.insert_db_content("LOAD DATA LOCAL INFILE '%s' INTO TABLE %s (sequenceID_A,sequenceID_B,evalue,score,bit_score,start_A,end_A,start_B,end_B,identities,similarity,gaps,program,filter,coverage_A,coverage_B)" %(file_path, self.biana_database.PROTEIN_BLAST_RESULTS_TABLE), 1453 # answer_mode = None ) 1454 1455
1456 - def _load_sequences(self, sequenceIdList, type="proteinsequence"):
1457 """ 1458 Gets multiple sequence object from BIANA database 1459 1460 "sequenceID" can be a single id or a id list 1461 1462 "type" can be "protein" or "nucleotide" 1463 """ 1464 1465 results = {} 1466 1467 if type == "proteinsequence": 1468 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"]], 1469 columns = ["proteinSequenceID","UNCOMPRESS(sequence)"], 1470 fixed_conditions = [("proteinSequenceID","IN","(%s)" %(", ".join(map(str,sequenceIdList))),None)] ), 1471 answer_mode = "raw" ) 1472 1473 for current_data in data: 1474 results[current_data[0]] = BianaObjects.ProteinSequence(sequence=current_data[1],sequenceID=current_data[0]) 1475 1476 elif type == "nucleotidesequence": 1477 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"]], 1478 columns = ["nucleotideSequenceID","sequenceType","UNCOMPRESS(sequence)"], 1479 fixed_conditions = [("nucleotideSequenceID","IN","(%s)" %(", ".join(map(str,sequenceIdList))))] ), 1480 answer_mode = "raw" ) 1481 for current_data in data: 1482 if current_data[1] == "dna": 1483 results[current_data[0]] = BianaObjects.ProteinSequence(sequence=current_data[2],sequenceID=current_data[0]) 1484 elif current_data[1] == "rna": 1485 results[current_data[0]] = BianaObjects.ProteinSequence(sequence=current_data[2],sequenceID=current_data[0]) 1486 1487 else: 1488 raise ValueError("type must be \"proteinsequence\" or \"nucleotidesequence\"") 1489 1490 return results
1491 1492
1493 - def insert_ontology(self, ontology):
1494 """ 1495 Inserts to the database the complete ontology object 1496 1497 The external entities of the ontology must be inserted previously 1498 """ 1499 1500 if not ontology.linked_attribute.lower() in self.biana_database.VALID_EXTERNAL_ENTITY_ATTRIBUTE_TYPES_DICT: 1501 raise ValueError("Trying to link this ontology to an invalid attribute type") 1502 1503 key_id = self._add_key_attribute = self._add_key_attribute( external_database_id = ontology.get_source_database().get_id(), key_attribute = ontology.linked_attribute ) 1504 1505 column_values = [ ("externalEntityID", ontology.get_id()), 1506 ("linked_attribute", ontology.linked_attribute), 1507 ("key_id", key_id), 1508 ("name", ontology.name), 1509 ("description_attribute", ontology.description_attribute) ] 1510 1511 # In order to simplify and improve the performance of the expansion queries, we need to duplicate data in a table, as in _add_transfer_attribute. 1512 # It is equivalent to the idea of the key attribute in "transfer attributes" 1513 # In order to check 1514 1515 1516 if ontology.level_attribute is not None: 1517 column_values.append(("level_attribute", ontology.level_attribute)) 1518 1519 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.ONTOLOGY_INFO_TABLE, 1520 column_values = column_values, 1521 use_buffer = self.use_buffer ) ) 1522 1523 for current_eE_id in ontology.get_all_external_entity_ids(): 1524 for current_parent_id in ontology.get_parents_ids(current_eE_id): 1525 if current_parent_id != current_eE_id: 1526 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.ONTOLOGY_IS_A_TABLE, 1527 column_values = [ ("externalEntityID", current_eE_id), 1528 ("is_a",current_parent_id) ], 1529 use_buffer = self.use_buffer ) ) 1530 1531 for current_part_parent_id in ontology.get_part_parents_ids(current_eE_id): 1532 if current_part_parent_id != current_eE_id: 1533 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.ONTOLOGY_IS_PART_OF_TABLE, 1534 column_values = [ ("externalEntityID", current_eE_id), 1535 ("is_part_of",current_part_parent_id) ], 1536 use_buffer = self.use_buffer ) ) 1537 1538 # Insert extended ontology 1539 print "Inserting extended ontology" 1540 for current_eE_id in ontology.get_all_external_entity_ids(): 1541 for current_child in ontology.get_descendants( ontologyElementID = current_eE_id ): 1542 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE, 1543 column_values = [ ("parentExternalEntityID",current_eE_id), 1544 ("childExternalEntityID",current_child) ], 1545 use_buffer = self.use_buffer ) ) 1546 return
1547
1548 - def get_ontology(self, ontology_name, root_attribute_values = [], load_external_entities=False):
1549 """ 1550 Loads ontology object 1551 """ 1552 1553 #print "Getting ontology %s with roots %s" %(ontologyName, root_attribute_values) 1554 root_attribute_values.sort() 1555 key = ontology_name.lower()+str(root_attribute_values) 1556 1557 if key in self.loaded_ontologies: 1558 return self.loaded_ontologies[key] 1559 1560 is_a_table = self.biana_database.ONTOLOGY_IS_A_TABLE 1561 is_part_of_table = self.biana_database.ONTOLOGY_IS_PART_OF_TABLE 1562 eE_table = self.biana_database.EXTERNAL_ENTITY_TABLE 1563 eE_field = self.biana_database.externalEntityID_col 1564 1565 linked_attr = self.get_available_ontology_names(name=ontology_name) 1566 attr_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[linked_attr.lower()] 1567 1568 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.ONTOLOGY_INFO_TABLE, 1569 self.biana_database.EXTERNAL_ENTITY_TABLE], 1570 columns = ["externalDatabaseID","linked_attribute","key_id","level_attribute","description_attribute"], 1571 join_conditions = [("%s.externalEntityID" %self.biana_database.ONTOLOGY_INFO_TABLE, 1572 "=", 1573 "%s.externalEntityID" %self.biana_database.EXTERNAL_ENTITY_TABLE)], 1574 fixed_conditions = [("name","=",ontology_name)]), 1575 answer_mode = "raw" ) 1576 1577 if len(data) == 0 or len(data)>1: 1578 raise ValueError("Trying to get an unexsiting ontology or multiple with the same name...") 1579 1580 ontology = BianaObjects.Ontology( source_database = data[0][0], linkedAttribute = data[0][1], name= ontology_name, descriptionAttribute = data[0][4], levelAttribute=data[0][3] ) 1581 1582 1583 def add_data(sql_results): 1584 1585 for current_data in sql_results: 1586 if current_data[1] is not None: 1587 is_a = [ int(x) for x in current_data[1].split(",") ] 1588 else: 1589 is_a = [] 1590 if current_data[2] is not None: 1591 is_part_of = [ int(x) for x in current_data[2].split(",") ] 1592 else: 1593 is_part_of = [] 1594 1595 ontology.add_element( ontologyElementID = current_data[0], 1596 isA = is_a, 1597 isPartOf = is_part_of, 1598 linkedAttributeValue = current_data[3] )
1599 1600 # For loading all ontology 1601 if len(root_attribute_values)==0: 1602 1603 query = "SELECT %s.%s, GROUP_CONCAT(DISTINCT %s), GROUP_CONCAT(DISTINCT %s), value FROM %s LEFT JOIN %s ON %s.%s=%s.%s LEFT JOIN %s ON %s.%s=%s.%s, %s WHERE externalDatabaseID=%s AND %s.%s = %s.%s GROUP BY %s.%s" %(eE_table, eE_field, 1604 "is_a", "is_part_of", 1605 eE_table, 1606 is_a_table, 1607 eE_table,eE_field, 1608 is_a_table,eE_field, 1609 is_part_of_table, 1610 eE_table,eE_field, 1611 is_part_of_table,eE_field, 1612 attr_table, 1613 ontology.get_source_database(), 1614 eE_table, eE_field, 1615 attr_table, eE_field, 1616 eE_table, eE_field) 1617 1618 #print query 1619 1620 add_data( self.db.select_db_content( query, answer_mode="raw" ) ) 1621 1622 else: 1623 1624 # SELECT ROOT FIRST 1625 root_list= self.get_list_external_entities_IDs_by_attribute( attribute_identifier = self.get_available_ontology_names(name = ontology_name), 1626 field_values = [("value",x) for x in root_attribute_values], 1627 source_databases = [ontology.get_source_database() ], 1628 expand_ontology_attributes = False ) 1629 1630 root_eE_dict = self.get_external_entities_dict(externalEntityIdsList=root_list, 1631 attribute_list=[linked_attr]) 1632 1633 add_data( [(x,None,None,root_eE_dict[x].get_attribute(linked_attr).pop().value) for x in root_list ] ) 1634 1635 current_level = map(str, root_list) 1636 1637 if( len(current_level) == 0 ): 1638 raise ValueError("Trying to load an ontology with an unexisting root") 1639 1640 def get_recursive_query(list_root_ids): 1641 1642 where_sql = "is_a IN (%s)" %(",".join(list_root_ids)) 1643 1644 # is part of should be included? TO DECIDE... 1645 1646 query = "SELECT %s.%s, GROUP_CONCAT(DISTINCT %s), GROUP_CONCAT(DISTINCT %s), value FROM %s LEFT JOIN %s ON %s.%s=%s.%s LEFT JOIN %s ON %s.%s=%s.%s, %s WHERE externalDatabaseID=%s AND %s.%s=%s.%s AND %s GROUP BY %s.%s" %(eE_table, eE_field, 1647 "is_a", "is_part_of", 1648 eE_table, 1649 is_a_table, 1650 is_a_table,eE_field, 1651 eE_table,eE_field, 1652 is_part_of_table, 1653 is_part_of_table,eE_field, 1654 eE_table,eE_field, 1655 attr_table, 1656 ontology.get_source_database(), 1657 eE_table, eE_field, 1658 attr_table, eE_field, 1659 where_sql, 1660 eE_table,eE_field) 1661 1662 return query 1663 1664 1665 def t(v): 1666 return str(v[0]) 1667 1668 while( True ): 1669 1670 if len(current_level)==0: 1671 break 1672 1673 data = self.db.select_db_content( get_recursive_query(current_level), answer_mode="raw" ) 1674 add_data( data ) 1675 current_level = map(t,data) 1676 1677 1678 1679 if load_external_entities is True: 1680 external_entities_dict = {} 1681 1682 #avoid bigg packets... 1683 all_list = ontology.get_all_external_entity_ids() 1684 1685 for x in xrange(0,len(all_list)/50): 1686 current_list = all_list[50*x:50*x+50] 1687 external_entities_dict.update( self.get_external_entities_dict( externalEntityIdsList = current_list, 1688 attribute_list = [ontology.description_attribute], 1689 useTransferAttributes = False ) ) 1690 1691 ontology._set_external_entities_dict( self.get_external_entities_dict( externalEntityIdsList = ontology.get_all_external_entity_ids(), 1692 attribute_list = [ontology.description_attribute], 1693 useTransferAttributes = False ) ) 1694 1695 self.loaded_ontologies[key] = ontology 1696 1697 return ontology 1698 1699 1700 1701 #################################################################################### 1702 # EXTERNAL ENTITIES SELECTION METHODS # 1703 #################################################################################### 1704 1705 1706
1707 - def _convertListSourceDBVersionToSourceDBIdList(self, source_databases):
1708 externalDatabaseID_list = [] 1709 if source_databases is not None: 1710 for actual_source in source_databases: 1711 if isinstance(actual_source,int) or isinstance(actual_source,long): 1712 externalDatabaseID_list.append(actual_source) 1713 continue 1714 if actual_source[1] is None: 1715 externalDatabaseID_list.extend(self._get_externalDatabaseID_int(sourceDBName=actual_source[0],sourceDBVersion=None)) 1716 else: 1717 externalDatabaseID_list.append(self._get_externalDatabaseID_int(sourceDBName=actual_source[0],sourceDBVersion=actual_source[1])) 1718 return externalDatabaseID_list
1719 1720 1721 1722 1723
1724 - def get_external_entities_dict_by_attribute(self, attribute_identifier, field_values, source_databases=None, attribute_restrictions=None, attribute_list=[], relation_attribute_list=[], participant_attribute_list=[] ):
1725 """ 1726 """ 1727 1728 list_eE = self.get_list_external_entities_IDs_by_attribute( attribute_identifier = attribute_identifier, 1729 field_values = field_values, 1730 source_databases = source_databases, 1731 attribute_restrictions = attribute_restrictions ) 1732 1733 return self.get_external_entities_dict( externalEntityIdsList = list_eE, 1734 attribute_list=attribute_list, 1735 relation_attribute_list=relation_attribute_list, 1736 participant_attribute_list=participant_attribute_list)
1737 1738 1739 1740 1741
1742 - def _get_expand_ontology_attribute_table_query(self, attribute_identifier, values_to_expand_list):
1743 1744 attribute_identifier = attribute_identifier.lower() 1745 1746 in_str = "(\"%s\")" %",".join(map(str,values_to_expand_list)) 1747 1748 if not self._is_ontology_linked_attribute(attribute_identifier): 1749 raise ValueError("Not possible to expand by using %s attribute. Not ontology linked" %attribute_identifier) 1750 1751 1752 1753 attribute_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier] 1754 1755 tables = [ (self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE, "o"), 1756 (attribute_table, "attr1"), 1757 (attribute_table, "attr2") ] 1758 1759 join_conditions = [ ("attr1.value","IN",in_str), 1760 ("o.parentExternalEntityID","=","attr1.externalEntityID"), 1761 ("o.childExternalEntityID","=","attr2.externalEntityID") ] 1762 1763 columns = [("attr2.value","childs")] 1764 1765 query = self.db._get_select_sql_query( tables = tables, 1766 columns = columns, 1767 join_conditions = join_conditions ) 1768 1769 query_list = [query] 1770 query_list.extend( [ "SELECT %s" %x for x in values_to_expand_list ] ) 1771 1772 return "(%s)" %self.db._get_union_queries( queries = query_list )
1773 1774 1775 1776 1777
1778 - def _get_list_external_entities_IDs_by_attribute_SQLstat(self, attribute_identifier, field_values, source_databases=[], attribute_restrictions=None, expand_ontology_attributes=True ):
1779 """ 1780 Gets a list of external entities that have an attribute with "attribute_value" value 1781 "field_value" is the field/S and value/S we want to obtain 1782 If a value is *, it will obtain all having this attribute 1783 """ 1784 1785 # TODO!!! IF FIELD VALUES HAVE NOT ONLY VALUES... IT WILL CRASH 1786 1787 1788 sqlStat = database.SQLSelectStatement() 1789 1790 attribute_identifier = attribute_identifier.lower() 1791 1792 if attribute_identifier == "proteinsequenceid": 1793 1794 seq_table = self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"] 1795 table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["proteinsequence"] 1796 sqlStat.add_element( tables = [seq_table, table] ) 1797 sqlStat.add_element( columns = ["%s.externalEntityID" %(table) ] ) 1798 sqlStat.add_element( join_conditions = [("%s.sequenceMD5" %seq_table, 1799 "=", 1800 "%s.value" %table)] ) 1801 1802 else: 1803 1804 table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier.lower()] 1805 sqlStat.add_element( tables = [table] ) 1806 sqlStat.add_element( columns = ["%s.externalEntityID" %(table) ] ) 1807 1808 1809 if len(source_databases)>0: 1810 externalDatabaseID_list = self._convertListSourceDBVersionToSourceDBIdList(source_databases) 1811 if externalDatabaseID_list != []: 1812 sqlStat.add_element( tables = [self.biana_database.EXTERNAL_ENTITY_TABLE] ) 1813 sqlStat.add_element( fixed_conditions = [("externalDatabaseID","IN","(%s)" %(",".join([ str(x) for x in externalDatabaseID_list ])),None)] ) 1814 1815 sqlStat.add_element( join_conditions = [("%s.externalEntityID" %table, 1816 "=", 1817 "%s.externalEntityID" %self.biana_database.EXTERNAL_ENTITY_TABLE)] ) 1818 1819 if attribute_restrictions is not None: 1820 num = 1 1821 for current_restriction_attribute, restriction_values in attribute_restrictions: 1822 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_restriction_attribute.lower()] 1823 sqlStat.add_element( tables = [str(current_table)+" AS Q%s" %num] ) 1824 1825 values_list = restriction_values.split(",") ### WHY???? 1826 1827 1828 if BianaObjects.ExternalEntityAttribute.isFullTextSearchable(attribute_identifier, self.biana_database): 1829 sqlStat.add_element( join_conditions = [ ("MATCH (Q%s.value)" %num, 1830 "AGAINST", 1831 "('%s' IN BOOLEAN MODE)" %("\",\"".join(values_list))) ] ) 1832 else: 1833 1834 if self._is_ontology_linked_attribute( current_restriction_attribute ): 1835 sqlStat.add_element( tables = [(self._get_expand_ontology_attribute_table_query( attribute_identifier = current_restriction_attribute, 1836 values_to_expand_list = values_list ), "childs")] ) 1837 sqlStat.add_element( join_conditions = [("Q%s.value" %num, "=", "childs.childs")] ) 1838 1839 else: 1840 sqlStat.add_element( join_conditions = [ ("Q%s.value" %num, 1841 "IN", 1842 "(\"%s\")" %",".join(map(str, values_list))) ] ) 1843 1844 sqlStat.add_element( join_conditions = [ ("%s.externalEntityID" %table, 1845 "=", 1846 "Q%s.externalEntityID" %num) ] ) 1847 num += 1 1848 1849 1850 1851 # Detect the fields 1852 1853 values_list = [] 1854 1855 for current_field, current_value in field_values: 1856 if current_field.lower() == "value": 1857 values_list.append(str(current_value).strip()) 1858 else: 1859 raise ValueError("TO IMPLEMENT") # by union??? 1860 1861 if BianaObjects.ExternalEntityAttribute.isVersionableIdentifier(attribute_identifier, self.biana_database): 1862 for current_value in values_list: 1863 splitted = current_value.split(".") 1864 if len(splitted)>1: 1865 field_values.append(("value",splitted[0])) 1866 field_values.append(("version",splitted[1])) 1867 raise ValueError("TO IMPLEMENT") 1868 else: 1869 field_values.append(("value",splitted[0])) 1870 1871 if "*" not in values_list: 1872 if attribute_identifier == "proteinsequenceid": 1873 sqlStat.add_element( join_conditions = [("%s.proteinSequenceID" %seq_table, 1874 "IN", 1875 "(\"%s\")" % "\",\"".join(values_list) )] ) 1876 elif BianaObjects.ExternalEntityAttribute.isFullTextSearchable(attribute_identifier, self.biana_database): 1877 sqlStat.add_element( join_conditions = [("MATCH (%s.value)" %table, 1878 "AGAINST", 1879 "('%s' IN BOOLEAN MODE)" % " ".join(values_list) )] ) 1880 else: 1881 if self._is_ontology_linked_attribute( attribute_identifier ): 1882 sqlStat.add_element( tables = [(self._get_expand_ontology_attribute_table_query( attribute_identifier = attribute_identifier, 1883 values_to_expand_list = values_list ), "childs")] ) 1884 sqlStat.add_element( join_conditions = [("%s.value" %table, "=", "childs.childs")] ) 1885 else: 1886 sqlStat.add_element( join_conditions = [("%s.value" %table, "IN", "(\"%s\")" %"\",\"".join(map(str, values_list)))] ) 1887 1888 return sqlStat
1889 1890 1891 1892
1893 - def transform_expanded_attribute_restrictions(self, attribute_restriction_list):
1894 """ 1895 1896 """ 1897 1898 #restrictions = sets.Set(attribute_restriction_list) 1899 restrictions = sets.Set() 1900 1901 attribute_values_to_expand = {} 1902 1903 for current_attribute, current_value in attribute_restriction_list: 1904 if current_attribute.lower() not in self.transferred_attributes: 1905 restrictions.add((current_attribute, current_value)) 1906 continue 1907 else: 1908 attribute_values_to_expand.setdefault(current_attribute.lower(), []).append(current_value) 1909 1910 for current_attribute, values_list in attribute_values_to_expand.iteritems(): 1911 new_attribute_restrictions = self._get_attribute_value_list_for_transfer_attributes( attribute_identifier = current_attribute, 1912 field_values = [("value",x) for x in values_list] ) 1913 1914 map(restrictions.add, new_attribute_restrictions) 1915 1916 return list(restrictions)
1917 1918 1919 1920
1921 - def _get_attribute_value_list_for_transfer_attributes(self, attribute_identifier, field_values):
1922 """ 1923 returns a list of (field, value) of the transferred attribute 1924 """ 1925 1926 attribute_identifier = attribute_identifier.lower() 1927 1928 attribute_values = [] 1929 1930 for current_transfer in self.transferred_attributes[attribute_identifier]: 1931 1932 # First, get the key attribute values 1933 tables = [ self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier], #Attribute transferred 1934 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_transfer[1]],"key1"), #Attribute used as a key 1935 (self.biana_database.EXTERNAL_ENTITY_TABLE, "e1") ] 1936 1937 columns = ["DISTINCT(key1.value)"] 1938 1939 fixed_conditions = [("e1.externalDatabaseID", 1940 "=", 1941 current_transfer[0])] 1942 1943 join_conditions = [("key1.externalEntityID","=","%s.externalEntityID" %(self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier]))] 1944 1945 if BianaObjects.ExternalEntityAttribute.isFullTextSearchable(attribute_identifier, self.biana_database): 1946 join_conditions.append(("MATCH (%s.value)" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier], 1947 "AGAINST", 1948 "('%s' IN BOOLEAN MODE)" % " ".join([ str(actual_restriction[1]) for actual_restriction in field_values ] ) )) 1949 else: 1950 join_conditions.append(("%s.value" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier], 1951 "IN", 1952 "(\"%s\")" % "\",\"".join([ str(actual_restriction[1]) for actual_restriction in field_values ] ) )) 1953 #"(\"%s\")" % "\",\"".join([ str(actual_restriction) for actual_restriction in field_values ] ) )) 1954 1955 attribute_values_query = self.db._get_select_sql_query( tables = tables, 1956 columns = columns, 1957 fixed_conditions = fixed_conditions, 1958 join_conditions = join_conditions) 1959 1960 #print attribute_values_query 1961 1962 attribute_values_list = self.db.select_db_content( attribute_values_query, answer_mode = "list" ) 1963 1964 #attribute_values.append((current_transfer[1],",".join(map(str,attribute_values_list)))) 1965 attribute_values.extend([(current_transfer[1],str(x)) for x in attribute_values_list ]) 1966 1967 return attribute_values
1968 1969 1970 1971
1972 - def _get_list_user_entities_IDs_by_attribute_transfer_query(self, attribute_identifier, field_values, unification_protocol_name, source_databases=[], attribute_restrictions=None, include_type = False, restrict_to_user_entity_ids_list=[] ):
1973 1974 1975 # MAYBE TO CHANGE... IT WOULD BE BETTER DOING IT WITH THE NEW METHOD 1976 1977 if unification_protocol_name is None: 1978 raise ValueError("You are trying to transfer attributes without using a unification protocol...") 1979 1980 attribute_identifier = attribute_identifier.lower() 1981 1982 if attribute_identifier not in self.transferred_attributes: 1983 raise ValueError("Not transferable attribute") 1984 1985 1986 user_entities_list = [] 1987 1988 for current_transfer in self.transferred_attributes[attribute_identifier]: 1989 1990 # First, get the key attribute values 1991 tables = [ self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier], #Attribute transferred 1992 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_transfer[1]],"key1"), #Attribute used as a key 1993 (self.biana_database.EXTERNAL_ENTITY_TABLE, "e1") ] 1994 1995 columns = ["DISTINCT(key1.value)"] 1996 1997 fixed_conditions = [("e1.externalDatabaseID", 1998 "=", 1999 current_transfer[0])] 2000 2001 join_conditions = [("key1.externalEntityID","=","%s.externalEntityID" %(self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier]))] 2002 2003 if BianaObjects.ExternalEntityAttribute.isFullTextSearchable(attribute_identifier, self.biana_database): 2004 join_conditions.append(("MATCH (%s.value)" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier], 2005 "AGAINST", 2006 "('%s' IN BOOLEAN MODE)" % " ".join([ str(actual_restriction[1]) for actual_restriction in field_values ] ) )) 2007 else: 2008 join_conditions.append(("%s.value" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier], 2009 "IN", 2010 "(\"%s\")" % "\",\"".join([ str(actual_restriction[1]) for actual_restriction in field_values ] ) )) 2011 2012 key_values_query = self.db._get_select_sql_query( tables = tables, 2013 columns = columns, 2014 fixed_conditions = fixed_conditions, 2015 join_conditions = join_conditions) 2016 2017 key_values = self.db.select_db_content( key_values_query, answer_mode = "list" ) 2018 2019 key_values = [("value",x) for x in key_values ] 2020 2021 new_list = self.get_list_user_entities_IDs_by_attribute(unification_protocol_name = unification_protocol_name, 2022 attribute_identifier = current_transfer[1], 2023 field_values = key_values, 2024 attribute_restrictions = attribute_restrictions, 2025 restrict_to_user_entity_ids_list = restrict_to_user_entity_ids_list, 2026 include_type = include_type) 2027 2028 user_entities_list.extend(new_list) 2029 2030 return user_entities_list
2031 2032 2033 2034
2035 - def get_list_external_entities_IDs_by_attribute(self, attribute_identifier, field_values, source_databases=[], attribute_restrictions=None ):
2036 2037 # Get those external entities directly linked to those attributes 2038 sqlStat = self._get_list_external_entities_IDs_by_attribute_SQLstat(attribute_identifier=attribute_identifier, 2039 field_values=field_values, 2040 source_databases=source_databases, 2041 attribute_restrictions=attribute_restrictions) 2042 2043 query = self.db._get_select_sql_query( tables = sqlStat.tables, 2044 columns = sqlStat.columns, 2045 fixed_conditions = sqlStat.fixed_conditions, 2046 join_conditions = sqlStat.join_conditions ) 2047 2048 direct_linked_list = self.db.select_db_content( query, answer_mode = "list" ) 2049 2050 2051 return direct_linked_list
2052 2053 2054 2055
2056 - def _get_attribute_restrictions_query(self, unification_protocol_name, negative_attribute_restrictions ):
2057 2058 negative_attribute_restrictions = negative_attribute_restrictions 2059 2060 queries = [] 2061 2062 self._load_available_unification_protocols() 2063 unif_table = self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name) 2064 2065 2066 num_query = 1 2067 for current_restriction_attribute, current_restriction_values in negative_attribute_restrictions: 2068 2069 #for current_restriction in restrictions_set: 2070 #print "APPLYING NEGATIVE RESTRICTION ",current_restriction 2071 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_restriction_attribute.lower()] 2072 2073 tables = [(unif_table,"nu"), 2074 (current_table, "nq")] 2075 2076 columns = ["nu.userEntityID"] 2077 2078 group_conditions = columns 2079 2080 join_conditions = [("nq.externalEntityID","=","nu.externalEntityID")] 2081 2082 values_list = [ x for x in str(current_restriction_values).split(",") ] 2083 2084 #expand_ontology_attributes: 2085 #values_list.extend( self.expand_ontology_field_values( values_list = values_list, attribute_identifier = current_restriction[0] ) ) 2086 if self._is_ontology_linked_attribute( current_restriction_attribute ): 2087 tables.append( ( self._get_expand_ontology_attribute_table_query( attribute_identifier = current_restriction_attribute, values_to_expand_list = values_list), "childs" ) ) 2088 join_conditions.append( ("nq.value","=","childs.childs") ) 2089 2090 if BianaObjects.ExternalEntityAttribute.isFullTextSearchable(current_restriction_attribute, self.biana_database): 2091 join_conditions.append(("MATCH (nq.value)", 2092 "AGAINST", 2093 "('%s' IN BOOLEAN MODE)" %("\",\"".join(map(str,values_list))) )) 2094 else: 2095 join_conditions.append(("nq.value", 2096 "IN", 2097 "(\"%s\")" %("\",\"".join(map(str,values_list))) )) 2098 2099 num_query += 1 2100 2101 queries.append(self.db._get_select_sql_query( tables = tables, 2102 columns = columns, 2103 join_conditions = join_conditions, 2104 group_conditions = group_conditions )) 2105 2106 return self.db._get_union_queries( queries )
2107 2108 2109 2110
2111 - def _apply_negative_restrictions_to_query( self, query, unification_protocol_name, negative_attribute_restrictions, column_name_to_restrict="userEntityID" ):
2112 """ 2113 """ 2114 2115 if len(negative_attribute_restrictions)>0: 2116 2117 inner_negative_query = self._get_attribute_restrictions_query( unification_protocol_name = unification_protocol_name, 2118 negative_attribute_restrictions = negative_attribute_restrictions ) 2119 tables = [("(%s)" %query,"query_to_restrict_negatively")] 2120 2121 join_conditions = [("query_to_restrict_negatively.%s" %(column_name_to_restrict),"NOT IN","(%s)" %inner_negative_query)] 2122 2123 query = self.db._get_select_sql_query( tables = tables, 2124 columns = ["query_to_restrict_negatively.*"], 2125 join_conditions = join_conditions ) 2126 2127 2128 return query
2129 2130 2131 2132
2133 - def _apply_restrictions_to_query( self, query, unification_protocol_name, attribute_restrictions, column_name_to_restrict="userEntityID" ):
2134 """ 2135 Applies the restrictions for a given query, where some user entites must be restricted 2136 """ 2137 2138 unif_table = self._get_user_entity_table_name(unification_protocol_name = unification_protocol_name) 2139 2140 # Restrictions CANNOT be applied at external entity level. They must be applied at USER ENTITY LEVEL. 2141 # In order to do this, MySQL queries are very slow doing it in a direct way, so, it is faster to do it in nested way 2142 2143 if attribute_restrictions is not None: 2144 2145 attribute_restrictions = attribute_restrictions 2146 2147 num_nested_query = 1 2148 2149 for current_restriction_attribute, current_restriction_values in attribute_restrictions: 2150 2151 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_restriction_attribute.lower()] 2152 query_name = "subrestrictionquery%s" %num_nested_query 2153 2154 tables = [("(%s)" %query,query_name), 2155 (unif_table,"u"), 2156 (current_table, "q")] 2157 2158 columns = [ "%s.*" %query_name ] 2159 2160 join_conditions = [("%s.%s" %(query_name,column_name_to_restrict),"=","u.userEntityID"), 2161 ("q.externalEntityID","=","u.externalEntityID")] 2162 2163 values_list = [ x for x in str(current_restriction_values).split(",") ] 2164 2165 if BianaObjects.ExternalEntityAttribute.isFullTextSearchable(current_restriction_attribute, self.biana_database): 2166 join_conditions.append(("MATCH (q.value)", 2167 "AGAINST", 2168 "('%s' IN BOOLEAN MODE)" %("\",\"".join(map(str,values_list))) )) 2169 2170 if BianaObjects.ExternalEntityAttribute.isNumericAttribute(current_restriction_attribute, self.biana_database) or BianaObjects.ExternalEntityAttribute.isSpecialAttribute(current_restriction_attribute, self.biana_database): 2171 regex = re.compile("([><=]*)([\d\.]+)") 2172 list_of_non_greater_values = [] 2173 for current_value in values_list: 2174 m = regex.match(current_value) 2175 if m: 2176 join_conditions.append(("q.value",m.group(1),m.group(2))) 2177 else: 2178 list_of_non_greater_values.append(current_value) 2179 2180 if len(list_of_non_greater_values)>0: 2181 if self._is_ontology_linked_attribute( current_restriction_attribute ): 2182 tables.append( (self._get_expand_ontology_attribute_table_query( attribute_identifier = current_restriction_attribute, 2183 values_to_expand_list = list_of_non_greater_values ), "childs") ) 2184 join_conditions.append(("q.value","=","childs.childs")) 2185 else: 2186 join_conditions.append(("q.value", 2187 "IN", 2188 "(\"%s\")" %("\",\"".join(map(list_of_non_greater_values))))) 2189 else: 2190 if self._is_ontology_linked_attribute( current_restriction_attribute ): 2191 tables.append( (self._get_expand_ontology_attribute_table_query( attribute_identifier = current_restriction_attribute, 2192 values_to_expand_list = values_list ), "childs") ) 2193 join_conditions.append(("q.value","=","childs.childs")) 2194 else: 2195 join_conditions.append(("q.value", 2196 "IN", 2197 "(\"%s\")" %("\",\"".join(map(str,values_list))))) 2198 num_nested_query += 1 2199 2200 query = self.db._get_select_sql_query( tables = tables, 2201 columns = columns, 2202 join_conditions = join_conditions, 2203 distinct_columns = True ) 2204 2205 return query
2206 2207 2208
2209 - def _apply_relation_restrictions_to_query( self, query, attribute_restrictions_dict, column_name_to_restrict="externalEntityRelationID"):
2210 """ 2211 """ 2212 2213 num = 1 2214 2215 for attribute_name, values in attribute_restrictions_dict.iteritems(): 2216 2217 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_name.lower()] 2218 2219 tables = [ ("(%s)" %query, "query_%s" %num), 2220 (current_table.get_table_name(), "EERA%s" %num) ] 2221 2222 columns = [ "query_%s.*" %num ] 2223 2224 join_conditions = [("query_%s.%s" %(num,column_name_to_restrict),"=","EERA%s.externalEntityID" %num)] 2225 2226 if BianaObjects.ExternalEntityAttribute.isNumericAttribute(attribute_name, self.biana_database) or BianaObjects.ExternalEntityAttribute.isSpecialAttribute(attribute_name, self.biana_database): 2227 regex = re.compile("([><=]*)([\d\.]+)") 2228 for current_value in values: 2229 m = regex.match(current_value) 2230 if m: 2231 join_conditions.append(("EERA%s.value" %num,m.group(1),m.group(2))) 2232 else: 2233 join_conditions.append(("EERA%s.value" %num,"=","\"%s\"" %current_value)) 2234 else: 2235 join_conditions.append(("EERA%s.value" %num,"IN","(\"%s\")" %("\",\"".join([ str(x) for x in values])))) 2236 2237 num += 1 2238 2239 query = self.db._get_select_sql_query( tables = tables, 2240 columns = columns, 2241 join_conditions = join_conditions, 2242 distinct_columns = True ) 2243 2244 return query
2245 2246 2247
2248 - def _get_nested_queries_for_getting_equivalent_uE_by_sharing_attributes( self, unification_protocol_name, attribute_list, user_entity_ids_list, restrict_to_user_entity_ids_list=True, ontology_levels={} ):
2249 """ 2250 Returns a string with a sql query. The sql query returns 2 columns: userEntityID1 and userEntityID2, because they share the combination of attributes in the attribute_list 2251 """ 2252 2253 2254 # option 1 working, to check if it is efficient... 2255 unif_table = self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name) 2256 2257 last_query = None 2258 2259 current_nested_query = 1 2260 2261 for (current_attribute, parameter_and_value_list) in attribute_list: 2262 2263 nested_query_tables = [(unif_table,"u1"), 2264 (unif_table,"u2"), 2265 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute.lower()].get_table_name(),"attr1"), 2266 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute.lower()].get_table_name(),"attr2")] 2267 2268 nested_query_columns = [("u1.userEntityID AS userEntityID1"), 2269 ("u2.userEntityID AS userEntityID2")] 2270 2271 2272 nested_query_join_conditions = [("u1.userEntityID","!=","u2.userEntityID"), 2273 ("u1.externalEntityID", "=", "attr1.externalEntityID"), 2274 ("u2.externalEntityID", "=", "attr2.externalEntityID")] 2275 2276 nested_query_fixed_conditions = [] 2277 2278 # Non-parameterizable attribute 2279 if len(parameter_and_value_list) == 0: 2280 # Use ontologies to determine if they are the same or not 2281 if self._is_ontology_linked_attribute(current_attribute) and ontology_levels.has_key( self.ontology_linked_attributes[current_attribute.lower()]["level_attribute"] ): 2282 # Is ontology linked 2283 2284 # SI SOLUCIONO ESTO, SOLUCIONO LAS EXPANSIONES!!!! 2285 2286 nested_query_tables.append( (self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE, "o") ) 2287 nested_query_tables.append( (self.biana_database.EXTENDED_ONTOLOGY_HIERARCHY_TABLE, "o2") ) 2288 nested_query_tables.append( (self._get_key_attribute_table_name( key_id = self.ontology_linked_attributes[current_attribute.lower()]["key_id"] ), "k") ) 2289 nested_query_tables.append( (self._get_key_attribute_table_name( key_id = self.ontology_linked_attributes[current_attribute.lower()]["key_id"] ), "k2") ) 2290 nested_query_tables.append( (self._get_key_attribute_table_name( key_id = self.ontology_linked_attributes[current_attribute.lower()]["level_attribute"] ), "level") ) 2291 2292 nested_query_join_conditions.extend[ ("attr1.value", "=", "k.value"), 2293 ("k.externalEntityID", "=", "o.childExternalEntityID"), 2294 ("o.parentExternalEntityID", "=", "level.externalEntityID"), 2295 ("level.value","=",ontology_levels[self.ontology_linked_attributes[current_attribute.lower()]]["level_attribute"]), 2296 ("o.parentExternalEntityID", "=", "o2.parentExternalEntityID"), 2297 ("o2.externalEntityID", "=","k2.externalEntityID"), 2298 ("k2.value", "=", "attr2.value") ] 2299 2300 # TO CHECK TOMORROW... JAVI 2301 2302 else: 2303 nested_query_join_conditions.append(("attr1.value", "=", "attr2.value")) 2304 2305 # Parameterizable attribute - for instance; sequenceProtein 2306 else: 2307 # To make it generic need to change/add user_friendly_names in self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES 2308 if current_attribute.lower() == "proteinsequence": 2309 nested_query_tables.append( ("%s" %(self.biana_database.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[current_attribute.lower()]["table"].get_table_name()), "d_1") ) 2310 nested_query_tables.append( ("%s" %(self.biana_database.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[current_attribute.lower()]["table"].get_table_name()), "d_2") ) 2311 nested_query_tables.append( ("%s" %(self.biana_database.PROTEIN_BLAST_RESULTS_TABLE.get_table_name()), "b") ) 2312 nested_query_join_conditions.append(("attr1.value", "=", "d_1.sequenceMD5")) 2313 nested_query_join_conditions.append(("attr2.value", "=", "d_2.sequenceMD5")) 2314 nested_query_join_conditions.append(("b.sequenceID_A","=","d_1.proteinSequenceID")) 2315 nested_query_join_conditions.append(("b.sequenceID_B","=","d_2.proteinSequenceID")) 2316 nested_query_fixed_conditions.extend( [ ("b.%s" % parameter, ">=", value) for (parameter, value) in parameter_and_value_list ] ) 2317 else: 2318 notSupported = False 2319 for (parameter, value) in parameter_and_value_list: 2320 if str(value.strip()) != "": 2321 notSupported = True 2322 break 2323 if notSupported: 2324 sys.err.write("Warning: Not supported parameterizable attribute (ignored): %s\n" % current_attribute) 2325 continue 2326 nested_query_join_conditions.extend( [ ("a%s.%s" % (current_attribute, parameter), "=","b%s.%s" % (current_attribute, parameter) ) for (parameter, value) in parameter_and_value_list ] ) 2327 2328 if current_nested_query == 1: 2329 nested_query_join_conditions.append(("u1.userEntityID","IN","(%s)" %",".join(map(str,user_entity_ids_list)))) 2330 if restrict_to_user_entity_ids_list: 2331 nested_query_join_conditions.append(("u2.userEntityID","IN","(%s)" %",".join(map(str,user_entity_ids_list)))) 2332 else: 2333 nested_query_join_conditions.append(("u1.userEntityID","=","nested_query%s.userEntityID1" %(current_nested_query-1))) 2334 nested_query_join_conditions.append(("u2.userEntityID","=","nested_query%s.userEntityID2" %(current_nested_query-1))) 2335 nested_query_tables.append(("(%s)" %last_query, "nested_query%s" %(current_nested_query-1))) 2336 2337 2338 last_query = self.db._get_select_sql_query( tables = nested_query_tables, 2339 columns = nested_query_columns, 2340 fixed_conditions = nested_query_fixed_conditions, 2341 join_conditions = nested_query_join_conditions, 2342 distinct_columns = True ) 2343 2344 current_nested_query += 1 2345 2346 2347 #print last_query 2348 return self.db._get_select_sql_query( tables = [("(%s)" %last_query, "nested_queries")], 2349 columns = ["userEntityID1","userEntityID2"], 2350 distinct_columns = True )
2351 2352
2353 - def get_list_user_entities_IDs_by_attribute(self, unification_protocol_name, attribute_identifier, field_values, attribute_restrictions=None, negative_attribute_restrictions=None, restrict_to_user_entity_ids_list=[], include_type=False ):
2354 """ 2355 Returns a list of user entities that match with the attributes specified of type attribute_identifier 2356 2357 If include_type is set to True, it returns a list of tuples 2358 """ 2359 2360 sqlStat = self._get_list_external_entities_IDs_by_attribute_SQLstat( attribute_identifier=attribute_identifier, 2361 field_values=field_values ) 2362 2363 self._load_available_unification_protocols() 2364 2365 unif_table = self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name) 2366 2367 if attribute_identifier.lower()=="proteinsequenceid": 2368 attribute_identifier="proteinsequence" 2369 2370 attr_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier.lower()].get_table_name() 2371 sqlStat.add_element( tables = [(unif_table,"u")] ) 2372 sqlStat.reset_columns( columns = ["u.userEntityID"] ) 2373 sqlStat.add_element( join_conditions = [("u.externalEntityID", 2374 "=", 2375 "%s.externalEntityID" %attr_table )] ) 2376 if include_type is False: 2377 sqlStat.add_element( group_conditions = ["u.userEntityID"] ) 2378 else: 2379 sqlStat.add_element( tables = [(self.biana_database.EXTERNAL_ENTITY_TABLE,"e")] ) 2380 sqlStat.reset_columns( columns = ["u.userEntityID","e.type"] ) 2381 sqlStat.add_element( group_conditions = ["u.userEntityID","e.type"] ) 2382 sqlStat.add_element( join_conditions = [("u.externalEntityID","=","e.externalEntityID")] ) 2383 2384 if len(restrict_to_user_entity_ids_list)>0: 2385 sqlStat.add_element( fixed_conditions = [("u.userEntityID","IN","(%s)" %",".join(map(str,restrict_to_user_entity_ids_list)),None)] ) 2386 2387 2388 # Restrictions CANNOT be applied at external entity level. They must be applied at USER ENTITY LEVEL. 2389 # In order to do this, MySQL queries are very slow doing it in a direct way, so, it is faster to do it in nesting way 2390 2391 general_query = self.db._get_select_sql_query( tables = sqlStat.tables, 2392 columns = sqlStat.columns, 2393 fixed_conditions = sqlStat.fixed_conditions, 2394 join_conditions = sqlStat.join_conditions, 2395 group_conditions = sqlStat.group_conditions ) 2396 2397 restricted_query = self._apply_restrictions_to_query( query = general_query, 2398 unification_protocol_name = unification_protocol_name, 2399 attribute_restrictions = attribute_restrictions, 2400 column_name_to_restrict="userEntityID" ) 2401 2402 negatively_restricted_query = self._apply_negative_restrictions_to_query( query = restricted_query, 2403 unification_protocol_name = unification_protocol_name, 2404 column_name_to_restrict="userEntityID", 2405 negative_attribute_restrictions = negative_attribute_restrictions ) 2406 2407 2408 if len(sqlStat.columns)>1: 2409 initial_list = list(self.db.select_db_content( negatively_restricted_query, answer_mode = "raw" )) 2410 else: 2411 initial_list = self.db.select_db_content( negatively_restricted_query, answer_mode = "list" ) 2412 2413 return initial_list
2414 2415 2416 2417 #################################################################################### 2418 # SPECIFIC METHODS FOR ATTRIBUTE DESCRIPTION DATABASES # 2419 #################################################################################### 2420
2421 - def insert_new_attribute_description(self, attribute_identifier, field_values):
2422 """ 2423 "attribute_identifier" must be an accepted one 2424 2425 "field_values" must be a dictionary with the keys (accepted ones) and its values 2426 2427 """ 2428 2429 temp_field_values = [ (x.lower(),field_values[x]) for x in field_values ] 2430 2431 try: 2432 temp = self.biana_database.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[attribute_identifier.lower()] 2433 tableObject = temp["table"] 2434 if tableObject.has_optimized_fields(): 2435 table = tableObject.get_temp_table_name() 2436 else: 2437 table = tableObject.get_table_name() 2438 fields = temp["fields"] 2439 except: 2440 raise "%s is not a valid attribute identifier" %attribute_identifier 2441 2442 column_values = [] 2443 2444 for actual_field in temp_field_values: 2445 if actual_field[1] is not None: 2446 fieldObject = fields[actual_field[0]] 2447 2448 #check the field name (optimized or not) 2449 if fieldObject.get_optimized_space() is not None: 2450 fieldName = fieldObject.get_optimized_field_name() 2451 else: 2452 fieldName = fieldObject.get_field_name() 2453 2454 #check if the data is compressed... 2455 if fieldObject.is_compressed() is not None: 2456 #column_values.append((fieldName,"COMPRESS(\"%s\")" %str(actual_field[1]).replace('"',''),1)) 2457 column_values.append((fieldName,"COMPRESS(\"%s\")" %str(actual_field[1]).replace('\\','\\\\').replace('"','\\"'),1)) 2458 else: 2459 #column_values.append((fieldName,str(actual_field[1]).replace('"',''))) 2460 column_values.append((fieldName,str(actual_field[1]).replace('\\','\\\\').replace('"','\\"'))) 2461 2462 self.db.insert_db_content(self.db._get_insert_sql_query( table = table, 2463 column_values = column_values), 2464 #column_values = zip(columns,values) ), 2465 answer_mode=None)
2466
2467 - def output_sequences(self, outmethod, type="proteinsequence", format="fasta", sequenceIDs=None):
2468 """ 2469 outmethod: destination where data in fasta format would be written 2470 type: either "proteinsequence" or "nucleotidesequence" - decides which type of sequence is desired 2471 format: either "fasta" or "seq" corresponding to fasta format or raw sequence format 2472 sequenceIDs: list of sequence ids whose sequence would be outputted - if None will output all sequences in the database 2473 """ 2474 # Set block size not to limit mem usage 2475 database_block_limit=1000000 2476 database_block_offset=0 2477 aa_x_line = 79 #80 # amino acid/nucleotide per line 2478 enter_to_loop = True 2479 2480 if sequenceIDs is not None: 2481 enter_to_loop = False 2482 if type.lower() == "proteinsequence": 2483 query = "SELECT proteinSequenceID, UNCOMPRESS(sequence) FROM %s WHERE proteinSequenceID IN (%s)" % (self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"].get_table_name(), ",".join(sequenceIDs)) 2484 self.db._check_locked_table(table=self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"].get_table_name()) 2485 elif type.lower() == "nucleotidesequence": 2486 query = "SELECT nucleotideSequenceID, UNCOMPRESS(sequence) FROM %s WHERE nucleotideSequenceID IN (%s)" % (self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"].get_table_name(), ",".join(sequenceIDs)) 2487 self.db._check_locked_table(table=self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"].get_table_name()) 2488 else: 2489 raise ValueError("Sequence type not recognized") 2490 temp_sequences = self.db.select_db_content( query, answer_mode = "raw", remove_duplicates="no" ) 2491 # Gets all the sequences in db 2492 else: 2493 enter_to_loop = True 2494 if type.lower() == "proteinsequence": 2495 query = "SELECT proteinSequenceID, UNCOMPRESS(sequence) FROM %s LIMIT %s" %(self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"].get_table_name(), database_block_limit) 2496 self.db._check_locked_table(table=self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"].get_table_name()) 2497 elif type.lower() == "nucleotidesequence": 2498 query = "SELECT nucleotideSequenceID, UNCOMPRESS(sequence) FROM %s LIMIT %s" %(self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"], database_block_limit) 2499 self.db._check_locked_table(table=self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"].get_table_name()) 2500 else: 2501 raise ValueError("Sequence type not recognized") 2502 temp_sequences = self.db.select_db_content( query + " OFFSET %s" % database_block_offset, answer_mode = "raw", remove_duplicates="no" ) 2503 2504 while( len(temp_sequences) > 0 ): 2505 database_block_offset += database_block_limit 2506 #print "%s sequences done!" %(database_block_offset) 2507 for actual_sequence in temp_sequences: 2508 if format.lower() == "fasta": 2509 #outmethod(">%s\n%s\n" %(actual_sequence[0],"\n".join([actual_sequence[1][x*aa_x_line:x*aa_x_line+aa_x_line] for x in xrange((len(actual_sequence[1])/aa_x_line)+1) ]))) 2510 outmethod(">%s\n%s\n" %(actual_sequence[0],"\n".join([ actual_sequence[1][x*aa_x_line:x*aa_x_line+aa_x_line] for x in xrange(int(ceil(len(actual_sequence[1])/float(aa_x_line)))) ]))) 2511 elif format.lower() == "seq": 2512 outmethod("%s\t%s\n" %(actual_sequence[0],actual_sequence[1])) 2513 if not enter_to_loop: 2514 break 2515 temp_sequences = self.db.select_db_content( query + " OFFSET %s" % database_block_offset, answer_mode = "raw", remove_duplicates="no" )
2516 2517
2518 - def _empty_sequences_table(self, type):
2519 """ 2520 2521 """ 2522 2523 if type=="proteinsequence": 2524 table = "sequenceProtein" 2525 elif type=="nucleotidesequence": 2526 table = "sequenceNucleotide" 2527 else: 2528 raise ValueError("Type %s not recognized to empy sequences table" %(type) ) 2529 2530 self.db.insert_db_content( self.db._get_delete_sql_query(table = table ) ) 2531 2532 return
2533 2534 #################################################################################### 2535 # SOME SPECIFIC METHODS # 2536 #################################################################################### 2537
2538 - def get_taxonomy_names_taxID_dict(self, tax_id_name_type=None):
2539 """ 2540 name is lower cased 2541 """ 2542 a = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["taxid"] 2543 b = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["taxid_name"] 2544 2545 self.db._enable_indices( table_list = [a,b] ) 2546 2547 fixed_conditions = [] 2548 if tax_id_name_type is not None: 2549 fixed_conditions.append(("taxid_name_type","=","scientific name")) 2550 2551 result = dict(self.db.select_db_content( self.db._get_select_sql_query( tables = [a,b], 2552 columns = ["LCASE(%s.value)" %b,"%s.value" %a], 2553 join_conditions = [("%s.externalEntityID" %a, "=", "%s.externalEntityID" %b )], 2554 fixed_conditions = fixed_conditions), 2555 answer_mode = "raw") ) 2556 return result
2557 2558 2559
2560 - def get_taxonomies_from_species_name(self, species_name):
2561 2562 list_taxID = self.db.select_db_content( self.db.SelectAttributeDescription( attribute_identifier = "ncbi_taxonomy_names", 2563 fields = ["taxID"], 2564 conditions = {"name": species_name} ), 2565 answer_mode = "list", remove_duplicates = "yes" ) 2566 return list_taxID
2567 2568 2569 #################################################################################### 2570 # SEQUENCE RELATED METHODS # 2571 #################################################################################### 2572
2573 - def get_sequence_from_sequenceID( self, sequenceID ):
2574 2575 # TO CHECK! not only sequence protein... 2576 2577 return self.db.select_db_content( "SELECT UNCOMPRESS(sequence) FROM sequenceProtein WHERE sequenceProteinID=%s" %sequenceID, 2578 answer_mode = "single" )
2579
2580 - def get_sequence_taxonomies( self, sequenceID ):
2581 2582 return self.db.select_db_content( "SELECT externalEntityTaxID.value FROM sequenceProtein,externalEntityTaxID,externalEntityProteinSequence WHERE proteinSequenceID=%s AND externalEntityProteinSequence.value = sequenceMD5 AND externalEntityTaxID.externalEntityID = externalEntityProteinSequence.externalEntityID" %sequenceID, 2583 answer_mode = "list", remove_duplicates=True )
2584 2585 2586 #################################################################################### 2587 # PDB RELATED METHODS # 2588 #################################################################################### 2589
2590 - def insert_pdb_object( self, PDBObject, source_database, description = None ):
2591 2592 pdb = PDBObject.get_name() 2593 2594 # Each chain is stored separatedly 2595 # To each one, a new external Entity Object is created... (I don't know if doing this in this way... 2596 2597 for actual_chain in PDBObject.get_chain_names(): 2598 2599 externalEntity = BianaObjects.ExternalEntity( source_database= source_database, type="structure") 2600 2601 externalEntity.add_attribute(BianaObjects.ExternalEntityAttribute( attribute_identifier = "proteinsequence", 2602 value = BianaObjects.ProteinSequence(sequence = PDBObject.get_sequence( chain_name = actual_chain )))) 2603 2604 2605 residues_list = PDBObject.get_residues( chain_name = actual_chain ) 2606 temp_residue_num_list = [ int(x.get_residue_num()) for x in residues_list ] 2607 2608 externalEntity.add_attribute(BianaObjects.ExternalEntityAttribute( attribute_identifier = "pdb", 2609 value = pdb, 2610 type = "unique", 2611 additional_fields = { "chain": actual_chain, 2612 "pdb_range": "%s-%s" %(min(temp_residue_num_list), 2613 max(temp_residue_num_list))})) 2614 if description is not None: 2615 externalEntity.add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier = "description", value = description)) 2616 2617 self.insert_new_external_entity( externalEntity = externalEntity ) 2618 2619 #print "Inserting to database..." 2620 2621 total_num_atoms = PDBObject.get_chain_num_atoms( chain_name = actual_chain ) 2622 2623 #residue_num_list = unsigned_int_list_to_ascii(2,[ x.get_residue_num() for x in residues_list ]).replace('\\','\\\\').replace('"','\\"') 2624 residue_num_list = int_ascii.int_list_to_ascii(2,temp_residue_num_list).replace('\\','\\\\').replace('"','\\"') 2625 2626 residue_type_list = '\x00'.join([ x.get_residue_type() for x in residues_list ]).replace('\\','\\\\').replace('"','\\"') 2627 2628 #print [ x for x in residues_list ] 2629 #print [ x.get_num_atoms() for x in residues_list ] 2630 residue_atom_num_list = int_ascii.unsigned_int_list_to_ascii(1,[ x.get_num_atoms() for x in residues_list ]).replace('\\','\\\\').replace('"','\\"') 2631 2632 atom_type_list = "".join([y.get_type() for x in residues_list for y in x.get_atoms() ]).replace('\\','\\\\').replace('"','\\"') 2633 2634 atom_name_list = '\x00'.join([y.get_name() for x in residues_list for y in x.get_atoms() ]).replace('\\','\\\\').replace('"','\\"') 2635 2636 atom_coordinates = [] 2637 [ atom_coordinates.extend(y.get_coordinates()) for x in residues_list for y in x.get_atoms() ] 2638 2639 atom_coordinates_list = int_ascii.float_list_to_ascii(3,3,atom_coordinates).replace('\\','\\\\').replace('"','\\"') 2640 2641 resolution = PDBObject.resolution 2642 2643 column_values = [ ("pdb",pdb), 2644 ("chain",actual_chain), 2645 ("externalEntityID", externalEntity.get_id()), 2646 ("total_num_atoms",total_num_atoms) ] 2647 2648 if resolution is not None: 2649 column_values.append(("resolution",str(resolution))) 2650 2651 self.db.insert_db_content( self.db._get_insert_sql_query( table = "pdb", 2652 column_values = column_values, 2653 special_column_values = [("residue_num_list","COMPRESS(\"%s\")" %residue_num_list), 2654 ("residue_type_list","COMPRESS(\"%s\")" %residue_type_list), 2655 ("residue_atom_num_list","COMPRESS(\"%s\")" %residue_atom_num_list), 2656 ("atom_type_list","COMPRESS(\"%s\")" %atom_type_list), 2657 ("atom_name_list","COMPRESS(\"%s\")" %atom_name_list), 2658 ("atom_coordinates","COMPRESS(\"%s\")" %atom_coordinates_list) ], 2659 use_buffer=self.use_buffer), 2660 answer_mode = None)
2661 2662
2663 - def add_hssp_info_to_pdb(self,pdb_code, chain, residue_pdb_number, residue_hssp_entropy, residue_hssp_norm_entropy, residue_hssp_variability, conservation_hssp, solvent_exposure_hssp, dssp):
2664 """ 2665 Adds HSSP information to pdb files 2666 2667 If the PDB information does not exist, inserts the data 2668 """ 2669 2670 print "inserting %s to chain %s" %(residue_pdb_number,chain) 2671 # It is necessary to lock table pdb, as it is not inserted in the usual and automathic way 2672 self.db._check_locked_table(table="pdb") 2673 2674 accessibility = int_ascii.unsigned_int_list_to_ascii(1,solvent_exposure_hssp).replace('\\','\\\\').replace('"','\\"') 2675 variability = int_ascii.unsigned_int_list_to_ascii(1,residue_hssp_variability).replace('\\','\\\\').replace('"','\\"') 2676 entropy = int_ascii.unsigned_float_list_to_ascii(2,3,residue_hssp_entropy).replace('\\','\\\\').replace('"','\\"') 2677 norm_entropy = int_ascii.unsigned_int_list_to_ascii(1,residue_hssp_norm_entropy).replace('\\','\\\\').replace('"','\\"') 2678 conservation = int_ascii.unsigned_float_list_to_ascii(1,2,conservation_hssp).replace('\\','\\\\').replace('"','\\"') 2679 residue_numbers = int_ascii.int_list_to_ascii(2,residue_pdb_number).replace('\\','\\\\').replace('"','\\"') 2680 2681 query = "UPDATE pdb SET hssp_residue_num_correspondences=COMPRESS(\"%s\"),residue_dssp_results=COMPRESS(\"%s\"),residue_hssp_entropy=COMPRESS(\"%s\"),residue_hssp_norm_entropy=COMPRESS(\"%s\"),residue_hssp_variability=COMPRESS(\"%s\"),conservation_hssp=COMPRESS(\"%s\"),solvent_exposure_hssp=COMPRESS(\"%s\") WHERE pdb=\"%s\" AND chain=\"%s\"" %(residue_numbers,"".join(dssp),entropy,norm_entropy,variability,conservation,accessibility,pdb_code,chain) 2682 #print query 2683 self.db.insert_db_content( query, answer_mode = None )
2684 2685
2686 - def load_pdb_object(self, pdb_code, fragments=[], merge_fragments = False, request_information=None):
2687 """ 2688 "pdb_code" is mandatory 2689 2690 "fragments" is a list of pdb fragments. 2691 2692 "merge_fragments" is used to merge all fragments or chains as a single chain or to mantain each chain as a different chain. If it is used, atom numbers and residue numbers are changed 2693 2694 "request_information" indicates which information has to be loaded. It is a list. It can be: 2695 It can be: "residue_type","atoms_info","hssp_conservation","hssp_entropy","hssp_exposure","hssp_norm_entropy","hssp_variability","dssp" 2696 """ 2697 2698 information = ["residue_type","atoms_info"] 2699 2700 if isinstance(request_information,list): 2701 information.extend(request_information) 2702 2703 2704 pdbObject = BianaObjects.PDB( name=pdb_code ) 2705 2706 2707 #specify the pdb code 2708 fixed_conditions = [("pdb.pdb","=",pdb_code)] 2709 2710 # specify the chains to obtain 2711 requested_chains = [ x.get_chain() for x in fragments ] 2712 2713 if len(requested_chains)>0 and None not in requested_chains: 2714 fixed_conditions.append(("pdb.chain","IN","(\"%s\")" %("\",\"".join(requested_chains)),None)) 2715 2716 possible_requested_data = {"residue_type": "UNCOMPRESS(residue_type_list)", 2717 "atoms_info": ["UNCOMPRESS(atom_type_list)", 2718 "UNCOMPRESS(atom_name_list)", 2719 "UNCOMPRESS(atom_coordinates)", 2720 "UNCOMPRESS(residue_atom_num_list)"], 2721 "hssp_residue_num_correspondences":"UNCOMPRESS(hssp_residue_num_correspondences)", 2722 "hssp_conservation":"UNCOMPRESS(conservation_hssp)", 2723 "hssp_entropy":"UNCOMPRESS(residue_hssp_entropy)", 2724 "hssp_exposure":"UNCOMPRESS(solvent_exposure_hssp)", 2725 "hssp_norm_entropy":"UNCOMPRESS(residue_hssp_norm_entropy)", 2726 "hssp_variability": "UNCOMPRESS(residue_hssp_variability)", 2727 "dssp": "UNCOMPRESS(residue_dssp_results)"} 2728 2729 columns = ["pdb","chain","resolution","UNCOMPRESS(residue_num_list)"] 2730 pos = len(columns) 2731 requested_data_indexes = {} 2732 append_hssp_eq = None 2733 try: 2734 for actual_information in information: 2735 if actual_information != "atoms_info": 2736 if actual_information != "residue_type": 2737 append_hssp_eq = 1 2738 columns.append(possible_requested_data[actual_information.lower()]) 2739 requested_data_indexes[actual_information.lower()] = pos 2740 pos += 1 2741 else: 2742 columns.extend(possible_requested_data["atoms_info"]) 2743 requested_data_indexes[actual_information.lower()] = pos 2744 pos += 4 2745 except: 2746 raise ValueError("Trying to get unavailable pdb information") 2747 2748 if append_hssp_eq is not None: 2749 columns.append(possible_requested_data["hssp_residue_num_correspondences"]) 2750 requested_data_indexes["hssp_residue_num_correspondences"] = pos 2751 2752 query = self.db._get_select_sql_query( tables= ["pdb"], 2753 columns = columns, 2754 fixed_conditions = fixed_conditions ) 2755 2756 2757 data = self.db.select_db_content( query, answer_mode = "raw", remove_duplicates = "no" ) 2758 2759 atom_num = 1 2760 residue_num_value = 1 2761 2762 for actual_data in data: 2763 chain = actual_data[1] 2764 if merge_fragments: 2765 new_chain = 'A' 2766 else: 2767 new_chain = chain 2768 residue_num_list = int_ascii.ascii_to_int_list(2,actual_data[3]) 2769 residue_type_list = None 2770 residue_type_value = None 2771 atom_type_list = None 2772 atom_name_list = None 2773 atom_coordinates_list = None 2774 residue_atom_num_list = None 2775 hssp_residue_num_correspondence = None 2776 hssp_conservation = None 2777 hssp_entropy = None 2778 hssp_exposure = None 2779 hssp_norm_entropy = None 2780 hssp_variability = None 2781 hssp_conservation_value = None 2782 hssp_entropy_value = None 2783 hssp_exposure_value = None 2784 hssp_norm_entropy_value = None 2785 hssp_variability_value = None 2786 dssp = None 2787 dssp_value = " " 2788 2789 for actual_requested in requested_data_indexes: 2790 2791 if actual_requested == "residue_type": 2792 residue_type_list = actual_data[requested_data_indexes["residue_type"]].split('\x00') 2793 #print residue_type_list 2794 continue 2795 if actual_requested == "atoms_info": 2796 #print "Getting atoms info" 2797 atom_type_list = actual_data[requested_data_indexes["atoms_info"]] 2798 atom_name_list = actual_data[requested_data_indexes["atoms_info"]+1].split('\x00') 2799 atom_coordinates_list = int_ascii.ascii_to_float_list(3,3,actual_data[requested_data_indexes["atoms_info"]+2]) 2800 residue_atom_num_list = int_ascii.ascii_to_unsigned_int_list(1,actual_data[requested_data_indexes["atoms_info"]+3]) 2801 #print atom_type_list 2802 #print atom_name_list 2803 #print atom_coordinates_list 2804 #print residue_atom_num_list 2805 continue 2806 if actual_requested == "hssp_conservation": 2807 temp = actual_data[requested_data_indexes["hssp_conservation"]] 2808 if temp is not None: 2809 hssp_conservation = int_ascii.ascii_to_unsigned_float_list(1,2,temp) 2810 else: 2811 print "Trying to get the conservation for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1]) 2812 continue 2813 if actual_requested == "hssp_entropy": 2814 temp = actual_data[requested_data_indexes["hssp_entropy"]] 2815 if temp is not None: 2816 hssp_entropy = int_ascii.ascii_to_unsigned_float_list(2,3,temp) 2817 else: 2818 print "Trying to get the entropy for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1]) 2819 continue 2820 if actual_requested == "hssp_exposure": 2821 temp = actual_data[requested_data_indexes["hssp_exposure"]] 2822 if temp is not None: 2823 hssp_exposure = int_ascii.ascii_to_unsigned_int_list(1,temp) 2824 else: 2825 print "Trying to get the exposure for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1]) 2826 continue 2827 if actual_requested == "hssp_norm_entropy": 2828 temp = actual_data[requested_data_indexes["hssp_norm_entropy"]] 2829 if temp is not None: 2830 hssp_norm_entropy = int_ascii.ascii_to_unsigned_int_list(1,temp) 2831 else: 2832 print "Trying to get the normalized entropy for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1]) 2833 continue 2834 if actual_requested == "hssp_variability": 2835 temp = actual_data[requested_data_indexes["hssp_variability"]] 2836 if temp is not None: 2837 hssp_variability = int_ascii.ascii_to_unsigned_int_list(1,temp) 2838 else: 2839 print "Trying to get the variability for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1]) 2840 continue 2841 if actual_requested == "dssp": 2842 temp = actual_data[requested_data_indexes["dssp"]] 2843 if temp is not None: 2844 dssp = temp 2845 else: 2846 print "Trying to get the dssp for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1]) 2847 continue 2848 if actual_requested == "hssp_residue_num_correspondences": 2849 temp = actual_data[requested_data_indexes["hssp_residue_num_correspondences"]] 2850 if temp is not None: 2851 hssp_residue_num_correspondence = int_ascii.ascii_to_int_list(2,temp) 2852 else: 2853 print "Trying to get the hssp equivalences for pdb %s and chain %s that has no data" %(actual_data[0],actual_data[1]) 2854 continue 2855 2856 2857 # Get the equivalences between hssp and pdb 2858 if hssp_residue_num_correspondence is not None: 2859 hssp_num = 0 2860 hssp_pdb_equivalences = [] 2861 #print hssp_residue_num_correspondence 2862 #print residue_num_list 2863 for x in xrange(len(residue_num_list)): 2864 if hssp_num>=len(hssp_residue_num_correspondence): 2865 hssp_pdb_equivalences.append(None) 2866 continue 2867 if residue_num_list[x] == hssp_residue_num_correspondence[hssp_num]: 2868 hssp_pdb_equivalences.append(x) 2869 hssp_num+=1 2870 else: 2871 hssp_pdb_equivalences.append(None) 2872 2873 # Create all the atoms and add them to the pdbObject 2874 actual_atom_pos = 0 2875 2876 for x in xrange(len(residue_num_list)): 2877 residue_requested = None 2878 #print residue_num_list[x] 2879 2880 if len(fragments)>0: 2881 if True in [ current_fragment.includes( chain=chain, res_num=residue_num_list[x] ) for current_fragment in fragments ]: 2882 residue_requested = True 2883 else: 2884 residue_requested = True 2885 2886 #TODO: Put the equivalences with the hssp numeration... 2887 if residue_requested: 2888 2889 if hssp_residue_num_correspondence is not None and hssp_pdb_equivalences[x] is not None: 2890 if hssp_conservation is not None: 2891 hssp_conservation_value = hssp_conservation[hssp_pdb_equivalences[x]] 2892 if hssp_entropy is not None: 2893 hssp_entropy_value = hssp_entropy[hssp_pdb_equivalences[x]] 2894 if hssp_exposure is not None: 2895 hssp_exposure_value = hssp_exposure[hssp_pdb_equivalences[x]] 2896 if hssp_norm_entropy is not None: 2897 hssp_norm_entropy_value = hssp_norm_entropy[hssp_pdb_equivalences[x]] 2898 if hssp_variability is not None: 2899 hssp_variability_value = hssp_variability[hssp_pdb_equivalences[x]] 2900 if dssp is not None: 2901 dssp_value = dssp[hssp_pdb_equivalences[x]] 2902 if residue_type_list is not None: 2903 residue_type_value = residue_type_list[x] 2904 if merge_fragments: 2905 residue_num_value += 1 2906 else: 2907 residue_num_value = residue_num_list[x] 2908 2909 pdbObject.add_residue( chain_name = new_chain, 2910 residue_num = residue_num_value, 2911 residue_type = residue_type_value, 2912 atoms_initial_list = [], 2913 hssp_conservation = hssp_conservation_value, 2914 hssp_entropy = hssp_entropy_value, 2915 hssp_exposure = hssp_exposure_value, 2916 hssp_norm_entropy = hssp_norm_entropy_value, 2917 hssp_variability = hssp_variability_value, 2918 dssp = dssp_value ) 2919 2920 if requested_data_indexes.has_key("atoms_info"): 2921 #print residue_atom_num_list[x] 2922 for y in xrange(residue_atom_num_list[x]): 2923 current_atom = BianaObjects.PDBAtom( atom_num = atom_num, 2924 atom_type = atom_type_list[actual_atom_pos+y], 2925 atom_name = atom_name_list[actual_atom_pos+y], 2926 x = atom_coordinates_list[(actual_atom_pos+y)*3], 2927 y = atom_coordinates_list[(actual_atom_pos+y)*3+1], 2928 z = atom_coordinates_list[(actual_atom_pos+y)*3+2]) 2929 atom_num+=1 2930 #actual_atom_pos +=1 2931 pdbObject.add_atom( atomObject= current_atom, 2932 chain_name = new_chain, 2933 residue_num = residue_num_value, 2934 residue_type = residue_type_list[x] ) 2935 2936 actual_atom_pos += residue_atom_num_list[x] 2937 2938 #print pdbObject.get_in_pdb_format() 2939 #print pdbObject.get_entropy("A") 2940 2941 if pdbObject.get_num_residues() == 0: 2942 raise ValueError("Empty PDB created") 2943 else: 2944 print pdbObject.get_num_residues() 2945 2946 return pdbObject
2947 2948 2949 #residue_num_list = int_ascii.ascii_to_int_list(2,actual_data[2]) 2950 #print residue_num_list 2951 2952 2953 2954 #################################################################################### 2955 # ALIGNMENTS RELATED METHODS # 2956 #################################################################################### 2957
2958 - def insert_alignment( self, alignmentObject, externalEntityID, insert_aligned="no" ):
2959 """ 2960 "insert_aligned" is used to insert exactly the aligned sequence (used in done alignments, as HSSP, as the sequences does not always is exactly the same 2961 In this case, the fragments are not inserted 2962 """ 2963 2964 for x in xrange(len(alignmentObject.get_alignment())): 2965 2966 column_values = [("externalEntityID",externalEntityID), 2967 #("sequenceMD5",str(alignmentObject.sequence_ids_list[x]).replace('\\','\\\\').replace('"','\\"')), 2968 #("sequenceMD5",str(alignmentObject.sequence_ids_list[x])), 2969 ("position",x)] 2970 2971 if alignmentObject.sequence_ids_list[x] is not None: 2972 column_values.append(("sequenceMD5",str(alignmentObject.sequence_ids_list[x]))) 2973 if alignmentObject.cross_ids_list[x] is not None: 2974 column_values.append(("crossID",str(alignmentObject.cross_ids_list[x][1]))) 2975 column_values.append(("crossID_identifier_type",alignmentObject.cross_ids_list[x][0])) 2976 2977 special_column_values = [] 2978 2979 #print "Inserting eE %s with sequenceID %s to position %s" %(externalEntityID,alignmentObject.sequence_ids_list[x],x) 2980 2981 if insert_aligned == "no": 2982 column_values.append(("alignmentBlocks",alignmentObject.get_sequence_fragments_in_ascii(x))) 2983 else: 2984 special_column_values.append(("alignedSequence","COMPRESS(\"%s\")" %alignmentObject.get_aligned_sequence(x))) 2985 2986 if alignmentObject.sim[x] is not None: 2987 column_values.append(("identity",alignmentObject.sim[x])) 2988 if alignmentObject.wsim[x] is not None: 2989 column_values.append(("weighted_sim",alignmentObject.wsim[x])) 2990 if alignmentObject.lali[x] is not None: 2991 column_values.append(("sequence_covered",alignmentObject.lali[x])) 2992 2993 2994 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.ALIGNMENT_ELEMENT_TABLE, 2995 column_values = column_values, 2996 special_column_values = special_column_values, 2997 use_buffer=self.use_buffer), 2998 answer_mode = None )
2999 3000
3001 - def get_alignment( self, externalEntityID, get_species=False, range=None ):
3002 """ 3003 Returns an alignment object identified by the externalEntityID used as parameter 3004 """ 3005 3006 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.ALIGNMENT_ELEMENT_TABLE], 3007 columns = ["sequenceMD5","position","alignmentBlocks","UNCOMPRESS(alignedSequence)","crossID","crossID_identifier_type"], 3008 fixed_conditions = [(self.biana_database.externalEntityID_col,"=",externalEntityID)]), 3009 answer_mode = "raw" ) 3010 3011 aln = BianaObjects.SequenceAlignment() 3012 3013 aln.set_number_of_sequences( number_of_sequences = len(data) ) 3014 3015 seq_id_tax_id_dict = {} 3016 3017 temp_list_seq_ids = [ actual_aligned_seq[0] for actual_aligned_seq in data ] 3018 3019 def start_list(x): 3020 seq_id_tax_id_dict[x]=[]
3021 3022 map(start_list,temp_list_seq_ids) 3023 3024 #[ seq_id_tax_id_dict[x] = [] for x in temp_list_seq_ids ] 3025 3026 3027 # FOR DOING IT USING SEQUENCE ID... NOT USED NOW. 3028 #if get_species == "yes": 3029 # sequenceID_taxID_correspondences = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_ENTITY_ATTRIBUTES_DICT["taxid"]["table"], 3030 # self.biana_database.EXTERNAL_ENTITY_ATTRIBUTES_DICT["sequence"]["table"]], 3031 # columns = ["sequenceMD5","taxID"], 3032 # fixed_conditions = [("sequenceMD5","IN","(%s)" %(",".join([str(x) for x in temp_list_seq_ids])),None)], 3033 # join_conditions = [("%s.externalEntityID" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTES_DICT["taxid"]["table"], 3034 # "=", 3035 # "%s.externalEntityID" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTES_DICT["sequence"]["table"])]), 3036 # 3037 # answer_mode = "raw" ) 3038 3039 # for actual_seq_taxID in sequenceID_taxID_correspondences: 3040 # seq_id_tax_id_dict[actual_seq_taxID[0]].append(actual_seq_taxID[1]) 3041 3042 tax_ids = [] 3043 3044 # COMMENTED AS HSSP GIVES SPECIE... BUT IN THE FUTURE IT MUST BE UNCOMMENTED 3045 #if get_species == True: 3046 # taxid_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["taxid"] 3047 # for actual_aligned_seq in data: 3048 # attr_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[actual_aligned_seq[5].lower()] 3049 # query = self.db._get_select_sql_query( tables = [taxid_table,attr_table], 3050 # columns = ["DISTINCT(%s.value)" %taxid_table], 3051 # fixed_conditions = [("%s.value" %attr_table,"=",actual_aligned_seq[4])], 3052 # join_conditions = [("%s.externalEntityID" %attr_table, 3053 # "=", 3054 # "%s.externalEntityID" %taxid_table)] ) 3055 # print query 3056 # tax_ids.append( self.db.select_db_content( sql_query = query, answer_mode = "list" ) ) 3057 #else: 3058 # tax_ids = [[] for x in xrange(len(data))] 3059 3060 3061 for x_actual_aligned_seq in xrange(len(data)): 3062 actual_aligned_seq = data[x_actual_aligned_seq] 3063 try: 3064 tax_id_list = [ actual_aligned_seq[4].split("_")[1] ] 3065 except: 3066 tax_id_list = [] 3067 aln.add_sequence_to_position( sequence_id = actual_aligned_seq[0], 3068 #taxID_list = seq_id_tax_id_dict[actual_aligned_seq[0]], 3069 #taxID_list = tax_ids[x_actual_aligned_seq], # TO UNCOMMENT IF UNCOMMENTED ABOVE 3070 taxID_list = tax_id_list, 3071 sequence_position = actual_aligned_seq[1], 3072 sequence_fragments_ascii = actual_aligned_seq[2], 3073 aligned_sequence = actual_aligned_seq[3], 3074 crossID = (actual_aligned_seq[5],actual_aligned_seq[4]) ) 3075 3076 #aln.print_alignment() 3077 3078 return aln 3079 3080
3081 - def load_hssp_multiple_chains(self, pdb_id, fragments=[], get_species=False):
3082 """ 3083 3084 """ 3085 final_aln = None 3086 3087 for current_fragment in fragments: 3088 new_aln = self.load_hssp_alignment(pdb_id=pdb_id, chain=current_fragment.get_chain(), fragments=[current_fragment], get_species=get_species) 3089 if final_aln is None: 3090 final_aln = new_aln 3091 else: 3092 final_aln = final_aln.concatenate_by_crossID(new_aln) 3093 3094 return final_aln
3095 3096 #by_chains = {} 3097 #for current_fragment in fragments: 3098 # by_chains.setdefault(current_fragment.get_chain(),[]).append(current_fragment) 3099 3100 #print "Requested %s chains!" %(len(by_chains)) 3101 3102 #alns = {} 3103 3104 #for current_chain in by_chains: 3105 # self.load_hssp_alignment(pdb_id=pdb_id, chain=current_chain, fragments=by_chains[current_chain], get_species=get_species) 3106 3107 #if len(by_chains)>2: 3108 # raise ValueError("for the moment, only prepared to run with a maximum of two chains") 3109 3110 3111 3112 3113
3114 - def load_hssp_alignment(self, pdb_id, chain, fragments=[], get_species=False):
3115 """ 3116 Returns an alignment for an structure given in the hssp 3117 3118 For the moment, all the fragments must belong to the same chain 3119 """ 3120 3121 pdb_attr_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["pdb"] 3122 3123 fixed_conditions = [("%s.value" %pdb_attr_table,"=",pdb_id)] 3124 join_conditions = [("%s.externalEntityID" %pdb_attr_table,"=","%s.externalEntityID" %self.biana_database.ALIGNMENT_ELEMENT_TABLE ), 3125 ("%s.pdb" %self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["pdb"],"=","%s.value" %pdb_attr_table)] 3126 3127 if chain is not None: 3128 fixed_conditions.append(("%s.chain" %pdb_attr_table,"=",chain)) 3129 join_conditions.append(("%s.chain" %pdb_attr_table,"=","%s.chain" %pdb_attr_table)) 3130 3131 query = self.db._get_select_sql_query( tables = [self.biana_database.ALIGNMENT_ELEMENT_TABLE, 3132 self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["pdb"], 3133 pdb_attr_table], 3134 columns = ["%s.externalEntityID" %pdb_attr_table, 3135 "UNCOMPRESS(hssp_residue_num_correspondences)", 3136 "UNCOMPRESS(residue_num_list)"], 3137 fixed_conditions = fixed_conditions, 3138 join_conditions = join_conditions ) 3139 3140 data = self.db.select_db_content( query, answer_mode = "raw" ) 3141 3142 if len(data)==0: 3143 print "HSSP %s NOT FOUND\n" %pdb_id 3144 # Search for an homologous chain?? 3145 return None 3146 else: 3147 alignmentExternalEntityID = data[0][0] 3148 hssp_residue_num_correspondence = int_ascii.ascii_to_int_list(2,data[0][1]) 3149 residue_num_list = int_ascii.ascii_to_int_list(2,data[0][2]) 3150 3151 #print residue_num_list 3152 3153 align_fragments_to_select = [] 3154 current_start = 0 3155 current_end = None 3156 3157 if len(fragments)>0: 3158 for actual_fragment in fragments: 3159 for x in xrange(len(residue_num_list)): 3160 if (actual_fragment.get_start_residue() is None and actual_fragment.get_end_residue() is None) or (actual_fragment.get_start_residue()<=residue_num_list[x] and (actual_fragment.get_end_residue()>=residue_num_list[x] or actual_fragment.get_end_residue() is None)): 3161 #print "I take position %s (which corresponds to %s)" %(x,residue_num_list[x]) 3162 current_end = x 3163 else: 3164 if current_end is not None: 3165 #print "Adding (%s,%s)" %(current_start, current_end) 3166 align_fragments_to_select.append((current_start,current_end)) 3167 current_end = None 3168 current_start = x+1 3169 3170 if current_end is not None: 3171 align_fragments_to_select.append((current_start,current_end)) 3172 3173 else: 3174 align_fragments_to_select.append((current_start,len(residue_num_list)-1)) 3175 3176 alignment = self.get_alignment(externalEntityID=alignmentExternalEntityID,get_species=get_species) 3177 3178 return alignment.get_subalignment( fragments = align_fragments_to_select )
3179 3180 3181 #################################################################################### 3182 # USER ENTITY RELATED METHODS # 3183 #################################################################################### 3184 3185
3186 - def _load_available_unification_protocols(self):
3187 """ 3188 Gets the information about available unification protocols 3189 """ 3190 3191 if( self.available_unification_protocols is None ): 3192 3193 self.available_unification_protocols = {} 3194 3195 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.USER_ENTITY_PROTOCOL_TABLE.get_table_name()], 3196 columns = ["description","unificationProtocolID","databaseVersion"]), 3197 answer_mode = "raw" ) 3198 3199 for actual_protocol in data: 3200 3201 uP = BianaObjects.UnificationProtocol(description = actual_protocol[0], BianaDatabaseVersion = actual_protocol[1], id = str(actual_protocol[1])) 3202 3203 # Search all the atoms for this unification protocol 3204 atoms = self.db.select_db_content( self.db._get_select_sql_query( tables = [(self.biana_database.USER_ENTITY_PROTOCOL_ATOMS_TABLE.get_table_name(),"A"), 3205 (self.biana_database.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE.get_table_name(),"B")], 3206 columns = ["externalDatabaseID_A","externalDatabaseID_B","GROUP_CONCAT(B.cross_referenced_code)"], 3207 join_conditions = [("A.unificationAtomID","=","B.unificationAtomID")], 3208 fixed_conditions = [("A.unificationProtocolID","=",actual_protocol[1])], 3209 group_conditions = ["A.unificationAtomID"] ), 3210 answer_mode = "raw" ) 3211 for current_atom in atoms: 3212 uP.add_unification_atom_elements( BianaObjects.UnificationAtomElement( externalDatabaseID_A = current_atom[0], 3213 externalDatabaseID_B = current_atom[1], 3214 externalAttribute = current_atom[2].split(",") ) ) 3215 3216 self.available_unification_protocols[actual_protocol[0].lower()] = uP 3217 3218 3219 self.available_unification_protocols["no unification"] = None 3220 3221 return
3222 3223
3224 - def _get_user_entity_table_name(self, unification_protocol_name):
3225 3226 self._load_available_unification_protocols() 3227 3228 unification_protocol_name = str(unification_protocol_name).lower() 3229 3230 if self.available_unification_protocols[unification_protocol_name] is None: 3231 return self.biana_database.USER_ENTITY_TABLE.get_table_name() 3232 3233 return self.biana_database.USER_ENTITY_TABLE.get_table_name()+self.available_unification_protocols[unification_protocol_name].get_id()
3234 3235
3236 - def get_available_unification_protocols_list(self):
3237 3238 if( self.dbname is None ): 3239 return [] 3240 3241 self._load_available_unification_protocols() 3242 3243 return list(self.available_unification_protocols.keys())
3244 3245
3246 - def _create_new_unification_protocol_tables(self, protocol):
3247 """ 3248 """ 3249 3250 # Inserting the basic information of the protocol to the database 3251 protocol_id = self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.USER_ENTITY_PROTOCOL_TABLE, 3252 column_values = [("description",protocol.get_description()), 3253 ("databaseVersion","TEST_DB_VERSION")], 3254 use_buffer = False ), 3255 answer_mode = "last_id" ) 3256 protocol.id = protocol_id 3257 3258 # Save the unification protocol... 3259 for actual_unification_atom_element in protocol.get_unification_atom_elements(): 3260 atom_id = self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.USER_ENTITY_PROTOCOL_ATOMS_TABLE, 3261 column_values = [ ("unificationProtocolID",protocol_id), 3262 ("externalDatabaseID_A",actual_unification_atom_element.get_externalDatabaseID_A() ), 3263 ("externalDatabaseID_B",actual_unification_atom_element.get_externalDatabaseID_B() ) ], 3264 use_buffer=False ), 3265 answer_mode = "last_id" ) 3266 3267 crossed_attributes = actual_unification_atom_element.get_external_attribute_list() 3268 3269 for actual_crossed_attribute in crossed_attributes: 3270 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE, 3271 column_values = [ ("unificationAtomID",atom_id), 3272 ("cross_referenced_code",actual_crossed_attribute)], 3273 use_buffer = False) )
3274 3275
3276 - def create_new_user_entities(self, protocol):
3277 """ 3278 3279 "protocol" is the UnificationProtocol object that has to be followed 3280 """ 3281 3282 # WE COULD CHECK IF A PREVIOUS UNIFICATION WITH THE SAME PARAMETERS HAS BEEN PREVIOUSLY DONE... 3283 3284 3285 if self.isOptimizedForRunning is False: 3286 self.optimize_database_for( mode="running", optimize=True ) 3287 3288 self._create_new_unification_protocol_tables(protocol) 3289 3290 protocol_temp_table = database.TableDB( table_name = "temp_equivalences_protocol_%s" %protocol.id, 3291 table_fields = [ database.FieldDB(field_name = "externalEntity1", 3292 data_type = "integer(4) unsigned"), 3293 database.FieldDB(field_name = "externalEntity2", 3294 data_type = "integer(4) unsigned")] ) 3295 3296 self.db.insert_db_content( protocol_temp_table.create_mysql_query(), answer_mode = None ) 3297 3298 print "Unifying..." 3299 3300 #import C_functions 3301 import tempfile 3302 3303 temp_eq_file = tempfile.NamedTemporaryFile(bufsize=0) 3304 3305 #Done like this because if not it does not work on WINDOWS os... 3306 temp_eq_file_name = temp_eq_file.name 3307 temp_eq_file.close() 3308 temp_eq_file = open(temp_eq_file_name,'w') 3309 3310 # Get the list of queries to obtain the equivalences 3311 for actual_unification_atom_element in protocol.get_unification_atom_elements(): 3312 query = self._get_equivalent_external_entities(actual_unification_atom_element) 3313 if query is not None: 3314 #print query 3315 self.db.insert_db_content( "INSERT INTO %s (%s)" %(protocol_temp_table.get_table_name(),query), 3316 answer_mode = None ) 3317 3318 #data = self.db.select_db_content( query, answer_mode = "raw" ) 3319 #[ temp_eq_file.write("%s\t%s\n" %(equivalence[0],equivalence[1])) for equivalence in data ] 3320 3321 3322 3323 # Get the information from the temporal table 3324 3325 l=1000000 3326 o=0 3327 3328 temp_equivalences = self.db.select_db_content( "SELECT * FROM %s LIMIT %s OFFSET %s" %(protocol_temp_table.get_table_name(),l,o), 3329 answer_mode = "raw", remove_duplicates="no" ) 3330 3331 while( len(temp_equivalences) > 0 ): 3332 o += l 3333 [ temp_eq_file.write("%s\t%s\n" %(equivalence[0],equivalence[1])) for equivalence in temp_equivalences ] 3334 temp_equivalences = self.db.select_db_content( "SELECT * FROM %s LIMIT %s OFFSET %s" %(protocol_temp_table.get_table_name(),l,o), 3335 answer_mode = "raw", remove_duplicates="no" ) 3336 3337 3338 # Delete the content of the temporal table 3339 self.db.insert_db_content( "DELETE FROM %s" %protocol_temp_table.get_table_name() ) 3340 3341 temp_all_file = tempfile.NamedTemporaryFile(bufsize=0) 3342 temp_all_file_name = temp_all_file.name 3343 temp_all_file.close() 3344 temp_all_file = open(temp_all_file_name, 'w') 3345 3346 # Get the list of queries to obtain all the external entities of desired databases 3347 # FIRST, IT IS NEEDED TO OBTAIN A LIST OF ALL DATABASES THAT ARE GOING TO BE USED 3348 databases = protocol.get_database_ids() 3349 for actual_database in databases: 3350 if not self.get_external_database(database_id = actual_database).get_promiscuity(): 3351 query = self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_ENTITY_TABLE], 3352 columns = [self.biana_database.externalEntityID_col], 3353 fixed_conditions = [("externalDatabaseID","=",actual_database), 3354 ("type","!=","relation")] ) #JAVI RECENTLY ADDED. MAY DECREASE UNIFYING EFICIENCY... 3355 3356 #print query 3357 3358 self.db.insert_db_content( "INSERT INTO %s (externalEntity1) (%s)" %(protocol_temp_table.get_table_name(),query), 3359 answer_mode = None ) 3360 3361 #data = self.db.select_db_content( query, answer_mode = "raw" ) 3362 #[ temp_all_file.write("%s\n" %x) for x in data ] 3363 3364 3365 # Get the information from the temporal table 3366 l=1000000 3367 o=0 3368 temp_equivalences = self.db.select_db_content( "SELECT externalEntity1 FROM %s LIMIT %s OFFSET %s" %(protocol_temp_table.get_table_name(),l,o), 3369 answer_mode = "raw", remove_duplicates="no" ) 3370 while( len(temp_equivalences) > 0 ): 3371 o += l 3372 [ temp_all_file.write("%s\n" %equivalence) for equivalence in temp_equivalences ] 3373 temp_equivalences = self.db.select_db_content( "SELECT externalEntity1 FROM %s LIMIT %s OFFSET %s" %(protocol_temp_table.get_table_name(),l,o), 3374 answer_mode = "raw", remove_duplicates="no" ) 3375 3376 # Delete the temporal table 3377 self.db.insert_db_content( protocol_temp_table.get_drop_query() ) 3378 3379 unification_temp_file = tempfile.NamedTemporaryFile(bufsize=0) 3380 unification_temp_file_name = unification_temp_file.name 3381 3382 unification_temp_file.close() 3383 temp_eq_file.close() 3384 temp_all_file.close() 3385 3386 from biana import __path__ as biana_path 3387 import os 3388 3389 command = "\""+biana_path[0]+os.sep+"BianaDB"+os.sep+"%s\" "+temp_eq_file_name+" "+temp_all_file_name+" "+unification_temp_file_name 3390 3391 if sys.platform.lower().startswith("win"): 3392 command = command %("win_unify.exe") 3393 else: 3394 command = command %("unify") 3395 3396 p = os.system(command) 3397 3398 if p==1: 3399 raise ValueError("ERROR in unification. Unify compiled program error") 3400 #C_functions.unify( temp_equivalences_file = temp_eq_file.name, 3401 # temp_list_file = temp_all_file.name, 3402 # unification_temp_file = unification_temp_file.name ) 3403 3404 temp_eq_file.close() 3405 temp_all_file.close() 3406 3407 3408 print "Creating the protocol table" 3409 protocol_table = copy.deepcopy(self.biana_database.USER_ENTITY_TABLE) # It is necessary to do a copy because we are going to change its name... 3410 protocol_table.set_table_name(new_name = "%s%s" %(protocol_table.get_table_name(),protocol.id)) 3411 3412 self.db.insert_db_content( protocol_table.create_mysql_query(ignore_primary_key=True), answer_mode = None ) 3413 3414 self.db._disable_indices( table_list = [protocol_table] ) 3415 3416 3417 3418 if False: 3419 self.db._lock_tables( table_list = [protocol_table.get_table_name()] ) 3420 self.db.insert_db_content( "LOAD DATA LOCAL INFILE '%s' INTO TABLE %s" %(unification_temp_file.name.replace("\\","\\\\"),protocol_table.get_table_name()) ) 3421 else: 3422 # Done like this because not all mysql versions accept LOAD DATA LOCAL INFILE. How is it possible????????? 3423 3424 # Read and insert the unification 3425 unification_file = open(unification_temp_file.name,'r') 3426 for line in unification_file: 3427 fields = line.strip().split("\t") 3428 self.db.insert_db_content( self.db._get_insert_sql_query( table = protocol_table, 3429 column_values = [("userEntityID", fields[0]), 3430 ("externalEntityID", fields[1])], 3431 use_buffer = "yes" ) ) 3432 unification_file.close() 3433 3434 import os 3435 os.unlink(unification_temp_file.name) 3436 3437 # Make sure to empty the buffer... 3438 self.db._empty_buffer() 3439 3440 self.db._enable_indices( table_list = [protocol_table] ) 3441 3442 self.db._unlock_tables() 3443 3444 # Here, we should incorporate into the unification the promiscuous external entities (external entities than can belong to different user entities, as scop or pfam domains, etc... 3445 self._unify_promiscuous_external_entities( protocol = protocol ) 3446 3447 return
3448 3449
3450 - def _unify_promiscuous_external_entities( self, protocol ):
3451 """ 3452 3453 """ 3454 3455 print "Adding promiscuous external entities" 3456 3457 #self.db.set_lock_tables( False ) 3458 3459 unification_table = self._get_user_entity_table_name(unification_protocol_name=protocol.description) 3460 3461 # Get all promiscuous external entities 3462 for actual_unification_atom_element in protocol.get_unification_atom_elements(): 3463 db_id_A = actual_unification_atom_element.get_externalDatabaseID_A() 3464 db_id_B = actual_unification_atom_element.get_externalDatabaseID_B() 3465 3466 # If only one of the external database is promiscuous 3467 #xor = lambda x,y: x!=y and (x or y) # using built-in xor: x^y 3468 if not (self.get_external_database(database_id = db_id_A).get_promiscuity() ^ self.get_external_database(database_id = db_id_B).get_promiscuity()): 3469 continue 3470 3471 if self.get_external_database(database_id = db_id_A).get_promiscuity(): 3472 promiscuous_db_id, non_promiscuous_db_id = db_id_A, db_id_B 3473 else: 3474 promiscuous_db_id, non_promiscuous_db_id = db_id_B, db_id_A 3475 3476 attribute_list = actual_unification_atom_element.get_external_attribute_list() 3477 3478 # Get the equivalences of the user entities for this promiscuous external entities 3479 tables = [ (unification_table, "u"), 3480 (self.biana_database.EXTERNAL_ENTITY_TABLE, "eE1"), 3481 (self.biana_database.EXTERNAL_ENTITY_TABLE, "eE2") ] 3482 3483 columns = ["userEntityID", "eE2.externalEntityID"] 3484 3485 join_conditions = [ ("eE1.externalEntityID", "=", "u.externalEntityID") ] 3486 fixed_conditions = [ ("eE2.type","!=","relation") ] 3487 3488 for attribute_index in xrange(len(attribute_list)): 3489 unifying_attribute = attribute_list[attribute_index] 3490 tables.append( (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[unifying_attribute], "a1_%s" % attribute_index) ) 3491 tables.append( (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[unifying_attribute], "a2_%s" % attribute_index) ) 3492 join_conditions.append( ("a1_%s.value" % attribute_index,"=","a2_%s.value" % attribute_index) ) 3493 join_conditions.append( ("eE1.externalEntityID","=","a1_%s.externalEntityID" % attribute_index) ) 3494 join_conditions.append( ("eE2.externalEntityID","=","a2_%s.externalEntityID" % attribute_index) ) 3495 fixed_conditions.append( ("eE1.externalDatabaseID","=",non_promiscuous_db_id) ) 3496 fixed_conditions.append( ("eE2.externalDatabaseID","=",promiscuous_db_id) ) 3497 3498 if unifying_attribute.lower()=="pdb": 3499 join_conditions.append( ("a1_%s.chain"%attribute_index,"=","b2_%s.chain"%attribute_index) ) 3500 3501 get_equivalences_query = self.db._get_select_sql_query( tables = tables, 3502 columns = columns, 3503 join_conditions = join_conditions, 3504 fixed_conditions = fixed_conditions ) 3505 print get_equivalences_query 3506 3507 # Insert them in the user entities table 3508 3509 # Trying to insert directly, without being in a temporary table. 3510 # Probably this is slow... If it is: 3511 # Insert all information in a temporary table 3512 # Disable indices from unification table 3513 # Put all information of temporary table into unification table 3514 # Enable indices from unification table 3515 # Delete temporary table 3516 3517 #self.db.insert_db_content( sql_query = "INSERT INTO %s (userEntityID, externalEntityID) (%s)" %(unification_table, 3518 # get_equivalences_query ), 3519 # answer_mode = None ) 3520 3521 self.db.insert_db_content( sql_query = self.db._get_nested_insert_sql_query( table = unification_table, 3522 columns = ["userEntityID", "externalEntityID"], 3523 subquery = get_equivalences_query ), 3524 answer_mode = None)
3525 # Finished 3526 3527
3528 - def drop_unification_protocol( self, unification_protocol_name ):
3529 """ 3530 Drops from the database all the information of a protocol of unification 3531 3532 "protocol_description" corresponds to the description of the protocol to drop 3533 """ 3534 3535 self._load_available_unification_protocols() 3536 3537 unification_protocol_name = unification_protocol_name.lower() 3538 3539 if not self.available_unification_protocols.has_key( unification_protocol_name ): 3540 raise ValueError("ERROR. Trying to drop an unexisting unification protocol") 3541 3542 if unification_protocol_name == "no unification": 3543 raise ValueError("Default unification protocol cannot be deleted") 3544 3545 unificationProtocolObj = self.available_unification_protocols[unification_protocol_name] 3546 3547 # Delete information about unification protocol atoms 3548 atom_ids_query = self.db._get_select_sql_query( tables = [self.biana_database.USER_ENTITY_PROTOCOL_ATOMS_TABLE], 3549 columns = ["unificationAtomID"], 3550 fixed_conditions = [("unificationProtocolID","=", unificationProtocolObj.get_id())] ) 3551 3552 self.db.insert_db_content( self.db._get_delete_sql_query(table = self.biana_database.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE, 3553 fixed_conditions = [("unificationAtomID","IN","(%s)" %atom_ids_query, None)]) ) 3554 3555 self.db.insert_db_content( self.db._get_delete_sql_query(table = self.biana_database.USER_ENTITY_PROTOCOL_ATOMS_TABLE, 3556 fixed_conditions = [("unificationProtocolID","=", unificationProtocolObj.get_id())]) ) 3557 3558 # Delete information in unification protocols table 3559 self.db.insert_db_content( self.db._get_delete_sql_query(table = self.biana_database.USER_ENTITY_PROTOCOL_TABLE, 3560 fixed_conditions = [("unificationProtocolID","=", unificationProtocolObj.get_id())]) ) 3561 3562 # Drop unification table 3563 self.db.insert_db_content( self.db._get_drop_sql_query( [self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)] ) ) 3564 3565 return
3566
3567 - def get_unification_protocol_atoms( self, unification_protocol_name ):
3568 """ 3569 Fetchs from the database all the atom information of a protocol of unification 3570 3571 "unification_protocol_name" corresponds to the description of the protocol 3572 """ 3573 self._load_available_unification_protocols() 3574 3575 unification_protocol_name = unification_protocol_name.lower() 3576 3577 if not self.available_unification_protocols.has_key( unification_protocol_name ): 3578 raise ValueError("ERROR. Trying to get an unexisting unification protocol") 3579 3580 if unification_protocol_name == "no unification": 3581 raise ValueError("Default unification protocol does not have any atom information") 3582 3583 unificationProtocolObj = self.available_unification_protocols[unification_protocol_name] 3584 3585 unification_protocol_atoms = unificationProtocolObj.get_unification_atom_elements() 3586 3587 #for atom in atoms: 3588 # atom.get_externalDatabaseID_A() 3589 # atom.get_externalDatabaseID_B() 3590 # atom.get_external_attribute_list() 3591 3592 return unification_protocol_atoms
3593 3594
3595 - def get_equivalent_user_entities_from_list(self, userEntitiesList, attribute, protocol_id):
3596 """ 3597 """ 3598 table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute.lower()]["table"].get_table_name() 3599 protocol_table = "%s%s" %(self.biana_database.USER_ENTITY_TABLE,protocol_id) 3600 3601 data = self.db.select_db_content( self.db._get_select_sql_query( tables = [(protocol_table,"u1"), 3602 (protocol_table,"u2"), 3603 (table,"e1"), 3604 (table,"e2")], 3605 columns = ["u1.userEntityID", "u2.userEntityID"], 3606 join_conditions = [("e1.%s" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute.lower()]["fields"]["value"].get_field_name(), 3607 "=", 3608 "e2.%s" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute.lower()]["fields"]["value"].get_field_name()), 3609 ("u1.externalEntityID","<","u2.externalEntityID"), 3610 ("u1.externalEntityID","=","e1.externalEntityID"), 3611 ("u2.externalEntityID","=","e2.externalEntityID")], 3612 fixed_conditions = [("u1.userEntityID","IN","(%s)" %(",".join([ str(x) for x in userEntitiesList])),None), 3613 ("u2.userEntityID","IN","(%s)" %(",".join([ str(x) for x in userEntitiesList])),None)] ), 3614 answer_mode = "raw", remove_duplicates="no" ) 3615 3616 import networkx 3617 3618 g = networkx.Graph() 3619 g.add_nodes_from(userEntitiesList) 3620 3621 for current_data in data: 3622 g.add_edge(current_data[0],current_data[1]) 3623 3624 return networkx.connected_components(g)
3625 3626 3627
3628 - def _get_equivalent_external_entities(self, unification_atom_element):
3629 """ 3630 returns the query to obtain the list of equivalent 3631 """ 3632 3633 actual_atom_element = unification_atom_element 3634 attribute_list = actual_atom_element.get_external_attribute_list() 3635 3636 tables = [(self.biana_database.EXTERNAL_ENTITY_TABLE,"e1"), 3637 (self.biana_database.EXTERNAL_ENTITY_TABLE,"e2")] 3638 3639 fixed_conditions = [] 3640 join_conditions = [] 3641 3642 # Skip (delay processing till all user entities created) external databases providing promiscuous data 3643 db_id_A = actual_atom_element.get_externalDatabaseID_A() 3644 db_id_B = actual_atom_element.get_externalDatabaseID_B() 3645 if (self.get_external_database(database_id = db_id_A).get_promiscuity() or self.get_external_database(database_id = db_id_B).get_promiscuity()): 3646 return None 3647 3648 # If only none of the external database is promiscuous 3649 for attribute_index in xrange(len(attribute_list)): 3650 3651 actual_attribute = attribute_list[attribute_index] 3652 3653 tables.append( (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[actual_attribute.lower()],"a%s" %(attribute_index)) ) 3654 tables.append( (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[actual_attribute.lower()],"b%s" %(attribute_index)) ) 3655 3656 # To check... 3657 fixed_conditions.extend( [ ("a%s.%s" %(attribute_index,self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[actual_attribute]["fields"][x[0].lower()]),"=",x[1]) 3658 for x in actual_atom_element.get_field_conditions_A() ] ) 3659 3660 fixed_conditions.extend( [ ("b%s.%s" %(attribute_index,self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[actual_attribute]["fields"][x[0].lower()]),"=",x[1]) 3661 for x in actual_atom_element.get_field_conditions_B() ] ) 3662 3663 join_conditions.extend( [ ("a%s.value" %attribute_index, 3664 "=", 3665 "b%s.value" %attribute_index) 3666 for x in actual_atom_element.get_field_cross_references() ] ) 3667 3668 # Special treatment for PDB: in PDB, pdb code and chain must be shared. 3669 if actual_attribute.lower()=="pdb": 3670 join_conditions.extend( [ ("a%s.chain" %attribute_index, 3671 "=", 3672 "b%s.chain" %attribute_index) 3673 for x in actual_atom_element.get_field_cross_references() ] ) 3674 3675 join_conditions.append( ("e1.externalEntityID","=","a%s.externalEntityID" %attribute_index) ) 3676 join_conditions.append( ("e2.externalEntityID","=","b%s.externalEntityID" %attribute_index) ) 3677 3678 # Consider equal only external entities have the same type 3679 join_conditions.append( ("e1.type","=","e2.type") ) 3680 3681 fixed_conditions.append( ("e1.externalDatabaseID","=",actual_atom_element.get_externalDatabaseID_A()) ) 3682 fixed_conditions.append( ("e2.externalDatabaseID","=",actual_atom_element.get_externalDatabaseID_B()) ) 3683 3684 fixed_conditions.append( ("e1.type","!=","relation") ) #JAVI RECENTLY ADDED. MAY DECREASE UNIFYING EFICIENCY... ADDED TO AVOID ADDING relations to unification. It can also be filtered in a posterior step 3685 fixed_conditions.append( ("e2.type","!=","relation") ) 3686 3687 if( actual_atom_element.get_externalDatabaseID_A() == actual_atom_element.get_externalDatabaseID_B() ): 3688 join_conditions.append( ("e1.externalEntityID","<","e2.externalEntityID") ) # JAVI QUESTION: Does this affect the unification? Is it correct??? 3689 3690 3691 return self.db._get_select_sql_query( tables = tables, 3692 columns = ["e1.externalEntityID","e2.externalEntityID"], 3693 fixed_conditions = fixed_conditions, 3694 join_conditions = join_conditions )
3695 3696
3697 - def get_external_entities_dict(self, externalEntityIdsList, attribute_list=[], relation_attribute_list=[], participant_attribute_list=[], useTransferAttributes=True):
3698 """ 3699 Returns a dict of external Entity Objects with the attributes specified in the "attribute_identifier_list" 3700 3701 The key in the dictionary corresponds to the external Entity ID 3702 3703 The external entity can be of any type (included relations) 3704 3705 "attribute_field_restrictions" is to restrict the attributes by additional fields. Sintaxis: [(attribute_identifier, field, value)] 3706 """ 3707 3708 if len(externalEntityIdsList)==0: 3709 return {} 3710 3711 eE_id_str_list = ", ".join([ str(x) for x in externalEntityIdsList ]) 3712 3713 # Get the basic information for the external entities 3714 data = self.db.select_db_content(self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name()], 3715 columns = ["%s.externalEntityID" %self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name(), 3716 "%s.externalDatabaseID" %self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name(), 3717 "%s.type" %self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name()], 3718 fixed_conditions = [("%s.externalEntityID" %self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name(), 3719 "IN", 3720 "(%s)" %eE_id_str_list, None)] ), 3721 answer_mode="raw", remove_duplicates="no") 3722 eE_dict = {} 3723 3724 eEr_id_list = [] 3725 3726 for current_data in data: 3727 3728 if current_data[2]=="relation": 3729 # Get the relation type 3730 relation_type = self.db.select_db_content( self.db._get_select_sql_query( tables=[self.biana_database.EXTERNAL_ENTITY_RELATION_TABLE], 3731 columns = ["type"], 3732 fixed_conditions = [("externalEntityRelationID","=",current_data[0])] ), 3733 answer_mode = "single") 3734 3735 relationObj = BianaObjects.ExternalEntityRelation( id = current_data[0], 3736 source_database = current_data[1], 3737 relation_type = relation_type ) 3738 3739 eE_dict[current_data[0]] = relationObj 3740 3741 # Add the rest of participants of this relation 3742 participants = self.db.select_db_content( self.db._get_select_sql_query( tables=[self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE], 3743 columns = ["externalEntityID"], 3744 fixed_conditions = [("externalEntityRelationID","=",current_data[0])] ), 3745 answer_mode = "raw" ) 3746 3747 [ relationObj.add_participant( externalEntityID=x[0] ) for x in participants ] 3748 3749 eEr_id_list.append(str(current_data[0])) 3750 3751 else: 3752 eE_dict[current_data[0]] = BianaObjects.ExternalEntity( id = current_data[0], 3753 source_database = current_data[1], 3754 type = current_data[2] ) 3755 3756 3757 3758 # Prepare restrictions 3759 #attribute_field_restrictions_dict = dict([(x[0].lower(),(x[1],x[2])) for x in attribute_field_restrictions ]) 3760 3761 # Now, we can merge simple eE with eEr, as they are treated in the same way 3762 attributes = sets.Set([ x.lower() for x in attribute_list ]) 3763 attributes.update([ x.lower() for x in relation_attribute_list ]) 3764 3765 #for current_attribute in sets.Set([ x.lower() for x in attribute_list ]): 3766 for current_attribute in attributes: 3767 3768 if current_attribute == "proteinsequence": 3769 # Get the sequence and sequence ID 3770 tables = [self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute], 3771 self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"]] 3772 columns = ["externalEntityID","UNCOMPRESS(sequence)"] 3773 data = self.db.select_db_content(self.db._get_select_sql_query( tables = tables, 3774 columns=columns, 3775 fixed_conditions = [("externalEntityID", 3776 "IN", 3777 "(%s)" %eE_id_str_list, None)], 3778 join_conditions = [("%s.value" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute], 3779 "=", 3780 "%s.sequenceMD5" %self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"])]), 3781 answer_mode = "raw" ) 3782 for current_sequence in data: 3783 eE_dict[current_sequence[0]].add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier=current_attribute, 3784 value=BianaObjects.ProteinSequence(sequence=current_sequence[1]))) 3785 3786 continue 3787 3788 if current_attribute == "nucleotidesequence": 3789 # Get the sequence and sequence ID 3790 tables = [self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute], 3791 self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"]] 3792 columns = ["externalEntityID","sequenceType","UNCOMPRESS(sequence)"] 3793 data = self.db.select_db_content(self.db._get_select_sql_query( tables = tables, 3794 columns=columns, 3795 fixed_conditions = [("externalEntityID", 3796 "IN", 3797 "(%s)" %eE_id_str_list, None)], 3798 join_conditions = [("%s.value" %self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute], 3799 "=", 3800 "%s.sequenceMD5" %self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["nucleotideSequence"])]), 3801 answer_mode = "raw" ) 3802 for current_sequence in data: 3803 if( current_sequence[1]=="dna" ): 3804 eE_dict[current_data[0]].add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier=current_attribute, 3805 value=BianaObjects.DNASequence(sequence=current_sequence[2]))) 3806 elif( current_sequence[1]=="rna" ): 3807 eE_dict[current_data[0]].add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier=current_attribute, 3808 value=BianaObjects.RNASequence(sequence=current_sequence[2]))) 3809 3810 continue 3811 3812 try: 3813 table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute] 3814 3815 columns = ["%s.externalEntityID" %table, 3816 "%s.value" %table] 3817 #"%s.type" %table] 3818 3819 fixed_conditions = [("%s.externalEntityID" %table, 3820 "IN", 3821 "(%s)" %eE_id_str_list, None)] 3822 3823 data = self.db.select_db_content(self.db._get_select_sql_query( tables=[table], 3824 columns=columns, 3825 fixed_conditions = fixed_conditions), 3826 answer_mode="raw", remove_duplicates="no") 3827 3828 for current_data in data: 3829 eE_dict[current_data[0]].add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier=current_attribute, 3830 value=current_data[1])) 3831 3832 3833 ### ADD TRANSFERRED ATTRIBUTES ### 3834 3835 if self._is_transferred_attribute( current_attribute ) and useTransferAttributes is True: 3836 3837 for current_transfer in self.transferred_attributes[current_attribute.lower()]: 3838 3839 tables = [ (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute],"transferred"), 3840 (self._get_key_attribute_table_name( key_id = self.key_attribute_ids[(current_transfer[0],current_transfer[1])] ),"key_attr"), 3841 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_transfer[1]],"key_attr2") ] 3842 3843 fixed_conditions = [("key_attr2.externalEntityID", 3844 "IN", 3845 "(%s)" %eE_id_str_list, None)] 3846 3847 join_conditions = [("key_attr.value","=","key_attr2.value"), 3848 ("transferred.externalEntityID","=","key_attr.externalEntityID")] 3849 3850 columns = ["key_attr2.externalEntityID","transferred.value","transferred.type"] 3851 3852 3853 data = self.db.select_db_content(self.db._get_select_sql_query( tables=tables, 3854 columns=columns, 3855 fixed_conditions = fixed_conditions, 3856 join_conditions = join_conditions), 3857 answer_mode="raw", remove_duplicates="no") 3858 3859 for current_data in data: 3860 eE_dict[current_data[0]].add_attribute(BianaObjects.ExternalEntityAttribute(attribute_identifier=current_attribute, 3861 value=current_data[1], 3862 type="transferred_%s" %(current_data[2]) )) 3863 3864 3865 3866 except: 3867 traceback.print_exc() 3868 sys.stderr.write("Attribute %s is not found in available attributes\n" %(current_attribute)) 3869 3870 3871 for current_attribute in sets.Set([ x.lower() for x in participant_attribute_list]): 3872 3873 try: 3874 for current_eErID in eEr_id_list: 3875 current_eEr_obj = eE_dict[int(current_eErID)] 3876 data = self.db.select_db_content( self.db._get_select_sql_query( tables=[self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE, 3877 self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT[current_attribute]], 3878 columns = ["externalEntityID","value"], 3879 join_conditions = [("%s.externalEntityRelationParticipantID" %self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE, 3880 "=", 3881 "%s.externalEntityRelationParticipantID" %self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT[current_attribute])], 3882 fixed_conditions = [("externalEntityID","IN","(%s)" %",".join([ str(x) for x in current_eEr_obj.get_participant_external_entity_ids_list()]),None)]), 3883 answer_mode = "raw" ) 3884 3885 for current_data in data: 3886 current_eEr_obj.add_participant_attribute( externalEntityID = current_data[0], 3887 participantAttribute = BianaObjects.ExternalEntityRelationParticipantAttribute( attribute_identifier = current_attribute, 3888 value = current_data[1] )) 3889 3890 3891 except: 3892 traceback.print_exc() 3893 sys.stderr.write("Attribute %s is not found in available relation participant attributes\n" %(current_attribute)) 3894 3895 3896 return eE_dict
3897 3898
3899 - def get_user_entity_attributes(self, unification_protocol_name, listUserEntityID, attribute_identifier):
3900 """ 3901 Returns a dictionary with { userEntityID: list of attributes } 3902 3903 This method is intended to be faster for getting user entity attributes than getting external entity objects and its attribute objects 3904 3905 Numerical values are stored as strings!!! 3906 """ 3907 3908 #TO CHECK... IS IT NECESSARY??? 3909 3910 3911 if debug_web == 1: 3912 import os 3913 debug_file_fd = file("/usr/local/apache2/htdocs/webiana/sessions/debug_file_biana.txt", "w") 3914 os.chmod("/usr/local/apache2/htdocs/webiana/sessions/debug_file_biana.txt", 0777) 3915 debug_file_fd.write("attribute_identifier is %s" %(attribute_identifier)) 3916 debug_file_fd.write("external entity attribute tables dict has keys %s" %(self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT.keys())) 3917 3918 attr_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier] 3919 unif_table = self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name) 3920 3921 if attribute_identifier.lower()=="proteinsequence": 3922 query = self.db._get_select_sql_query( tables = [attr_table,unif_table, 3923 self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"]], 3924 columns = ["userEntityID","UNCOMPRESS(sequence) AS seq"], 3925 join_conditions = [("userEntityID","IN","(%s)" %",".join(map(str,listUserEntityID))), 3926 ("%s.externalEntityID" %attr_table,"=","%s.externalEntityID" %unif_table), 3927 ("%s.value" %attr_table,"=","%s.sequenceMD5" %self.biana_database.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES["proteinSequence"])], 3928 group_conditions = ["userEntityID","seq"] ) 3929 else: 3930 query = self.db._get_select_sql_query( tables = [attr_table,unif_table], 3931 columns = ["userEntityID","value"], 3932 join_conditions = [("userEntityID","IN","(%s)" %",".join(map(str,listUserEntityID))), 3933 ("%s.externalEntityID" %attr_table,"=","%s.externalEntityID" %unif_table)], 3934 group_conditions = ["userEntityID","value"] ) 3935 3936 data = self.db.select_db_content( query, answer_mode = "raw" ) 3937 3938 return_dict = {} 3939 3940 for current_data in data: 3941 return_dict.setdefault(current_data[0],[]).append(str(current_data[1])) 3942 3943 3944 ### ADD TRANSFERRED ATTRIBUTES ### 3945 if self._is_transferred_attribute( attribute_identifier ): 3946 3947 for current_transfer in self.transferred_attributes[attribute_identifier.lower()]: 3948 3949 tables = [ unif_table, 3950 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_identifier],"transferred"), 3951 (self._get_key_attribute_table_name( key_id = self.key_attribute_ids[(current_transfer[0],current_transfer[1])] ),"key1"), 3952 (self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_transfer[1]],"key2") ] 3953 3954 columns = ["userEntityID","transferred.value"] 3955 3956 join_conditions = [("userEntityID","IN","(%s)" %",".join(map(str,listUserEntityID))), 3957 ("%s.externalEntityID" %unif_table,"=","key2.externalEntityID"), 3958 ("key1.value","=","key2.value"), 3959 ("transferred.externalEntityID","=","key1.externalEntityID")] 3960 3961 query = self.db._get_select_sql_query( tables = tables, 3962 columns = columns, 3963 join_conditions = join_conditions ) 3964 3965 data = self.db.select_db_content( query, answer_mode = "raw" ) 3966 3967 for current_data in data: 3968 return_dict.setdefault(current_data[0],[]).append(str(current_data[1])) 3969 3970 return return_dict
3971
3972 - def get_user_entity_type(self, unification_protocol_name, user_entity_ids):
3973 """ 3974 Gets types associated with given user entity ids from database 3975 3976 Returns a dictionary of user entity id and type 3977 """ 3978 3979 self._load_available_unification_protocols() 3980 3981 #fixed_conditions = [("u.externalEntityID","IN","(%s)" %",".join(map(str,external_entity_IDs_list)),None)] 3982 fixed_conditions = [ ("u.userEntityID","IN","(%s)" %",".join(map(str,user_entity_ids)),None) ] 3983 tables = [ "%s u" % self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name), 3984 "%s e" % self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name() ] 3985 columns = [ "u.userEntityID", "e.type" ] 3986 join_conditions = [ ("u.externalEntityID", "=", "e.externalEntityID") ] 3987 group_conditions = [ "u.userEntityID", "e.type" ] 3988 3989 query = self.db._get_select_sql_query( tables = tables, 3990 columns = columns, 3991 fixed_conditions = fixed_conditions, 3992 join_conditions = join_conditions, 3993 group_conditions = group_conditions) 3994 3995 userEntityId_type_tuples = self.db.select_db_content(query, 3996 answer_mode = "raw" ) 3997 # gene has type precedence over protein 3998 uEId_to_type = {} 3999 for uEId, type in userEntityId_type_tuples: 4000 if uEId_to_type.setdefault(uEId, type) != "gene": 4001 if type == "gene": 4002 uEId_to_type[uEId] = type 4003 4004 return uEId_to_type
4005 4006
4007 - def get_user_entity_id_and_type_tuple_list(self, unification_protocol_name, attribute_identifier, field_values, attribute_restrictions = [], restrict_to_user_entity_ids_list = []):
4008 """ 4009 Gets a list of user entities 4010 4011 It returns a set with user entities objects 4012 4013 "attribute_restrictions" must be a list of (attribute_identifier, value) 4014 4015 #! Obselete: Not used by any method 4016 """ 4017 4018 external_entity_IDs_list = self.get_list_external_entities_IDs_by_attribute( attribute_identifier = attribute_identifier.lower(), 4019 field_values = field_values, 4020 attribute_restrictions = attribute_restrictions ) 4021 self._load_available_unification_protocols() 4022 4023 fixed_conditions = [("u.externalEntityID","IN","(%s)" %",".join(map(str,external_entity_IDs_list)),None)] 4024 4025 if len(restrict_to_user_entity_ids_list)>0: 4026 fixed_conditions.append( ("u.userEntityID","IN","(%s)" %",".join(map(str,restrict_to_user_entity_ids_list)),None) ) 4027 4028 if( len(external_entity_IDs_list) > 0 ): 4029 4030 tables = [ "%s u" % self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name), 4031 "%s e" % self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name() ] 4032 columns = [ "u.userEntityID", "e.type" ] 4033 join_conditions = [ ("u.externalEntityID", "=", "e.externalEntityID") ] 4034 group_conditions = [ "u.userEntityID", "e.type" ] 4035 4036 query = self.db._get_select_sql_query( tables = tables, 4037 columns = columns, 4038 fixed_conditions = fixed_conditions, 4039 join_conditions = join_conditions, 4040 group_conditions = group_conditions) 4041 4042 userEntityIDlist = self.db.select_db_content(query, 4043 answer_mode = "raw" ) 4044 4045 return userEntityIDlist 4046 4047 return []
4048 4049
4050 - def _get_list_eE_for_uE(self, unification_protocol_name, userEntityID):
4051 4052 return self.db.select_db_content( self.db._get_select_sql_query( tables = [self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)], 4053 columns = ["externalEntityID"], 4054 fixed_conditions = [("userEntityID","=",userEntityID)] ), 4055 answer_mode = "list" )
4056 4057
4058 - def get_user_entity_relations_by_sharing_attributes(self, unification_protocol_name, userEntityID_list, listAttributes, limit_to_userEntityID_list=False, attribute_restrictions=[], negative_attribute_restrictions=[], ontology_expansion_level=None):
4059 """ 4060 Returns a list of relations between user entities, in which the share attributes listed in listAttributes 4061 4062 "listAttributes" is a list of lists of external entity attributes. Each sublist is a "sharing" restriction 4063 4064 "expand_ontology_attributes": Boolean to specify if ontology attributes should be expanded to lower levels 4065 4066 "ontology_expansion_level": Dictionary to specigy the category level to which should be considered equivalent two ontology attributes (for example, family level in scop) 4067 """ 4068 4069 # TODO: APPLY "ontology_expansion_level" attribute 4070 4071 if len(userEntityID_list)==0: 4072 return [] 4073 4074 equivalent_uE_pair_queries = [] 4075 4076 for current_attribute_group in listAttributes: 4077 4078 equivalent_uE_pair_queries.append(self._get_nested_queries_for_getting_equivalent_uE_by_sharing_attributes( unification_protocol_name = unification_protocol_name, 4079 attribute_list = current_attribute_group, 4080 user_entity_ids_list = userEntityID_list, 4081 restrict_to_user_entity_ids_list = limit_to_userEntityID_list)) 4082 4083 union_query = self.db._get_union_queries( equivalent_uE_pair_queries ) 4084 4085 4086 # Apply restrictions 4087 4088 restricted_query = self._apply_restrictions_to_query( query = union_query, 4089 unification_protocol_name = unification_protocol_name, 4090 attribute_restrictions = attribute_restrictions, 4091 column_name_to_restrict="userEntityID2" ) 4092 4093 negatively_restricted_query = self._apply_negative_restrictions_to_query( query = restricted_query, 4094 unification_protocol_name = unification_protocol_name, 4095 column_name_to_restrict="userEntityID2", 4096 negative_attribute_restrictions = negative_attribute_restrictions ) 4097 4098 4099 4100 results = self.db.select_db_content( negatively_restricted_query, 4101 answer_mode = "raw" ) 4102 4103 if results: 4104 uE1s, uE2s = zip(*results) 4105 uE2_types_dict = self.get_user_entity_type(unification_protocol_name, uE2s) 4106 #new_results = [ (x[0], x[1][0], x[1][1]) for x in zip(uE1s, tuples) ] 4107 new_results = [] 4108 for current_result in results: 4109 new_results.append((current_result[0],current_result[1],uE2_types_dict[current_result[1]])) 4110 else: 4111 new_results = [] 4112 4113 return new_results
4114
4115 - def _get_list_external_entities_for_user_entities(self, unification_protocol_name, userEntityID_list):
4116 4117 tables = [ "%s u" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name) ) ] 4118 columns = ["externalEntityID"] 4119 join_conditions = [("u.userEntityID","IN","(%s)" %",".join(map(str,userEntityID_list)))] 4120 4121 return self.db.select_db_content( self.db._get_select_sql_query( tables = tables, 4122 columns = columns, 4123 join_conditions = join_conditions ), 4124 answer_mode = "list" )
4125
4126 - def _get_list_user_entities_for_external_entities(self, unification_protocol_name, externalEntityID_list):
4127 4128 tables = [ "%s u" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name) ) ] 4129 columns = ["userEntityID"] 4130 join_conditions = [("u.externalEntityID","IN","(%s)" %",".join(map(str,externalEntityID_list)))] 4131 4132 return self.db.select_db_content( self.db._get_select_sql_query( tables = tables, 4133 columns = columns, 4134 join_conditions = join_conditions ), 4135 answer_mode = "list" )
4136 4137
4138 - def get_default_external_entity_ids(self, externalEntityIDsList):
4139 4140 # MONDAY: METHOD TO CHANGE !!!!!!!! 4141 4142 if len(externalEntityIDsList)==0: 4143 return {} 4144 4145 default_attribute_identifiers = self.db.select_db_content( self.db._get_select_sql_query( tables = [ self.biana_database.EXTERNAL_DATABASE_TABLE, self.biana_database.EXTERNAL_ENTITY_TABLE ], 4146 columns = [ "%s.externalEntityID" %(self.biana_database.EXTERNAL_ENTITY_TABLE), 4147 "%s.defaultExternalEntityAttribute" %(self.biana_database.EXTERNAL_DATABASE_TABLE) ], 4148 4149 join_conditions = [("%s.externalDatabaseID" %(self.biana_database.EXTERNAL_DATABASE_TABLE),"=","%s.externalDatabaseID" %(self.biana_database.EXTERNAL_ENTITY_TABLE)), 4150 ("%s.externalEntityID" %(self.biana_database.EXTERNAL_ENTITY_TABLE),"IN","(%s)" %(",".join(map(str,externalEntityIDsList))))] ), 4151 answer_mode = "raw" ) 4152 4153 attribute_identifiers_dict = {} 4154 for externalEntityID, defaultAttribute in default_attribute_identifiers: 4155 attribute_identifiers_dict.setdefault(defaultAttribute,[]).append(externalEntityID) 4156 4157 return_dict = {} 4158 4159 # Get the id for external entities 4160 for current_attribute, list_externalEntityIds in attribute_identifiers_dict.iteritems(): 4161 if current_attribute is None: 4162 for current_eEid in list_externalEntityIds: 4163 return_dict[current_eEid] = "" # What should I do here? 4164 else: 4165 tables = [self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute.lower()]] 4166 join_conditions = [("externalEntityID","IN","(%s)" %",".join(map(str,list_externalEntityIds)))] 4167 columns = ["externalEntityID","value"] 4168 4169 #print self.db._get_select_sql_query( tables = tables, 4170 # columns = columns, 4171 # join_conditions = join_conditions ) 4172 4173 raw_data = self.db.select_db_content( self.db._get_select_sql_query( tables = tables, 4174 columns = columns, 4175 join_conditions = join_conditions, 4176 fixed_conditions = [("type","=","unique")], 4177 group_conditions = ["externalEntityID"]), 4178 answer_mode = "raw" ) 4179 4180 return_dict.update( dict( [ (x[0],"%s: %s" %(current_attribute,x[1])) for x in raw_data ] ) ) 4181 4182 del raw_data 4183 4184 #return_dict.update(dict(self.db.select_db_content( self.db._get_select_sql_query( tables = tables, 4185 # columns = columns, 4186 # join_conditions = join_conditions, 4187 # group_conditions = ["externalEntityID"]), 4188 # answer_mode = "raw" ))) 4189 4190 return return_dict 4191 4192 4193 # OLD CODE: 4194 4195 if len(externalEntityIDsList)==0: 4196 return {} 4197 4198 # Get the id for external entities 4199 tables = [self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["defaultid"]] 4200 join_conditions = [("externalEntityID","IN","(%s)" %",".join(map(str,externalEntityIDsList)))] 4201 columns = ["externalEntityID","value"] 4202 4203 return_dict = dict(self.db.select_db_content( self.db._get_select_sql_query( tables = tables, 4204 columns = columns, 4205 join_conditions = join_conditions ), 4206 answer_mode = "raw" )) 4207 4208 4209 # Get the id for external entity relations 4210 tables = [self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT["defaultid"]] 4211 join_conditions = [("externalEntityID","IN","(%s)" %",".join(map(str,externalEntityIDsList)))] 4212 columns = ["externalEntityID","value"] 4213 4214 return_dict.update(dict(self.db.select_db_content( self.db._get_select_sql_query( tables = tables, 4215 columns = columns, 4216 join_conditions = join_conditions ), 4217 answer_mode = "raw" ))) 4218 4219 return return_dict
4220 4221
4222 - def get_relations_hierarchy(self, externalEntityRelationIDs):
4223 """ 4224 Returns a list of tuples (relationID1, relationID2) in where relationID1 is a child of relationID2 4225 """ 4226 4227 #print "Getting relations hierarchy with ",externalEntityRelationIDs 4228 4229 tables = ["%s p" %self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE] 4230 #tables = ["originalExternalEntityRelationParticipant p"] 4231 4232 columns = ["p.externalEntityID","p.externalEntityRelationID"] 4233 join_conditions = [("p.externalEntityID","IN","(%s)" %",".join(map(str,externalEntityRelationIDs))), 4234 ("p.externalEntityRelationID","IN","(%s)" %",".join(map(str,externalEntityRelationIDs)))] 4235 4236 return self.db.select_db_content( self.db._get_select_sql_query( tables = tables, 4237 columns = columns, 4238 join_conditions = join_conditions ), 4239 answer_mode = "raw" )
4240 4241
4242 - def _update_relations_hierarchy(self):
4243 """ 4244 Precalculates and stores in database relations hierachy 4245 """ 4246 4247 return "DEPRECATED" 4248 4249 hierarchy_set = sets.Set() # Set that contains tuples (externalEntityRelationID, externalEntityID), where externalEntityID is included in externalEntityRelationID. externalEntityID can also be a relation 4250 4251 # Get all the information about relations and participants 4252 eE_eERid_list = self.db.select_db_content( self.db._get_select_sql_query( tables = [self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE], 4253 columns = ["externalEntityID","externalEntityRelationID"]), 4254 answer_mode = "raw" ) 4255 eE_eERid_dict = {} 4256 4257 for current_pair in eE_eERid_list: 4258 eE_eERid_dict.setdefault(current_pair[0],[]).append(current_pair[1]) 4259 4260 4261 def get_all_parents(eEid): 4262 parents = sets.Set() 4263 if( eE_eERid_dict.has_key(eEid) ): 4264 parents.update(eE_eERid_dict[eEid]) 4265 for current_eEid in eE_eERid_dict[eEid]: 4266 parents.update(get_all_parents(current_eEid)) 4267 return parents
4268 4269 4270 for eEid in eE_eERid_dict.keys(): 4271 4272 for current_parent in get_all_parents(eEid): 4273 hierarchy_set.add((current_parent,eEid)) 4274 4275 4276 self.db.set_lock_tables(True) 4277 4278 self.db._disable_indices([self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE]) 4279 4280 for current_eERid, current_eEid in hierarchy_set: 4281 self.db.insert_db_content( self.db._get_insert_sql_query( table = self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE, 4282 column_values = (("externalEntityRelationParticipantID", self._get_new_external_entity_relation_participant_id()), 4283 (self.biana_database.external_entity_relation_id_col, current_eERid), 4284 (self.biana_database.externalEntityID_col, current_eEid )), 4285 use_buffer = True )) 4286 self.db._empty_buffer() 4287 self.db._enable_indices([self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE]) 4288 4289 self.db.set_lock_tables(False) 4290 return 4291
4292 - def NEWget_user_entity_relations(self, unification_protocol_name, userEntityID_list, attribute_restrictions = [], negative_attribute_restrictions = [], listRelationType=[], dictRelationAttributeRestriction={}, use_self_relations=True, limit_to_userEntityID_list=False, use_nested_relations=True):
4293 """ 4294 Returns the list of relations where the userEntity is involved 4295 use_nested_relations ==> Include relations greater than a nested level 4296 """ 4297 4298 if len(userEntityID_list)==0: 4299 return [] 4300 4301 if use_nested_relations: 4302 PARTICIPANT_TABLE = self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE.get_table_name() 4303 else: 4304 PARTICIPANT_TABLE = self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE.get_table_name() 4305 4306 # First, get the relations 4307 tables = [ "%s u1" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)), 4308 "%s p1" %PARTICIPANT_TABLE, 4309 "%s r" %(self.biana_database.EXTERNAL_ENTITY_RELATION_TABLE.get_table_name())] 4310 4311 columns = [ ("u1.userEntityID", "userEntityID"), 4312 ("r.externalEntityRelationID", "externalEntityRelationID"), 4313 ("r.type", "type"), 4314 ("p1.externalEntityID", "externalEntityID")] 4315 4316 join_conditions = [("u1.userEntityID","IN","(%s)" %",".join([ str(x) for x in userEntityID_list])), 4317 ("p1.externalEntityID","=","u1.externalEntityID"), 4318 ("p1.externalEntityRelationID","=","r.externalEntityRelationID")] 4319 4320 if len(listRelationType) > 0: 4321 join_conditions.append(("r.type","IN","(\"%s\")" % "\",\"".join([ relationType for relationType in listRelationType]))) 4322 4323 # Apply relation attribute restrictions 4324 num = 1 4325 for attribute_name, values in dictRelationAttributeRestriction.iteritems(): 4326 4327 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_name.lower()] 4328 tables.append( "%s AS EERA%s" %(current_table.get_table_name(),num) ) 4329 if BianaObjects.ExternalEntityAttribute.isNumericAttribute(attribute_name, self.biana_database) or BianaObjects.ExternalEntityAttribute.isSpecialAttribute(attribute_name, self.biana_database): 4330 regex = re.compile("([><=]*)([\d\.]+)") 4331 for current_value in values: 4332 m = regex.match(current_value) 4333 if m: 4334 join_conditions.append(("EERA%s.value" %num,m.group(1),m.group(2))) 4335 else: 4336 join_conditions.append(("EERA%s.value" %num,"=","\"%s\"" %current_value)) 4337 else: 4338 join_conditions.append(("EERA%s.value" %num,"IN","(\"%s\")" %("\",\"".join([ str(x) for x in values])))) 4339 join_conditions.append( ("r.externalEntityRelationID","=","EERA%s.externalEntityID" %num ) ) 4340 num += 1 4341 4342 query = self.db._get_select_sql_query( tables = tables, 4343 columns = columns, 4344 join_conditions = join_conditions ) 4345 4346 # Then, get the u2 participants 4347 tables = [ ("(%s)" %query, "query1"), 4348 "%s p2" %PARTICIPANT_TABLE, 4349 "%s u2" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)), 4350 "%s e" %(self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name()) ] 4351 4352 columns = [ "query1.userEntityID", 4353 ("u2.userEntityID", "userEntityID2"), 4354 "query1.externalEntityRelationID", 4355 "query1.type", 4356 ("e.type", "etype") ] 4357 4358 join_conditions = [ ("query1.externalEntityRelationID","=","p2.externalEntityRelationID"), 4359 ("p2.externalEntityID","=","u2.externalEntityID"), 4360 ("query1.externalEntityID","!=","p2.externalEntityID"), 4361 ("p2.externalEntityID", "=", "e.externalEntityID")] 4362 4363 if limit_to_userEntityID_list is True: 4364 join_conditions.append(("u2.userEntityID","IN","(%s)" %",".join([ str(x) for x in userEntityID_list ]))) 4365 4366 4367 #query = self.db._get_select_sql_query( tables = tables, 4368 # columns = columns, 4369 # join_conditions = join_conditions ) 4370 4371 4372 #if limit_to_userEntityID_list is True: 4373 # join_conditions.append(("u2.userEntityID","IN","(%s)" %",".join([ str(x) for x in userEntityID_list ]),None)) 4374 4375 # Get the interacting user entities 4376 query = self.db._get_select_sql_query( tables = tables, 4377 columns = columns, 4378 join_conditions = join_conditions, 4379 distinct_columns = True ) 4380 4381 query = self._apply_restrictions_to_query( query = query, 4382 unification_protocol_name = unification_protocol_name, 4383 attribute_restrictions= attribute_restrictions, 4384 column_name_to_restrict="userEntityID2" ) 4385 4386 query = self._apply_negative_restrictions_to_query( query = query, 4387 unification_protocol_name = unification_protocol_name, 4388 negative_attribute_restrictions = negative_attribute_restrictions, 4389 column_name_to_restrict="userEntityID2" ) 4390 4391 interacting_uE = list(self.db.select_db_content( query, answer_mode = "raw" )) 4392 4393 return interacting_uE
4394
4395 - def get_user_entity_relations(self, unification_protocol_name, userEntityID_list, attribute_restrictions = [], negative_attribute_restrictions = [], listRelationType=[], dictRelationAttributeRestriction={}, use_self_relations=True, limit_to_userEntityID_list=False, use_nested_relations=True):
4396 #return self.NEWget_user_entity_relations(unification_protocol_name, userEntityID_list, attribute_restrictions, negative_attribute_restrictions, listRelationType, dictRelationAttributeRestriction, use_self_relations, limit_to_userEntityID_list, use_nested_relations) 4397 return self.OLDget_user_entity_relations(unification_protocol_name, userEntityID_list, attribute_restrictions, negative_attribute_restrictions, listRelationType, dictRelationAttributeRestriction, use_self_relations, limit_to_userEntityID_list, use_nested_relations)
4398 4399 4400 #old code
4401 - def OLDget_user_entity_relations(self, unification_protocol_name, userEntityID_list, attribute_restrictions = [], negative_attribute_restrictions = [], listRelationType=[], dictRelationAttributeRestriction={}, use_self_relations=True, limit_to_userEntityID_list=False, use_nested_relations=True):
4402 """ 4403 #! Nasty trick (not used anymore): to not to join externalEntityType table in the query in case sth is interacting with itself (see use_self_relations) there is an internal type called "self_type" which should be handled carefully whenever this method is called and type is going to be used 4404 """ 4405 4406 if len(userEntityID_list)==0: 4407 return [] 4408 4409 if use_nested_relations: 4410 PARTICIPANT_TABLE = self.biana_database.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE.get_table_name() 4411 else: 4412 PARTICIPANT_TABLE = self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE.get_table_name() 4413 4414 4415 tables = ["%s u1" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)), 4416 "%s p1" %PARTICIPANT_TABLE, 4417 "%s u2" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)), 4418 "%s p2" %PARTICIPANT_TABLE, 4419 "%s r" %(self.biana_database.EXTERNAL_ENTITY_RELATION_TABLE.get_table_name()), 4420 "%s e" %(self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name())] 4421 4422 columns = [ ("u1.userEntityID","userEntityID1"), 4423 ("u2.userEntityID","userEntityID2"), 4424 ("r.externalEntityRelationID", "externalEntityRelationID"), 4425 ("r.type", "type"), 4426 ("e.type", "etype") ] 4427 4428 #group_conditions = ["u1.userEntityID","u2.userEntityID","r.externalEntityRelationID, r.type, e.type"] 4429 4430 fixed_conditions = [("u1.userEntityID","IN","(%s)" %",".join([ str(x) for x in userEntityID_list]),None)] 4431 join_conditions = [("p1.externalEntityID","=","u1.externalEntityID"), 4432 ("p1.externalEntityRelationID","=","p2.externalEntityRelationID"), 4433 ("p2.externalEntityID","=","u2.externalEntityID"), 4434 ("p1.externalEntityID","!=","p2.externalEntityID"), 4435 ("p1.externalEntityRelationID","=","r.externalEntityRelationID"), 4436 ("p2.externalEntityID", "=", "e.externalEntityID")] 4437 4438 if limit_to_userEntityID_list is True: 4439 join_conditions.append(("u2.userEntityID","IN","(%s)" %",".join([ str(x) for x in userEntityID_list ]),None)) 4440 4441 # Apply relation attribute restrictions 4442 num = 1 4443 for attribute_name, values in dictRelationAttributeRestriction.iteritems(): 4444 4445 current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_name.lower()] 4446 tables.append( "%s AS EERA%s" %(current_table.get_table_name(),num) ) 4447 if BianaObjects.ExternalEntityAttribute.isNumericAttribute(attribute_name, self.biana_database) or BianaObjects.ExternalEntityAttribute.isSpecialAttribute(attribute_name, self.biana_database): 4448 regex = re.compile("([><=]*)([\d\.]+)") 4449 for current_value in values: 4450 m = regex.match(current_value) 4451 if m: 4452 join_conditions.append(("EERA%s.value" %num,m.group(1),m.group(2))) 4453 else: 4454 join_conditions.append(("EERA%s.value" %num,"=","\"%s\"" %current_value)) 4455 else: 4456 join_conditions.append(("EERA%s.value" %num,"IN","(\"%s\")" %("\",\"".join([ str(x) for x in values])))) 4457 join_conditions.append( ("r.externalEntityRelationID","=","EERA%s.externalEntityID" %num ) ) 4458 num += 1 4459 4460 4461 if len(listRelationType) > 0: 4462 fixed_conditions.append(("r.type","IN","(\"%s\")" % "\",\"".join([ relationType for relationType in listRelationType]),None )) 4463 4464 # Get the interacting user entities 4465 query = self.db._get_select_sql_query( tables = tables, 4466 columns = columns, 4467 fixed_conditions = fixed_conditions, 4468 join_conditions = join_conditions, 4469 distinct_columns = True ) 4470 # group_conditions = group_conditions ) 4471 4472 # Removed because too slow.... we use the previous strategy 4473 #query = self._apply_relation_restrictions_to_query( query = query, 4474 # attribute_restrictions_dict = dictRelationAttributeRestriction, 4475 # column_name_to_restrict="externalEntityRelationID" ) 4476 4477 4478 query = self._apply_restrictions_to_query( query = query, 4479 unification_protocol_name = unification_protocol_name, 4480 attribute_restrictions= attribute_restrictions, 4481 column_name_to_restrict="userEntityID2" ) 4482 #column_name_to_restrict="userEntityID" ) 4483 4484 query = self._apply_negative_restrictions_to_query( query = query, 4485 unification_protocol_name = unification_protocol_name, 4486 negative_attribute_restrictions = negative_attribute_restrictions, 4487 column_name_to_restrict="userEntityID2" ) 4488 #column_name_to_restrict="userEntityID" ) 4489 4490 4491 #print query 4492 4493 interacting_uE = list(self.db.select_db_content( query, answer_mode = "raw" )) 4494 4495 if( use_self_relations is True ): 4496 4497 # Get self interactions 4498 fixed_conditions = [("u1.userEntityID","IN","(%s)" %",".join([ str(x) for x in userEntityID_list]),None)] 4499 4500 if len(listRelationType) > 0: 4501 fixed_conditions.append(("r.type","IN","(\"%s\")" % "\",\"".join([ relationType for relationType in listRelationType]),None )) 4502 4503 tables = ["%s u1" %(self._get_user_entity_table_name(unification_protocol_name=unification_protocol_name)), 4504 "%s p1" %PARTICIPANT_TABLE, 4505 "%s c" %self.biana_database.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT["cardinality"].get_table_name(), 4506 "%s r" %(self.biana_database.EXTERNAL_ENTITY_RELATION_TABLE.get_table_name()), 4507 "%s e" %(self.biana_database.EXTERNAL_ENTITY_TABLE.get_table_name())] 4508 4509 columns = ["u1.userEntityID","r.externalEntityRelationID", ("r.type", "type"), ("e.type", "etype") ] 4510 4511 4512 join_conditions = [("p1.externalEntityID","=","u1.externalEntityID"), 4513 ("c.externalEntityRelationParticipantID","=","p1.externalEntityRelationParticipantID"), 4514 ("c.value",">",1), 4515 ("r.externalEntityRelationID","=","p1.externalEntityRelationID"), 4516 ("p1.externalEntityID", "=", "e.externalEntityID")] 4517 4518 # Apply relation attribute restrictions 4519 #num = 1 4520 #for attribute_name, values in dictRelationAttributeRestriction.iteritems(): 4521 # current_table = self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[attribute_name.lower()] 4522 # tables.append( "%s AS EERA%s" %(current_table.get_table_name(),num) ) 4523 # if BianaObjects.ExternalEntityAttribute.isNumericAttribute(attribute_name, self.biana_database): 4524 # regex = re.compile("([><=]*)([\d\.]+)") 4525 # for current_value in values: 4526 # m = regex.match(current_value) 4527 # if m: 4528 # join_conditions.append(("EERA%s.value" %num,m.group(1),m.group(2))) 4529 # else: 4530 # join_conditions.append(("EERA%s.value" %num,"=",current_value)) 4531 # else: 4532 # join_conditions.append(("EERA%s.value" %num,"IN","(\"%s\")" %("\",\"".join([ str(x) for x in values])))) 4533 # join_conditions.append( ("r.externalEntityRelationID","=","EERA%s.externalEntityID" %num ) ) 4534 # num += 1 4535 4536 4537 query = self.db._get_select_sql_query( tables = tables, 4538 columns = columns, 4539 fixed_conditions = fixed_conditions, 4540 join_conditions = join_conditions ) 4541 4542 query = self._apply_relation_restrictions_to_query( query = query, 4543 attribute_restrictions_dict = dictRelationAttributeRestriction, 4544 column_name_to_restrict="externalEntityRelationID" ) 4545 4546 4547 query = self._apply_restrictions_to_query( query = query, 4548 unification_protocol_name = unification_protocol_name, 4549 attribute_restrictions= attribute_restrictions, 4550 column_name_to_restrict="userEntityID" ) 4551 4552 query = self._apply_negative_restrictions_to_query( query = query, 4553 unification_protocol_name = unification_protocol_name, 4554 negative_attribute_restrictions = negative_attribute_restrictions, 4555 column_name_to_restrict="userEntityID" ) 4556 4557 #print query 4558 4559 #interacting_uE.extend([ (x,x,y,z,"self_type") for x,y,z in self.db.select_db_content( query, answer_mode="raw" )]) 4560 interacting_uE.extend([ (x,x,y,z,t) for x,y,z,t in self.db.select_db_content( query, answer_mode="raw" )]) 4561 4562 #print len(interacting_uE) 4563 4564 return interacting_uE
4565 4566
4567 - def _get_equivalent_user_entities_by_sharing_attributes( self, unification_protocol_name, user_entity_id_list, attribute_list ):
4568 """ 4569 Returns a list of tuples with userEntityID1, userEntityID2 4570 OBSOLETE - use get_user_entity_relations_by_sharing_attributes instead 4571 4572 """ 4573 4574 raise Exception("OBSOLETE function call!") 4575 4576 equivalent_uE_set = sets.Set() 4577 4578 for current_expansion_block in attribute_list: 4579 tables = ["%s u1" %(self._get_user_entity_table_name(unification_protocol_name = unification_protocol_name)), 4580 "%s u2" %(self._get_user_entity_table_name(unification_protocol_name = unification_protocol_name))] 4581 columns = ["u1.userEntityID as original_uE", 4582 "u2.userEntityID AS eq_uE"] 4583 group_by = ["u1.userEntityID" , "u2.userEntityID"] 4584 join_conditions = [("u1.userEntityID","IN","(%s)" %",".join(map(str,user_entity_id_list)))] 4585 4586 for (current_expansion_attribute, parameter_and_value_list) in current_expansion_block: 4587 tables.append( "%s %s_1" %(self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_expansion_attribute.lower()],current_expansion_attribute) ) 4588 tables.append( "%s %s_2" %(self.biana_database.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_expansion_attribute.lower()],current_expansion_attribute) ) 4589 tables.append( "%s u_%s" %(self._get_user_entity_table_name(unification_protocol_name = unification_protocol_name),current_expansion_attribute) ) 4590 4591 if current_expansion_attribute.lower() == "proteinsequence": 4592 4593 tables.append( "%s d_1" %(self.biana_database.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[current_expansion_attribute.lower()]["table"].get_table_name()) ) 4594 tables.append( "%s d_2" %(self.biana_database.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[current_expansion_attribute.lower()]["table"].get_table_name()) ) 4595 4596 join_conditions.extend( [ ("%s_1.externalEntityID" %current_expansion_attribute,"=","u1.externalEntityID"), 4597 ("%s_2.externalEntityID" %current_expansion_attribute,"=","u_%s.externalEntityID" %current_expansion_attribute), 4598 ("%s_1.value" %current_expansion_attribute,"=","d_1.sequenceMD5"), 4599 ("%s_2.value" %current_expansion_attribute,"=","d_2.sequenceMD5"), 4600 ("u_%s.userEntityID" %current_expansion_attribute,"=","u2.userEntityID"), 4601 ("u1.userEntityID","!=","u2.userEntityID") ] ) 4602 4603 if len(parameter_and_value_list) > 0: 4604 tables.append( "%s b" %(self.biana_database.PROTEIN_BLAST_RESULTS_TABLE.get_table_name()) ) 4605 join_conditions.extend( [ ("b.sequenceID_A","=","d_1.proteinSequenceID"), 4606 ("b.sequenceID_B","=","d_2.proteinSequenceID") ] ) 4607 join_conditions.extend( [ ("b.%s" % parameter, ">=", value) for (parameter, value) in parameter_and_value_list ] ) 4608 else: 4609 join_conditions.extend( [ ("d_1.value","=","d_2.value") ] ) 4610 else: 4611 join_conditions.extend( [ ("%s_1.value" %current_expansion_attribute,"=","%s_2.value" %current_expansion_attribute), 4612 ("%s_1.externalEntityID" %current_expansion_attribute,"=","u1.externalEntityID"), 4613 ("%s_2.externalEntityID" %current_expansion_attribute,"=","u_%s.externalEntityID" %current_expansion_attribute), 4614 ("u_%s.userEntityID" %current_expansion_attribute,"=","u2.userEntityID"), 4615 ("u1.userEntityID","!=","u2.userEntityID") ] ) 4616 if len(parameter_and_value_list) > 0: 4617 join_conditions.extend( [ ("%s_2.%s" %(current_expansion_attribute, parameter),"=","\"%s\"" %value) for parameter, value in parameter_and_value_list ] ) 4618 4619 4620 query = self.db._get_select_sql_query( tables = tables, 4621 columns = columns, 4622 join_conditions = join_conditions, 4623 group_conditions = group_by ) 4624 4625 #print query 4626 4627 equivalent_uE_set.update( self.db.select_db_content( sql_query = query, 4628 answer_mode="raw" ) ) 4629 4630 return equivalent_uE_set
4631 4632 4633 # TO CHECK NEGATIVE RESTRICTIONS
4634 - def get_expanded_entity_relations(self, unification_protocol_name, userEntityID_list, expansionAttributesList=[], listRelationType=[], use_self_relations=True, limit_to_userEntityID_list=False, expansionLevel=2, attribute_restrictions=[], negative_attribute_restrictions=[]):
4635 """ 4636 unification_protocol_name: name of the unification protocol to be used 4637 4638 userEntityID_list: user entity ids to be searched for relations based on the shared attributes 4639 4640 expansionAttributesList: list of lists containing (attribute, list_of_parameters_and_values) where list_of_parameter_and_values contains tuples like (parameter, value). Each tuple in the inner list is an expansion criterion (anded with the other expansion criteria in the inner list whereas all inner lists are ored in the query). List contains all the attributes that must be used together for the expansion. 4641 4642 listRelationType: type of relations between attribute sharing entry and its partners to be inferred 4643 4644 use_self_relations: including self relations or not 4645 4646 limit_to_userEntityID_list: use given user entity ids only in inference # TODO!!! NOT USED NOW. 4647 4648 expansionLevel: number of relations (edges) to look further during inference based on shared attributes 4649 4650 attribute_restrictions: restrictions to be applied on the attributes # TODO!!!! NOT USED NOW. 4651 """ 4652 4653 4654 if expansionLevel != 1 and expansionLevel != 2: 4655 raise ValueError("You must specify level to 1 or 2") 4656 4657 4658 # First, it is necessary to obtain u2 user entities (those that share attributes with u1) WITHOUT USING RESTRICTIONS 4659 u1_u2_set = sets.Set( self.get_user_entity_relations_by_sharing_attributes( unification_protocol_name = unification_protocol_name, 4660 userEntityID_list = userEntityID_list, 4661 listAttributes = expansionAttributesList ) ) 4662 4663 # Then, it is necessary to obtain u3 user entities (those having relations with u2). u3 ARE PREDICTIONS! 4664 u2_list = [ x[1] for x in u1_u2_set ] 4665 4666 4667 u2_to_u1 = dict([ (x[1],(x[0], x[2])) for x in u1_u2_set ]) 4668 4669 temporary_predictions_uEr = self.get_user_entity_relations( unification_protocol_name = unification_protocol_name, 4670 userEntityID_list = u2_list, 4671 listRelationType = listRelationType, 4672 use_self_relations = True, 4673 limit_to_userEntityID_list = False ) 4674 4675 4676 #(80171L, 8118L, 1182323L, 'interaction', 'protein') 4677 predicted_uEr = [] 4678 4679 # for level 2 predictions 4680 u3_externalEntityRelationID = {} 4681 u3_u1_predictions = {} # Dictionary with u3ids as keys and a list as u1ids values //used in next step 4682 u3_relation_type = {} # contains type of relation_type between u2-u3 4683 u1_u2 = {} 4684 4685 #print temporary_predictions_uEr 4686 for (uE2, uE3, eErID, eErType, eE3Type) in temporary_predictions_uEr: 4687 4688 uE1, type = u2_to_u1[uE2] 4689 4690 #if eE3Type == "self_type": 4691 # eE3Type = type 4692 4693 if use_self_relations is False and uE1==uE3: 4694 pass 4695 else: 4696 predicted_uEr.append( ( uE1, 4697 uE3, 4698 None, 4699 #BianaObjects.UserEntityExpandedRelation( userEntity1Source = uE2, 4700 # userEntity2Source = uE3, 4701 # attribute_identifier = None, #Not stored 4702 # attribute_values = None, #Not stored 4703 # externalEntityRelationID = eErID), 4704 "inferred_" + eErType, 4705 eE3Type) ) 4706 4707 u3_externalEntityRelationID.setdefault(uE3,sets.Set()).add(eErID) 4708 u3_relation_type[uE3] = eErType 4709 u3_u1_predictions.setdefault(uE3,[]).append(uE1) 4710 u1_u2.setdefault(uE1,sets.Set()).add(uE2) 4711 4712 del u2_to_u1 4713 4714 4715 if expansionLevel==2 and len(temporary_predictions_uEr)>0: 4716 4717 del temporary_predictions_uEr 4718 4719 u3set = sets.Set(map(str,u3_externalEntityRelationID.keys())) 4720 4721 done = 0 4722 4723 itime = time.time() 4724 4725 for current_uE3 in u3set: 4726 done += 1 4727 4728 temp_u3_u4_sharing_attributes = self.get_user_entity_relations_by_sharing_attributes( unification_protocol_name = unification_protocol_name, 4729 userEntityID_list = [current_uE3], 4730 listAttributes = expansionAttributesList 4731 ) 4732 4733 for current_uE3, current_uE4, current_uE4_type in temp_u3_u4_sharing_attributes: 4734 for current_eErID in u3_externalEntityRelationID[current_uE3]: # TODO: Problem, we don't store the original relation.... 4735 for current_uE1 in u3_u1_predictions[current_uE3]: 4736 if use_self_relations is False and current_uE1==current_uE4: 4737 pass 4738 else: 4739 predicted_uEr.append((current_uE1, 4740 current_uE4, 4741 None, 4742 #BianaObjects.UserEntityExpandedRelation( userEntity1Source = u1_u2[current_uE1], 4743 # userEntity2Source = current_uE3, 4744 # attribute_identifier = None, 4745 # attribute_values = None, 4746 # externalEntityRelationID = current_eErID ), 4747 "inferred_" + u3_relation_type[current_uE3], 4748 current_uE4_type)) 4749 4750 if done%100 == 0: 4751 print "%s done in %s seconds [%s]" %(done, time.time()-itime,len(predicted_uEr)) 4752 4753 4754 4755 # Filter the predicted uEr with the restrictions, negative_restrictions and limit_to_user_entity_ids 4756 4757 #print "Filternig the predictions" 4758 predicted_uE_ids_set = sets.Set( [int(x[1]) for x in predicted_uEr] ) 4759 4760 if limit_to_userEntityID_list is True: 4761 predicted_uE_ids_set = sets.Set(map(int,userEntityID_list)).intersection(predicted_uE_ids_set) 4762 4763 if len(attribute_restrictions)>0: 4764 all_possible = self.db.select_db_content( self._get_attribute_restrictions_query( unification_protocol_name = unification_protocol_name, 4765 negative_attribute_restrictions = attribute_restrictions ), 4766 answer_mode = "list" ) 4767 predicted_uE_ids_set = predicted_uE_ids_set.intersection(sets.Set(map(int,all_possible))) 4768 4769 if len(negative_attribute_restrictions)>0: 4770 excluded_list = self.db.select_db_content( self._get_attribute_restrictions_query( unification_protocol_name = unification_protocol_name, 4771 negative_attribute_restrictions = negative_attribute_restrictions ), 4772 answer_mode = "list" ) 4773 4774 predicted_uE_ids_set = predicted_uE_ids_set.difference(sets.Set(map(int,excluded_list))) 4775 4776 filtered_predicted_uEr = [] 4777 for current_prediction in predicted_uEr: 4778 if int(current_prediction[1]) in predicted_uE_ids_set: 4779 filtered_predicted_uEr.append(current_prediction) 4780 4781 #print filtered_predicted_uEr 4782 return filtered_predicted_uEr
4783 4784 4785 #################################################################################### 4786 # EXTERNAL ENTITIES DELETION METHODS # 4787 #################################################################################### 4788 4789 # At this moment, BIANA does not allow to delete external entities 4790 4791 4792 #################################################################################### 4793 # TESTING # 4794 #################################################################################### 4795 4796 4797 4798 if __name__=="__main__": 4799 4800 db_access = BianaDBaccess(dbname="biana_v5", dbhost="localhost", dbuser="root", dbsocket="/home/jgarcia/local/mysql/var/mysql.sock") 4801 print db_access 4802 db_access.create_database() 4803 sys.exit() 4804