Package biana :: Package BianaDB :: Module BianaDatabaseDefinition
[hide private]
[frames] | no frames]

Source Code for Module biana.BianaDB.BianaDatabaseDefinition

  1  from database import Database,TableDB,FieldDB 
  2  import biana.BianaObjects 
  3  import sets 
  4   
  5   
6 -class BianaDatabase(Database):
7 8 externalEntityID_col = "externalEntityID" 9 external_entity_relation_id_col = "externalEntityRelationID" 10 externalEntityID_col_type = "integer(4) unsigned" 11 externalDatabaseID_col = "externalDatabaseID" 12 external_entity_relation_participant_id_col = "externalEntityRelationParticipantID" 13 externalDatabaseID_col_type = "integer(2) unsigned" 14
15 - def __init__(self):
16 17 Database.__init__(self) 18 19 20 # DEFINE ALL TABLES = None 21 22 self.DATABASE_VERSION_TABLE = None 23 self.BIANA_DATABASE_TABLE = None 24 self.EXTERNAL_ENTITY_TABLE = None 25 self.EXTERNAL_DATABASE_AVAILABLE_eE_TYPES_TABLE = None 26 self.EXTERNAL_DATABASE_AVAILABLE_eEr_ATTRIBUTE_TABLE = None 27 self.EXTERNAL_DATABASE_AVAILABLE_eEr_TYPES_TABLE = None 28 self.EXTERNAL_DATABASE_AVAILABLE_eE_ATTRIBUTE_TABLE = None 29 self.EXTERNAL_DATABASE_TABLE = None 30 self.EXTERNAL_DATABASE_ATTRIBUTE_TRANSFER_TABLE = None 31 32 self.EXTERNAL_ENTITY_RELATION_TABLE = None 33 self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE = None 34 self.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE = None 35 36 self.USER_ENTITY_TABLE = None 37 self.USER_ENTITY_PROTOCOL_TABLE = None 38 self.USER_ENTITY_PROTOCOL_ATOMS_TABLE = None 39 self.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE = None 40 41 self.ONTOLOGY_IS_A_TABLE = None 42 self.EXTENDED_ONTOLOGY_HIERARCHY_TABLE = None 43 self.ONTOLOGY_IS_PART_OF_TABLE = None 44 self.ONTOLOGY_INFO_TABLE = None 45 46 self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES = None 47 self.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT = None 48 self.CD_HIT_CLUSTERING_TABLE = None 49 self.PROTEIN_BLAST_RESULTS_TABLE = None 50 51 self.TRANSFER_TABLES_LIST = [] 52 53 # DICTIONARIES 54 self.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT = {} 55 self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT = {} 56 57 # FIELDS 58 self.externalEntityIdentifierType_field = None 59 60 self.externalEntityID_field = FieldDB(field_name = self.externalEntityID_col, 61 data_type = "integer(4) unsigned", 62 null = False) 63 64 self.external_entity_relation_participant_id_field = FieldDB( field_name = self.external_entity_relation_participant_id_col, 65 data_type = "integer(4) unsigned", 66 null = False) 67 self.externalDatabaseID_field = FieldDB(field_name = "externalDatabaseID", 68 data_type = "integer(2) unsigned", 69 null = False) 70 71 72 # DATABASE SPECIFICIC DATA TYPES 73 self.VALID_EXTERNAL_ENTITY_TYPES_DICT = {} 74 self.VALID_EXTERNAL_ENTITY_RELATION_TYPES_DICT = {} 75 76 self.VALID_IDENTIFIER_REFERENCE_TYPES_SET = sets.Set() 77 78 self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_TYPES_DICT = {} 79 self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_DATA_TYPES = {} 80 self.VALID_EXTERNAL_ENTITY_GENERAL_ATTRIBUTE_TYPES_SET = sets.Set() 81 self.VALID_EXTERNAL_ENTITY_IDENTIFIER_ATTRIBUTE_TYPES_SET = sets.Set() # Attributes of the type identifier that can be searched by value or by external entity 82 self.VALID_EXTERNAL_ENTITY_VERSIONABLE_IDENTIFIER_ATTRIBUTE_TYPES_SET = sets.Set() # Attributes of the type identifier with associated version that can be searched by value or by external entity and version 83 self.VALID_EXTERNAL_ENTITY_DESCRIPTIVE_SEARCHABLE_ATTRIBUTE_TYPES_SET = sets.Set() # Descriptive attributes that can be searched by keywords 84 self.VALID_EXTERNAL_ENTITY_DESCRIPTIVE_ATTRIBUTE_TYPES_SET = sets.Set() # Attributes searchable only by external entity 85 self.VALID_EXTERNAL_ENTITY_NUMERIC_ATTRIBUTE_TYPES_SET = sets.Set() # Numeric attributes searchable by numeric value (equal, greater or lower than,...) 86 self.VALID_EXTERNAL_ENTITY_SPECIAL_ATTRIBUTE_TYPES_SET = sets.Set() # Special attributes 87 88 self.VALID_EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TYPES_DICT = {} 89 self.VALID_EXTERNAL_ENTITY_RELATION_PARTICIPANT_NUMERIC_ATTRIBUTE_TYPES_SET = sets.Set() 90 self.VALID_EXTERNAL_ENTITY_RELATION_PARTICIPANT_DESCRIPTIVE_ATTRIBUTE_TYPES_SET = sets.Set() 91 self.VALID_EXTERNAL_ENTITY_RELATION_PARTICIPANT_DESCRIPTIVE_SEARCHABLE_ATTRIBUTE_TYPES_SET = sets.Set() 92 93 self._create_initial_tables()
94 95 96
97 - def add_valid_external_entity_relation_participant_attribute_type(self, current_attribute, data_type, category, additional_fields_tuple_list):
98 99 new_table = None 100 101 category = category.lower() 102 103 if category == "eerp attribute": 104 new_table = TableDB( table_name = "externalEntityRelationParticipant"+current_attribute, 105 table_fields = [self.external_entity_relation_participant_id_field, 106 FieldDB( field_name = "value", 107 data_type = data_type, 108 null = False )], 109 indices = [self.external_entity_relation_participant_id_col] ) 110 elif category == "eerp numeric attribute": 111 pass 112 elif category == "eerp descriptive attribute": 113 pass 114 elif category == "eerp descriptive searchable attribute": 115 pass 116 else: 117 raise ValueError("Invalid category when adding valid relation participant attribute type: %s" %category) 118 119 self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT[current_attribute.lower()] = new_table
120 121
122 - def add_valid_external_entity_attribute_type(self, current_attribute, data_type, category, additional_fields_tuple_list=[]):
123 124 category = category.lower() 125 126 new_table = None 127 128 if category == "ee attribute": 129 self.VALID_EXTERNAL_ENTITY_GENERAL_ATTRIBUTE_TYPES_SET.add(current_attribute.lower()) 130 additional_fields_set = sets.Set() 131 new_table = TableDB( table_name = "externalEntity"+current_attribute, 132 table_fields = [ FieldDB( field_name = "value", 133 data_type = data_type, 134 null = False ), 135 self.externalEntityID_field ], 136 primary_key = (self.externalEntityID_col, "value"), # changed in order to optimize unification and expansions 137 indices = [("value")] ) 138 139 140 elif category == "ee identifier attribute": 141 self.VALID_EXTERNAL_ENTITY_IDENTIFIER_ATTRIBUTE_TYPES_SET.add(current_attribute.lower()) 142 additional_fields_set = sets.Set(["type"]) 143 new_table = TableDB( table_name = "externalEntity"+current_attribute, 144 table_fields = [ FieldDB( field_name = "value", 145 data_type = data_type, 146 null = False ), 147 self.externalEntityID_field, 148 self.externalEntityIdentifierType_field ], 149 primary_key = (self.externalEntityID_col, "value"), # changed in order to optimize unification and expansions 150 indices = [("value")] ) 151 152 elif category == "ee versionable identifier attribute": 153 self.VALID_EXTERNAL_ENTITY_VERSIONABLE_IDENTIFIER_ATTRIBUTE_TYPES_SET.add(current_attribute.lower()) 154 additional_fields_set = sets.Set(["type","version"]) 155 new_table = TableDB( table_name = "externalEntity"+current_attribute, 156 table_fields = [ FieldDB( field_name = "value", 157 data_type = data_type, 158 null = False ), 159 FieldDB( field_name = "version", 160 data_type = "integer(1) unsigned", 161 null = False, 162 default_value = 0 ), 163 self.externalEntityID_field, 164 self.externalEntityIdentifierType_field ], 165 primary_key = (self.externalEntityID_col,"value","version"), 166 indices = [ ("value",self.externalEntityID_col), 167 ("value","version")] ) 168 169 170 elif category == "ee descriptive attribute": 171 self.VALID_EXTERNAL_ENTITY_DESCRIPTIVE_ATTRIBUTE_TYPES_SET.add(current_attribute.lower()) 172 additional_fields_set = sets.Set() 173 new_table = TableDB( table_name = "externalEntity"+current_attribute, 174 table_fields = [ FieldDB( field_name = "value", 175 data_type = data_type, 176 null = False ), 177 self.externalEntityID_field, 178 self.externalEntityIdentifierType_field ], 179 indices = [ (self.externalEntityID_col) ] ) 180 181 182 elif category == "ee descriptive searchable attribute": 183 self.VALID_EXTERNAL_ENTITY_DESCRIPTIVE_SEARCHABLE_ATTRIBUTE_TYPES_SET.add(current_attribute.lower()) 184 additional_fields_set = sets.Set() 185 new_table = TableDB( table_name = "externalEntity"+current_attribute, 186 table_fields = [ FieldDB( field_name = "value", 187 data_type = data_type, 188 null = False ), 189 self.externalEntityID_field, 190 self.externalEntityIdentifierType_field ], 191 indices = [ (self.externalEntityID_col) ], 192 fulltext_indices = [ ("value") ] ) 193 194 elif category == "ee numeric attribute": 195 self.VALID_EXTERNAL_ENTITY_NUMERIC_ATTRIBUTE_TYPES_SET.add(current_attribute.lower()) 196 additional_fields_set = sets.Set() 197 new_table = TableDB( table_name = "externalEntity"+current_attribute, 198 table_fields = [ FieldDB( field_name = "value", 199 data_type = data_type, 200 null = False ), 201 self.externalEntityID_field], 202 indices = [ (self.externalEntityID_col,), ("value",) ] ) 203 204 elif category == "ee special attribute": 205 self.VALID_EXTERNAL_ENTITY_SPECIAL_ATTRIBUTE_TYPES_SET.add(current_attribute.lower()) 206 additional_fields_set = sets.Set() 207 fields = [ FieldDB( field_name = "value", 208 data_type = data_type, 209 null = False ), 210 self.externalEntityID_field, 211 self.externalEntityIdentifierType_field ] 212 213 for current_field_name, current_data_type, current_null in additional_fields_tuple_list: 214 if current_null == 0: current_null = False 215 else: current_null = True 216 fields.append( FieldDB( field_name = current_field_name, 217 data_type = current_data_type, 218 null = current_null ) ) 219 additional_fields_set.add(current_field_name) 220 221 new_table = TableDB( table_name = "externalEntity"+current_attribute, 222 table_fields = fields, 223 indices = [ (self.externalEntityID_col,), ("value",) ] ) 224 225 else: 226 raise ValueError("Invalid category when adding valid attribute type: %s" %category) 227 228 self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_TYPES_DICT[current_attribute.lower()] = {"name": current_attribute, "fields": additional_fields_set } 229 self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_DATA_TYPES[current_attribute.lower()] = data_type 230 231 if new_table: 232 self.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute.lower()] = new_table
233
234 - def is_valid_external_entity_relation_participant_attribute_type(self, type): return type.lower() in self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT
235 - def is_valid_external_entity_attribute_type(self, type): return type.lower() in self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_TYPES_DICT
236 - def add_valid_external_entity_type(self, type): self.VALID_EXTERNAL_ENTITY_TYPES_DICT[type.lower()] = type
237 - def get_valid_external_entity_types(self): return self.VALID_EXTERNAL_ENTITY_TYPES_DICT.keys()
238 - def is_valid_external_entity_type(self, type): return type.lower() in self.VALID_EXTERNAL_ENTITY_TYPES_DICT
239 - def add_valid_external_entity_relation_type(self, type): self.VALID_EXTERNAL_ENTITY_RELATION_TYPES_DICT[type.lower()] = type
240 - def get_valid_external_entity_relation_types(self): return self.VALID_EXTERNAL_ENTITY_RELATION_TYPES_DICT.keys()
241 - def is_valid_external_entity_relation_type(self, type): return type.lower() in self.VALID_EXTERNAL_ENTITY_RELATION_TYPES_DICT
242 - def is_valid_identifier_reference_type(self, reference_type): return reference_type.lower() in self.VALID_IDENTIFIER_REFERENCE_TYPES_SET
243
244 - def get_attribute_data_type(self, attribute_identifier): return self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_DATA_TYPES[attribute_identifier.lower()]
245
246 - def add_valid_identifier_reference_type(self, reference_type):
247 248 self.VALID_IDENTIFIER_REFERENCE_TYPES_SET.add(reference_type.lower()) 249 valid_identifier_reference_types_list = list(self.VALID_IDENTIFIER_REFERENCE_TYPES_SET) 250 valid_identifier_reference_types_list.sort() 251 self.externalEntityIdentifierType_field = FieldDB( field_name = "type", 252 data_type = "ENUM(\"%s\")" %"\",\"".join(valid_identifier_reference_types_list), 253 user_friendly_name = "type" )
254 255 256
257 - def get_sql_query( self, ignore_primary_keys = False ):
258 259 self._load_database_tables() 260 return Database.get_sql_query(self, ignore_primary_keys = ignore_primary_keys )
261 262
263 - def get_tables(self):
264 self._load_database_tables() 265 return Database.get_tables(self)
266 267 268 269
270 - def _load_database_tables(self):
271 """ 272 Returns a database object with all the content 273 """ 274 275 self.remove_tables() 276 self._create_initial_tables() 277 self.create_specific_database_tables() 278 279 self.add_table(self.DATABASE_VERSION_TABLE) 280 self.add_table(self.BIANA_DATABASE_TABLE) 281 self.add_table(self.TYPES_AND_ATTRIBUTES_TABLE) 282 self.add_table(self.SPECIAL_ATTRIBUTES_TABLE) 283 self.add_table(self.EXTERNAL_ENTITY_TABLE) 284 self.add_table(self.EXTERNAL_DATABASE_AVAILABLE_eE_TYPES_TABLE) 285 self.add_table(self.EXTERNAL_DATABASE_AVAILABLE_eEr_ATTRIBUTE_TABLE) 286 self.add_table(self.EXTERNAL_DATABASE_AVAILABLE_eEr_TYPES_TABLE) 287 self.add_table(self.EXTERNAL_DATABASE_AVAILABLE_eE_ATTRIBUTE_TABLE) 288 self.add_table(self.EXTERNAL_DATABASE_TABLE) 289 self.add_table(self.EXTERNAL_DATABASE_ATTRIBUTE_TRANSFER_TABLE) 290 291 self.add_table(self.EXTERNAL_ENTITY_RELATION_TABLE) 292 self.add_table(self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE) 293 self.add_table(self.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE) 294 295 self.add_table(self.USER_ENTITY_TABLE) 296 self.add_table(self.USER_ENTITY_PROTOCOL_TABLE) 297 self.add_table(self.USER_ENTITY_PROTOCOL_ATOMS_TABLE) 298 self.add_table(self.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE) 299 300 self.add_table(self.ONTOLOGY_IS_A_TABLE) 301 self.add_table(self.EXTENDED_ONTOLOGY_HIERARCHY_TABLE) 302 self.add_table(self.ONTOLOGY_IS_PART_OF_TABLE) 303 self.add_table(self.ONTOLOGY_INFO_TABLE) 304 305 for current_table in self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES.values(): 306 self.add_table(current_table) 307 308 self.add_table(self.CD_HIT_CLUSTERING_TABLE) 309 self.add_table(self.PROTEIN_BLAST_RESULTS_TABLE) 310 311 312 for current_table in self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT.values(): 313 self.add_table(current_table) 314 315 for current_table in self.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT.values(): 316 self.add_table(current_table)
317 318
319 - def _create_initial_tables(self):
320 """ 321 Creates initial tables, not dependant on specific biana databases 322 """ 323 324 # First of all, create initial tables that are equal in ALL BIANA Databases 325 self.BIANA_DATABASE_TABLE = TableDB( table_name = "BianaDatabase", 326 table_fields = [ FieldDB( field_name="description", 327 data_type = "varchar(255)" ), 328 FieldDB( field_name="last_externalEntityID", 329 data_type = "integer(4) unsigned default 0", 330 null = False), 331 FieldDB( field_name="last_externalEntityRelationParticipantID", 332 data_type = "integer(4) unsigned default 0", 333 null = False), 334 FieldDB( field_name="last_proteinSequenceID", 335 data_type = "integer(4) unsigned default 0", 336 null = False), 337 FieldDB( field_name="last_nucleotideSequenceID", 338 data_type = "integer(4) unsigned default 0", 339 null = False), 340 FieldDB( field_name="last_keyID", 341 data_type = "integer(1) unsigned default 0", 342 null = False), 343 FieldDB( field_name="source_code_version", 344 data_type = "varchar(20)"), 345 FieldDB( field_name="optimized_for", 346 data_type = "ENUM(\"parsing\",\"running\")" ) ] ) 347 348 self.TYPES_AND_ATTRIBUTES_TABLE = TableDB( table_name = "BianaDatabaseTypesAndAttributes", 349 table_fields = [ FieldDB( field_name="name", 350 data_type = "varchar(255)", 351 null = False ), 352 FieldDB( field_name="data_type", 353 data_type = "text" ), # not varchar because it only allows 255 characters 354 FieldDB( field_name="category", 355 data_type = "ENUM(\"ee type\",\"eer type\",\"ee attribute\",\"ee identifier attribute\",\"ee versionable identifier attribute\",\"ee descriptive attribute\",\"ee descriptive searchable attribute\",\"ee numeric attribute\",\"ee special attribute\",\"eerp attribute\",\"eerp descriptive attribute\",\"eerp descriptive searchable attribute\",\"identifier reference type\")", 356 null = False ) ] ) 357 358 # Table for special attributes: those having more than a field 359 self.SPECIAL_ATTRIBUTES_TABLE = TableDB( table_name = "BianaDatabaseSpecialAttributes", 360 table_fields = [ FieldDB( field_name="attribute_name", 361 data_type = "varchar(255)", 362 null = False ), 363 FieldDB( field_name="field_name", 364 data_type = "varchar(255)" ), # not varchar because it only allows 255 characters 365 FieldDB( field_name="data_type", 366 data_type = "text", 367 null = False ), 368 FieldDB( field_name="canbenull", 369 data_type = "tinyint(1)", 370 null = False ) ] ) 371 372 self.DATABASE_VERSION_TABLE = TableDB( table_name="database_version", 373 table_fields = [ FieldDB(field_name = "versionID", 374 data_type = "smallint unsigned auto_increment", 375 null = False), 376 FieldDB(field_name = "dbControlID", 377 data_type = "varchar(40)", 378 null = False), 379 FieldDB(field_name = "date", 380 data_type = "char(10)", 381 null = False), 382 FieldDB( field_name = "stable_externalEntityID", 383 data_type = "integer(4) unsigned"), 384 FieldDB( field_name = "stable_externalEntityRelationParticipantID", 385 data_type = "integer(4) unsigned") ], 386 primary_key = ("dbControlID"), 387 indices = [("versionID")] )
388 389 390 391
393 """ 394 Adds to the database all the specific tables for this database 395 """ 396 397 # GENERAL FIELDS 398 399 eE_types_list = self.VALID_EXTERNAL_ENTITY_TYPES_DICT.keys() 400 eE_types_list.sort() 401 self.enum_eE_types_col = "ENUM(\"%s\")" %"\",\"".join(eE_types_list) 402 403 eEr_types_list = self.VALID_EXTERNAL_ENTITY_RELATION_TYPES_DICT.keys() 404 eEr_types_list.sort() 405 self.enum_eEr_types_col = "ENUM(\"%s\")" %"\",\"".join(eEr_types_list) 406 407 eE_attr_list = self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_TYPES_DICT.keys() 408 eE_attr_list.sort() 409 self.enum_eEattr_col = "ENUM(\"%s\")" %"\",\"".join(eE_attr_list) 410 411 412 self.EXTERNAL_ENTITY_TABLE = TableDB(table_name = "externalEntity", 413 table_fields = [ FieldDB(field_name = self.externalEntityID_col, 414 data_type = "integer(4) unsigned", 415 null = False), 416 FieldDB(field_name = self.externalDatabaseID_col, 417 data_type = self.externalDatabaseID_col_type, 418 null = False), 419 FieldDB(field_name = "type", 420 data_type = self.enum_eE_types_col, 421 null = False)], 422 primary_key = (self.externalDatabaseID_col,self.externalEntityID_col), #! if the order is reversed, the eEId index may not be needed 423 indices = [ (self.externalEntityID_col) ] ) 424 425 self.EXTERNAL_DATABASE_AVAILABLE_eEr_TYPES_TABLE = TableDB( table_name = "externalDatabaseExternalEntityRelationType", 426 table_fields = [ self.externalDatabaseID_field, 427 FieldDB( field_name = "eErType", 428 data_type = self.enum_eEr_types_col, 429 null = False) ], 430 primary_key = (self.externalDatabaseID_col,"eErType") ) 431 432 self.EXTERNAL_DATABASE_AVAILABLE_eE_TYPES_TABLE = TableDB( table_name = "externalDatabaseExternalEntityType", 433 table_fields = [ self.externalDatabaseID_field, 434 FieldDB( field_name = "eEType", 435 data_type = self.enum_eE_types_col, 436 null = False) ], 437 primary_key = (self.externalDatabaseID_col,"eEType") ) 438 439 self.EXTERNAL_DATABASE_AVAILABLE_eEr_ATTRIBUTE_TABLE = TableDB( table_name = "externalDatabaseExternalEntityRelationAttribute", 440 table_fields = [ self.externalDatabaseID_field, 441 FieldDB( field_name = "attributeType", 442 data_type = self.enum_eEattr_col, 443 null = False) ], 444 primary_key = (self.externalDatabaseID_col,"attributeType") ) 445 446 self.EXTERNAL_DATABASE_AVAILABLE_eE_ATTRIBUTE_TABLE = TableDB( table_name = "externalDatabaseExternalEntityAttribute", 447 table_fields = [ self.externalDatabaseID_field, 448 FieldDB( field_name = "attributeType", 449 data_type = self.enum_eEattr_col, 450 null = False) ], 451 primary_key = (self.externalDatabaseID_col,"attributeType") ) 452 453 self.EXTERNAL_DATABASE_TABLE = TableDB( table_name = "externalDatabase", 454 table_fields = [ FieldDB( field_name = self.externalDatabaseID_col, 455 data_type = "smallint unsigned auto_increment", 456 null = False), 457 FieldDB( field_name = "databaseName", 458 data_type = "varchar(255)", 459 null = False), 460 FieldDB( field_name = "databaseVersion", 461 data_type = "varchar(255)", 462 null = False), 463 FieldDB( field_name = "parsedFile", 464 data_type = "varchar(255)"), 465 FieldDB( field_name = "parsedDate", 466 data_type = "char(10)", 467 null = False), 468 FieldDB( field_name = "databaseDescription", 469 data_type = "varchar(255)", 470 null = False), 471 FieldDB( field_name = "parsingTime", 472 data_type = "integer(3) unsigned"), 473 FieldDB( field_name = "defaultExternalEntityAttribute", 474 data_type = self.enum_eEattr_col ), 475 FieldDB( field_name = "isPromiscuous", 476 data_type = "bool", 477 default_value = 0, 478 null = False) ], # + type_fields, 479 primary_key = ("databaseName","databaseVersion"), 480 indices = [self.externalDatabaseID_col]) 481 482 self.EXTERNAL_DATABASE_ATTRIBUTE_TRANSFER_TABLE = TableDB( table_name = "externalDatabaseAttributeTransfer", 483 table_fields = [ FieldDB( field_name = "keyID", 484 data_type = "integer(1) unsigned", 485 null = False ), 486 FieldDB( field_name = "externalDatabaseID", 487 data_type = "integer(2) unsigned", 488 null = False ), 489 FieldDB( field_name = "attributeKey", 490 data_type = self.enum_eEattr_col, 491 null = False ), 492 FieldDB( field_name = "transferAttribute", 493 data_type = self.enum_eEattr_col, 494 null = False ) ] ) 495 496 self.ALIGNMENT_ELEMENT_TABLE = TableDB( table_name = "alignmentElement", 497 table_fields = [ self.externalEntityID_field, # Referring to the external Entity Relation ID of the alignment 498 FieldDB( field_name = "sequenceMD5", 499 data_type = "binary(16)"), # Previously it was mandatory, but as not always is mandatory to have previously the sequence, now it is not mandatory 500 FieldDB( field_name = "position", 501 data_type = "integer(3) unsigned", 502 null = False ), 503 FieldDB( field_name = "crossID", # Used to cross the alignment element with another external entity (as pex an uniprot, where then it is possible to obtain complete sequence, taxonomy,...) 504 data_type = "varchar(255)" ), 505 FieldDB( field_name = "crossID_identifier_type", 506 data_type = self.enum_eEattr_col ), 507 FieldDB( field_name = "alignmentBlocks", 508 data_type = "varchar(255)"), 509 FieldDB( field_name = "alignedSequence", # Compressed aligned sequence (used for alignments already done) 510 data_type = "text"), 511 FieldDB( field_name = "identity", 512 data_type = "integer(1) unsigned"), 513 FieldDB( field_name = "weighted_sim", 514 data_type = "integer(1) unsigned"), 515 FieldDB( field_name = "sequence_covered", 516 data_type = "integer(1) unsigned")], 517 primary_key = (self.externalEntityID_col,"sequenceMD5","position"), 518 indices = [("sequenceMD5")] ) 519 520 521 self.EXTERNAL_ENTITY_RELATION_TABLE = TableDB( table_name = "externalEntityRelation", 522 table_fields = [ FieldDB( field_name = self.external_entity_relation_id_col, 523 data_type = self.externalEntityID_col_type, 524 null = False ), 525 FieldDB(field_name = "type", 526 data_type = self.enum_eEr_types_col, 527 null = False)], 528 primary_key = (self.external_entity_relation_id_col)) 529 530 531 self.external_entity_relation_id_field = FieldDB( field_name = self.external_entity_relation_id_col, 532 data_type = "integer(4) unsigned", 533 null = False, 534 foreign_key = (self.EXTERNAL_ENTITY_RELATION_TABLE, 535 self.EXTERNAL_ENTITY_RELATION_TABLE.get_field(field_name="externalEntityRelationID" ))) 536 537 538 self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE = TableDB( table_name = "externalEntityRelationParticipant", 539 table_fields = [ FieldDB( field_name = self.external_entity_relation_participant_id_col, 540 data_type = "integer(4) unsigned", 541 null = False ), 542 self.external_entity_relation_id_field, 543 self.externalEntityID_field], 544 primary_key = self.external_entity_relation_participant_id_col, 545 indices = [(self.external_entity_relation_id_col, self.externalEntityID_col), 546 (self.externalEntityID_col, self.external_entity_relation_id_col)]) 547 548 self.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE = TableDB( table_name = "extendedExternalEntityRelationParticipant", 549 table_fields = [ FieldDB( field_name = self.external_entity_relation_participant_id_col, 550 data_type = "integer(4) unsigned", 551 null = False ), 552 self.external_entity_relation_id_field, 553 self.externalEntityID_field], 554 primary_key = self.external_entity_relation_participant_id_col, 555 indices = [(self.external_entity_relation_id_col, self.externalEntityID_col), 556 (self.externalEntityID_col,self.external_entity_relation_id_col)]) 557 558 559 560 561 ################################################## 562 ### USER ENTITIES RELATED ### 563 ################################################## 564 565 # A table to store the information about how the unification has been done 566 # A table to store the unified entities 567 568 self.USER_ENTITY_TABLE = TableDB( table_name = "userEntityUnification_protocol_", 569 table_fields = [ FieldDB( field_name = "userEntityID", 570 data_type = "integer(4) unsigned", 571 null = False ), 572 FieldDB( field_name = "externalEntityID", 573 data_type = "integer(4) unsigned", 574 null = False) ], 575 primary_key = ("userEntityID","externalEntityID"), 576 indices = [("externalEntityID","userEntityID")]) 577 578 self.USER_ENTITY_PROTOCOL_TABLE = TableDB( table_name = "userEntityUnificationProtocol", 579 table_fields = [ FieldDB( field_name = "unificationProtocolID", 580 data_type = "smallint unsigned auto_increment", 581 null = False), 582 FieldDB( field_name = "description", 583 data_type = "varchar(255)", 584 null = False), 585 FieldDB( field_name = "databaseVersion", 586 data_type = "varchar(40)", 587 null = False )], 588 # deleted foreign key for the moment... (because it failed when inserting default protocol) 589 #foreign_key = (DATABASE_VERSION_TABLE, 590 # DATABASE_VERSION_TABLE.get_field(field_name = "dbControlID")))], 591 primary_key = "unificationProtocolID" ) 592 593 594 self.USER_ENTITY_PROTOCOL_ATOMS_TABLE = TableDB( table_name = "userEntityUnificationProtocolAtoms", 595 table_fields = [ FieldDB( field_name = "unificationAtomID", 596 data_type = "integer(2) unsigned auto_increment", 597 null = False), 598 FieldDB( field_name = "unificationProtocolID", 599 data_type = "smallint unsigned", 600 null = False, 601 foreign_key = (self.USER_ENTITY_PROTOCOL_TABLE, 602 self.USER_ENTITY_PROTOCOL_TABLE.get_field(field_name = "unificationProtocolID") )), 603 FieldDB( field_name = "externalDatabaseID_A", 604 data_type = "integer(2) unsigned", 605 null = False, 606 foreign_key = (self.EXTERNAL_DATABASE_TABLE, 607 self.EXTERNAL_DATABASE_TABLE.get_field(field_name = "externalDatabaseID") )), 608 FieldDB( field_name = "externalDatabaseID_B", 609 data_type = "integer(2) unsigned", 610 null = False, 611 foreign_key = (self.EXTERNAL_DATABASE_TABLE, 612 self.EXTERNAL_DATABASE_TABLE.get_field(field_name = "externalDatabaseID") )) ], 613 indices = ["unificationProtocolID"], 614 primary_key = "unificationAtomID" ) 615 616 self.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE = TableDB( table_name = "userEntityUnificationProtocolAtomAttribute", 617 table_fields = [ FieldDB( field_name = "unificationAtomID", 618 data_type = "integer(2) unsigned", 619 null = False, 620 foreign_key = (self.USER_ENTITY_PROTOCOL_ATOMS_TABLE, 621 self.USER_ENTITY_PROTOCOL_ATOMS_TABLE.get_field( field_name = "unificationAtomID" ) )), 622 FieldDB( field_name = "cross_referenced_code", 623 data_type = self.enum_eEattr_col, 624 null = False ) ], 625 primary_key = ("unificationAtomID","cross_referenced_code") ) 626 627 628 ################################################## 629 ### ONTOLOGIES RELATED ### 630 ################################################## 631 632 633 self.ONTOLOGY_IS_A_TABLE = TableDB( table_name = "ExternalEntityOntology_isA", 634 table_fields = [ self.externalEntityID_field, 635 FieldDB( field_name = "is_a", 636 data_type = self.externalEntityID_col_type, 637 null = False )], 638 indices = [(self.externalEntityID_col,),("is_a",)] ) 639 640 self.EXTENDED_ONTOLOGY_HIERARCHY_TABLE = TableDB( table_name = "ExtendedOntologyIsA", 641 table_fields = [ FieldDB( field_name = "parentExternalEntityID", 642 data_type = self.externalEntityID_col_type, 643 null = False ), 644 FieldDB( field_name = "childExternalEntityID", 645 data_type = self.externalEntityID_col_type, 646 null = False ) ], 647 indices = [("parentExternalEntityID","childExternalEntityID")] ) 648 649 self.ONTOLOGY_IS_PART_OF_TABLE = TableDB( table_name = "ExternalEntityOntology_isPartOf", 650 table_fields = [ self.externalEntityID_field, 651 FieldDB( field_name = "is_part_of", 652 data_type = self.externalEntityID_col_type, 653 null = False )], 654 indices = [(self.externalEntityID_col,),("is_part_of",)] ) 655 656 self.ONTOLOGY_INFO_TABLE = TableDB( table_name = "ExternalEntityOntology", 657 table_fields = [ self.externalEntityID_field, 658 FieldDB( field_name = "name", 659 data_type = "varchar(255)", 660 null = False ), 661 FieldDB( field_name = "linked_attribute", # Attribute used in the external database as the main key 662 data_type = "varchar(255)", 663 null = False ), 664 FieldDB( field_name = "key_id", 665 data_type = "integer(1) unsigned", 666 null = False ), 667 FieldDB( field_name = "level_attribute", 668 data_type = "varchar(255)", 669 null = True ), 670 FieldDB( field_name = "description_attribute", # Attribute used to show the tree in a user friendly way 671 data_type = "varchar(255)", 672 null = False )], 673 primary_key = ["name"] ) 674 675 # TO CHECK: Al crear sense primary_keys, deixa de ser primary... 676 677 678 679 ##################################################### 680 ### EXTERNAL ATTRIBUTES DESCRIPTION DATABASES ### 681 ##################################################### 682 683 self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES = { "pdb": TableDB( table_name = "pdb", 684 table_fields = [ FieldDB( field_name = "pdb", 685 data_type = "char(4)", 686 null = False ), 687 FieldDB( field_name = "chain", 688 data_type = "char(1)"), 689 FieldDB( field_name = "resolution", 690 data_type = "float"), 691 self.externalEntityID_field, 692 FieldDB( field_name = "total_num_atoms", # Total number of atoms 693 data_type = "integer(3) unsigned", 694 null = False ), 695 FieldDB( field_name = "residue_num_list", # Number of the residue (integer list) 696 data_type = "blob"), 697 FieldDB( field_name = "residue_type_list", # List of residue type 698 data_type = "text"), # List of the types of the atoms (string list) 699 FieldDB( field_name = "residue_atom_num_list", #Number of atoms/residue (integer list) 700 data_type = "blob"), 701 FieldDB( field_name = "atom_type_list", # List of atom type (1character/atom) 702 data_type = "text"), 703 FieldDB( field_name = "atom_name_list", # List of the atom names (string list) 704 data_type = "text"), 705 FieldDB( field_name = "atom_coordinates", # List of coordinates for all atoms (float list: x,y,z) 706 data_type = "mediumblob"), 707 FieldDB( field_name = "hssp_residue_num_correspondences", # List of residue correspondences between hssp and pdb 708 data_type = "blob"), 709 FieldDB( field_name = "residue_dssp_results", # Results of the dssp program 710 data_type = "text"), 711 FieldDB( field_name = "residue_hssp_entropy", # Results of the entropy found in hssp 712 data_type = "blob"), 713 FieldDB( field_name = "residue_hssp_norm_entropy", # Normalized entropy found in hssp 714 data_type = "blob"), 715 FieldDB( field_name = "residue_hssp_variability", # Results of the variability found in hssp 716 data_type = "blob"), 717 FieldDB( field_name = "conservation_hssp", # Conservation found in the hssp 718 data_type = "blob"), 719 FieldDB( field_name = "solvent_exposure_hssp", # Solvent exposure found in hssp 720 data_type = "blob")], 721 primary_key = ("pdb","chain") ), 722 723 "proteinSequence": TableDB( table_name = "sequenceProtein", 724 table_fields = [ FieldDB( field_name = "proteinSequenceID", 725 #data_type = "integer unsigned auto_increment"), 726 data_type = "integer(4) unsigned"), 727 #null = False), 728 FieldDB( field_name = "sequenceMD5", 729 #data_type = "char(40) collate latin1_general_cs", 730 data_type = "binary(16)", 731 user_friendly_name = "sequenceMD5", 732 optimize_space = 4, 733 null = False), 734 FieldDB( field_name = "sequence", 735 user_friendly_name = "sequence", 736 data_type = "mediumtext", 737 compress = 1, 738 null = False), 739 FieldDB( field_name = "sequenceLength", 740 data_type = "integer(2) unsigned", 741 user_friendly_name = "length", 742 null = False), 743 FieldDB( field_name = "sequenceMW", 744 data_type = "float", 745 user_friendly_name = "mw", 746 null = False, 747 default_value = 0), 748 FieldDB( field_name = "sequenceIP", 749 data_type = "float", 750 user_friendly_name = "ip", 751 null = False, 752 default_value = 0 ) ], 753 primary_key = "sequenceMD5", 754 indices = [("proteinSequenceID")]), 755 756 "nucleotideSequence": TableDB( table_name = "sequenceNucleotide", 757 table_fields = [ FieldDB( field_name = "nucleotideSequenceID", 758 data_type = "integer(4) unsigned"), 759 #ata_type = "integer(4) unsigned auto_increment"), 760 #null = False), 761 FieldDB( field_name = "sequenceMD5", 762 #data_type = "char(40) collate latin1_general_cs", 763 data_type = "binary(16)", 764 user_friendly_name = "sequenceMD5", 765 optimize_space = 4, 766 null = False), 767 FieldDB( field_name = "sequenceLength", 768 data_type = "integer(2) unsigned", 769 user_friendly_name = "length", 770 null = False), 771 FieldDB( field_name = "sequence", 772 data_type = "mediumtext", 773 compress = 1, 774 user_friendly_name = "sequence", 775 null = False)], 776 primary_key = "sequenceMD5", 777 indices = [("nucleotideSequenceID")]) } 778 779 self.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT = {} 780 781 for actual_attribute in self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES.keys(): 782 783 temp_fields = self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES[actual_attribute].get_fields() 784 785 self.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[actual_attribute.lower()] = {"table": self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES[actual_attribute], 786 "fields": dict([ (x.get_user_friendly_name(),x) for x in temp_fields ]) } 787 788 ########################################## 789 ### SEQUENCE SIMILARITY RELATED TABLES ### 790 ########################################## 791 792 # Table to store CD-HIT clusters file 793 self.CD_HIT_CLUSTERING_TABLE = TableDB( table_name = "sequenceProteinCD_HIT", 794 table_fields = [ FieldDB( field_name = "representant_proteinSequenceID", 795 data_type = "integer(4) unsigned", 796 null = False ), 797 FieldDB( field_name = "proteinSequenceID", 798 data_type = "integer(4) unsigned", 799 null = False ), 800 FieldDB( field_name = "representant_start_position", 801 data_type = "integer(2) unsigned", 802 null = False ), 803 FieldDB( field_name = "representant_end_position", 804 data_type = "integer(2) unsigned", 805 null = False ), 806 FieldDB( field_name = "start_position", 807 data_type = "integer(2) unsigned", 808 null = False ), 809 FieldDB( field_name = "end_position", 810 data_type = "integer(2) unsigned", 811 null = False ), 812 FieldDB( field_name = "identity", 813 data_type = "integer(1) unsigned", 814 null = False )], 815 indices = [("proteinSequenceID"),("representant_proteinSequenceID")] ) 816 817 818 819 self.PROTEIN_BLAST_RESULTS_TABLE = TableDB( table_name = "sequenceProteinBlastResults", 820 table_fields = [ FieldDB( field_name = "sequenceID_A", 821 data_type = "integer(4) unsigned", 822 null = False ), 823 FieldDB( field_name = "sequenceID_B", 824 data_type = "integer(4) unsigned", 825 null = False ), 826 FieldDB( field_name = "evalue", 827 data_type = "float", 828 null = False ), 829 FieldDB( field_name = "score", 830 data_type = "smallint(2) unsigned", 831 null = False ), 832 FieldDB( field_name = "bit_score", 833 data_type = "float unsigned", #float(4) 834 null = False ), 835 FieldDB( field_name = "start_A", 836 data_type = "smallint(2) unsigned", 837 null = False ), 838 FieldDB( field_name = "end_A", 839 data_type = "smallint(2) unsigned", 840 null = False ), 841 FieldDB( field_name = "coverage_A", 842 data_type = "tinyint(1) unsigned"), 843 FieldDB( field_name = "coverage_B", 844 data_type = "tinyint(1) unsigned"), 845 FieldDB( field_name = "start_B", 846 data_type = "smallint(2) unsigned", 847 null = False ), 848 FieldDB( field_name = "end_B", 849 data_type = "smallint(2) unsigned", 850 null = False ), 851 FieldDB( field_name = "identities", 852 data_type = "tinyint(1) unsigned", 853 null = False ), 854 FieldDB( field_name = "similarity", 855 data_type = "tinyint(1) unsigned", 856 null = False ), 857 FieldDB( field_name = "gaps", 858 data_type = "tinyint(1) unsigned", 859 null = False ), 860 FieldDB( field_name = "program", 861 data_type = "ENUM(\"bl2seq\",\"blastall\")"), 862 FieldDB( field_name = "filter", 863 data_type = "ENUM(\"T\",\"F\")")], 864 865 866 indices = [("sequenceID_A","identities","coverage_A"), 867 ("sequenceID_B","identities","coverage_B")]) # TO CHECK THE INDICES!!!
868 869 870 ################ 871 ## PROCEDURES ## 872 ################ 873 874 ### NOT USED AS IT IS VERY SLOW ### 875 876 877 REMOVE_SEQUENCE_DUPLICATES_PROCEDURE =""" 878 DELIMITER // 879 CREATE PROCEDURE remove_duplicates() 880 881 DETERMINISTIC 882 MODIFIES SQL DATA 883 884 BEGIN 885 DECLARE sequenceMD5 BINARY(16); 886 DECLARE previous_sMD5 BINARY(16) DEFAULT 0; 887 DECLARE sequenceID int(10) unsigned; 888 DECLARE sequence mediumtext; 889 DECLARE sequenceLength int(2) unsigned; 890 DECLARE sequenceMW float; 891 DECLARE sequenceIP float; 892 893 DECLARE done BOOLEAN DEFAULT FALSE; 894 DECLARE cur1 CURSOR FOR SELECT * FROM sequenceProtein ORDER BY sequenceMD5; 895 DECLARE CONTINUE HANDLER FOR SQLSTATE '02000' SET done = TRUE; 896 897 OPEN cur1; 898 899 cursor_loop: LOOP 900 FETCH cur1 INTO sequenceID,sequenceMD5,sequence,sequenceLength,sequenceMW,sequenceIP; 901 IF done THEN LEAVE cursor_loop; END IF; 902 IF sequenceMD5!=previous_sMD5 THEN 903 SET previous_sMD5 = sequenceMD5; 904 INSERT INTO test_nonr_seq VALUES (sequenceID,sequenceMD5,sequence,sequenceLength,sequenceMW,sequenceIP); 905 END IF; 906 END LOOP cursor_loop; 907 908 DEALLOCATE PREPARE stmt_query; 909 CLOSE cur1; 910 911 END 912 // 913 delimiter ; 914 """ 915