1 from database import Database,TableDB,FieldDB
2 import biana.BianaObjects
3 import sets
4
5
7
8 externalEntityID_col = "externalEntityID"
9 external_entity_relation_id_col = "externalEntityRelationID"
10 externalEntityID_col_type = "integer(4) unsigned"
11 externalDatabaseID_col = "externalDatabaseID"
12 external_entity_relation_participant_id_col = "externalEntityRelationParticipantID"
13 externalDatabaseID_col_type = "integer(2) unsigned"
14
16
17 Database.__init__(self)
18
19
20
21
22 self.DATABASE_VERSION_TABLE = None
23 self.BIANA_DATABASE_TABLE = None
24 self.EXTERNAL_ENTITY_TABLE = None
25 self.EXTERNAL_DATABASE_AVAILABLE_eE_TYPES_TABLE = None
26 self.EXTERNAL_DATABASE_AVAILABLE_eEr_ATTRIBUTE_TABLE = None
27 self.EXTERNAL_DATABASE_AVAILABLE_eEr_TYPES_TABLE = None
28 self.EXTERNAL_DATABASE_AVAILABLE_eE_ATTRIBUTE_TABLE = None
29 self.EXTERNAL_DATABASE_TABLE = None
30 self.EXTERNAL_DATABASE_ATTRIBUTE_TRANSFER_TABLE = None
31
32 self.EXTERNAL_ENTITY_RELATION_TABLE = None
33 self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE = None
34 self.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE = None
35
36 self.USER_ENTITY_TABLE = None
37 self.USER_ENTITY_PROTOCOL_TABLE = None
38 self.USER_ENTITY_PROTOCOL_ATOMS_TABLE = None
39 self.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE = None
40
41 self.ONTOLOGY_IS_A_TABLE = None
42 self.EXTENDED_ONTOLOGY_HIERARCHY_TABLE = None
43 self.ONTOLOGY_IS_PART_OF_TABLE = None
44 self.ONTOLOGY_INFO_TABLE = None
45
46 self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES = None
47 self.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT = None
48 self.CD_HIT_CLUSTERING_TABLE = None
49 self.PROTEIN_BLAST_RESULTS_TABLE = None
50
51 self.TRANSFER_TABLES_LIST = []
52
53
54 self.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT = {}
55 self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT = {}
56
57
58 self.externalEntityIdentifierType_field = None
59
60 self.externalEntityID_field = FieldDB(field_name = self.externalEntityID_col,
61 data_type = "integer(4) unsigned",
62 null = False)
63
64 self.external_entity_relation_participant_id_field = FieldDB( field_name = self.external_entity_relation_participant_id_col,
65 data_type = "integer(4) unsigned",
66 null = False)
67 self.externalDatabaseID_field = FieldDB(field_name = "externalDatabaseID",
68 data_type = "integer(2) unsigned",
69 null = False)
70
71
72
73 self.VALID_EXTERNAL_ENTITY_TYPES_DICT = {}
74 self.VALID_EXTERNAL_ENTITY_RELATION_TYPES_DICT = {}
75
76 self.VALID_IDENTIFIER_REFERENCE_TYPES_SET = sets.Set()
77
78 self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_TYPES_DICT = {}
79 self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_DATA_TYPES = {}
80 self.VALID_EXTERNAL_ENTITY_GENERAL_ATTRIBUTE_TYPES_SET = sets.Set()
81 self.VALID_EXTERNAL_ENTITY_IDENTIFIER_ATTRIBUTE_TYPES_SET = sets.Set()
82 self.VALID_EXTERNAL_ENTITY_VERSIONABLE_IDENTIFIER_ATTRIBUTE_TYPES_SET = sets.Set()
83 self.VALID_EXTERNAL_ENTITY_DESCRIPTIVE_SEARCHABLE_ATTRIBUTE_TYPES_SET = sets.Set()
84 self.VALID_EXTERNAL_ENTITY_DESCRIPTIVE_ATTRIBUTE_TYPES_SET = sets.Set()
85 self.VALID_EXTERNAL_ENTITY_NUMERIC_ATTRIBUTE_TYPES_SET = sets.Set()
86 self.VALID_EXTERNAL_ENTITY_SPECIAL_ATTRIBUTE_TYPES_SET = sets.Set()
87
88 self.VALID_EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TYPES_DICT = {}
89 self.VALID_EXTERNAL_ENTITY_RELATION_PARTICIPANT_NUMERIC_ATTRIBUTE_TYPES_SET = sets.Set()
90 self.VALID_EXTERNAL_ENTITY_RELATION_PARTICIPANT_DESCRIPTIVE_ATTRIBUTE_TYPES_SET = sets.Set()
91 self.VALID_EXTERNAL_ENTITY_RELATION_PARTICIPANT_DESCRIPTIVE_SEARCHABLE_ATTRIBUTE_TYPES_SET = sets.Set()
92
93 self._create_initial_tables()
94
95
96
98
99 new_table = None
100
101 category = category.lower()
102
103 if category == "eerp attribute":
104 new_table = TableDB( table_name = "externalEntityRelationParticipant"+current_attribute,
105 table_fields = [self.external_entity_relation_participant_id_field,
106 FieldDB( field_name = "value",
107 data_type = data_type,
108 null = False )],
109 indices = [self.external_entity_relation_participant_id_col] )
110 elif category == "eerp numeric attribute":
111 pass
112 elif category == "eerp descriptive attribute":
113 pass
114 elif category == "eerp descriptive searchable attribute":
115 pass
116 else:
117 raise ValueError("Invalid category when adding valid relation participant attribute type: %s" %category)
118
119 self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT[current_attribute.lower()] = new_table
120
121
123
124 category = category.lower()
125
126 new_table = None
127
128 if category == "ee attribute":
129 self.VALID_EXTERNAL_ENTITY_GENERAL_ATTRIBUTE_TYPES_SET.add(current_attribute.lower())
130 additional_fields_set = sets.Set()
131 new_table = TableDB( table_name = "externalEntity"+current_attribute,
132 table_fields = [ FieldDB( field_name = "value",
133 data_type = data_type,
134 null = False ),
135 self.externalEntityID_field ],
136 primary_key = (self.externalEntityID_col, "value"),
137 indices = [("value")] )
138
139
140 elif category == "ee identifier attribute":
141 self.VALID_EXTERNAL_ENTITY_IDENTIFIER_ATTRIBUTE_TYPES_SET.add(current_attribute.lower())
142 additional_fields_set = sets.Set(["type"])
143 new_table = TableDB( table_name = "externalEntity"+current_attribute,
144 table_fields = [ FieldDB( field_name = "value",
145 data_type = data_type,
146 null = False ),
147 self.externalEntityID_field,
148 self.externalEntityIdentifierType_field ],
149 primary_key = (self.externalEntityID_col, "value"),
150 indices = [("value")] )
151
152 elif category == "ee versionable identifier attribute":
153 self.VALID_EXTERNAL_ENTITY_VERSIONABLE_IDENTIFIER_ATTRIBUTE_TYPES_SET.add(current_attribute.lower())
154 additional_fields_set = sets.Set(["type","version"])
155 new_table = TableDB( table_name = "externalEntity"+current_attribute,
156 table_fields = [ FieldDB( field_name = "value",
157 data_type = data_type,
158 null = False ),
159 FieldDB( field_name = "version",
160 data_type = "integer(1) unsigned",
161 null = False,
162 default_value = 0 ),
163 self.externalEntityID_field,
164 self.externalEntityIdentifierType_field ],
165 primary_key = (self.externalEntityID_col,"value","version"),
166 indices = [ ("value",self.externalEntityID_col),
167 ("value","version")] )
168
169
170 elif category == "ee descriptive attribute":
171 self.VALID_EXTERNAL_ENTITY_DESCRIPTIVE_ATTRIBUTE_TYPES_SET.add(current_attribute.lower())
172 additional_fields_set = sets.Set()
173 new_table = TableDB( table_name = "externalEntity"+current_attribute,
174 table_fields = [ FieldDB( field_name = "value",
175 data_type = data_type,
176 null = False ),
177 self.externalEntityID_field,
178 self.externalEntityIdentifierType_field ],
179 indices = [ (self.externalEntityID_col) ] )
180
181
182 elif category == "ee descriptive searchable attribute":
183 self.VALID_EXTERNAL_ENTITY_DESCRIPTIVE_SEARCHABLE_ATTRIBUTE_TYPES_SET.add(current_attribute.lower())
184 additional_fields_set = sets.Set()
185 new_table = TableDB( table_name = "externalEntity"+current_attribute,
186 table_fields = [ FieldDB( field_name = "value",
187 data_type = data_type,
188 null = False ),
189 self.externalEntityID_field,
190 self.externalEntityIdentifierType_field ],
191 indices = [ (self.externalEntityID_col) ],
192 fulltext_indices = [ ("value") ] )
193
194 elif category == "ee numeric attribute":
195 self.VALID_EXTERNAL_ENTITY_NUMERIC_ATTRIBUTE_TYPES_SET.add(current_attribute.lower())
196 additional_fields_set = sets.Set()
197 new_table = TableDB( table_name = "externalEntity"+current_attribute,
198 table_fields = [ FieldDB( field_name = "value",
199 data_type = data_type,
200 null = False ),
201 self.externalEntityID_field],
202 indices = [ (self.externalEntityID_col,), ("value",) ] )
203
204 elif category == "ee special attribute":
205 self.VALID_EXTERNAL_ENTITY_SPECIAL_ATTRIBUTE_TYPES_SET.add(current_attribute.lower())
206 additional_fields_set = sets.Set()
207 fields = [ FieldDB( field_name = "value",
208 data_type = data_type,
209 null = False ),
210 self.externalEntityID_field,
211 self.externalEntityIdentifierType_field ]
212
213 for current_field_name, current_data_type, current_null in additional_fields_tuple_list:
214 if current_null == 0: current_null = False
215 else: current_null = True
216 fields.append( FieldDB( field_name = current_field_name,
217 data_type = current_data_type,
218 null = current_null ) )
219 additional_fields_set.add(current_field_name)
220
221 new_table = TableDB( table_name = "externalEntity"+current_attribute,
222 table_fields = fields,
223 indices = [ (self.externalEntityID_col,), ("value",) ] )
224
225 else:
226 raise ValueError("Invalid category when adding valid attribute type: %s" %category)
227
228 self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_TYPES_DICT[current_attribute.lower()] = {"name": current_attribute, "fields": additional_fields_set }
229 self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_DATA_TYPES[current_attribute.lower()] = data_type
230
231 if new_table:
232 self.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT[current_attribute.lower()] = new_table
233
243
244 - def get_attribute_data_type(self, attribute_identifier): return self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_DATA_TYPES[attribute_identifier.lower()]
245
247
248 self.VALID_IDENTIFIER_REFERENCE_TYPES_SET.add(reference_type.lower())
249 valid_identifier_reference_types_list = list(self.VALID_IDENTIFIER_REFERENCE_TYPES_SET)
250 valid_identifier_reference_types_list.sort()
251 self.externalEntityIdentifierType_field = FieldDB( field_name = "type",
252 data_type = "ENUM(\"%s\")" %"\",\"".join(valid_identifier_reference_types_list),
253 user_friendly_name = "type" )
254
255
256
261
262
266
267
268
269
271 """
272 Returns a database object with all the content
273 """
274
275 self.remove_tables()
276 self._create_initial_tables()
277 self.create_specific_database_tables()
278
279 self.add_table(self.DATABASE_VERSION_TABLE)
280 self.add_table(self.BIANA_DATABASE_TABLE)
281 self.add_table(self.TYPES_AND_ATTRIBUTES_TABLE)
282 self.add_table(self.SPECIAL_ATTRIBUTES_TABLE)
283 self.add_table(self.EXTERNAL_ENTITY_TABLE)
284 self.add_table(self.EXTERNAL_DATABASE_AVAILABLE_eE_TYPES_TABLE)
285 self.add_table(self.EXTERNAL_DATABASE_AVAILABLE_eEr_ATTRIBUTE_TABLE)
286 self.add_table(self.EXTERNAL_DATABASE_AVAILABLE_eEr_TYPES_TABLE)
287 self.add_table(self.EXTERNAL_DATABASE_AVAILABLE_eE_ATTRIBUTE_TABLE)
288 self.add_table(self.EXTERNAL_DATABASE_TABLE)
289 self.add_table(self.EXTERNAL_DATABASE_ATTRIBUTE_TRANSFER_TABLE)
290
291 self.add_table(self.EXTERNAL_ENTITY_RELATION_TABLE)
292 self.add_table(self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE)
293 self.add_table(self.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE)
294
295 self.add_table(self.USER_ENTITY_TABLE)
296 self.add_table(self.USER_ENTITY_PROTOCOL_TABLE)
297 self.add_table(self.USER_ENTITY_PROTOCOL_ATOMS_TABLE)
298 self.add_table(self.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE)
299
300 self.add_table(self.ONTOLOGY_IS_A_TABLE)
301 self.add_table(self.EXTENDED_ONTOLOGY_HIERARCHY_TABLE)
302 self.add_table(self.ONTOLOGY_IS_PART_OF_TABLE)
303 self.add_table(self.ONTOLOGY_INFO_TABLE)
304
305 for current_table in self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES.values():
306 self.add_table(current_table)
307
308 self.add_table(self.CD_HIT_CLUSTERING_TABLE)
309 self.add_table(self.PROTEIN_BLAST_RESULTS_TABLE)
310
311
312 for current_table in self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_ATTRIBUTE_TABLES_DICT.values():
313 self.add_table(current_table)
314
315 for current_table in self.EXTERNAL_ENTITY_ATTRIBUTE_TABLES_DICT.values():
316 self.add_table(current_table)
317
318
320 """
321 Creates initial tables, not dependant on specific biana databases
322 """
323
324
325 self.BIANA_DATABASE_TABLE = TableDB( table_name = "BianaDatabase",
326 table_fields = [ FieldDB( field_name="description",
327 data_type = "varchar(255)" ),
328 FieldDB( field_name="last_externalEntityID",
329 data_type = "integer(4) unsigned default 0",
330 null = False),
331 FieldDB( field_name="last_externalEntityRelationParticipantID",
332 data_type = "integer(4) unsigned default 0",
333 null = False),
334 FieldDB( field_name="last_proteinSequenceID",
335 data_type = "integer(4) unsigned default 0",
336 null = False),
337 FieldDB( field_name="last_nucleotideSequenceID",
338 data_type = "integer(4) unsigned default 0",
339 null = False),
340 FieldDB( field_name="last_keyID",
341 data_type = "integer(1) unsigned default 0",
342 null = False),
343 FieldDB( field_name="source_code_version",
344 data_type = "varchar(20)"),
345 FieldDB( field_name="optimized_for",
346 data_type = "ENUM(\"parsing\",\"running\")" ) ] )
347
348 self.TYPES_AND_ATTRIBUTES_TABLE = TableDB( table_name = "BianaDatabaseTypesAndAttributes",
349 table_fields = [ FieldDB( field_name="name",
350 data_type = "varchar(255)",
351 null = False ),
352 FieldDB( field_name="data_type",
353 data_type = "text" ),
354 FieldDB( field_name="category",
355 data_type = "ENUM(\"ee type\",\"eer type\",\"ee attribute\",\"ee identifier attribute\",\"ee versionable identifier attribute\",\"ee descriptive attribute\",\"ee descriptive searchable attribute\",\"ee numeric attribute\",\"ee special attribute\",\"eerp attribute\",\"eerp descriptive attribute\",\"eerp descriptive searchable attribute\",\"identifier reference type\")",
356 null = False ) ] )
357
358
359 self.SPECIAL_ATTRIBUTES_TABLE = TableDB( table_name = "BianaDatabaseSpecialAttributes",
360 table_fields = [ FieldDB( field_name="attribute_name",
361 data_type = "varchar(255)",
362 null = False ),
363 FieldDB( field_name="field_name",
364 data_type = "varchar(255)" ),
365 FieldDB( field_name="data_type",
366 data_type = "text",
367 null = False ),
368 FieldDB( field_name="canbenull",
369 data_type = "tinyint(1)",
370 null = False ) ] )
371
372 self.DATABASE_VERSION_TABLE = TableDB( table_name="database_version",
373 table_fields = [ FieldDB(field_name = "versionID",
374 data_type = "smallint unsigned auto_increment",
375 null = False),
376 FieldDB(field_name = "dbControlID",
377 data_type = "varchar(40)",
378 null = False),
379 FieldDB(field_name = "date",
380 data_type = "char(10)",
381 null = False),
382 FieldDB( field_name = "stable_externalEntityID",
383 data_type = "integer(4) unsigned"),
384 FieldDB( field_name = "stable_externalEntityRelationParticipantID",
385 data_type = "integer(4) unsigned") ],
386 primary_key = ("dbControlID"),
387 indices = [("versionID")] )
388
389
390
391
393 """
394 Adds to the database all the specific tables for this database
395 """
396
397
398
399 eE_types_list = self.VALID_EXTERNAL_ENTITY_TYPES_DICT.keys()
400 eE_types_list.sort()
401 self.enum_eE_types_col = "ENUM(\"%s\")" %"\",\"".join(eE_types_list)
402
403 eEr_types_list = self.VALID_EXTERNAL_ENTITY_RELATION_TYPES_DICT.keys()
404 eEr_types_list.sort()
405 self.enum_eEr_types_col = "ENUM(\"%s\")" %"\",\"".join(eEr_types_list)
406
407 eE_attr_list = self.VALID_EXTERNAL_ENTITY_ATTRIBUTE_TYPES_DICT.keys()
408 eE_attr_list.sort()
409 self.enum_eEattr_col = "ENUM(\"%s\")" %"\",\"".join(eE_attr_list)
410
411
412 self.EXTERNAL_ENTITY_TABLE = TableDB(table_name = "externalEntity",
413 table_fields = [ FieldDB(field_name = self.externalEntityID_col,
414 data_type = "integer(4) unsigned",
415 null = False),
416 FieldDB(field_name = self.externalDatabaseID_col,
417 data_type = self.externalDatabaseID_col_type,
418 null = False),
419 FieldDB(field_name = "type",
420 data_type = self.enum_eE_types_col,
421 null = False)],
422 primary_key = (self.externalDatabaseID_col,self.externalEntityID_col),
423 indices = [ (self.externalEntityID_col) ] )
424
425 self.EXTERNAL_DATABASE_AVAILABLE_eEr_TYPES_TABLE = TableDB( table_name = "externalDatabaseExternalEntityRelationType",
426 table_fields = [ self.externalDatabaseID_field,
427 FieldDB( field_name = "eErType",
428 data_type = self.enum_eEr_types_col,
429 null = False) ],
430 primary_key = (self.externalDatabaseID_col,"eErType") )
431
432 self.EXTERNAL_DATABASE_AVAILABLE_eE_TYPES_TABLE = TableDB( table_name = "externalDatabaseExternalEntityType",
433 table_fields = [ self.externalDatabaseID_field,
434 FieldDB( field_name = "eEType",
435 data_type = self.enum_eE_types_col,
436 null = False) ],
437 primary_key = (self.externalDatabaseID_col,"eEType") )
438
439 self.EXTERNAL_DATABASE_AVAILABLE_eEr_ATTRIBUTE_TABLE = TableDB( table_name = "externalDatabaseExternalEntityRelationAttribute",
440 table_fields = [ self.externalDatabaseID_field,
441 FieldDB( field_name = "attributeType",
442 data_type = self.enum_eEattr_col,
443 null = False) ],
444 primary_key = (self.externalDatabaseID_col,"attributeType") )
445
446 self.EXTERNAL_DATABASE_AVAILABLE_eE_ATTRIBUTE_TABLE = TableDB( table_name = "externalDatabaseExternalEntityAttribute",
447 table_fields = [ self.externalDatabaseID_field,
448 FieldDB( field_name = "attributeType",
449 data_type = self.enum_eEattr_col,
450 null = False) ],
451 primary_key = (self.externalDatabaseID_col,"attributeType") )
452
453 self.EXTERNAL_DATABASE_TABLE = TableDB( table_name = "externalDatabase",
454 table_fields = [ FieldDB( field_name = self.externalDatabaseID_col,
455 data_type = "smallint unsigned auto_increment",
456 null = False),
457 FieldDB( field_name = "databaseName",
458 data_type = "varchar(255)",
459 null = False),
460 FieldDB( field_name = "databaseVersion",
461 data_type = "varchar(255)",
462 null = False),
463 FieldDB( field_name = "parsedFile",
464 data_type = "varchar(255)"),
465 FieldDB( field_name = "parsedDate",
466 data_type = "char(10)",
467 null = False),
468 FieldDB( field_name = "databaseDescription",
469 data_type = "varchar(255)",
470 null = False),
471 FieldDB( field_name = "parsingTime",
472 data_type = "integer(3) unsigned"),
473 FieldDB( field_name = "defaultExternalEntityAttribute",
474 data_type = self.enum_eEattr_col ),
475 FieldDB( field_name = "isPromiscuous",
476 data_type = "bool",
477 default_value = 0,
478 null = False) ],
479 primary_key = ("databaseName","databaseVersion"),
480 indices = [self.externalDatabaseID_col])
481
482 self.EXTERNAL_DATABASE_ATTRIBUTE_TRANSFER_TABLE = TableDB( table_name = "externalDatabaseAttributeTransfer",
483 table_fields = [ FieldDB( field_name = "keyID",
484 data_type = "integer(1) unsigned",
485 null = False ),
486 FieldDB( field_name = "externalDatabaseID",
487 data_type = "integer(2) unsigned",
488 null = False ),
489 FieldDB( field_name = "attributeKey",
490 data_type = self.enum_eEattr_col,
491 null = False ),
492 FieldDB( field_name = "transferAttribute",
493 data_type = self.enum_eEattr_col,
494 null = False ) ] )
495
496 self.ALIGNMENT_ELEMENT_TABLE = TableDB( table_name = "alignmentElement",
497 table_fields = [ self.externalEntityID_field,
498 FieldDB( field_name = "sequenceMD5",
499 data_type = "binary(16)"),
500 FieldDB( field_name = "position",
501 data_type = "integer(3) unsigned",
502 null = False ),
503 FieldDB( field_name = "crossID",
504 data_type = "varchar(255)" ),
505 FieldDB( field_name = "crossID_identifier_type",
506 data_type = self.enum_eEattr_col ),
507 FieldDB( field_name = "alignmentBlocks",
508 data_type = "varchar(255)"),
509 FieldDB( field_name = "alignedSequence",
510 data_type = "text"),
511 FieldDB( field_name = "identity",
512 data_type = "integer(1) unsigned"),
513 FieldDB( field_name = "weighted_sim",
514 data_type = "integer(1) unsigned"),
515 FieldDB( field_name = "sequence_covered",
516 data_type = "integer(1) unsigned")],
517 primary_key = (self.externalEntityID_col,"sequenceMD5","position"),
518 indices = [("sequenceMD5")] )
519
520
521 self.EXTERNAL_ENTITY_RELATION_TABLE = TableDB( table_name = "externalEntityRelation",
522 table_fields = [ FieldDB( field_name = self.external_entity_relation_id_col,
523 data_type = self.externalEntityID_col_type,
524 null = False ),
525 FieldDB(field_name = "type",
526 data_type = self.enum_eEr_types_col,
527 null = False)],
528 primary_key = (self.external_entity_relation_id_col))
529
530
531 self.external_entity_relation_id_field = FieldDB( field_name = self.external_entity_relation_id_col,
532 data_type = "integer(4) unsigned",
533 null = False,
534 foreign_key = (self.EXTERNAL_ENTITY_RELATION_TABLE,
535 self.EXTERNAL_ENTITY_RELATION_TABLE.get_field(field_name="externalEntityRelationID" )))
536
537
538 self.EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE = TableDB( table_name = "externalEntityRelationParticipant",
539 table_fields = [ FieldDB( field_name = self.external_entity_relation_participant_id_col,
540 data_type = "integer(4) unsigned",
541 null = False ),
542 self.external_entity_relation_id_field,
543 self.externalEntityID_field],
544 primary_key = self.external_entity_relation_participant_id_col,
545 indices = [(self.external_entity_relation_id_col, self.externalEntityID_col),
546 (self.externalEntityID_col, self.external_entity_relation_id_col)])
547
548 self.EXTENDED_EXTERNAL_ENTITY_RELATION_PARTICIPANT_TABLE = TableDB( table_name = "extendedExternalEntityRelationParticipant",
549 table_fields = [ FieldDB( field_name = self.external_entity_relation_participant_id_col,
550 data_type = "integer(4) unsigned",
551 null = False ),
552 self.external_entity_relation_id_field,
553 self.externalEntityID_field],
554 primary_key = self.external_entity_relation_participant_id_col,
555 indices = [(self.external_entity_relation_id_col, self.externalEntityID_col),
556 (self.externalEntityID_col,self.external_entity_relation_id_col)])
557
558
559
560
561
562
563
564
565
566
567
568 self.USER_ENTITY_TABLE = TableDB( table_name = "userEntityUnification_protocol_",
569 table_fields = [ FieldDB( field_name = "userEntityID",
570 data_type = "integer(4) unsigned",
571 null = False ),
572 FieldDB( field_name = "externalEntityID",
573 data_type = "integer(4) unsigned",
574 null = False) ],
575 primary_key = ("userEntityID","externalEntityID"),
576 indices = [("externalEntityID","userEntityID")])
577
578 self.USER_ENTITY_PROTOCOL_TABLE = TableDB( table_name = "userEntityUnificationProtocol",
579 table_fields = [ FieldDB( field_name = "unificationProtocolID",
580 data_type = "smallint unsigned auto_increment",
581 null = False),
582 FieldDB( field_name = "description",
583 data_type = "varchar(255)",
584 null = False),
585 FieldDB( field_name = "databaseVersion",
586 data_type = "varchar(40)",
587 null = False )],
588
589
590
591 primary_key = "unificationProtocolID" )
592
593
594 self.USER_ENTITY_PROTOCOL_ATOMS_TABLE = TableDB( table_name = "userEntityUnificationProtocolAtoms",
595 table_fields = [ FieldDB( field_name = "unificationAtomID",
596 data_type = "integer(2) unsigned auto_increment",
597 null = False),
598 FieldDB( field_name = "unificationProtocolID",
599 data_type = "smallint unsigned",
600 null = False,
601 foreign_key = (self.USER_ENTITY_PROTOCOL_TABLE,
602 self.USER_ENTITY_PROTOCOL_TABLE.get_field(field_name = "unificationProtocolID") )),
603 FieldDB( field_name = "externalDatabaseID_A",
604 data_type = "integer(2) unsigned",
605 null = False,
606 foreign_key = (self.EXTERNAL_DATABASE_TABLE,
607 self.EXTERNAL_DATABASE_TABLE.get_field(field_name = "externalDatabaseID") )),
608 FieldDB( field_name = "externalDatabaseID_B",
609 data_type = "integer(2) unsigned",
610 null = False,
611 foreign_key = (self.EXTERNAL_DATABASE_TABLE,
612 self.EXTERNAL_DATABASE_TABLE.get_field(field_name = "externalDatabaseID") )) ],
613 indices = ["unificationProtocolID"],
614 primary_key = "unificationAtomID" )
615
616 self.USER_ENTITY_PROTOCOL_ATOM_ATTRIBUTES_TABLE = TableDB( table_name = "userEntityUnificationProtocolAtomAttribute",
617 table_fields = [ FieldDB( field_name = "unificationAtomID",
618 data_type = "integer(2) unsigned",
619 null = False,
620 foreign_key = (self.USER_ENTITY_PROTOCOL_ATOMS_TABLE,
621 self.USER_ENTITY_PROTOCOL_ATOMS_TABLE.get_field( field_name = "unificationAtomID" ) )),
622 FieldDB( field_name = "cross_referenced_code",
623 data_type = self.enum_eEattr_col,
624 null = False ) ],
625 primary_key = ("unificationAtomID","cross_referenced_code") )
626
627
628
629
630
631
632
633 self.ONTOLOGY_IS_A_TABLE = TableDB( table_name = "ExternalEntityOntology_isA",
634 table_fields = [ self.externalEntityID_field,
635 FieldDB( field_name = "is_a",
636 data_type = self.externalEntityID_col_type,
637 null = False )],
638 indices = [(self.externalEntityID_col,),("is_a",)] )
639
640 self.EXTENDED_ONTOLOGY_HIERARCHY_TABLE = TableDB( table_name = "ExtendedOntologyIsA",
641 table_fields = [ FieldDB( field_name = "parentExternalEntityID",
642 data_type = self.externalEntityID_col_type,
643 null = False ),
644 FieldDB( field_name = "childExternalEntityID",
645 data_type = self.externalEntityID_col_type,
646 null = False ) ],
647 indices = [("parentExternalEntityID","childExternalEntityID")] )
648
649 self.ONTOLOGY_IS_PART_OF_TABLE = TableDB( table_name = "ExternalEntityOntology_isPartOf",
650 table_fields = [ self.externalEntityID_field,
651 FieldDB( field_name = "is_part_of",
652 data_type = self.externalEntityID_col_type,
653 null = False )],
654 indices = [(self.externalEntityID_col,),("is_part_of",)] )
655
656 self.ONTOLOGY_INFO_TABLE = TableDB( table_name = "ExternalEntityOntology",
657 table_fields = [ self.externalEntityID_field,
658 FieldDB( field_name = "name",
659 data_type = "varchar(255)",
660 null = False ),
661 FieldDB( field_name = "linked_attribute",
662 data_type = "varchar(255)",
663 null = False ),
664 FieldDB( field_name = "key_id",
665 data_type = "integer(1) unsigned",
666 null = False ),
667 FieldDB( field_name = "level_attribute",
668 data_type = "varchar(255)",
669 null = True ),
670 FieldDB( field_name = "description_attribute",
671 data_type = "varchar(255)",
672 null = False )],
673 primary_key = ["name"] )
674
675
676
677
678
679
680
681
682
683 self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES = { "pdb": TableDB( table_name = "pdb",
684 table_fields = [ FieldDB( field_name = "pdb",
685 data_type = "char(4)",
686 null = False ),
687 FieldDB( field_name = "chain",
688 data_type = "char(1)"),
689 FieldDB( field_name = "resolution",
690 data_type = "float"),
691 self.externalEntityID_field,
692 FieldDB( field_name = "total_num_atoms",
693 data_type = "integer(3) unsigned",
694 null = False ),
695 FieldDB( field_name = "residue_num_list",
696 data_type = "blob"),
697 FieldDB( field_name = "residue_type_list",
698 data_type = "text"),
699 FieldDB( field_name = "residue_atom_num_list",
700 data_type = "blob"),
701 FieldDB( field_name = "atom_type_list",
702 data_type = "text"),
703 FieldDB( field_name = "atom_name_list",
704 data_type = "text"),
705 FieldDB( field_name = "atom_coordinates",
706 data_type = "mediumblob"),
707 FieldDB( field_name = "hssp_residue_num_correspondences",
708 data_type = "blob"),
709 FieldDB( field_name = "residue_dssp_results",
710 data_type = "text"),
711 FieldDB( field_name = "residue_hssp_entropy",
712 data_type = "blob"),
713 FieldDB( field_name = "residue_hssp_norm_entropy",
714 data_type = "blob"),
715 FieldDB( field_name = "residue_hssp_variability",
716 data_type = "blob"),
717 FieldDB( field_name = "conservation_hssp",
718 data_type = "blob"),
719 FieldDB( field_name = "solvent_exposure_hssp",
720 data_type = "blob")],
721 primary_key = ("pdb","chain") ),
722
723 "proteinSequence": TableDB( table_name = "sequenceProtein",
724 table_fields = [ FieldDB( field_name = "proteinSequenceID",
725
726 data_type = "integer(4) unsigned"),
727
728 FieldDB( field_name = "sequenceMD5",
729
730 data_type = "binary(16)",
731 user_friendly_name = "sequenceMD5",
732 optimize_space = 4,
733 null = False),
734 FieldDB( field_name = "sequence",
735 user_friendly_name = "sequence",
736 data_type = "mediumtext",
737 compress = 1,
738 null = False),
739 FieldDB( field_name = "sequenceLength",
740 data_type = "integer(2) unsigned",
741 user_friendly_name = "length",
742 null = False),
743 FieldDB( field_name = "sequenceMW",
744 data_type = "float",
745 user_friendly_name = "mw",
746 null = False,
747 default_value = 0),
748 FieldDB( field_name = "sequenceIP",
749 data_type = "float",
750 user_friendly_name = "ip",
751 null = False,
752 default_value = 0 ) ],
753 primary_key = "sequenceMD5",
754 indices = [("proteinSequenceID")]),
755
756 "nucleotideSequence": TableDB( table_name = "sequenceNucleotide",
757 table_fields = [ FieldDB( field_name = "nucleotideSequenceID",
758 data_type = "integer(4) unsigned"),
759
760
761 FieldDB( field_name = "sequenceMD5",
762
763 data_type = "binary(16)",
764 user_friendly_name = "sequenceMD5",
765 optimize_space = 4,
766 null = False),
767 FieldDB( field_name = "sequenceLength",
768 data_type = "integer(2) unsigned",
769 user_friendly_name = "length",
770 null = False),
771 FieldDB( field_name = "sequence",
772 data_type = "mediumtext",
773 compress = 1,
774 user_friendly_name = "sequence",
775 null = False)],
776 primary_key = "sequenceMD5",
777 indices = [("nucleotideSequenceID")]) }
778
779 self.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT = {}
780
781 for actual_attribute in self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES.keys():
782
783 temp_fields = self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES[actual_attribute].get_fields()
784
785 self.EXTERNAL_ATTRIBUTE_DESCRIPTIONS_DICT[actual_attribute.lower()] = {"table": self.EXTERNAL_ATTRIBUTES_DESCRIPTION_TABLES[actual_attribute],
786 "fields": dict([ (x.get_user_friendly_name(),x) for x in temp_fields ]) }
787
788
789
790
791
792
793 self.CD_HIT_CLUSTERING_TABLE = TableDB( table_name = "sequenceProteinCD_HIT",
794 table_fields = [ FieldDB( field_name = "representant_proteinSequenceID",
795 data_type = "integer(4) unsigned",
796 null = False ),
797 FieldDB( field_name = "proteinSequenceID",
798 data_type = "integer(4) unsigned",
799 null = False ),
800 FieldDB( field_name = "representant_start_position",
801 data_type = "integer(2) unsigned",
802 null = False ),
803 FieldDB( field_name = "representant_end_position",
804 data_type = "integer(2) unsigned",
805 null = False ),
806 FieldDB( field_name = "start_position",
807 data_type = "integer(2) unsigned",
808 null = False ),
809 FieldDB( field_name = "end_position",
810 data_type = "integer(2) unsigned",
811 null = False ),
812 FieldDB( field_name = "identity",
813 data_type = "integer(1) unsigned",
814 null = False )],
815 indices = [("proteinSequenceID"),("representant_proteinSequenceID")] )
816
817
818
819 self.PROTEIN_BLAST_RESULTS_TABLE = TableDB( table_name = "sequenceProteinBlastResults",
820 table_fields = [ FieldDB( field_name = "sequenceID_A",
821 data_type = "integer(4) unsigned",
822 null = False ),
823 FieldDB( field_name = "sequenceID_B",
824 data_type = "integer(4) unsigned",
825 null = False ),
826 FieldDB( field_name = "evalue",
827 data_type = "float",
828 null = False ),
829 FieldDB( field_name = "score",
830 data_type = "smallint(2) unsigned",
831 null = False ),
832 FieldDB( field_name = "bit_score",
833 data_type = "float unsigned",
834 null = False ),
835 FieldDB( field_name = "start_A",
836 data_type = "smallint(2) unsigned",
837 null = False ),
838 FieldDB( field_name = "end_A",
839 data_type = "smallint(2) unsigned",
840 null = False ),
841 FieldDB( field_name = "coverage_A",
842 data_type = "tinyint(1) unsigned"),
843 FieldDB( field_name = "coverage_B",
844 data_type = "tinyint(1) unsigned"),
845 FieldDB( field_name = "start_B",
846 data_type = "smallint(2) unsigned",
847 null = False ),
848 FieldDB( field_name = "end_B",
849 data_type = "smallint(2) unsigned",
850 null = False ),
851 FieldDB( field_name = "identities",
852 data_type = "tinyint(1) unsigned",
853 null = False ),
854 FieldDB( field_name = "similarity",
855 data_type = "tinyint(1) unsigned",
856 null = False ),
857 FieldDB( field_name = "gaps",
858 data_type = "tinyint(1) unsigned",
859 null = False ),
860 FieldDB( field_name = "program",
861 data_type = "ENUM(\"bl2seq\",\"blastall\")"),
862 FieldDB( field_name = "filter",
863 data_type = "ENUM(\"T\",\"F\")")],
864
865
866 indices = [("sequenceID_A","identities","coverage_A"),
867 ("sequenceID_B","identities","coverage_B")])
868
869
870
871
872
873
874
875
876
877 REMOVE_SEQUENCE_DUPLICATES_PROCEDURE ="""
878 DELIMITER //
879 CREATE PROCEDURE remove_duplicates()
880
881 DETERMINISTIC
882 MODIFIES SQL DATA
883
884 BEGIN
885 DECLARE sequenceMD5 BINARY(16);
886 DECLARE previous_sMD5 BINARY(16) DEFAULT 0;
887 DECLARE sequenceID int(10) unsigned;
888 DECLARE sequence mediumtext;
889 DECLARE sequenceLength int(2) unsigned;
890 DECLARE sequenceMW float;
891 DECLARE sequenceIP float;
892
893 DECLARE done BOOLEAN DEFAULT FALSE;
894 DECLARE cur1 CURSOR FOR SELECT * FROM sequenceProtein ORDER BY sequenceMD5;
895 DECLARE CONTINUE HANDLER FOR SQLSTATE '02000' SET done = TRUE;
896
897 OPEN cur1;
898
899 cursor_loop: LOOP
900 FETCH cur1 INTO sequenceID,sequenceMD5,sequence,sequenceLength,sequenceMW,sequenceIP;
901 IF done THEN LEAVE cursor_loop; END IF;
902 IF sequenceMD5!=previous_sMD5 THEN
903 SET previous_sMD5 = sequenceMD5;
904 INSERT INTO test_nonr_seq VALUES (sequenceID,sequenceMD5,sequence,sequenceLength,sequenceMW,sequenceIP);
905 END IF;
906 END LOOP cursor_loop;
907
908 DEALLOCATE PREPARE stmt_query;
909 CLOSE cur1;
910
911 END
912 //
913 delimiter ;
914 """
915