Package biana :: Package BianaDB :: Module database
[hide private]
[frames] | no frames]

Source Code for Module biana.BianaDB.database

  1  from sets import * 
  2   
  3   
  4  OPTIMIZE_SPACE = None 
  5   
  6   
  7  # FILE TO IMPLEMENT THE CLASSES RELATED WITH A DATABASE 
8 -class SQLSelectStatement(object):
9
10 - def __init__(self):
11 12 self.tables = Set() 13 self.columns = [] 14 self.fixed_conditions = Set() 15 self.join_conditions = Set() 16 self.group_conditions = Set()
17 18 19
20 - def add_element(self, columns=[], tables=[], fixed_conditions = [], join_conditions = [], group_conditions = [] ):
21 22 [ self.tables.add(x) for x in tables ] 23 [ self.columns.append(x) for x in columns ] 24 [ self.fixed_conditions.add(x) for x in fixed_conditions ] 25 [ self.join_conditions.add(x) for x in join_conditions ] 26 [ self.group_conditions.add(x) for x in group_conditions ]
27
28 - def reset_columns(self, columns = []):
29 30 self.columns = columns
31
32 - def merge(self,sqlSelectStat):
33 34 self.tables.update(sqlSelectStat.tables) 35 self.columns.extend(sqlSelectStat.columns) 36 self.fixed_conditions.update(sqlSelectStat.fixed_conditions) 37 self.join_conditions.update(sqlSelectStat.join_conditions) 38 self.group_conditions.update(sqlSelectStat.group_conditions)
39 40
41 -class Database(object):
42 """ 43 Class to represent a relational database 44 """ 45
46 - def __init__(self, tables=None):
47 """ 48 "tables": a list of table objects that are contained in the database 49 """ 50 51 if tables is None: 52 tables = [] 53 54 self.tables = tables 55 self.created_unique_tables = {} # stores the unique tables created ( the unique_table_name as a key and the optimization number as value ) 56 self.added_unique_table_names = Set() # Stores the unique tables that have been previously inserted in the tables list 57 self.all_tables = Set() # stores the name of all tables, unique and temporal tables included
58
59 - def remove_tables(self):
60 self.tables = [] 61 self.created_unique_tables = {} # stores the unique tables created ( the unique_table_name as a key and the optimization number as value ) 62 self.added_unique_table_names = Set() # Stores the unique tables that have been previously inserted in the tables list 63 self.all_tables = Set()
64
65 - def get_sql_query( self, ignore_primary_keys = False ):
66 """ 67 """ 68 sql_query_string = "\n".join( [x.create_mysql_query(self.created_unique_tables, ignore_primary_key=ignore_primary_keys ) for x in self.tables] ) 69 self.created_unique_tables = {} 70 return sql_query_string
71 72
73 - def add_table(self, table):
74 75 if table.get_table_name() in self.all_tables: 76 raise ValueError("Trying to insert the same table twice") 77 78 self.tables.append(table) 79 self.all_tables.add(table.get_table_name()) 80 81 for actual_field in table.get_fields(): 82 if actual_field.get_optimized_space(): 83 if actual_field.get_optimized_table().get_table_name() not in self.added_unique_table_names: 84 self.all_tables.add(actual_field.get_optimized_table()) 85 self.added_unique_table_names.add(actual_field.get_optimized_table().get_table_name()) 86 self.all_tables.add(table.get_temp_table())
87 88
89 - def get_tables(self):
90 """ 91 Get basic tables 92 """ 93 #return list(self.all_tables) 94 return self.tables
95
96 - def get_all_tables(self):
97 98 return list(self.all_tables)
99
100 - def get_drop_sql_query(self):
101 102 return "\n".join([x.get_drop_query() for x in self.tables])
103 104
105 - def optimize_database(self, optimize=False, analyze=False):
106 """ 107 Returns the queries to optimize database 108 """ 109 110 queries = Set() 111 112 if analyze: 113 for actual_table in self.tables: 114 queries.update(actual_table.get_analyze_query()) 115 116 117 if optimize: 118 for actual_table in self.tables: 119 queries.update(actual_table.get_optimize_queries()) 120 121 return list(queries)
122 123
124 -class FieldDB(object):
125 """ 126 Class to represent a field in a database 127 """ 128
129 - def __init__(self, field_name, data_type, default_value=None, null=True, foreign_key=None, user_friendly_name=None, optimize_space = None, compress = None ):
130 """ 131 "field_name": field name in the database 132 "data_type": data type of this field 133 "default_value" 134 "null": By default, it is accecpted a null value. If not, it is necessary to specify it 135 "foreign_key": it is a tuples with the table and the Field of its correspondent key 136 (TableObject, FieldObject) 137 They should be of the same data_type!!! 138 "user_friendly_name": This is used to refer to this field in a user friendly manner (for different fields in different tables that have the same meaning...) 139 "optimize_space": This is used to apply the space optimization for this field. It consists on saving the unique values of this field in a separated table, and an integer cross-reference is saved in the original table. It can be None, or the number of bytes reserved to index it 140 """ 141 142 self.field_name = field_name 143 self.data_type = data_type 144 self.default_value = default_value 145 self.null = null 146 self.foreign_key = foreign_key 147 148 if OPTIMIZE_SPACE: 149 self.optimize_space = optimize_space 150 else: 151 self.optimize_space = None 152 153 self.optimized_table = None 154 155 self.compress = compress 156 157 if self.optimize_space is not None: 158 self.optimized_table = self.get_optimized_table() 159 160 if( user_friendly_name is None ): 161 self.user_friendly_name = None 162 else: 163 self.user_friendly_name = user_friendly_name.lower() 164 165 self.sqlSelect = None
166
167 - def is_compressed(self):
168 return self.compress
169
170 - def get_optimized_field_name(self):
171 return "original_id_%s" %(self.field_name)
172
173 - def get_optimized_space(self):
174 """ 175 Returns the number of bytes for the optimized form of the field, if it has. Otherwise, return None 176 """ 177 return self.optimize_space
178
179 - def get_user_friendly_name(self):
180 181 if self.user_friendly_name is None: 182 return self.field_name 183 184 return self.user_friendly_name
185
186 - def get_field_name(self):
187 """ 188 189 """ 190 return self.field_name
191
192 - def get_data_type(self):
193 """ 194 195 """ 196 return self.data_type
197
198 - def get_optimized_data_type(self):
199 return "integer(%s) unsigned" %(self.optimize_space)
200
201 - def get_mysql_query(self):
202 203 if self.optimize_space is None: 204 data_type = self.get_data_type() 205 else: 206 data_type = self.get_optimized_data_type() 207 208 query = "%s %s " %(self.get_field_name(), 209 data_type) 210 211 if self.default_value is not None: 212 query += " DEFAULT \"%s\"" %(self.default_value) 213 214 if self.null is False: 215 query += " NOT NULL" 216 217 return query
218 219 # It can be done more elegant...
220 - def get_not_optimized_query(self):
221 222 if self.optimize_space is None: 223 return self.get_mysql_query() 224 225 query = "%s %s " %(self.get_optimized_field_name(), 226 self.get_data_type()) 227 228 if self.default_value is not None: 229 query += " DEFAULT \"%s\"" %(self.default_value) 230 231 if self.null is False: 232 query += " NOT NULL" 233 234 return query
235 236
237 - def set_foreign_key(self, foreign_key):
238 """ 239 """ 240 241 self.foreign_key = foreign_key
242 243
244 - def get_optimized_table(self):
245 """ 246 Get the optimized table 247 """ 248 249 if self.optimize_space is None: 250 raise ValueError("Trying to get an optimized table in a not allowed field") 251 252 if self.optimized_table is None: 253 254 self.optimized_table = TableDB( table_name = "unique_%s" %(self.field_name), 255 table_fields = [ FieldDB( field_name = self.get_field_name(), 256 data_type = "%s auto_increment" %(self.get_optimized_data_type()), 257 null = True), 258 FieldDB( field_name = self.get_optimized_field_name(), 259 data_type = self.get_data_type(), 260 null = True) ], 261 primary_key = (self.get_optimized_field_name()), 262 indices = [(self.get_field_name())] ) 263 264 return self.optimized_table
265 266 ## def get_optimized_temp_table(self): 267 ## """ 268 ## Get the temp table to be able to generate quickly the optimized data 269 ## """ 270 271 ## if self.optimize_space is None: 272 ## raise ValueError("Trying to get an optimized temp table in a not allowed field") 273 274 ## if self.optimized_temp_table is not None: 275 ## return self.optimized_temp_table 276 ## else: 277 278 ## new_table = TableDB( table_name = "temp_%s" %(self.field_name), 279 ## table_fields = [ FieldDB( field_name = self.get_field_name(), 280 ## data_type = "%s auto_increment" %(self.get_optimized_data_type()), 281 ## null = 1), 282 ## FieldDB( field_name = self.get_optimized_field_name(), 283 ## data_type = self.get_data_type(), 284 ## null = 1) ], 285 ## primary_key = (self.get_optimized_field_name()), 286 ## indices = [(self.get_field_name())] ) 287 288 ## return new_table 289 290 291
293 294 if self.foreign_key is not None: 295 return "FOREIGN KEY (%s) REFERENCES %s(%s)" %(self.field_name, 296 self.foreign_key[0].get_table_name(), 297 self.foreign_key[1].get_field_name())
298 299 300
301 -class TableDB(object):
302 """ 303 This object will implement a database table 304 """ 305
306 - def __init__(self, table_name, table_fields, primary_key=(), indices=[], fulltext_indices = []):
307 """ 308 Intializes the object 309 310 "table_name" will be the table name in the database 311 "table_fields" will be a list of fieldDB Objects 312 "primary_key" A tuple or list of the name of the fields that form the primary_key 313 "indices": a list of tuples with the name of the fields that form the indices 314 """ 315 316 self.table_name = table_name 317 self.table_fields = table_fields 318 self.primary_key = primary_key 319 self.indices = indices 320 self.fulltext_indices = fulltext_indices 321 322 self.temp_table_name = None 323 self.has_optimized_field = self._has_optimized_fields() 324 self.temp_table = None 325 326 327 # Stores the information of fields by field_name 328 self.fields_by_name = {} 329 for actual_field in self.table_fields: 330 self.fields_by_name[actual_field.get_field_name()] = actual_field 331 332 self.sqlSelectStats = {}
333
334 - def __str__(self):
335 return self.table_name
336
337 - def set_primary_key(self, primary_key):
338 self.primary_key = primary_key
339
340 - def has_optimized_fields(self):
341 return self.has_optimized_field
342
343 - def _has_optimized_fields(self):
344 345 for x in self.table_fields: 346 if x.get_optimized_space() is not None: 347 self.temp_table_name = "temp_"+self.get_table_name() 348 return 1 349 350 return None
351
352 - def get_temp_table_name(self):
353 return self.temp_table_name
354
355 - def add_field(self, new_field):
356 self.table_fields.append(new_field)
357
358 - def has_field(self, field_name):
359 360 if self.fields_by_name.has_key(field_name): 361 return 1 362 else: 363 return None
364 365
366 - def get_fields(self):
367 368 return self.table_fields
369 370
371 - def get_table_name(self):
372 373 return self.table_name
374
375 - def set_table_name(self, new_name):
376 377 self.table_name = new_name
378 379
380 - def get_field(self, field_name):
381 382 if self.fields_by_name.has_key(field_name): 383 return self.fields_by_name[field_name] 384 else: 385 return None
386
387 - def add_indice(self, indice):
388 389 self.indices.append(indice)
390 391
392 - def has_indice(self, indice):
393 """ 394 Checks if the table contains an indice with those fields 395 """ 396 if( self.indices.contains(indice) ): 397 return 1 398 else: 399 return None
400 401
402 - def get_optimize_queries(self):
403 404 query_list = [ "OPTIMIZE TABLE %s" %self.table_name ] 405 406 for x in self.table_fields: 407 if x.get_optimized_space() is not None: 408 query_list.append( "OPTIMIZE TABLE %s" %x.get_optimized_table().get_table_name() ) 409 410 return query_list
411
412 - def get_analyze_query(self):
413 414 query_list = [ "ANALYZE TABLE %s" %self.table_name ] 415 416 for x in self.table_fields: 417 if x.get_optimized_space() is not None: 418 query_list.append( "ANALYZE TABLE %s" %x.get_optimized_table().get_table_name() ) 419 420 return query_list
421
422 - def get_SQLSearchStatement(self, field, value_list):
423 424 sqlStat = SQLSelectStatement() 425 sqlStat.add_element(tables = [self.table_name]) 426 427 field = self.get_field(field) 428 if field is None: 429 raise ValueError("Trying to get a field (%s) that is not found in thid table" %field) 430 if field.get_optimized_space() is not None: 431 sqlStat.add_element(tables=[field.get_optimized_table().get_table_name()]) 432 sqlStat.add_element(join_conditions = [("%s.%s" %(self.table_name,field.get_field_name()), 433 "=", 434 "%s.%s" %(field.get_optimized_table().get_table_name(),field.get_field_name()) ) ]) 435 #sqlStat.add_element(fixed_conditions = [(field.get_optimized_field_name(),"=",value)]) 436 sqlStat.add_element(fixed_conditions = [(field.get_optimized_field_name(),"IN","(\"%s\")" %("\",\"".join([ str(x) for x in value_list ])),None)]) 437 else: 438 sqlStat.add_element(fixed_conditions = [(field.get_field_name(),"IN","(\"%s\")" %("\",\"".join([ str(x) for x in value_list ])),None)]) 439 #sqlStat.add_element(fixed_conditions = [(field.get_field_name(),"=",value)]) 440 441 return sqlStat
442
443 - def get_SQLSelectStatement(self, field_list):
444 445 if not self.sqlSelectStats.has_key(tuple(field_list)): 446 447 sqlStat = SQLSelectStatement() 448 sqlStat.add_element(tables=[self.table_name]) 449 450 for actual_field in field_list: 451 field = self.get_field(actual_field) 452 if field is None: 453 raise ValueError("Trying to get a field (%s) that is not found in this table" %(actual_field)) 454 if field.get_optimized_space() is not None: 455 sqlStat.add_element(tables=[field.get_optimized_table().get_table_name()]) 456 sqlStat.add_element(join_conditions = [("%s.%s" %(self.table_name,field.get_field_name()), 457 "=", 458 "%s.%s" %(field.get_optimized_table().get_table_name(),field.get_field_name()) ) ]) 459 sqlStat.add_element(columns = ["%s.%s" %(field.get_optimized_table().get_table_name(),field.get_optimized_field_name())]) 460 else: 461 sqlStat.add_element(columns = ["%s.%s" %(self.get_table_name(),field.get_field_name())]) 462 463 self.sqlSelectStats[tuple(field_list)] = sqlStat 464 465 return self.sqlSelectStats[tuple(field_list)]
466 467
468 - def create_mysql_query(self, created_unique_tables={}, ignore_primary_key=False):
469 470 sql_fields = [ x.get_mysql_query() for x in self.table_fields ] 471 472 473 if ignore_primary_key is False: 474 # In this case, the primary key is saved as a normal index 475 primary_key_text = "PRIMARY KEY" 476 else: 477 primary_key_text = "INDEX" 478 479 480 if isinstance(self.primary_key,list) or isinstance(self.primary_key,tuple): 481 if len(self.primary_key)>0: 482 primary_key_sql = "%s (%s)" %(primary_key_text, ",".join(list(self.primary_key))) 483 else: 484 primary_key_sql = None 485 else: 486 primary_key_sql = "%s (%s)" %(primary_key_text, self.primary_key) 487 488 if primary_key_sql is not None: 489 sql_fields.append(primary_key_sql) 490 491 indices_list = [] 492 for actual_indice in self.indices: 493 if( isinstance(actual_indice,tuple) ): 494 indices_list.append([ x for x in actual_indice ]) 495 else: 496 indices_list.append([actual_indice]) 497 498 indices_sql = ["INDEX (%s)" %(",".join(actual_indice)) for actual_indice in indices_list] 499 500 sql_fields.extend(indices_sql) 501 502 fulltext_list = [] 503 for current_index in self.fulltext_indices: 504 if( isinstance(current_index,tuple) ): 505 fulltext_list.append([ x for x in current_index ]) 506 else: 507 fulltext_list.append([current_index]) 508 509 sql_fields.extend(["FULLTEXT (%s)" %",".join(current_fulltext_index) for current_fulltext_index in fulltext_list ]) 510 511 512 # ADD FOREIGN KEY RESTRICTIONS 513 for actual_field in self.table_fields: 514 foreign_key_query = actual_field.get_foreign_key_mysql_query() 515 if foreign_key_query is not None: 516 sql_fields.append(actual_field.get_foreign_key_mysql_query()) 517 518 engine = "MyISAM" 519 520 query = ["CREATE TABLE IF NOT EXISTS %s ( \n\t%s\n ) ENGINE %s;" %(self.table_name, 521 ",\n\t".join(sql_fields), 522 engine)] 523 524 # CREATE AUXILIAR TABLES 525 526 if self.has_optimized_field is not None: 527 #indices = [] 528 for actual_field in self.table_fields: 529 if actual_field.get_optimized_space() is not None: 530 opt_table = actual_field.get_optimized_table() 531 if not created_unique_tables.has_key(opt_table.get_table_name()): 532 query.append(actual_field.get_optimized_table().create_mysql_query(created_unique_tables)) 533 created_unique_tables[opt_table.get_table_name()] = 1 534 #indices.append(actual_field.get_optimized_field_name()) 535 536 sql_fields = [ x.get_not_optimized_query() for x in self.table_fields ] 537 538 #indices_sql = ["INDEX (%s)" %actual_indice for actual_indice in indices] 539 #sql_fields.extend(indices_sql) 540 541 542 query.append("CREATE TABLE %s ( \n\t%s\n ) ENGINE MyISAM;" %(self.get_temp_table_name(), 543 ",\n\t".join(sql_fields)) ) 544 545 546 return "\n".join(query)
547 548
549 - def get_temp_table(self):
550 """ 551 """ 552 553 if self.has_optimized_field is None: 554 return None 555 556 if self.temp_table is None: 557 self.temp_table = TableDB( table_name = self.get_temp_table_name(), 558 table_fields = self.get_fields() ) 559 560 return self.temp_table
561 562
563 - def get_drop_query(self):
564 565 return "DROP TABLE %s;" %self.table_name
566 567
568 - def empty_temporal_table_sql_query(self, _skip_first=None):
569 570 if self.has_optimized_field is None: 571 return None 572 573 574 # Steps: 575 576 queries = [] 577 578 # 1. Put all distinct values into its unique tables 579 if _skip_first is None: 580 for actual_field in self.table_fields: 581 if actual_field.get_optimized_space() is not None: 582 queries.append("ALTER TABLE %s DISABLE KEYS" %(actual_field.get_optimized_table()) ) 583 queries.append("INSERT IGNORE INTO %s (%s) SELECT DISTINCT %s FROM %s" %(actual_field.get_optimized_table(), 584 actual_field.get_optimized_field_name(), 585 actual_field.get_optimized_field_name(), 586 self.get_temp_table_name() ) ) 587 queries.append("ALTER TABLE %s ENABLE KEYS" %(actual_field.get_optimized_table()) ) 588 589 590 # 2. Transfer all the information from the temp table to the final table 591 unique_tables = [] 592 values_in_temp_table = [] 593 where_conditions = [] 594 for actual_field in self.table_fields: 595 if actual_field.get_optimized_space() is not None: 596 unique_tables.append( actual_field.get_optimized_table().get_table_name() ) 597 values_in_temp_table.append( "%s.%s" %(actual_field.get_optimized_table(), 598 actual_field.get_field_name())) 599 where_conditions.append( "%s.%s = %s.%s" %(actual_field.get_optimized_table(),actual_field.get_optimized_field_name(), 600 self.get_temp_table_name(),actual_field.get_optimized_field_name())) 601 else: 602 values_in_temp_table.append( actual_field.get_field_name() ) 603 604 queries.append("ALTER TABLE %s DISABLE KEYS" %(self.get_table_name())) 605 queries.append( "INSERT IGNORE INTO %s (%s) SELECT %s FROM %s, %s WHERE %s" %(self.get_table_name(), 606 ",".join( [x.get_field_name() for x in self.table_fields] ), 607 ",".join(values_in_temp_table), 608 self.get_temp_table_name(), 609 ",".join(unique_tables), 610 " AND ".join(where_conditions) ) ) 611 queries.append("ALTER TABLE %s ENABLE KEYS" %(self.get_table_name())) 612 613 # 3. Empty the temp table 614 queries.append( "DELETE FROM %s" %self.get_temp_table_name() ) 615 616 #print queries 617 return queries
618