"""
File        : filter_postgresql_lines.py
Author      : Ramon Aragues
Creation    : 26.10.2005
Contents    : does not print out to stdout those lines of input file that refer to postgresql commands
Called from : 

=======================================================================================================

This script is needed to create a STRING database on a mysql server, since there are postgresql commands that cannot be
interpreted by MySQL: it does not print out to stdout those lines that refer to postgresql commands

This script will be used in the following way:

$> mysql --database=stringDB < python2.3 filter_postgresql_lines.py --input-file=dump_file_of_string_postgresql 

"""

# filter_postgresql_lines.py: does not print out to stdout those lines of input file that refer to postgresql commands
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues

import sys
import getopt
import copy

import re
import readline

verbose = 1

admitted_tables = {"proteins_orthgroups":None,
                   "precomputed_protein_links":None,
                   "precomputed_orthgroup_links":None,
                   "identifiers_proteins":None}

# ----------------------
# Function usage()
# ----------------------
def usage():
    print "--------------------------------------------------------------------------------------------------------------"
    print " This script transforms a postgresql dump into a mysql dump\n"
    print "Usage: python filter_postgresql_lines.py --input-file=input_file --limit-tables [--help] [--verbose]"
    print "\nwhere:"
    print "     input_file      : the name of the input file that contains postgresql commands"
    print "     --limit-tables  : only inserts information into tables that appear in hard-coded dictionary admitted_tables"
    print "     --help          : prints this message and exits"
    print "     --verbose       : prints process info to stdout"
    print "--------------------------------------------------------------------------------------------------------------"
        

# ---------------------------
# Function parseArguments()                                               
# --------------------------- 

def parseArguments():
    
    global input_file
    global limit_tables
    
    global verbose
    
    try:
        opts, args = getopt.getopt(sys.argv[1:], "", ["verbose","help","limit-tables", "input-file="])
    except getopt.GetoptError:
        # print help information and exit:
        usage()
        sys.exit(2)
     
    for option,value in opts:
        
        if option == "--input-file":
            input_file = value
            
        elif option == "--limit-tables":
            limit_tables = 1
            
        elif option == "--verbose":
            verbose = 1
            
        elif option == "--help":
            # print help information and exit
            usage()
            sys.exit(2)
            
    # END OF for option,value in opts:
    
    if input_file is None:
        sys.stderr.write("You didn't set an input file name\n")
        usage()
        sys.exit(2)
        
        
# --------
# --------
#  Main()               
# --------                               
# --------


input_file = None
limit_tables = 0

# parsing arguments from the command line
parseArguments()

copy_statement = None  # keeps the statement that must precede the values lines
mode= None  # mode == 1 means we are in a copy. 0 otherwise 
current_table_name = None # keeps the current table that is being parsed

# for each line, transform the postgresql line to a mysql line
for line in open(input_file,"r"):

    if mode is None:
        # -------------------------------------------------------------------------------------
        # mode is not copy, remove other postgresql commands and take care of incompatibilities
        # -------------------------------------------------------------------------------------

        if line.startswith("SET ") or line.startswith("REVOKE") or line.startswith("GRANT") or  \
               line.startswith("CREATE INDEX") or   line.startswith("--") or line.startswith("COMMENT ON") or line.startswith("ALTER"):
            # Lines starting with these postgresql commands to be ignored...
            #   - CREATE INDEX would work under MySQL 5.0 but not on previous versions
            #        --> if you have MySQL 5 then you need to translate from: CREATE INDEX si_crc64_checksums ON crc64_checksums USING btree (checksum);
            #                                                             to: CREATE INDEX id_index USING BTREE ON crc64_checksums (checksum);
            continue

        elif line.startswith("COPY "):
            # a postgresql copy line looks like this: COPY species (species_id, long_name) FROM stdin;
            
            if limit_tables:
                # if tables are limited, get the table name from the line
                current_table_name = line.split()[1]
                
            # END OF if limit_tables:
                
            # process a copy line: keep the insert statement for the next lines and do not print this line (the values start in the next line)
            line = line.replace("COPY", "INSERT INTO")
            line = line.replace("FROM stdin;", "VALUES")
            line = line.replace('"sequence"', 'sequence')
            mode = 1
            copy_statement = copy.deepcopy(line)
            first_values_line = 1 # used to know whether the comma has to be written to separate the values lines
            continue

        elif line.find("character varying(33300)") != -1:
            # mysql does not accept a character varying so long
            line = line.replace("character varying(33300)", "BLOB")

        elif line.find('"sequence" bytea') != -1:
            # mysql doesn't have bytea column type, and doesn't like quotes around a column name
            line = line.replace('"sequence" bytea', "sequence BLOB")

        elif line.find('bytea') != -1:
            # mysql doesn't have bytea column type
            line = line.replace("bytea", "BLOB")

        elif line.find('"comment" character varying,') != -1:
            # mysql doesn't have bytea column type
            line = line.replace('"comment" character varying,', "comment character varying(100)," )
            
        elif line.find('"comment" character varying(1000)') != -1:
            # mysql doesn't have bytea column type
            line = line.replace('"comment" character varying(1000)', "comment BLOB" )

        elif line.find('"comment"') != -1:
            # mysql doesn't have bytea column type
            line = line.replace('"comment"', "comment" )
            
        elif line.find("varying,") != -1:
            # mysql doesn't have bytea column type
            line = line.replace("varying,", "varying(100)," )
            
        elif line.find('varying\n') != -1:
            # mysql doesn't have bytea column type
            line = line.replace("varying\n", "varying(100)\n" )
            
    # END OF if mode is None:

    else:
        # -------------------------------------------------------------------------------------
        # if we are in a postgresql copy statement, transform data lines to mysql values lines 
        # -------------------------------------------------------------------------------------

        if line.startswith("\."):
            # \. states that the COPY command has finished... change the mode and do not print the line
            mode = None
            copy_statement = None
            current_table_name = None
            continue
        else:
            if limit_tables:
                if not admitted_tables.has_key(current_table_name):
                    # if mode limit-tables activated and the insertion is not on one of the admitted tables, do nothing
                    continue
                
            # this is a line with TAB separated values: build a "insert into" mysql command using these values and the copy_statement kept before
            line = line.strip("\n")
            line = line.replace('"', " ")
            
            list_line_fields = line.split("\t")

            line = "%s (" %copy_statement
                
            i= 0 # used to know whether the comma has to be written before the value
            for line_field in list_line_fields:
                line_field = line_field.strip()
                if i == 0:
                    line += '"%s"' %line_field
                    i= 1
                else:
                    line += ', "%s"' %line_field
            # END OF for line_field in list_line_fields:

            line += ");\n"
            
        # END OF else: (if line.startswith("\."):)
    # END OF if mode == "copy":
    
    # write the modified line
    sys.stdout.write(line)
    sys.stdout.flush()

# END OF for line in input_file_fd:

