"""
File        : gi_vs_tax2piana.py
Author      : Daniel Jaeggi and Ramon Aragues
Creation    : 8.2003
Contents    : script that fills up tables in database piana related to  external DB "gi_vs_tax"
Called from :  build_database.py

=======================================================================================================

This file parsers a gi_vs_tax text file and inserts information into PianaDb


Command line option '--help' describes usage of this program

"""

# gi_vs_tax2piana.py: script that fills up tables in database piana related to  external DB "gi_vs_tax"
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues

import sys
import getopt

import re
import readline

import MySQLdb

from PianaDBaccess import *

verbose = 1
verbose_detailed = 0


# ----------------------
# Function usage()
# ----------------------
def usage():
    print "--------------------------------------------------------------------------------------------------------------"
    print "This program fills up tables in database piana related to external DB 'gi_vs_tax' \n"
    print "Usage: python gi_vs_tax2piana.py --input-file=input_file"
    print "                 --piana-dbname=piana_dbname --piana-dbhost=piana_dbhost --piana-dbuser=piana_dbuser --piana-dbpass=piana_dbpass"
    print "                                 [--help] [--verbose]"
    print "\nwhere:"
    print "     piana_dbname : name of database piana to be used (required)"
    print "     piana_dbhost : name of host where database piana to be used is placed (required)"
    print "     piana_dbuser : username accessing the database (not required in most systems)"
    print "     piana_dbpass : password of username accessing the database (not required in most systems)"
    print "     input_file   : the name of the taxonomy input file (default is '%s')" %(input_file)
    print "     --help       : prints this message and exits"
    print "     --verbose    : prints process info to stdout"
    print "--------------------------------------------------------------------------------------------------------------"
        

   
# ---------------------------
# Function parseArguments()                                               
# --------------------------- 

def parseArguments():
    
    global piana_dbname
    global piana_dbhost
    global piana_dbuser
    global piana_dbpass
    
    global input_file

    global verbose
    
    try:
        opts, args = getopt.getopt(sys.argv[1:], "vht:c:n:o:u:w:", ["verbose","help","input-file=",
                                                                   "piana-dbname=", "piana-dbhost=", "piana-dbuser=", "piana-dbpass=" ])
    except getopt.GetoptError:
        # print help information and exit:
        usage()
        sys.exit(2)
     
    for option,value in opts:
        
         if option in ("-t", "--input-file"):
             input_file = value
            
         elif option in ("-n", "--piana-dbname"):
             piana_dbname = value
             
         elif option in ("-o", "--piana-dbhost"):
             piana_dbhost = value
            
         elif option in ("-u", "--piana-dbuser"):
             piana_dbuser = value
             
         elif option in ("-w", "--piana-dbpass"):
             piana_dbpass = value
             
         elif option in ("-v", "--verbose"):
             verbose = 1
             
         elif option in ("-h", "--help"):
             # print help information and exit
             usage()
             sys.exit(2)
        
# --------
# --------
#  Main()               
# --------                               
# --------

piana_dbname = None
piana_dbuser = None
piana_dbhost = None
piana_dbpass = None

input_file = None


sourceDB = "ncbi"
# parsing arguments from the command line
parseArguments()

# Initialisating connection to piana
piana_access = PianaDBaccess(dbname=piana_dbname, dbhost=piana_dbhost, dbuser=piana_dbuser, dbpassword= piana_dbpass)

input_file_fd = open(input_file,"r")

#
# Reading external DB "taxonomy" and inserting its data into piana DB
# 

if verbose:
    sys.stderr.write( "Reading data from gi_vs_tax file...\n")

number_gis_found= 0
number_gis_not_found= 0

for line in input_file_fd:

    # Parsing the line using the regular expression followed by taxonomy data
    line_fields = line.split()

    # line_fields is a tuple with (tax_id, tax_name, ?, tax_comment)
    
    if len(line_fields) ==2:
        #
        # line_fields[0] is a gi code
        # line_fields[1] is a tax_id
        
        tax_id = line_fields[1].strip()
        gi_code = line_fields[0].strip()
        
        if verbose_detailed:
            sys.stderr.write( "tax_id is <%s> -- gi is <%s> \n" %(tax_id, gi_code))


        list_proteinPiana = piana_access.get_list_protein_piana(proteinCode_value=gi_code,
                                                                proteinCodeType_value= PianaGlobals.giID_col,
                                                                tax_id_value=0)

        for proteinPiana in list_proteinPiana:

            if verbose:
                sys.stderr.write("Inserting tax_id <%s> for proteinPiana <%s>\n" %(tax_id, proteinPiana))

            piana_access.insert_protein_species(tax_id =  tax_id,
                                                proteinPiana_value = proteinPiana,
                                                proteinSpeciesSource_value = sourceDB)            

        
        # END OF for proteinPiana in list_proteinPiana:

        if list_proteinPiana:
            number_gis_found += 1
        else:
            number_gis_not_found += 1

       
    # END OF len(line_fields) ==2:
# END OF for line in taxonomy_fd:
                              
        
if verbose:
    sys.stderr.write("All done! %s gis found and %s gis not found\n" %(number_gis_found,
                                                                       number_gis_not_found))
