"""
File        : taxonomy2piana.py
Author      : Daniel Jaeggi and Ramon Aragues
Creation    : 8.2003
Contents    : script that fills up tables in database piana related to  external DB "taxonomy"
Called from : 

=======================================================================================================

This file parsers a taxonomy text file and inserts information into PianaDb

Command line option '--help' describes usage of this program

For more details on how to use it, read piana/README.populate_piana_db

"""

# taxonomy2piana.py: script that fills up tables in database piana related to  external DB "taxonomy"
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues


import sys
import getopt

import re
import readline

import MySQLdb

from PianaDBaccess import *

verbose = 0

# ---------------------------------------------------------------
# Set here the default values for command line arguments
# ---------------------------------------------------------------
#
# These will be the values taken by the program when called directly from build_database.py

# Location of file containing external DB "taxonomy"
taxonomy_file = "../../../data/externalDBs/taxonomyDB/names.dmp"

# ----------------------
# Function usage()
# ----------------------
def usage():
    print "--------------------------------------------------------------------------------------------------------------"
    print "This program fills up tables in database piana related to external DB 'taxonomy' \n"
    print "Usage: python taxonomy2piana.py --taxonomy-file=taxonomy_file"
    print "                          --piana-dbname=piana_dbname --piana-dbhost=piana_dbhost --piana-dbuser=piana_dbuser --piana-dbpass=piana_dbpass"
    print "                                 [--help] [--verbose]"
    print "\nwhere:"
    print "     piana_dbname : name of database piana to be used (required)"
    print "     piana_dbhost : name of host where database piana to be used is placed (required)"
    print "     piana_dbuser : username accessing the database (not required in most systems)"
    print "     piana_dbpass : password of username accessing the database (not required in most systems)"
    print "     taxonomy_file  : the name of the taxonomy input file (default is '%s')" %(taxonomy_file)
    print "     --help         : prints this message and exits"
    print "     --verbose      : prints process info to stderr"
    print "--------------------------------------------------------------------------------------------------------------"
        

   
# ---------------------------
# Function parseArguments()                                               
# --------------------------- 

def parseArguments():
    
    global piana_dbname
    global piana_dbhost
    global piana_dbuser
    global piana_dbpass
    
    global taxonomy_file
    
    global verbose
    
    try:
        opts, args = getopt.getopt(sys.argv[1:], "", ["verbose","help","taxonomy-file=",
                                                      "piana-dbname=", "piana-dbhost=", "piana-dbuser=", "piana-dbpass=" ])
    except getopt.GetoptError:
        # print help information and exit:
        sys.stderr.write( bad_opt.__str__() )
        usage()
        sys.exit(2)
     
    for option,value in opts:
        
         if option == "--taxonomy-file":
             taxonomy_file = value
            
         elif option  == "--piana-dbname":
             piana_dbname = value
             
         elif option  == "--piana-dbhost":
             piana_dbhost = value
            
         elif option  == "--piana-dbuser":
             piana_dbuser = value
             
         elif option  == "--piana-dbpass":
             piana_dbpass = value
             
         elif option  == "--verbose":
             verbose = 1
             
         elif option  == "--help":
             # print help information and exit
             usage()
             sys.exit(2)
        
# --------
# --------
#  Main()               
# --------                               
# --------

piana_dbname = None
piana_dbuser = None
piana_dbhost = None
piana_dbpass = None

source_db = "ncbi"
# parsing arguments from the command line
parseArguments()

# Initialisating connection to piana
piana_access = PianaDBaccess(dbname=piana_dbname, dbhost=piana_dbhost, dbuser=piana_dbuser, dbpassword= piana_dbpass)

taxonomy_fd = open(taxonomy_file,"r")

#
# Reading external DB "taxonomy" and inserting its data into piana DB
# 

if verbose:
    sys.stderr.write( "Reading data from database dump file...\n")

number_species_inserted= 0
number_species_not_inserted= 0

for line in taxonomy_fd:

    # Parsing the line using the regular expression followed by taxonomy data
    line_fields = line.split("|")

    # line_fields is a tuple with (tax_id, tax_name, ?, tax_comment)
    
    if line_fields:
        
        tax_id = int( line_fields[0].strip() )
        tax_name = line_fields[1].strip().replace('"','').replace("/","").replace("\\","")
        tax_comment = line_fields[3].strip().replace('"','').replace("/","").replace("\\","")
        
        if verbose:
            sys.stderr.write( "tax_id is <%s> -- tax_name is <%s> -- tax comment is <%s>\n" %(tax_id, tax_name, tax_comment))

        if tax_id != "" and tax_name != "":
            piana_access.insert_species(tax_id= tax_id, tax_name= tax_name, tax_comment=tax_comment, source_db= source_db)
            number_species_inserted += 1
        else:
            number_species_not_inserted += 1
        
    # END OF if line_fields:
# END OF for line in taxonomy_fd:
                              
        
if verbose:
    sys.stderr.write("All done! %s species inserted and %s species not inserted\n" %(number_species_inserted,number_species_not_inserted ))
