"""
File        : get_gos_from_protein_list.py
Author      : Ramon Aragues
Creation    : 23.08.2005
Contents    : program that returns the go term at a given level for proteins in the input file 
Called from : command line

=======================================================================================================
+

"""
import sys
import getopt

import re
import readline
import MySQLdb

from PianaGraph import *
from PianaGraphBuilder import *
from PianaDBaccess import *

from PianaDBaccess import *
import PianaGlobals

import GOApi

import utilities

verbose = 0
verbose2 = 0

INFINITE_DISTANCE = 100


# ----------------------
# Function usage()
# ----------------------
def usage():
   print "--------------------------------------------------------------------------------------------------------------"
   print "This program returns the go term at a given level for proteins in the input file \n"
   print "Usage: python get_gos_from_protein_list.py  --piana-dbname=piana_dbname --piana-dbhost=piana_dbhost --piana-dbuser=piana_dbuser --piana-dbpass=piana_dbpass"
   print "              --go-dbname=go_dbname --go-dbhost=go_dbhost --go-dbuser=go_dbuser --go-dbpass=go_dbpass "
   print "              ---input-file=input_file"
   print "              --input-proteins-type=input_proteins_type  --level=level --one-term-per-line [--help] [--verbose]"
   print "\nwhere:"
   print "     piana_dbname : name of database piana to be used (required)"
   print "     piana_dbhost : name of host where database piana to be used is placed (required)"
   print "     piana_dbuser : username accessing the database (not required in most systems)"
   print "     piana_dbpass : password of username accessing the database (not required in most systems)"
   print "     go_dbname   : name of the go mysql database (required)"
   print "     go_dbhost   : name of the machine with go mysql server (required)"
   print "     go_dbuser   : name of the mysql go username (not required in most systems)"
   print "     go_dbpass   : name of the mysql go username (not required in most systems)"
   print "     input_file  : calculates distance only for proteins in the network built from the root proteins in the file"
   print "     input_proteins_type    : type of the proteins inside the input_file"
   print "     level       :  this is the go level for the terms that will be returned"
   print "     --one-term-per-line   : prints only one term per line (instead of printing the protein followed by all its terms)"
   print "     --help       : prints this message and exits"
   print "     --verbose    : prints process info to stdout"
   print "--------------------------------------------------------------------------------------------------------------"

# ---------------------------
# Function parseArguments()
# ---------------------------

def parseArguments():

   global go_dbname
   global go_dbhost
   global go_dbuser
   global go_dbpass

   global piana_dbname
   global piana_dbhost
   global piana_dbuser
   global piana_dbpass

   global insert_protein_go
   global insert_go_info
   global insert_go_distance
   
   global input_file
   global input_proteins_type
   global level_to_reach
   
   global one_term_per_line
   
   global verbose
   
   try:
      opts, args = getopt.getopt(sys.argv[1:], "", ["verbose","help","one-term-per-line","go-dbname=","go-dbuser=","go-dbhost=","go-dbpass=",
                                                     "piana-dbname=", "piana-dbhost=", "piana-dbuser=", "piana-dbpass=", 
                                                     "insert-protein-go", "insert-go-info", "insert-go-distance",
                                                    "input-file=", "input-proteins-type=", "level=" ]   )
   except getopt.GetoptError, bad_opt:
      # print help information and exit:
      sys.stderr.write( bad_opt.__str__() )
      usage()
      sys.exit(2)

   for option,value in opts:

      if option == "--go-dbhost":
         go_dbhost = value

      elif option == "--go-dbname":
         go_dbname = value

      elif option == "--go-dbuser":
           go_dbuser = value

      elif option == "--go-dbpass":
           go_dbpass = value

      elif option == "--piana-dbname":
           piana_dbname = value

      elif option == "--piana-dbhost":
           piana_dbhost = value

      elif option == "--piana-dbuser":
           piana_dbuser = value

      elif option == "--piana-dbpass":
           piana_dbpass = value

      elif option == "--input-file":
           input_file = value

      elif option == "--input-proteins-type":
           input_proteins_type = value

      elif option == "--one-term-per-line":
           one_term_per_line = 1

      elif option == "--level":
           level_to_reach = int(value)

      elif option == "--verbose":
           verbose = 1

      elif option == "--help":
           # print help information and exit
           usage()
           sys.exit(2)

   # check arguments
   if go_dbname is None or go_dbhost is None:
       #raise ValueError("trying to establish a connection to go database without giving a host or database name")
      print "trying to establish a connection to go database without giving a host or database name"
      usage()
      sys.exit(2)

# --------
# --------
#  Main()
# --------
# --------

go_dbname = None
go_dbuser = None
go_dbhost = None
go_dbpass = None

piana_dbname = None
piana_dbuser = None
piana_dbhost = None
piana_dbpass = None

input_file = None
input_proteins_type = None

level_to_reach= 0 # if nothing is said on command line, set no threshold (1000 is the same as setting no threshold...)
one_term_per_line = 0 # this determines whether to print only one term per line or a protein followed by all terms

# parsing arguments from the command line
parseArguments()

if verbose:
   sys.stderr.write("Opening connection to piana\n")

piana_access = PianaDBaccess(dbname=piana_dbname, dbhost=piana_dbhost, dbuser=piana_dbuser, dbpassword= piana_dbpass)

# opening connection to MySQL GO database and create a cursor to work with the database
if verbose:
   sys.stderr.write("Opening connection to go\n")

if go_dbuser is None and go_dbpass is None:
   godb = MySQLdb.connect(db=go_dbname, host=go_dbhost)

elif go_dbpass is None and go_dbuser is not None:
   godb = MySQLdb.connect(user=go_dbuser, db=go_dbname, host=go_dbhost)

else:
   godb = MySQLdb.connect(user=go_dbuser, db=go_dbname, host=go_dbhost, passwd= go_dbpass )

gocursor = godb.cursor()


for line in file(input_file, "r"):


   print "===============================================\n==============================================="
   # read protein to be processed
   protein_code = line.strip()

   # get proteinPianas for this protein
   list_proteinPiana = piana_access.get_list_protein_piana(proteinCode_value= protein_code ,
                                                           proteinCodeType_value= utilities.get_code_column(input_proteins_type),
                                                           tax_id_value= 0, source_db_info= "no")

   term_types = ["molecular_function", "biological_process", "cellular_component"]

   for term_type in term_types:

      parent_gos_for_this_protein = {}

      # get go terms for these proteins
      for proteinPiana in list_proteinPiana:

         list_go_terms = piana_access.get_protein_go_term_id(proteinPiana_value=proteinPiana,
                                                             term_type_value=term_type)


         print "----------------------\ngo term ids for protein %s (pp=%s) are %s" %(protein_code, proteinPiana, list_go_terms)
         
         for parent_go in GOApi.get_parents_at_level_N( list_go_terms = list_go_terms,
                                                        level_desired= level_to_reach,
                                                        dic_gos_at_level = {},
                                                        gocursor= gocursor,
                                                        piana_access= piana_access):
            
            parent_gos_for_this_protein[parent_go] = None 

      # END OF for proteinPiana in list_proteinPiana:

      print "\nlevel %s go term ids for protein %s (pp=%s) are %s\n------------------------" %(level_to_reach, protein_code, proteinPiana,
                                                                                               parent_gos_for_this_protein.keys() )

      if one_term_per_line:
         if not parent_gos_for_this_protein:
            sys.stdout.write("%s\t%s\tNOTHING\n" %(term_type, protein_code))
         else:
            for parent_go_id in parent_gos_for_this_protein:
               parent_go_name = piana_access.get_protein_go_name(go_term_id_value= parent_go_id)
               sys.stdout.write("%s\t%s\t%s\n" %(term_type, protein_code, parent_go_name))

      # END OF if one_term_per_line
      else:
         # print parent_gos at the desired level for this protein
         sys.stdout.write("%s\t%s" %(term_type, protein_code))
         if not parent_gos_for_this_protein:
            sys.stdout.write("\tNOTHING\n")
         else:
            for parent_go_id in parent_gos_for_this_protein:
               parent_go_name = piana_access.get_protein_go_name(go_term_id_value= parent_go_id)
               sys.stdout.write("\t%s" %parent_go_name)
            sys.stdout.write("\n")
      # END OF else: (if one_term_per_line:)

   # END OF for term_type in term_types:

# END OF for line in file(input_file, "r"):
