"""
File        : parsePSI.py
Author      : Ramon Aragues
Creation    : 19.10.2005
Contents    : PSI parser: uses PSIContentHandler to parse PSI xml files

=======================================================================================================

Implements a PSI parser: it reads the XML file and either: - inserts information into psi mysql db (as described in create_psi_db.sql)
                                                              or
                                                           - prints out information to stdout



Attention! counter_ids inserted as protein identifiers for BIND and MIPS do not mean anything! They are just used
           in the psi database to index the interactions, but one counter id is only used for one interaction,
           and one protein code can have multiple counter ids (one for each interaction where that protein appears)

"""

# parsePSI.py: PSI parser: uses PSIContentHandler to parse PSI xml files
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues

import MySQLdb
import getopt
import sys

from PSIContentHandler import *

verbose = 0

# ----------------------
# Function usage()
# ----------------------
def usage():
    print "\nUsage: parsePSI.py --psi-dbname=psi_dbname --psi-dbhost=psi_dbhost --psi-dbuser=psi_dbuser --psi-dbpass=psi_dbpass"
    print "                     --input-file=input_file  --mode=mode --psi-db=psi_db [--help] [--verbose]"
    print "  Where:"
    print "        input_file     : name of the file that contains PSI info on protein interactions (psi xml file)"
    print "        psi_dbname     : name of the mysql database where information from xml will be inserted (required)"
    print "        psi_dbhost     : name of the machine with psi mysql server (required)"
    print "        psi_dbuser     : name of the mysql psi username (not required in most systems)"
    print "        psi_dbpass     : name of the mysql psi username (not required in most systems)"
    print "        mode           : output mode desired (psiDB or text)"
    print "                          - mode 'psiDB' inserts content into mysql psi database"
    print "                          - mode 'text' prints output to stdout"
    print "        psi_db         : the psi database you are parsing (unfortunately, the standards are not that respected...)"
    print "                          - hprd: when parsing the Human Protein Reference Database"
    print "                          - mips: when parsing the MIPS Mammalian Protein-Protein Database "
    print "                          - for other databases, choose the one that you think is closer to them"
    print "                            (you'll probably have to play with the PSIContentHandler to adapt it to your DBs...)"
    print "        --help         : prints this message"
    print "        --verbose      : prints process info"
    

# ---------------------------
# Function parseArguments()                                               
# --------------------------- 

def parseArguments():
    
    global psi_dbname  
    global psi_dbhost
    global psi_dbuser
    global psi_dbpass
    
    global input_file
    global output_mode
    
    global psi_db
    
    global verbose

    
    try:
        opts,args = getopt.getopt(sys.argv[1:],"",["verbose", "help", "input-file=","mode=", "psi-db=",
                                                   "psi-dbname=","psi-dbuser=","psi-dbhost=","psi-dbpass="])
    except getopt.GetoptError, bad_opt:
        # print help information and exit:
        sys.stderr.write( bad_opt.__str__() )
        usage()
        sys.exit(2)
     
    for option,value in opts:
        
         if option == "--input-file":
             input_file = value
               
         elif option == "--mode":
             output_mode = value
             
         elif option == "--psi-db":
             psi_db = value

         elif option == "--psi-dbhost":
             psi_dbhost = value
             
         elif option == "--psi-dbname":
             psi_dbname = value
             
         elif option == "--psi-dbuser":
             psi_dbuser = value
             
         elif option == "--psi-dbpass":
             psi_dbpass = value
               
         elif option == "--verbose":
             verbose = 1
             
         elif option == "--help":
             # print help information and exit
             usage()
             sys.exit(2)
    # END OF for option,value in opts:
              
    if input_file is None:
        print "--------------------------------"
        print "parsePSI.py arguments error: You didn't set a valid input file"
        print "--------------------------------"
        usage()
        sys.exit(2)    
        
    if output_mode != "psiDB" and output_mode != "text":
        print "--------------------------------"
        print "parsePSI.py arguments error: You didnt set a valid output mode (psiDB or text)"
        print "--------------------------------"
        usage()
        sys.exit(2)

    if output_mode != "text":
        if psi_db is None or (psi_db != "hprd" and psi_db != "mips" and psi_db != "bind"):
            print "--------------------------------"
            print "parsePSI.py arguments error: You didn't set a valid type of psi database"
            print "--------------------------------"
            usage()
            sys.exit(2)    
        

        
# ---------------------
# method parse_psi_file
# ---------------------


def parse_psi_file(input_file, output_mode, verbose, psi_db, psi_dbhost, psi_dbname, psi_dbuser, psi_dbpass):
    
    handler = PSIContentHandler(output_mode=output_mode, verbose=verbose, psi_db= psi_db,
                                psi_dbhost=psi_dbhost, psi_dbname=psi_dbname, psi_dbuser=psi_dbuser, psi_dbpass=psi_dbpass)
    parser = make_parser()
    parser.setContentHandler(handler)
    
    try:
        parser.parse(open(input_file))
    except SAXException:
        return handler.mobile
    return None

# --------
# --------
#  Main()               
# --------                               
# --------

if __name__ == '__main__':

    

    psi_dbname = None
    psi_dbuser = None
    psi_dbhost = None
    psi_dbpass = None

    psi_db = None

    input_file = None
    output_mode = None

    
    # parsing arguments from the command line
    parseArguments()


    if verbose:
        sys.stderr.write("Parsing BIND data from %s and inserting it into database %s (server %s)\n" %(input_file,
                                                                                                       psi_dbname,
                                                                                                       psi_dbhost))

    # calling function that will parse the input_file with mode output_mode 
    psi_parsed = parse_psi_file(input_file=input_file, output_mode=output_mode, verbose=verbose, psi_db= psi_db,
                                psi_dbhost=psi_dbhost, psi_dbname=psi_dbname, psi_dbuser=psi_dbuser, psi_dbpass= psi_dbpass)

    
