"""
 File       : piana.py
 Author     : R. Aragues
 Creation   : 12.1.2004
 Contents   : command line interface to PIANA (batch or interactive execution available)
 Called from: command line

=======================================================================================================
This program is the command line interface to PIANA (batch or interactive execution available)


To learn more about what you can do with piana.py type piana.py --help, read conf_files/general_template.piana_conf

You've got some examples on how to use piana.py on piana/README.piana_examples

You should also read piana/README.piana_tutorial

If you encounter problems running PIANA, please read piana/README.piana_requirements and README.piana_installation

"""

# piana.py: command line interface to PIANA tool
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues

import sys
import getopt
import time
import re
import readline
import cPickle
from sets import *

from ndict import seqdict # implements ordered dictionaries
                          # package distributed within PIANA but
                          # developed by someone else
                          # read piana/code/utilities/seqdict/seqdict_webpage.txt
                          # for more info on this package

from PianaApi import *


# VERBOSE VARIABLES: used to print out information about the execution process
verbose = 0
verbose_graph_builder = 0
verbose_protein = 0
verbose_command_sequence = 1
verbose_specBuild = 1
verbose_parse_conf = 0
verbose_build = 0
verbose_patches = 0
verbose_prots_cir = 0
verbose_prots_cir_shallow = 1


# -------------------------------------
# GLOBAL VARIABLES USED BY THIS PROGRAM
# -------------------------------------

# This ordered dictionary contains execution commands that can be given to piana, their description and which arguments does it need
# in this ordered dictionary, first you have to give the keys (in the order you want) and then the dictionary
#   ATTENTION: respect the order of the commands: it must be the same in both list and dictionary!!!

valid_execution_commands= seqdict(

    ["reset-network",
     "save-network",
     "load-network",
     "add-protein",
     "add-proteins-file",
     "add-interactions-file",
     "species-network",
     "database-method-network",
     "print-table",
     "print-table-db-intersection",
     "print-network",
     "print-network-db-intersection",
     "print-all-prots-info",
     "print-root-prots-info",
     "print-connect-prots-info",
     "protein-code-2-protein-code",
     "protein-code-2-all-info",
     "print-parameters",
     "expand-interactions",
     "find-shortest-route",
     "find-distance-group",
     "match-proteins-to-spots",
     "cluster-by-go-terms",
     "train-cirs",
     "find-protein-patches",
     "exit"],
    {
    "reset-network":                  [ "start a new network  (doesn't change parameters, use option 'modif' for that))", None],
    "save-network":                   [ "save current network in a disc file. You'll be able to load it in the future", None],
    "load-network":                   [ "loads a network in memory from a disc file (overwrites current network)\n", None],
    "add-protein":                    [ "add a protein (and its interactions) to current network", None],
    "add-proteins-file":              [ "add a list of proteins (and their interactions) from an text file to current network", None],
    "add-interactions-file":          [ "add a list of interactions from an text file to current network", None],
    "species-network":                [ "build a network for all proteins in a given species", None],
    "database-method-network":        [ "build a network for all protein interactions in a given database and/or a given method\n", None],
    "print-table":                    [ "print table with all interactions in the network", None],
    "print-table-db-intersection":    [ "print table using only interactions that appear at the same time in several databases\n", None],
    "print-network":                  [ "print network in DOT format", None],
    "print-network-db-intersection":  [ "print network  in DOT format using only interactions that appear at the same time in several databases\n", None],
    "print-all-prots-info":           [ "prints information about all proteins in network", None],
    "print-root-prots-info":          [ "prints information about root proteins in network", None],
    "print-connect-prots-info":       [ "prints information about proteins that connect root_nodes", None],
    "protein-code-2-protein-code":    [ "transforms codes from input-file to output_proteins_code (doesn't create a network)", None],
    "protein-code-2-all-info":        [ "gets information of codes from input-file (doesn't create a network)\n", None],
    "print-parameters":               [ "print parameters used to create current network", None],
    "expand-interactions":            [ "predicts interactions of proteins in the network using expansion", None],
    "find-shortest-route":            [ "prints the route between two given proteins (ie. intermediate proteins)\n", None],
    "find-distance-group":            [ "prints proteins that are at distance N of a query protein\n", None],
    "match-proteins-to-spots":        [ "identifies spots in a 2D gel by matching MW and/or IP to proteins in the network\n", None],
    "cluster-by-go-terms":            [ "clusters the protein interaction network using go terms\n", None],
    "train-cirs":                     [ "produces output for training/evaluating CIRs (!!!NOT WORKING!!!)", None],
    "find-protein-patches":           [ "divides proteins into patches (!!!NOT WORKING!!!)", None],
    "exit":                           [ "exit", None]
})

"""
OPTIONS CURRENTLY DE-ACTIVATED:

     "modify-parameters",
     "filter",
     "print-protein-patches",
     "print-shared-patches",
     "print-patches-interactions",
     "print-patches-network",


    "modify-parameters":              [ "changes current parameters (depth, code types, ...) (!!!NOT WORKING!!!)\n", None],
    "filter":                         [ "filters proteins from the network (!!!NOT WORKING!!!)", None],
    "print-protein-patches":          [ "prints for each protein, which patches does it have at a certain level (!!!NOT WORKING!!!)\n", None],
    "print-shared-patches":           [ "prints for each patch, which proteins do have it at a certain level (!!!NOT WORKING!!!)", None],
    "print-patches-interactions":     [ "prints the table patch-patch interactions network at a certain level (!!!NOT WORKING!!!)", None],
    "print-patches-network":          [ "prints the .dot patch-patch interactions network at a certain level (!!!NOT WORKING!!!)", None],
"""
# .................................
# Index of argument strings used to set values in commands of configuration files (piana conf file and benchmark conf file)
# .................................

# arguments used in piana configuration file
arg_protein_name      = "protein-name"
arg_protein_a_name    = "protein-a-name"
arg_protein_b_name    = "protein-b-name"
arg_protein_type      = "protein-type"
arg_file_name         = "file-name"
arg_species_name      = "species-name"
arg_tax_id            = "tax-id"
arg_output_target     = "output-target"
arg_print_mode        = "print-mode"
arg_format_mode       = "format-mode"
arg_database_name     = "database-name"
arg_method_name       = "method-name"
arg_list_dbs          = "list-dbs"
arg_output_mode       = "output-mode"
arg_clustering_steps  = "clustering-steps"
arg_score_threshold   = "score-threshold"
arg_function_name     = "function-name"
arg_w_patches         = "w-patches"
arg_w_prots           = "w-prots"
arg_w_belong          = "w-belong"
arg_clustering_level  = "clustering-level"
arg_ranked_parameters_file  = "ranked-parameters-file"
arg_expansion_type    = "expansion-type"
arg_expansion_threshold = "expansion-threshold"
arg_expansion_nodes   = "expansion-nodes"
arg_exp_output_mode   = "exp-output-mode"
arg_spots_file_name   = "spots-file-name"
arg_list_mw_error     = "list-mw-error"
arg_list_ip_error     = "list-ip-error"
arg_save_mode         = "save-mode"
arg_disc_name         = "disc-name"
arg_remove_redundant  = "remove-redundant"
arg_distance          = "distance"
arg_info              = "info"
arg_term_type         = "term-type"
arg_sim_mode          = "sim-mode"
arg_level_threshold   = "level-threshold"
arg_distance_threshold= "distance-threshold"
arg_rep_term          = "rep-term"
arg_print_id          = "print-id"
arg_similarity_mode   = "similarity-mode"
arg_minimum_score     = "min-score"
arg_cir_method     = "cir-method"

# arguments used in benchmark configuration file
arg_list_function_names_to_test    = "list-function-names-to-test"
arg_list_w_patches_to_test         = "list-w-patches-to-test"
arg_list_w_prots_to_test           = "list-w-prots-to-test"
arg_list_w_belong_to_test          = "list-w-belong-to-test"
arg_gold_std                       = "gold-std"
arg_patch_mode                     = "patch-mode"
arg_comp_file_name                 = "comp-file-name"
arg_comp_file_dir                  = "comp-file-dir"
arg_comparison_mode                = "comparison-mode"


# .................................
# Asking for input from the user: define which sentences will be used
# .................................

string_get_protein_type      = "Write protein code type (unientry, uniacc, geneName, gi, pdb.chain, sequence or md5): "
string_get_file_name         = "Write input file name with one protein per line: "
string_get_int_file_name     = "Write input file name with one interaction per line: "
string_get_depth             = "Write depth of network: "
string_get_hub_threshold     = "Write maximum number of interactions per protein (0 for ignoring thresholds): "
string_get_protein           = "Write protein name: "
string_get_net_species_name  = "Write species name: (eg. blank (if using tax id) or human, yeast, fruit fly, ...) "
string_get_net_tax_id        = "Write taxonomy id: (eg. blank (if using species name) or 9606, 4932, 7227, ...) "
string_get_species_name      = "Write species name: (eg. all, human, yeast, saccharomyces cerevisiae, ...) "
string_get_piana_dbname      = "Write the name of piana database (eg. pianaDB_limited): "
string_get_piana_dbhost      = "Write the name of piana host (eg. localhost): "
string_get_piana_dbuser      = "Write the name of piana user: "
string_get_piana_dbpass      = "Write the name of piana password: "
string_get_results_prefix    = "Write the prefix all result files will have: "
string_get_results_dir       = "Write the directory where results will be saved (must end with '/'): "
string_get_results_directory = "Write directory where result files will be placed: "
string_get_output_name       = "Write file name where output will be saved (screen prints to stdout): "
string_get_print_mode        = "Write which proteins are allowed to appear in output (all, all_root or only_root): "
string_get_format_mode       = "Write format desired for output files (txt or html): "
string_get_output_mode       = "Write output mode: compact (one line info) or extended (all information): "
string_get_int_dbname        = "Write an interaction database name  (dip, string, ori, posas, expansion, pibase or mips) to be included: (end to exit) "
string_get_dbname            = "Choose an interaction database name  (dip, string, ori, posas, expansion, pibase or mips): "
string_get_methodname        = "Choose an interaction method name  (y2h, pibase_dbalig75c0, ): "
string_get_expansion_type    = "Choose expansion type you want to apply (ec, cog, scop or interpro): "
string_get_expansion_threshold="Set the expansion threshold (max number of nodes sharing expansion type (0 means no threshold): "
string_get_nodes_to_expand   = "Write the nodes you want to predict interactions for: (all or root): "
string_get_exp_output_mode   = "Write if expansion predictions should be added to network or printed to stdout (add or print): "
string_get_clustering_steps  = "Set how many clustering steps you want to perform: "
string_get_score_threshold   = "Set the score threshold for considering two patches identical: "
string_get_function_name     = "Set scoring function name that you want to use (quotient, lineal, ...): "
string_get_w_patches         = "Set the weight for component 'number of shared interacting patches' "
string_get_w_prots           = "Set the weight for component 'number of shared interacting proteins' "
string_get_w_belong          = "Set the weight for component 'belonging to same protein' "
string_get_clustering_level  = "Set which clustering level to be printed:  "
string_get_spots_file        = "Write file name with spots ids and their MW and IP: "
string_get_save_file         = "Write file name where network will be saved: "
string_get_load_file         = "Write file name containing the network you want to load:  "
string_get_save_mode         = "Set how patches will be saved (memory or disc): "
string_get_disc_name         = "Write file prefix where temporary patchgroup graphs will be saved: "
string_get_remove_redundant  = "Set if redundant codes should be removed from output (yes or no): "
string_get_distance          = "Set distance from protein query that will be used to retrieve distance-group proteins: "
string_get_info              = "Set which info will be printed next to the distance group proteins (blank, all, scop or cath): "
string_get_term_type         = "Set the GO term type you want to use for clustering (molecular_function, biological_process or cellular_component): "
string_get_sim_mode          = "Set the mode used for calculating differences between clusters (min, max, random, average): "
string_get_go_score_threshold= "Set the score threshold for considering two clusters similar (eg. 0.2): "
string_get_level_threshold   = "Set the lowest level of the go term in the cluster allowed for continuing clustering (eg. 2): "
string_get_distance_threshold= "Set the maximum distance allowed between two proteins in order to be clustered (eg. 3): "
string_get_rep_term          = "Set which of the GO terms in the cluster will be used for printing, max level or min level (min or max): "
string_get_print_id          = "Sets if clusters will be labeled in the output with and id or just the name (yes or no): "
string_get_protein_name      = "Sets if clusters will be labeled in the output with and id or just the name (yes or no): "


# ----
# Setting default values for piana parameters (for those parameters that were not set through command line and for those
#                                              parameters that cannot be set throught command line in interactive mode)
# ----
default_output_mode = "compact"

# set here the molecular error bounds for which you want to perform the matching
default_molecular_error_bounds   = [0.0, 0.0025, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3]

# set here the isoelectric error bounds for which you want to perform the matching
default_isoelectric_error_bounds = [0.0, 0.0025, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3]


def usage():
    """
    Describes the use of piana: 
    """
    
    sys.stdout.write( "---------------------------------------------------\n" )
    sys.stdout.write( "  PIANA: Protein Interactions and Network Analysis \n" )
    sys.stdout.write( "                             ----------------------\n\n")
    
    sys.stdout.write( " There is an interactive mode, where commands to be \n" )
    sys.stdout.write( " executed are requested to the user,  and a batch   \n" )
    sys.stdout.write( " mode where the sequence of commands is determined  \n" )
    sys.stdout.write( " in a piana configuration file.                     \n\n")
    sys.stdout.write( " Please, read piana/README.piana_tutorial for more \n" )
    sys.stdout.write( " information on how to use this program         \n\n" )
    
    sys.stdout.write( "Usage: python piana.py --exec-mode=exec-mode-value   \n" )
    sys.stdout.write( "       --configuration-file=configuration-file-value \n" )
    sys.stdout.write( "       --input-file=input-file-value  \n" )
    sys.stdout.write( "       --input-proteins-type=input-proteins-type-value\n" )
    sys.stdout.write( "       --input-proteins-species=input-proteins-species-value  \n" )
    sys.stdout.write( "       --depth=depth-value                     \n" )
    sys.stdout.write( "       --hub-threshold=hub-threshold-value\n" )
    sys.stdout.write( "       --output-proteins-type=output-proteins-type-value  \n" )
    sys.stdout.write( "       --piana-dbname=piana-dbname-value       \n" )
    sys.stdout.write( "       --piana-dbhost=piana-dbhost-value       \n" )
    sys.stdout.write( "       --piana-dbuser=piana-dbuser-value       \n" )
    sys.stdout.write( "       --piana-dbpass=piana-dbpass-value       \n" )
    sys.stdout.write( "       --results-prefix=results-prefix-value   \n" )
    sys.stdout.write( "       --results-dir=results-dir-value         \n" )
    sys.stdout.write( "       --spots-file-name=spots-file-name-value \n" )
    sys.stdout.write( "       --benchmark-conf-file=benchmark-conf-file-value \n" )
    sys.stdout.write( "       --gold-std=gold-std-value               \n" )
    sys.stdout.write( "       --patch-mode=patch-mode-value           \n" )
    sys.stdout.write( "       --help --verbose                        \n" )
    
    sys.stdout.write( "\nwhere:\n" )
    
    sys.stdout.write( "   --help               : prints this message and exits                 \n" )
    sys.stdout.write( "   --verbose            : prints process info to stdout                 \n" )
    sys.stdout.write( "   exec-mode-value      : interactive or batch (default: interactive)   \n")
    sys.stdout.write( "                          batch mode requires a piana_configuration_file\n" )
    sys.stdout.write( "   configuration-file-value : file that sets piana parameters (both exec\n" )
    sys.stdout.write( "                              modes) and execution commands (only batch)\n" )
    sys.stdout.write( "                              -> conf_files/general_template.piana_conf \n" )
    sys.stdout.write( "                                 describes how to write this conf file  \n" )
    sys.stdout.write( "   input-file-value     : file name containing one protein per line     \n" )
    sys.stdout.write( "   depth-value          : depth of the network to be built              \n" ) 
    sys.stdout.write( "   hub-threshold-value  : max number of interactions allowed per protein\n" )
    sys.stdout.write( "                           - if num_ints_of_protein >= hub_threshold    \n" )
    sys.stdout.write( "                              --> no interactions added for the protein \n" )
    sys.stdout.write( "                           - if hub_threshold== 0, no thresholds applied\n" )
    sys.stdout.write( "   piana-dbname-value   : name of the PIANA database to be used         \n" )
    sys.stdout.write( "   piana-dbhost-value   : host where the PIANA database is placed       \n" )
    sys.stdout.write( "   piana-dbuser-value   : username accessing the database               \n" )
    sys.stdout.write( "                          (not required in most systems)                \n" )
    sys.stdout.write( "   piana-dbpass-value   : password of username accessing the database   \n" )
    sys.stdout.write( "                          (not required in most systems)                \n" )
    sys.stdout.write( "   results-prefix-value : prefix of results files, indicating a         \n" )
    sys.stdout.write( "                          representative name of your experiment        \n" )
    sys.stdout.write( "   results-dir-value    : directory where results will be saved         \n" )
    sys.stdout.write( "                          (must end with '/')                           \n" ) 
    sys.stdout.write( "\n spots-file-name-value: input file containing one spot per line       \n" )
    sys.stdout.write( "                          with its isoelect point and molecular weight  \n" ) 
    sys.stdout.write( "\n benchmark-conf-file-value: benchmark configuration file (don't use)  \n" ) 
    sys.stdout.write( "   gold-std-value       : file name holding the golden standard for this\n" )
    sys.stdout.write( "                          training (not working)                        \n" ) 
    sys.stdout.write( "   patch-mode-value     : sets the mode for finding protein patches     \n" )
    sys.stdout.write( "                          (don't use)                                   \n" ) 
    sys.stdout.write( "     - train: creating comparison files used by ScoringFunctionBenchmark\n" )
    sys.stdout.write( "              (needs a gold std and benchmark conf file)                \n" ) 
    sys.stdout.write( "     - eval: finding patches and evaluating how good the results are    \n" ) 
    sys.stdout.write( "              (needs a gold std and benchmark conf file)                \n" ) 
    sys.stdout.write( "     - exec: finding patches and printing decomposition for the proteins\n\n" ) 
    sys.stdout.write( "   input-proteins-species-value:species of proteins in input_file_name  \n" )
    sys.stdout.write( "                              (fixes species for codes with many species\n" )  
    sys.stdout.write( "   input-proteins-type-value : type of  protein database identifier in  \n" )
    sys.stdout.write( "                               input_file_name                          \n" )  
    sys.stdout.write( "   output-proteins-type-value: type of protein database identifier that \n" )
    sys.stdout.write( "                               will be used for output                  \n" )    
    sys.stdout.write( "\n  --> attention! geneNames are not reliable at all: I strongly suggest\n" )
    sys.stdout.write( "                that you find their equivalent codes in Uniprot entries,\n" )    
    sys.stdout.write( "\n              uniprot accession numbers or NCBI gi                    \n" )   
    sys.stdout.write( "\n  --> valid values for input-proteins-type-value and                  \n" )
    sys.stdout.write( "        output-proteins-type-value are:                                 \n" )
    for input_type in PianaGlobals.valid_protein_types:
        sys.stdout.write( "    - %s: %s \n" %(input_type,
                                              PianaGlobals.protein_types_description[input_type])  ) 
    sys.stdout.write( "\n\nValid values for interaction databases are:                         \n" )
    sys.stdout.write( "    (required in some configuration file parameters and arguments)      \n" )
    sys.stdout.write( "\n  --> attention! Read sect. 'Setting which interaction databases to use'\n" )
    sys.stdout.write( "                   on README.piana_tutorial to better understand how to  \n" )
    sys.stdout.write( "                   choose interaction databases                          \n" )
    sys.stdout.write( "\n  --> attention! Not all these databases are necesarily available on   \n" )
    sys.stdout.write( "                   your system: it depends on which interaction databases\n" )
    sys.stdout.write( "                   you have parsed and inserted into your PIANA database \n" )
    for dbname in PianaGlobals.interaction_databases:
        sys.stdout.write( "     - %s \n" %dbname ) 
    sys.stdout.write( "-------------------------------------------------------------------------\n" )
        

def set_parameter_value(parameter_name= None, value=None):
    """
    checks if parameter name "parameter_name" has a related parameter, and in case it does, assigns "value" to it

    It is used by both parseArguments() and parse_configuration_file()

    Parameter assignment has been centralized here so it can be used by the two
    ways we have of setting parameters: command line and configuration file
    """
    global spots_file_name
    global input_file_name
    
    global input_proteins_type
    global input_proteins_species
    global hub_threshold
    
    global output_proteins_type
    global output_proteins_species
    global list_alternative_types
    global list_keywords
    
    global file_over_expressed
    global file_infra_expressed
    global expression_protein_type
    
    global depth
    global hub_threshold
    global use_self_ints
    global list_source_dbs
    global inverse_dbs
    global ignore_unreliable
    global list_source_methods
    global inverse_methods

    global results_prefix
    global results_dir
    global exec_mode
    global configuration_file
    
    global piana_dbname
    global piana_dbhost
    global piana_dbuser
    global piana_dbpass

    global benchmark_conf_file
    global gold_std
    global patch_mode
    
    global verbose
    global verbose_command_sequence
    # finding the variable associated to parameter_name and assigning value to it
    # (to make sure command line has preference over configuration file, before
    #  assigning the new value, make sure a value was not already assigned before (ie is not None) )
    #
    # When parsing the configuration file, many values will be "blank": a configuration file parameter value
    # will only be set if it is different from blank and it was not set through the command line.
    
    if parameter_name == "input-file":
        if input_file_name is None:
            if value != "blank":
                input_file_name =  value
             
    elif parameter_name == "piana-dbname":
        if piana_dbname is None:
            if value != "blank":
                piana_dbname = value
        
    elif parameter_name == "piana-dbhost":
        if piana_dbhost is None:
            if value != "blank":
                piana_dbhost = value
        
    elif parameter_name == "piana-dbuser":
        if piana_dbuser is None:
            if value != "blank":
                piana_dbuser = value
        
    elif parameter_name == "piana-dbpass":
        if piana_dbpass is None:
            if value != "blank":
                piana_dbpass = value
        
    elif parameter_name == "benchmark-conf-file":
        if benchmark_conf_file is None:
            if value != "blank":
                benchmark_conf_file = value
        
    elif parameter_name == "gold-std":
        if gold_std is None:
            if value != "blank":
                gold_std = value
        
    elif parameter_name == "patch-mode":
        if patch_mode is None:
            if value != "blank":
                patch_mode = value
        
    elif parameter_name == "input-proteins-type":
        if input_proteins_type is None:
            if value != "blank":
                input_proteins_type = value
        
    elif parameter_name == "input-proteins-species":
        if input_proteins_species is None:
            if value != "blank":
                input_proteins_species = value
        else:
            input_proteins_species = "all" # ---> default value for input_proteins_species
        
    elif parameter_name == "output-proteins-type":
        if output_proteins_type is None:
            if value != "blank":
                output_proteins_type = value
                
    elif parameter_name == "output-proteins-species":
        # this parameter can only be set through a configuration file: no need to check if it is None
        if value != "blank":
            output_proteins_species = value
        else:
            output_proteins_species = "all" # ---> default value for output_proteins_species
        
    elif parameter_name == "depth":
        if depth is None:
            if value != "blank":
                depth = int(value)
        
    elif parameter_name == "hub-threshold":
        if hub_threshold is None:
            # if the threshold was not set on the command line, then retrieve it from configuration file
            if value != "blank":
                hub_threshold = int(value)
        
    elif parameter_name == "use-self-ints":
        # this parameter can only be set through a configuration file: no need to check if it is None
        #   if configuration file doesn't have a value for it, set to "yes"
        if value != "blank":
            use_self_ints = value
        else:
            use_self_ints = "yes"
        
    elif parameter_name == "list-source-dbs":
        # this parameter can only be set through a configuration file: no need to check if it is None
        #   if configuration file doesn't have a value for it, set to "all"
        if value != "blank" and value != "all":
            list_source_dbs = value.split(":")
        else:
            list_source_dbs = "all"
        
    elif parameter_name == "inverse-dbs":
        # this parameter can only be set through a configuration file: no need to check if it is None
        #   if configuration file doesn't have a value for it, set to "no"
        if value != "blank":
            inverse_dbs = value
        else:
            inverse_dbs = "no"
            
    elif parameter_name == "ignore-unreliable":
        # this parameter can only be set through a configuration file: no need to check if it is None
        #   if configuration file doesn't have a value for it, set to "no"
        if value != "blank":
            ignore_unreliable = value
        else:
            ignore_unreliable = "no"
        
    elif parameter_name == "list-source-methods":
        # this parameter can only be set through a configuration file: no need to check if it is None
        #   if configuration file doesn't have a value for it, set to "all"
        if value != "blank" and value != "all":
            list_source_methods = value.split(":")
        else:
            list_source_methods = "all"
        
    elif parameter_name == "inverse-methods":
        # this parameter can only be set through a configuration file: no need to check if it is None
        #   if configuration file doesn't have a value for it, set to "no"
        if value != "blank":
            inverse_methods = value
        else:
            inverse_methods = "no"
        
    elif parameter_name == "spots-file-name":
        if spots_file_name is None:
            if value != "blank":
                spots_file_name = value
        
    elif parameter_name == "results-prefix":
        if results_prefix is None:
            if value != "blank":
                results_prefix = value
        
    elif parameter_name == "results-dir":
        if results_dir is None:
            if value != "blank":
                results_dir = value
            else:
                results_dir = "./"
        
    elif parameter_name == "exec-mode":
        if exec_mode is None:
            if value != "blank":
                exec_mode = value
        
        if exec_mode != "interactive" and exec_mode !="batch":
            sys.stderr.write("\nincorrect execution mode set: %s\n" %exec_mode)
            usage()
            sys.exit(2)
        
    elif parameter_name == "configuration-file":
        # this parameter will only be set through the command line... no need to check if it is None
        configuration_file = value
        
    elif parameter_name == "list-alternative-types":
        # this parameter will only be set through the configuration file... no need to check if it is None
        if value != "blank":
            list_alternative_types = value.split(":")
        else:
            list_alternative_types = []
        
    elif parameter_name == "list-keywords":
        # this parameter will only be set through the configuration file... no need to check if it is None
        if value != "blank":
            list_keywords = value.split(":")
        else:
            list_keywords = []
        
    elif parameter_name == "file-over-expressed":
        # this parameter will only be set through the configuration file... no need to check if it is None
        if value != "blank":
            file_over_expressed = value
        else:
            file_over_expressed = None
        
    elif parameter_name == "file-infra-expressed":
        # this parameter will only be set through the configuration file... no need to check if it is None
        if value != "blank":
            file_infra_expressed = value
        else:
            file_infra_expressed = None
        
    elif parameter_name == "expression-protein-type":
        # this parameter will only be set through the configuration file... no need to check if it is None
        if value != "blank":
            expression_protein_type = value
        else:
            expression_protein_type = None
        
    elif parameter_name == "verbose":
        verbose = 1
        verbose_command_sequence = 1
        
    elif parameter_name == "help":
        # print help information and exit
        usage()
        sys.exit(2)
        
def parseArguments():
    """
    parses command line.

    piana.py --help for command line options
    """
    try:
        opts, args = getopt.getopt(sys.argv[1:], "",                     
                                   ["verbose","help","piana-dbname=", "piana-dbhost=","piana-dbuser=","piana-dbpass=", "depth=", "hub-threshold=",
                                    "input-file=","input-proteins-type=","input-proteins-species=", "output-proteins-type=", "spots-file-name=",
                                    "exec-mode=","configuration-file=","results-prefix=", "results-dir=", "benchmark-conf-file=", "gold-std=", "patch-mode="])
    except getopt.GetoptError, msg:
        # print help information and exit:
        sys.stderr.write( "\n\n--\ncommand line arguments are not correct: %s\n--\n\n" %(msg))
        sys.exit(2)
     
    for option,value in opts:

        # this function checks what the option is, and sets the corresponding parameter to "value"
        set_parameter_value(parameter_name= option.strip("--"), value= value)
    # ENDOF for option,value in opts:


def parse_configuration_file(input_configuration_file= None):
    """
    parses a piana configuration file

    configuration_file is required in batch_mode. In interactive_mode, it can be used to set parameters values,
    but execution commands will be ignored.

    parameters set by configuration file do not overwrite those that were read from the command line
    (ie. command line has preference over configuration file)

    To create your own configuration file, follow instructions on file general_template.piana_conf
    """

    global execution_sequence

    counter = 0

    parsing_section = "parameters" # variable used to indicate if we are parsing the parameters section of the configuration file
                                   # or the execution commands section. The transition is marked in the file with a line starting
                                   # with ">>>"

    configuration_file_fd = file(input_configuration_file, "r")

    for conf_line in configuration_file_fd:

        if conf_line[0] !="#" and conf_line.strip() != "":
            # if the configuration line is not a comment or an empty line, read it
            
            if verbose_parse_conf:
                sys.stderr.write("parsing configuration line %s\n" %(conf_line) )

            if parsing_section == "parameters":

                if conf_line[0:3] == ">>>":
                    # check for transition between parameters and execution commands
                    parsing_section = "execution"
                else:
                    # if we are parsing the parameters section, just get the value after the "=" and assing it to the parameter
                    conf_line_fields = conf_line.split("=")
                    parameter_name =conf_line_fields[0].strip()
                    parameter_value = conf_line_fields[1].strip()

                    if parameter_value != "blank":
                        # this function checks what the option is, and sets the corresponding parameter to "value"
                        set_parameter_value(parameter_name= parameter_name, value= parameter_value)
                # END OF else: (if conf_line[0:3] == ">>>":)

            # END OF if parsing_sectin == "parameters":
            
            elif parsing_section == "execution":
                # if it is an execution line, obtain the command (first element before the ;) and set the arguments accordingly
                #  (execution_sequence follows the structure described in Main section of program)
                
                execution_line_fields = conf_line.strip().split(";")  # execution_line_fields[0] is the command name
                                                                      # execution_line_fields[1:] are the arguments "name_argument=value_argument"

                if not execution_line_fields[0] in valid_execution_commands.keys():
                    raise ValueError("Unknown execution command <%s> set in configuration file <%s>\n" %(execution_line_fields[0],
                                                                                                         input_configuration_file))
                
                if verbose_parse_conf:
                    sys.stderr.write("parsing execution command fields %s\n" %(execution_line_fields) )

                execution_sequence[counter] = {}
                execution_sequence[counter][ execution_line_fields[0] ] = {}

                if execution_line_fields[1].strip() != "":
                    # if there are arguments (second element of split(";") is not null), retrieve them...
                    for execution_argument in execution_line_fields[1:]:
                        # for each argument, get name and value (which are separated by "=")
                        execution_argument_fields = execution_argument.split("=") # execution_argument_fields[0] is the argument name
                                                                                  # execution_argument_fields[1] is the argument value
                        if len(execution_argument_fields) > 1:
                            # if there was an "=" in the argument, then set execution_sequence
                            execution_sequence[counter][execution_line_fields[0]][execution_argument_fields[0]] = execution_argument_fields[1]
                    # END OF for execution_argument in execution_line_fields[1:]:
                # END OF if len(execution_line_fields) > 1:

                counter += 1
            # END OF elif parsing_section == "execution":
            
        # END OF if conf_line[0] !="#" and conf_line.strip() != "":
    # END OF for conf_line in configuration_file_fd:

    # reversing the ordered dictionary of execution commands, so we can use pop() to get one by one the dictionary elements
    execution_sequence.reverse()


def parse_benchmark_configuration_file(file_name= None):
    """
    parses a benchmark configuration file

    """

    global benchmark_arguments

    counter = 0


    benchmark_file_fd = file(file_name, "r")

    for benchmark_conf_line in benchmark_file_fd:

        if benchmark_conf_line[0] !="#" and benchmark_conf_line.strip() != "":
            # if the benchmark configuration line is not a comment or an empty line, read it
            
            if verbose_parse_conf:
                sys.stderr.write("parsing benchmark configuration line %s\n" %(benchmark_conf_line) )

            # just get the value after the "=" and assign it to the parameter
            benchmark_conf_line_fields = benchmark_conf_line.split("=")
            benchmark_parameter_name = benchmark_conf_line_fields[0].strip()
            benchmark_parameter_value = benchmark_conf_line_fields[1].strip()
            
            benchmark_arguments[benchmark_parameter_name] = benchmark_parameter_value

        # END OF if benchmark_conf_line[0] !="#" and benchmark_conf_line.strip() != "":
    # END OF for benchmark_conf_line in benchmark_file_fd:



def parse_ranked_parameters_file(file_name= None):
    """
    parses a ranked parameters configuration file

    """

    global list_ranked_parameters

    if verbose_parse_conf:
        sys.stderr.write("parsing ranked parameters file %s\n" %(file_name) )

    ranked_parameters_file_fd = file(file_name, "r")

    for ranked_parameters_line in ranked_parameters_file_fd:

        if ranked_parameters_line[0] !="#" and ranked_parameters_line.strip() != "":
            # if the ranked_parameters_line is not a comment or an empty line, read it
            
            ranked_parameters_line_fields = ranked_parameters_line.split()

            # ranked_parameters_line_fields is:  [0] --> w_patches=0
            #                                    [1] --> w_prots=1
            #                                    [2] --> w_belong=0
            #                                    [3] --> score_links=huge
            #                                    [.] --> statistics on how good those parameters performed in the training
            #                                    [.] --> statistics on how good those parameters performed in the training
            #                                    .......................................................................

            line_w_patches = int(ranked_parameters_line_fields[0].split("=")[1])
            line_w_prots = int(ranked_parameters_line_fields[1].split("=")[1])
            line_w_belong = int(ranked_parameters_line_fields[1].split("=")[1])

           
            list_ranked_parameters.append([line_w_patches, line_w_prots, line_w_belong, ranked_parameters_line_fields[3] ])
        # END OF if ranked_parameters_line[0] !="#" and ranked_parameters_line.strip() != "":
    # END OF for ranked_parameters_line in ranked_parameters_file_fd:


def get_reliable_databases():
    """
    returns a list of reliable databases (ie. interaction databases not ending with _c)
    """
    list_dbs = []
    for database in PianaGlobals.interaction_databases:
        if database[-2:] != "_c":
            list_dbs.append(database)

    return list_dbs


    
#  ------------------------------------------------------------------------------------------------
#  ------------------------------------------------------------------------------------------------
#                                                    Main               
#  ------------------------------------------------------------------------------------------------
#  ------------------------------------------------------------------------------------------------

# --
# General variables that will be instantiated with values from the command line or from the configuration file
# --
# To avoid problems with old configuration files, new parameters have default values assigned
# However, to avoid using non-desired values, it is recommended that the configuration file explicitly sets values for all parameters

piana_dbname= None            # defines the piana database parameters
piana_dbhost= None
piana_dbuser= None
piana_dbpass= None


input_file_name = None
input_proteins_type = None
input_proteins_species = None

output_proteins_type = None
output_proteins_species = None
list_alternative_types = []
list_keywords = []

file_infra_expressed = None
file_over_expressed = None
expression_protein_type= None

spots_file_name = None

depth = None

hub_threshold = None
use_self_ints = None

list_source_dbs = None
inverse_dbs = None
ignore_unreliable = None
list_source_methods = None
inverse_methods = None

results_prefix= None          # prefix that all results produced by piana will contain in their name
results_dir= None             # directory where results will be saved
exec_mode = None              # type of execution that will be performed: interactive or batch 
configuration_file = None     # file that sets values for piana parameters and execution commands 

benchmark_conf_file = None    # used for benchmarking... normally, find-protein-patches arguments can only be set
                              # through configuration file (or interactive mode). If we wanted to perform benchmarking
                              # of different scoring functions with different weights, we would have to write a configuration
                              # file for each case, which is not very convenient... therefore, benchmark_conf_file can be used to pass
                              # lists of scoring function names and weights for each component that will be used for testing
                              # I am not making this publicly accessible because normal users will define in their
                              # configuration files their scoring function and the weights, and do not need to tune the formulas.
                              # if there is a benchmark file, its parameters will override those in the configuration file
                              
gold_std  = None             
patch_mode  = None


# --
# Global variables of the program
# --

current_decomposition = None           # object of type PatchDecomposition describing the patch decomposition of this piana_graph

execution_sequence = seqdict() # An ordered dictionary describing how the batch execution of piana will be done
                                       # (it is filled by the configuration file parser)
                                       # 
                                       # I've been obliged to add a counter as a key, so the user can execute the same
                                       # command many times. If the execution_command is the key, then each command
                                       # can only appear once in the execution_sequence
                                       #
                                       # structure of execution_sequence is: (if real dictionaries were ordered...)
                                       #
                                       #                     { 0:{execution_command1: {argument11:value11, argument12:value12, ...}},
                                       #                       1:{execution_command2: {argument21:value21, argument22:value22, ...}},
                                       #                       2:{...........................................................
                                       #                     }
                                       #

benchmark_arguments = {}               # used to keep parameters parsed from a benchmark configuration file
comp_file_name = None                  # file used for benchmarking protein binding sites
comp_file_dir = None                   # directory  used for benchmarking protein binding sites
comparison_mode = None                 # determines the comparison mode for benchmarking protein binding sites

list_ranked_parameters = []            # used to keep parameters parsed from a ranked parameters file



# parsing arguments from the command line
parseArguments()


if configuration_file is not None:
    # parsing configuration file with instructions on how to run piana
    #   (remember that command line options have preference over parameter values in configuration file)

    try:
        parse_configuration_file(configuration_file)
        
    except Exception, inst:
        sys.stderr.write("\n******************\nError reported: %s\n" %(inst))
        sys.stderr.write("******************\n")
        sys.stderr.write("Your configuration file has errors: check that you respect the format shown in general_template.piana_conf\n")
        sys.stderr.write(" --> read the error reported above\n\n")
        sys.stderr.write("Some typical errors:  \n")
        sys.stderr.write("    - you forgot the ';' after the command name or you wrote a ';' after the last argument of the command\n")
        sys.stderr.write("    - you are using an old configuration file that does not respect the new format\n")
        sys.stderr.write("    - you misspelled the name of the parameters\n")
        sys.stderr.write("    - you are using an invalid command\n")
        sys.stderr.write("    - your configuration file does not exist\n")
        sys.stderr.write("    - your input files do not exist\n\n")
        sys.exit()
    
else:
    # no configuration file given...
    
    if exec_mode is None:
        sys.stderr.write("\nWarning! exec-mode is None --> setting exec-mode to default (interactive)\n\n")
        exec_mode = "interactive"
        
    if exec_mode == "batch":
        # if exec mode is batch, configuration_file is mandatory
        sys.stderr.write( "\nbatch mode requires a configuration_file \n")
        sys.exit(2)
        
    elif exec_mode == "interactive":
        # set here defaults for variables that cannot be set through command line or interactive mode
        
        list_source_dbs="all"
        use_self_ints = "yes"
        inverse_dbs="no"
        ignore_unreliable="no"
        list_source_methods="all"
        inverse_methods="no"
        
        list_alternative_types=["uniacc", "unientry", "gi", "geneName", "md5", "proteinPiana"]
        list_keywords=[]
        
        file_infra_expressed = None
        file_over_expressed = None
        expression_protein_type= None
        
        output_proteins_species="all"
        
# END OF if configuration_file is not None:


# in interactive mode, a db name and host is required
if exec_mode == "interactive" and piana_dbname is None:
    piana_dbname = raw_input(string_get_piana_dbname)
    
if exec_mode  == "interactive" and piana_dbhost is None:
    piana_dbhost = raw_input(string_get_piana_dbhost)

if results_dir is None:
    results_dir = "./"


# Processing list_source_dbs to include/exclude unreliable interactions
#   --> piana has interactions that were "inferred" after completing codes (extending correferences from the existing references)
#       piana has as well interactions not very reliable (eg. were annotated using ambiguous gene names)
#       This interactions are labeled with the name of the database + "_c"
#
#   In case the user has set ignore-unreliable to yes, then the list of databases to use should ignore those
#   databases ending with _c

if list_source_dbs == "all" and ignore_unreliable=="yes":
    list_source_dbs= get_reliable_databases()
# END OF if list_source_dbs != "all" and list_source_dbs:
    



# ---------------------------------
# open an interface to the piana api
piana_api = PianaApi(piana_dbname=piana_dbname, piana_dbhost= piana_dbhost, piana_dbuser=piana_dbuser, piana_dbpass= piana_dbpass)
# ---------------------------------


# convert input_proteins_species and output_proteins_species to input_tax_id and output_tax_id

if input_proteins_species:
    input_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=input_proteins_species)

if output_proteins_species:
    output_tax_id= piana_api.get_one_tax_id_from_species_name(species_name=output_proteins_species)




# TO DO!!!!!!!!!!
# in some cases (eg. protein_code2protein_code) we do not want to build the network for input_file
# how do I make the difference between those cases and the others??????

if input_file_name is not None:
    # ----------------------------------------------------------------------------------------------------------
    # this is a very specific case for piana.py... normally, building a network is just a matter of adding files,
    # proteins and interaction files. However, when the user gives an input_file_name through the command line
    # we must build a network directly for that input_file_name. Why? Because in many cases, the user just wants
    # to work with one proteins file, and instead of adding the file through the interactive menu (or through the
    # configuration file) we let him speed up things by setting it directly through this command line parameter.
    # Briefly: when the parameter input_file_name is not None, build the network for it making appropiate calls...
    # ----------------------------------------------------------------------------------------------------------
    
    if exec_mode == "batch":
        if input_proteins_type is None or input_proteins_species is None or depth is None or hub_threshold is None:
            sys.stderr.write("input-proteins-type(%s), input-proteins-species(%s), hub_threshold (%s) and depth (%s) cannot be None\n" %(
                input_proteins_type,
                input_proteins_species,
                hub_threshold,
                depth))
            raise ValueError("In batch mode, when giving an input-file, all parameters stated above must be passed as well\n")

        this_file_protein_type= input_proteins_type
        this_file_tax_id= input_tax_id
        this_file_depth= depth
        this_file_hub_threshold= hub_threshold
    else:
        # exec_mode is interactive
        if input_proteins_type is None:  this_file_protein_type = raw_input("Input file protein type: " + string_get_protein_type)
        else:                            this_file_protein_type= input_proteins_type
                
        if input_proteins_species is None: this_file_protein_species = raw_input("Input file protein species: "+string_get_species_name)
        else:                              this_file_protein_species = input_proteins_species

        this_file_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=input_proteins_species)
                
        if depth is None:   this_file_depth = int(raw_input(string_get_depth))
        else:               this_file_depth = depth
                
        if hub_threshold is None:    this_file_hub_threshold = int(raw_input(string_get_hub_threshold))
        else:                        this_file_hub_threshold = hub_threshold


    if verbose_command_sequence:
        sys.stderr.write("Adding proteins in input file %s of type %s with threshold %s\n" %(input_file_name, this_file_protein_type,
                                                                                             this_file_hub_threshold ))

    file_object = file(input_file_name, "r")
    piana_api.add_file_proteins_to_piana_graph(file_object= file_object,
                                               protein_type_name= this_file_protein_type,
                                               tax_id_value= this_file_tax_id,
                                               depth = this_file_depth,
                                               hub_threshold= this_file_hub_threshold,
                                               use_self_ints=use_self_ints,
                                               list_source_dbs= list_source_dbs,
                                               inverse_dbs= inverse_dbs,
                                               list_source_methods= list_source_methods,
                                               inverse_methods= inverse_methods )
    file_object.close()
# END OF if input_file_name is not None:


"""
the piana_object has been initialized: get PIANA command (interactively or from configuration file) to be executed and proceed


1. Get command from menu (in interactive mode) or from configuration file (in batch mode)
2. Execute command

"""
option = None
while option!= "exit":


    # 1. Get command to be executed

    if exec_mode == "interactive":
        # If interactive mode, print list of available options
        sys.stdout.write( "=============================================================\n")
        sys.stdout.write( "PIANA is under GNU GPL\n")
        sys.stdout.write( "piana v. 1.0, Copyright (C) 2005 Ramon Aragues\n")
        sys.stdout.write( "piana comes with ABSOLUTELY NO WARRANTY; for details\n")
        sys.stdout.write( "read the license.  This is free software, and you are welcome\n")
        sys.stdout.write( "to redistribute it under certain conditions; read the license \n")
        sys.stdout.write( "for details.\n")

        sys.stdout.write( "==============================================================\n")
        sys.stdout.write( "PIANA options are:\n\n")
        
        for execution_command in valid_execution_commands.keys():
            sys.stdout.write("%s --> %s\n" %(execution_command.ljust(30), valid_execution_commands[execution_command][0] ))
            
        sys.stdout.write( "==============================================================\n")
        option = raw_input("\nChoose an option: ")

        sys.stdout.write("option chosen was: %s\n" %option)
    # END OF if exec_mode == "interactive":

    
    elif exec_mode == "batch":
        # if batch mode, get the execution command and arguments from the execution_sequence ordered dictionary
        current_command = execution_sequence.pop()
        
        option = (current_command.values()[0]).keys()[0]  # option is a string containing the command (must be in valid_execution_commands)
        arguments = current_command.values()[0][option]   # arguments is a dictionary with {argument_name=argument_value, ...} for this command
    # END OF elif exec_mode == "batch":



    # 2. Execute the command
    
    """
    In either mode, batch or interactive, option is the command that piana must execute

    In interactive mode, the parameters for the can be already set
    (through command line or configuration file) or obtained from the
    user interactively. They are obtained interactively in case their
    value is None.

    In batch mode, input parameters must be set in advance (through command line or configuration file). 
      --> read arguments from configuration file (which has been previously parsed)
          ( arguments is a dictionary with {argument_name=argument_value, ...} )


    The code is identical for all commands:

    1. get arguments values from user (either interactively or from the configuration file)
    2. check and prepare (if necessary) arguments values
    3. call a PianaApi method 
    """

    # -------------------------------------------------------------------------------------------------
    # option == reset-network
    # start a new network
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: reset-network;
    #
    if option == "reset-network":

        if verbose:
            sys.stderr.write("Reseting network\n")
            
        piana_api.reset_piana_graph()
        
        
           
    # END OF elif option == "reset-network":

    # -------------------------------------------------------------------------------------------------
    # option == save-network
    # saves the current network into a disc file
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: save-network;file-name=file_name
    #
    elif option == "save-network":

        if exec_mode == "interactive":
            save_file_name = raw_input(string_get_save_file)  

        elif exec_mode == "batch":
            try:
                if arguments[arg_file_name] != "blank":    save_file_name = arguments[arg_file_name]
                else:
                    sys.stderr.write("Trying to save the network without providing a file name!\n")
                    sys.stderr.write("You must set the destination file name in the arguments of your command save-network\n")
                    sys.exit()
            except:
                sys.stderr.write("configuration file error: make sure your command save-network respects the format described in general_template.piana_conf\n")
                sys.exit()
                
        # END OF elif exec_mode == "batch": (if exec_mode == "interactive":)

        
        if verbose_command_sequence:
            sys.stderr.write("Saving network\n")
            
        # save the current network
        file_object = file(results_dir + save_file_name, "wb")

        cPickle.dump(piana_api, file_object , 2) 
        file_object.close()

    # END OF elif option == "save-network":

    # -------------------------------------------------------------------------------------------------
    # option == load-network
    # loads network from a file, overwriting current network in memory
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: load-network;file-name=file_name
    #
    elif option == "load-network":

        if exec_mode == "interactive":
            load_file_name = raw_input(string_get_load_file)  

        elif exec_mode == "batch":
            try:
                if arguments[arg_file_name] != "blank":  load_file_name = arguments[arg_file_name]
                else:  
                    sys.stderr.write("Trying to load a network without providing a file name!\n")
                    sys.stderr.write("You must set the destination file name in the arguments of your command save-network\n")
                    sys.exit()  
            except:
                sys.stderr.write("configuration file error: make sure your command load-network respects the format described in general_template.piana_conf\n")
                sys.exit()            
            
        # END OF elif exec_mode == "batch": (if exec_mode == "interactive":)


        if verbose_command_sequence:
            sys.stderr.write("Loading network\n")
            
        file_object = file(results_dir + load_file_name, "rb")
        piana_api = cPickle.load(file_object)
        file_object.close()
   
    # END OF elif option == "load-network":

    # -------------------------------------------------------------------------------------------------
    # option == add-protein
    # add a protein to the network (and its interaction partners at depth X)
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: add-protein;protein-name=protein_name;protein-type=this_protein_code_type
    #
    elif option == "add-protein":

        if exec_mode == "interactive":

            # protein name cannot be set through the command line:always ask for it
            protein_name = raw_input("Protein to add to network: " + string_get_protein)  

            if input_proteins_type is None:  this_protein_type = raw_input("Input protein type: " + string_get_protein_type)
            else:                            this_protein_type = input_proteins_type
                
            if input_proteins_species is None: this_protein_species = raw_input("Input protein species: " + string_get_species_name)
            else:                              this_protein_species = input_proteins_species
            
            
            if depth is None:          depth = int(raw_input(string_get_depth))
                
            if hub_threshold is None:  hub_threshold = int(raw_input(string_get_hub_threshold))

        elif exec_mode == "batch":
            
            protein_name = arguments[arg_protein_name]
            
            if arguments[arg_protein_type] != "blank":   this_protein_type = arguments[arg_protein_type]
            else:                                        this_protein_type = input_proteins_type
                
            if arguments[arg_species_name] != "blank":   this_protein_species = arguments[arg_species_name]
            else:                                        this_protein_species = input_proteins_species

        # END OF elif exec_mode == "batch": (if exec_mode == "interactive":)


        # check required parameters
        if protein_name is None or this_protein_type is None or this_protein_species is None or depth is None or hub_threshold is None:
            raise ValueError("To add a protein, protein name (%s), protein type (%s), protein species (%s) depth(%s) and hub_threshold (%s) are required\n" %(
                protein_name,
                this_protein_type,
                this_protein_species,
                depth,
                hub_threshold))
        
        this_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=this_protein_species)

        if verbose_command_sequence:
            sys.stderr.write("Adding protein %s of type name %s with tax %s\n" %(protein_name, this_protein_type, this_tax_id))
          
        piana_api.add_protein_to_piana_graph(protein_code=protein_name,
                                             protein_type_name = this_protein_type,
                                             tax_id_value= this_tax_id,
                                             depth=depth,
                                             hub_threshold=hub_threshold,
                                             use_self_ints = use_self_ints,
                                             list_source_dbs=list_source_dbs,
                                             inverse_dbs= inverse_dbs,
                                             list_source_methods=list_source_methods,
                                             inverse_methods= inverse_methods)

    # END OF elif option == "add-protein":

    # -------------------------------------------------------------------------------------------------
    # option == add-proteins-file
    # add a list of proteins from an text file to current network
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: add-proteins-file;file-name=complete_path_to_file;protein-type=this_file_proteins_code_type
    #
    elif option == "add-proteins-file":

        if exec_mode == "interactive":
            file_name = raw_input( string_get_file_name)
            
            if input_proteins_type is None:  this_file_protein_type = raw_input("File protein type: " + string_get_protein_type)
            else:                            this_file_protein_type= input_proteins_type
                
            if input_proteins_species is None: this_file_protein_species = raw_input("Input protein species: "+string_get_species_name)
            else:                              this_file_protein_species = input_proteins_species
                
            if depth is None:   depth = int(raw_input(string_get_depth))
                
            if hub_threshold is None:    hub_threshold = int(raw_input(string_get_hub_threshold))

        elif exec_mode == "batch":
                
            if arguments[arg_file_name] != "blank":         file_name = arguments[arg_file_name]
            else:                raise ValueError("Trying to add a proteins file without setting its name in configuration file\n")
            
            if arguments[arg_protein_type] != "blank":      this_file_protein_type = arguments[arg_protein_type]
            else:                                           this_file_protein_type = input_proteins_type

            if arguments[arg_species_name] != "blank":      this_file_protein_species = arguments[arg_species_name]
            else:                                           this_file_protein_species = input_proteins_species
        # END OF elif exec_mode == "batch":

        # check required parameters
        if file_name is None or this_file_protein_type is None or this_file_protein_species is None:
            raise ValueError("To add proteins from a file, file name (%s), protein type (%s) and protein species (%s) are required\n" %(
                file_name,
                this_file_protein_type,
                this_file_proteins_species))

        this_file_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=this_file_protein_species)
        
        if verbose_command_sequence:
            sys.stderr.write("Adding proteins in file %s of type %s with threshold %s\n" %(file_name, this_file_protein_type, hub_threshold))

        file_object = file(file_name, "r")
        piana_api.add_file_proteins_to_piana_graph(file_object= file_object,
                                                   protein_type_name= this_file_protein_type,
                                                   tax_id_value= this_file_tax_id,
                                                   depth=depth,
                                                   hub_threshold=hub_threshold,
                                                   use_self_ints = use_self_ints,
                                                   list_source_dbs=list_source_dbs,
                                                   inverse_dbs= inverse_dbs,
                                                   list_source_methods=list_source_methods,
                                                   inverse_methods= inverse_methods)
        file_object.close()
    # END OF elif option == "add-proteins-file":

    
    # -------------------------------------------------------------------------------------------------
    # option == add-interactions-file
    # add a list of interactions from an text file to current network
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: add-interactions-file;file-name=complete_path_to_file;protein-type=this_file_interactions_code_type
    #
    elif option == "add-interactions-file":

        if exec_mode == "interactive":
            file_name = raw_input(string_get_int_file_name)
            
            if input_proteins_type is None:
                this_file_protein_type = raw_input("Proteins type used in interactions: " + string_get_protein_type)
            else:
                this_file_protein_type= input_proteins_type

        elif exec_mode == "batch":

            if arguments[arg_file_name] != "blank":
                file_name = arguments[arg_file_name]
            else:
                raise ValueError("Trying to add a interactions file without setting its name in configuration file\n")

            if arguments[arg_protein_type] != "blank":
                this_file_protein_type = arguments[arg_protein_type]
            else:
                this_file_protein_type = input_proteins_type

        # END OF elif exec_mode == "batch":


        if file_name is None or this_file_protein_type is None:
            raise ValueError("To add interactions, file name (%s) and a protein type (%s) are required\n" %(file_name,
                                                                                                            this_file_protein_type ))
            
        if verbose_command_sequence:
            sys.stderr.write("Adding interactions in file %s with proteins of type %s\n" %(file_name, this_file_protein_type))


        file_object = file(file_name, "r")
        
        piana_api.add_file_interactions_to_piana_graph(file_object= file_object, protein_type_name= this_file_protein_type)
       
    # END OF elif option == "add-interactions-file":

    # -------------------------------------------------------------------------------------------------
    # option == species-network
    # buids a protein-protein network for all proteins in a given species
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: species-network;species-name=species_name;tax-id=tax_id
    #
    elif option == "species-network":

        # TO DO!!! let the user choose the taxonomy directly, instead of going through species name
        taxonomy_value = None
        
        if exec_mode == "interactive":
            temp_species_name = raw_input( "Build network of species: " + string_get_net_species_name)
            temp_tax_id = raw_input( "Build network of taxonomy: " + string_get_net_tax_id)

            if temp_species_name == "blank":      species_name = None
            else:                                 species_name =temp_species_name

            if temp_tax_id == "blank":            tax_id= None
            else:                                 tax_id= temp_tax_id
                
            if hub_threshold is None:  hub_threshold = int(raw_input(string_get_hub_threshold))
                
        elif exec_mode == "batch":
            # if batch mode, read arguments from configuration file (which has been previously parsed)

            if arguments[arg_species_name] != "blank":        species_name = arguments[arg_species_name]
            else:                                             species_name = None
                
            if arguments[arg_tax_id] != "blank":              tax_id = int(arguments[arg_tax_id])
            else:                                             tax_id = None
        # END OF elif exec_mode == "batch":
        
        if species_name is None and tax_id is None or hub_threshold is None:
            raise ValueError("Trying to build species network without setting a species (species name(%s) or taxonomy id(%s)) or hub threshold (%s)\n" %(
                species_name, tax_id, hub_threshold))
            
        if verbose_command_sequence:
            sys.stderr.write("Building the network for (species %s -- tax_id %s) \n" %(species_name, tax_id))


        piana_api.create_species_piana_graph(species_name= species_name,
                                             tax_id= tax_id,
                                             hub_threshold= hub_threshold,
                                             use_self_ints = use_self_ints,
                                             list_source_dbs= list_source_dbs,
                                             inverse_dbs= inverse_dbs,
                                             list_source_methods= list_source_methods,
                                             inverse_methods= inverse_methods)
        
        
       
    # END OF elif option == "species-network":

    # -------------------------------------------------------------------------------------------------
    # option == database-method-network
    # buids a protein-protein network for all interactions in a given database and/or a given method
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: database-method-network;database-name=database_name;method-name=method_name
    #
    elif option == "database-method-network":
        
        if exec_mode == "interactive":
            database_name = raw_input( "Database to be used: " + string_get_dbname)
            method_name = raw_input( "Method to be used: " + string_get_methodname)
            species_name = raw_input( "Species to be used: " + string_get_species_name)
                
            if hub_threshold is None:  hub_threshold = int(raw_input(string_get_hub_threshold))
            
        elif exec_mode == "batch":
            # if batch mode, read arguments from configuration file (which has been previously parsed)

            if arguments[arg_database_name] != "blank":
                database_name = arguments[arg_database_name]
            else:
                raise ValueError("Trying to build network without setting the database name in configuration file\n")
            
            if arguments[arg_method_name] != "blank":
                method_name = arguments[arg_method_name]
            else:
                raise ValueError("Trying to build  network without setting the method name in configuration file\n")

            if arguments[arg_species_name] != "blank":
                species_name = arguments[arg_species_name]
            else:
                raise ValueError("Trying to build species network without setting the species name in configuration file\n")

        # END OF elif exec_mode == "batch":

        if species_name is None:
            raise ValueError("Trying to build database-method network without setting a species name (%s)\n" %(species_name))
        
        if database_name != "all":
            list_source_dbs = [database_name]
            inverse_dbs= "no"
        else:
            # in case database_name is set to all and ignore_unreliable is yes, then limit the source databases
            # to those that are reliable. 
            if ignore_unreliable == "yes":
                list_source_dbs = get_reliable_databases()
            
            
        if method_name != "all":
            list_source_methods = [method_name]
            inverse_methods= "no"

        this_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=species_name)
        
        
        if verbose_command_sequence:
            sys.stderr.write("Building the network for species %s source db %s and method %s\n" %(species_name,
                                                                                                  database_name,
                                                                                                  method_name))
        # TO DO!!! Why am I not using hub_threshold in this method?
        piana_api.create_database_method_piana_graph(tax_id_value= this_tax_id,
                                                     use_self_ints = use_self_ints,
                                                     list_source_dbs= list_source_dbs,
                                                     inverse_dbs= inverse_dbs,
                                                     list_source_methods = list_source_methods,
                                                     inverse_methods= inverse_methods)
        
    # END OF elif option == "database-method-network":

    
    # -------------------------------------------------------------------------------------------------
    # option == print-table or print-network
    #
    # print network, in several output flavors:
    #
    #   print-table: standard table with protein interactions
    #   print-network: standard DOT file
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: print-table;output-target=output_target;protein-type=protein_code_type;print-mode=print_mode;format-mode=format_mode
    # configuration file syntax: print-network;output-target=output_target;protein-type=protein_code_type;print-mode=print_mode;format-mode=format_mode

    elif option == "print-table" or option == "print-network":

        if exec_mode == "interactive":
            output_name = raw_input(string_get_output_name)
            print_mode = raw_input(string_get_print_mode)
            if option == "print-table":
                format_mode = raw_input(string_get_format_mode)
            else:
                format_mode = "dot"
            
            if output_proteins_type is None:            this_print_proteins_type_name = raw_input("Output: " + string_get_protein_type)
            else:                                       this_print_proteins_type_name = output_proteins_type
                
            if output_proteins_species is None: this_protein_species = raw_input("Output protein species: "+ string_get_species_name)
            else:                               this_protein_species = output_proteins_species


        # END OF if exec_mode == "interactive":
        
        elif exec_mode == "batch":
            # if batch mode, read arguments from configuration file (which has been previously parsed)

            if arguments[arg_protein_type] != "blank":
                this_print_proteins_type_name = arguments[arg_protein_type]
            else:
                if output_proteins_type is not None:
                    this_print_proteins_type_name = output_proteins_type
                else:
                    raise ValueError("output proteins type cannot be None")

            if arguments[arg_print_mode] != "blank":             print_mode = arguments[arg_print_mode]
            else:                                                print_mode = "all"
            
            if arguments[arg_format_mode] != "blank":            format_mode = arguments[arg_format_mode]
            else:                                                format_mode = "txt"
                

            if arguments[arg_output_target] != "blank":          output_name = arguments[arg_output_target]
            else:
                if results_prefix is None:
                    raise ValueError("Using default output name without giving a results prefix")
                
                output_name = results_dir + results_prefix + "." + print_mode + "." + option + "." + format_mode

            this_protein_species = output_proteins_species
        # END OF elif exec_mode == "batch":


        this_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=this_protein_species)
        
        if output_name == "screen":
            output_target = sys.stdout
        else:
            output_target = file(output_name , "w")


        if option == "print-table":


            if verbose_command_sequence:
                sys.stderr.write("Printing out table to file %s with output type %s\n" %(output_name, this_print_proteins_type_name))

                
            # Output to stdout the interactions of the network 
            piana_api.print_interactions( protein_type_name= this_print_proteins_type_name,
                                          output_file_object= output_target,
                                          output_format= "table",
                                          print_mode = print_mode,
                                          format_mode = format_mode,
                                          list_alternative_type_names= list_alternative_types,
                                          tax_id_value= this_tax_id,
                                          list_keywords= list_keywords,
                                          file_over_expressed= file_over_expressed,
                                          file_infra_expressed= file_infra_expressed,
                                          expression_protein_type= expression_protein_type)
            
        elif option == "print-network":

            if verbose_command_sequence:
                sys.stderr.write("Printing out DOT network to file %s\n" %(output_name))
            
            # Output to stdout the network file in DOT format
            piana_api.print_interactions( protein_type_name= this_print_proteins_type_name,
                                          output_file_object= output_target,
                                          output_format= "network",
                                          print_mode = print_mode,
                                          format_mode = format_mode,
                                          list_alternative_type_names= list_alternative_types,
                                          tax_id_value= this_tax_id,
                                          list_keywords= list_keywords,
                                          file_over_expressed= file_over_expressed,
                                          file_infra_expressed= file_infra_expressed,
                                          expression_protein_type= expression_protein_type)
            
        # END OF elif option == "print-network":
 
        if output_name != "screen":
            output_target.close()

    # END OF elif option == "print-table" or option == "print-network":


    # -------------------------------------------------------------------------------------------------
    # option == print-table-db-intersection or print-network-db-intersection
    #
    # prints interactions that are in several databases in different flavors:
    #
    #    - print-table-db-intersection: prints table with interactions that appear in several databases
    #    - print-network-db-intersection: prints network with interactions that appear in several databases 
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: print-table-db-intersection;output-target=output_target;protein-type=protein_code_type;print-mode=print_mode;list-dbs=dbname1:dbname2:...;format-mode=format_mode;format-mode=format_mode
    # configuration file syntax: print-network-db-intersection;output-target=output_target;protein-type=protein_code_type;print-mode=print_mode;list-dbs=dbname1:dbname2:...;format-mode=format_mode;format-mode=format_mode
    #
    elif option == "print-table-db-intersection" or option == "print-network-db-intersection":


        db_list =[]

        if exec_mode == "interactive":
            output_name = raw_input(string_get_output_name)
            print_mode = raw_input(string_get_print_mode)
                
            if output_proteins_species is None: this_protein_species = raw_input("Output protein species: "+ string_get_species_name)
            else:                               this_protein_species = output_proteins_species
            
            if option == "print-table":
                format_mode = raw_input(string_get_format_mode)
            else:
                format_mode = "dot"
            
            if output_proteins_type is None:
                # if no output proteins code type was given, ask user to give one
                this_print_proteins_type_name = raw_input("Output: " + string_get_protein_type)
            else:
                this_print_proteins_type_name= output_proteins_type

            # Ask the user to provide the databases where an interaction must appear in order to be printed
            ext_db_name = None

            while (ext_db_name !="end"):
                ext_db_name = raw_input(string_get_int_dbname)
                if ext_db_name != "end":
                    db_list.append(ext_db_name)
            # END OF while (db_name !="end"):

        # END OF if exec_mode == "interactive":
        
        elif exec_mode == "batch":
            # if batch mode, read arguments from configuration file (which has been previously parsed)

            if arguments[arg_list_dbs] != "blank":
                colon_separated_dbs = arguments[arg_list_dbs]
                db_list = colon_separated_dbs.strip().split(":")
            else:
                raise ValueError("Trying to %s without providing any database in the configuration file" %option)


            if arguments[arg_print_mode] != "blank":            print_mode = arguments[arg_print_mode]
            else:                                               print_mode = "all"
            
            if arguments[arg_format_mode] != "blank":            format_mode = arguments[arg_format_mode]
            else:                                                format_mode = "txt"
            
            if arguments[arg_output_target] != "blank":
                output_name = arguments[arg_output_target]
            else:
                if results_prefix is None:
                    raise ValueError("Using default output name without giving a results prefix")

                output_name = results_dir + results_prefix + "." + print_mode + "." + option +  "." + format_mode 
                for dbname in db_list:
                    output_name = output_name + "_" + dbname
            
            if arguments[arg_protein_type] != "blank":
                this_print_proteins_type_name = arguments[arg_protein_type]
            else:
                if output_proteins_type is not None:
                    this_print_proteins_type_name = output_proteins_type
                else:
                    raise ValueError("output proteins type cannot be None")

            this_protein_species = output_proteins_species
        # END OF elif exec_mode == "batch":
        
        this_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=this_protein_species)
        
        if output_name == "screen":
            output_target = sys.stdout
        else:
            output_target = file(output_name,"w")


        if option == "print-table-db-intersection":
            
            if verbose_command_sequence:
                sys.stderr.write("Printing out intersection table to file %s with output type %s\n" %(output_name, this_print_proteins_type_name))
                
            piana_api.print_interactions( protein_type_name= this_print_proteins_type_name,
                                          output_file_object= output_target,
                                          output_format= "table",
                                          intersection_dbs = db_list,
                                          print_mode = print_mode,
                                          format_mode = format_mode,
                                          list_alternative_type_names= list_alternative_types,
                                          tax_id_value= this_tax_id,
                                          list_keywords= list_keywords,
                                          file_over_expressed= file_over_expressed,
                                          file_infra_expressed= file_infra_expressed,
                                          expression_protein_type= expression_protein_type)
            
            
        elif option == "print-network-db-intersection":
            
            if verbose_command_sequence:
                sys.stderr.write("Printing out intersection network to file %s with output type %s\n" %(output_name, this_print_proteins_type_name))
                
            piana_api.print_interactions( protein_type_name= this_print_proteins_type_name,
                                          output_file_object= output_target,
                                          output_format= "network",
                                          intersection_dbs = db_list,
                                          print_mode = print_mode,
                                          format_mode = format_mode,
                                          list_alternative_type_names= list_alternative_types,
                                          tax_id_value= this_tax_id,
                                          list_keywords= list_keywords,
                                          file_infra_expressed= file_infra_expressed,
                                          expression_protein_type= expression_protein_type)
        # END OF elif option == "print-network-db-intersection":
            
        if output_name != "screen":
            output_target.close()

    # END OF elif option == "print-table-db-intersection" or option == "print-network-db-intersection":


    
    # -------------------------------------------------------------------------------------------------
    # option == print-all-prots-info or print-root-prots-info or print-connect-prots-info 
    #
    #  prints information about proteins in the network in different flavors:
    #
    #    - print-all-prots-info: about all proteins in the network
    #    - print-root-prots-info: about root proteins
    #    - print-connect-prots-info: about proteins that connect root nodes of the network
    # 
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: print-all-prots-info;output-target=output_target;protein-type=protein_code_type;output-mode=output_mode;format-mode=format_mode
    # configuration file syntax: print-root-prots-info;output-target=output_target;protein-type=protein_code_type;output-mode=output_mode;format-mode=format_mode
    # configuration file syntax: print-connect-prots-info;output-target=output_target;protein-type=protein_code_type;output-mode=output_mode;format-mode=format_mode

    elif option == "print-all-prots-info" or option == "print-root-prots-info" or option == "print-connect-prots-info":

        if exec_mode == "interactive":
            output_name = raw_input(string_get_output_name)
            
            if output_proteins_type is None:
                this_print_proteins_type_name = raw_input("Output: " + string_get_protein_type)
            else:
                this_print_proteins_type_name = output_proteins_type

                
            if output_proteins_species is None: this_protein_species = raw_input("Output protein species: "+ string_get_species_name)
            else:                               this_protein_species = output_proteins_species
            
            output_mode = raw_input(string_get_output_mode)
            format_mode = raw_input(string_get_format_mode)

        # END OF if exec_mode == "interactive":
        elif exec_mode == "batch":
            # if batch mode, read arguments from configuration file (which has been previously parsed)

                
            if arguments[arg_output_mode] != "blank":            output_mode = arguments[arg_output_mode]
            else:                                                output_mode = default_output_mode
            
            if arguments[arg_format_mode] != "blank":            format_mode = arguments[arg_format_mode]
            else:                                                format_mode = "txt"
            
            if arguments[arg_output_target] != "blank":
                output_name = arguments[arg_output_target]
            else:
                if results_prefix is None:
                    raise ValueError("Using default output name without giving a results prefix")

                output_name = results_dir + results_prefix + "." + output_mode + "." + option + "." + format_mode 
            
            if arguments[arg_protein_type] != "blank":
                this_print_proteins_type_name = arguments[arg_protein_type]
            else:
                if output_proteins_type is not None:
                    this_print_proteins_type_name = output_proteins_type
                else:
                    raise ValueError("output proteins type cannot be None")

            this_protein_species = output_proteins_species
        # END OF elif exec_mode == "batch":
        
        this_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=this_protein_species)
        
        if output_name == "screen":
            output_target = sys.stdout
        else:
            output_target = file(output_name,"w")


        if verbose_command_sequence:
            sys.stderr.write("Printing out info for proteins to file %s\n" %(output_name) )


        if option == "print-all-prots-info" :
            piana_api.print_all_proteins_information( protein_type_name=this_print_proteins_type_name,
                                                      output_file_object= output_target,
                                                      output_mode= output_mode,
                                                      format_mode = format_mode,
                                                      list_alternative_type_names=list_alternative_types,
                                                      list_keywords= list_keywords,
                                                      tax_id_value= this_tax_id,
                                                      file_over_expressed= file_over_expressed,
                                                      file_infra_expressed= file_infra_expressed,
                                                      expression_protein_type= expression_protein_type)
        elif option == "print-root-prots-info":
            piana_api.print_root_proteins_information( protein_type_name=this_print_proteins_type_name,
                                                       output_file_object= output_target,
                                                       output_mode= output_mode,
                                                       format_mode = format_mode,
                                                       list_alternative_type_names=list_alternative_types,
                                                       list_keywords= list_keywords,
                                                       tax_id_value= this_tax_id,
                                                       file_over_expressed= file_over_expressed,
                                                       file_infra_expressed= file_infra_expressed,
                                                       expression_protein_type= expression_protein_type)
            
        elif option == "print-connect-prots-info":
            piana_api.print_connecting_proteins_information( protein_type_name=this_print_proteins_type_name,
                                                             output_file_object= output_target,
                                                             output_mode= output_mode,
                                                             format_mode = format_mode,
                                                             list_alternative_type_names=list_alternative_types,
                                                             list_keywords= list_keywords,
                                                             tax_id_value= this_tax_id,
                                                             file_over_expressed= file_over_expressed,
                                                             file_infra_expressed= file_infra_expressed,
                                                             expression_protein_type= expression_protein_type)
            
        # END OF elif option ==  "print-connect-prots-info":
  
        if output_name != "screen":
            output_target.close()
        
                   
    # END OF elif option == "print-all-prots-info" or option == "print-root-prots-info" or
    #             option == "print-connect-prots-info":

    

    # -------------------------------------------------------------------------------------------------
    # option == protein-code-2-protein-code
    # transforms codes from input-file (which are of type input-proteins-type) to output-proteins-type
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: protein-code-2-protein-code;output-target=blank
    #
    elif option == "protein-code-2-protein-code":

        if exec_mode == "interactive":

            output_name = raw_input(string_get_output_name)

            if input_file_name is None:
                input_file_name = raw_input(string_get_file_name)
           
            if input_proteins_type is None:
                input_proteins_type = raw_input("Input code: " + string_get_protein_type)

            if output_proteins_type is None:
                output_proteins_type = raw_input("Output code: " + string_get_protein_type)
                
            if output_proteins_species is None: this_protein_species = raw_input("Output protein species: "+ string_get_species_name)
            else:                               this_protein_species = output_proteins_species
                
            format_mode = raw_input(string_get_format_mode)
               

        elif exec_mode == "batch":
            # if batch mode, read arguments from configuration file (which has been previously parsed)
            #  ( arguments is a dictionary with {argument_name=argument_value, ...} )

            if input_file_name is None or input_proteins_type is None or output_proteins_type is None or results_prefix is None:
                raise ValueError("input_file_name (%s), input_proteins_type (%s) and output_proteins_type (%s) and results_prefix (%s) cannot be None\n" %(
                    input_file_name,
                    input_proteins_type,
                    output_proteins_type,
                    results_prefix))
            
            if arguments[arg_format_mode] != "blank":            format_mode = arguments[arg_format_mode]
            else:                                                format_mode = "txt"
   
            if arguments[arg_output_target] != "blank":
                output_name = arguments[arg_output_target]
            else:
                output_name = results_dir + results_prefix + "." + option + "."  + input_proteins_type + "2" + output_proteins_type + "." + format_mode

            this_protein_species = output_proteins_species
        # END OF elif exec_mode == "batch":
           
        this_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=this_protein_species)
             
        if output_name == "screen":
            output_target = sys.stdout
        else:
            output_target = file(output_name,"w")

        input_file_object = file(input_file_name, "r")
            
        """
        At this point, all required parameters have a value assigned to them. Proceed...
        """
            
        if verbose_command_sequence:
            sys.stderr.write("Printing out code equivalences of proteins in %s to file %s\n" %(input_file_name, output_name))


        piana_api.protein_code_2_protein_code(input_file_object= input_file_object,
                                              input_proteins_type= input_proteins_type,
                                              output_file_object= output_target,
                                              format_mode = format_mode,
                                              output_proteins_type= output_proteins_type,
                                              list_alternative_types= list_alternative_types,
                                              tax_id_value = this_tax_id)

        if output_name != "screen":
            output_target.close()
        
    # END OF elif option == "protein-code-2-protein-code":

    # -------------------------------------------------------------------------------------------------
    # option == protein-code-2-all-info
    # transforms codes from input-file (which are of type input-proteins-type) to output-proteins-type
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax:
    #
    elif option == "protein-code-2-all-info":

        if exec_mode == "interactive":

            output_name = raw_input(string_get_output_name)

            if input_file_name is None:
                input_file_name = raw_input(string_get_file_name)
           
            if input_proteins_type is None:
                input_proteins_type = raw_input("Input code: " + string_get_protein_type)

            if output_proteins_type is None:
                this_print_proteins_type_name = raw_input("Output: " + string_get_protein_type)
            else:
                this_print_proteins_type_name = output_proteins_type

                
            format_mode = raw_input(string_get_format_mode)
         
            output_mode = raw_input(string_get_output_mode)

            
        elif exec_mode == "batch":
            # if batch mode, read arguments from configuration file (which has been previously parsed)
            #  ( arguments is a dictionary with {argument_name=argument_value, ...} )

            if input_file_name is None or input_proteins_type is None  or results_prefix is None:
                raise ValueError("input_file_name (%s), input_proteins_type (%s) and results_prefix (%s) cannot be None\n" %(
                    input_file_name,
                    input_proteins_type,
                    results_prefix))
             
            if arguments[arg_protein_type] != "blank":
                this_print_proteins_type_name = arguments[arg_protein_type]
            else:
                if output_proteins_type is not None:
                    this_print_proteins_type_name = output_proteins_type
                else:
                    raise ValueError("output proteins type cannot be None")
                
            if arguments[arg_output_mode] != "blank":            output_mode = arguments[arg_output_mode]
            else:                                                output_mode = default_output_mode
            
            if arguments[arg_format_mode] != "blank":            format_mode = arguments[arg_format_mode]
            else:                                                format_mode = "txt"
            
            if arguments[arg_output_target] != "blank":  output_name = arguments[arg_output_target]
            else:                                        output_name = results_dir + results_prefix  + "." + option + "." + format_mode

        # END OF elif exec_mode == "batch":

        if output_name == "screen":
            output_target = sys.stdout
        else:
            output_target = file(output_name,"w")

        input_file_object = file(input_file_name, "r")

        """
        At this point, all required parameters have a value assigned to them. Proceed...
        """
        if verbose_command_sequence:
            sys.stderr.write("Printing out info for proteins to file %s\n" %(output_name) )
            
        piana_api.print_file_proteins_information(input_file_object= input_file_object,
                                                  input_proteins_type=input_proteins_type,
                                                  output_file_object= output_target,
                                                  output_proteins_type=this_print_proteins_type_name,
                                                  output_mode = output_mode,
                                                  format_mode = format_mode,
                                                  list_keywords= list_keywords,
                                                  list_alternative_type_names= list_alternative_types,
                                                  file_over_expressed= file_over_expressed,
                                                  file_infra_expressed= file_infra_expressed,
                                                  expression_protein_type= expression_protein_type)

        if output_name != "screen":
            output_target.close()

    # END OF elif option == "protein-code-2-all-info":


    # -------------------------------------------------------------------------------------------------
    # option == expand-interactions 
    # apply expansion to network
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: expand-interactions;expansion-type=expansion_type;expansion-nodes=expansion_nodes;expansion-threshold=expansion_threshold
    #
    elif option == "expand-interactions":

        if exec_mode=="interactive":

            expansion_type = raw_input(string_get_expansion_type )
            expansion_nodes = raw_input(string_get_nodes_to_expand)
            exp_output_mode = raw_input(string_get_exp_output_mode)
            expansion_threshold = int(raw_input(string_get_expansion_threshold))
            
            if exp_output_mode == "print":
                # name and type only needed when printing information
                output_name = raw_input(string_get_output_name)

                if output_proteins_type is None:
                    this_print_proteins_type_name = raw_input("Expansion output:" + string_get_protein_type)
                else:
                    this_print_proteins_type_name = output_proteins_type
                
            if output_proteins_species is None: this_protein_species = raw_input("Output protein species: "+ string_get_species_name)
            else:                               this_protein_species = output_proteins_species

        # END OF if exec_mode=="interactive":
        
        elif exec_mode=="batch":
            
            if arguments[arg_expansion_type] != "blank":
                expansion_type = arguments[arg_expansion_type]
            else:
                raise ValueError("Trying to expand interactions without giving a expansion-type in configuration file")

            if arguments[arg_expansion_nodes] != "blank":
                expansion_nodes = arguments[arg_expansion_nodes]
            else:
                raise ValueError("Trying to expand interactions without setting expansion nodes in configuration file")
            
            if arguments[arg_expansion_threshold] != "blank":
                expansion_threshold = int(arguments[arg_expansion_threshold])
            else:
                raise ValueError("Trying to expand interactions without setting expansion expansion_threshold in configuration file")
  
            if arguments[arg_exp_output_mode] != "blank":
                exp_output_mode = arguments[arg_exp_output_mode]
            else:
                raise ValueError("Trying to expand interactions without setting output mode in configuration file")
                       
            if arguments[arg_protein_type] != "blank":
                this_print_proteins_type_name = arguments[arg_protein_type]
            else:
                if output_proteins_type is not None:
                    this_print_proteins_type_name = output_proteins_type
                else:
                    raise ValueError("output proteins type cannot be None")

            if arguments[arg_output_target] != "blank":
                output_name = arguments[arg_output_target]
            else:
                if results_prefix is None:
                    raise ValueError("Using default output name without giving a results prefix")

                output_name = results_dir + results_prefix + "." + option + "." + expansion_type + "_thres" + str(expansion_threshold) + "." + expansion_nodes

            this_protein_species = output_proteins_species
        # END OF elif exec_mode=="batch":

        this_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=this_protein_species)
        
        # TO DO!!! Introduce interaction confidence based on: - how many proteins share that certain characteristic
        #                                                     - how many interactions coming from a single protein


        if exp_output_mode == "print":
            # if output mode of expansion is print, open the file where results will be printed
            if output_name == "screen":
                output_target = sys.stdout
            else:
                output_target = file(output_name,"w")
        else:
            output_target = None

        """
        At this point, we have all the parameters required. Proceed...
        """

        piana_api.expand_piana_graph_interactions(expansion_type=expansion_type, expansion_mode=expansion_nodes,
                                                  expansion_threshold=expansion_threshold, hub_threshold=hub_threshold,
                                                  exp_output_mode=exp_output_mode,  output_file_object=output_target,
                                                  proteins_type_name=this_print_proteins_type_name,
                                                  output_tax_id= this_tax_id,
                                                  list_alternative_type_names=list_alternative_types,
                                                  use_self_ints = use_self_ints,
                                                  list_source_dbs=list_source_dbs,
                                                  inverse_dbs= inverse_dbs,
                                                  list_source_methods=list_source_methods,
                                                  inverse_methods= inverse_methods)

  
        if exp_output_mode == "print" and output_name != "screen":
            output_target.close()


    # END OF elif option == "expand-interactions":

      
    # -------------------------------------------------------------------------------------------------
    # option == match-proteins-to-spots
    # identifies spots in a 2D gel by matching MW and/or IP to proteins in the network
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax:
    #  match-proteins-to-spots;output-target=output_target;protein-type=protein_code_type;list-mw-error=mw_%er1:..:mw_%erN;list-ip-error=ip_%er1:..:ip_%erN
    #
    elif option == "match-proteins-to-spots":

        if exec_mode == "interactive":
            output_name = raw_input(string_get_output_name)
            
            if output_proteins_type is None:
                # if no output proteins code type was given, ask user to give one
                this_print_proteins_type_name = raw_input("Output: " + string_get_protein_type)
            else:
                this_print_proteins_type_name = output_proteins_type
                
            if spots_file_name is None:
                spots_file_name = raw_input(string_get_spots_file )

            format_mode = raw_input(string_get_format_mode)
   
            # In interactive mode we do not let the user choose the error bounds
            molecular_error_bounds   = default_molecular_error_bounds  
            #       - set here the isoelectric error bounds for which you want to perform the matching
            isoelectric_error_bounds = default_isoelectric_error_bounds

        # END OF if exec_mode == "interactive":

        elif exec_mode == "batch":
            # if batch mode, read arguments from configuration file (which has been previously parsed)

            if arguments[arg_format_mode] != "blank":            format_mode = arguments[arg_format_mode]
            else:                                                format_mode = "txt"
            
            if arguments[arg_output_target] != "blank":
                output_name = arguments[arg_output_target]
            else:
                if results_prefix is None:
                    raise ValueError("Using default output name without giving a results prefix")

                output_name = results_dir + results_prefix  + "." + option + "." + format_mode
            
            if arguments[arg_protein_type] != "blank":
                this_print_proteins_type_name = arguments[arg_protein_type]
            else:
                if output_proteins_type is not None:   this_print_proteins_type_name = output_proteins_type
                else:
                    raise ValueError("output proteins type cannot be None")
                
            if spots_file_name is None:
                if arguments[arg_spots_file_name] != "blank":   spots_file_name = arguments[arg_spots_file_name]
                else:
                    raise ValueError("Trying to match proteins to spots without giving a spots file in command line or configuration file")

            # get error bounds, either from default or from configuration file
            if  arguments[arg_list_mw_error] == "blank":   molecular_error_bounds   = default_molecular_error_bounds
            else:
                tmp_molecular_error_bounds = arguments[arg_list_mw_error].strip().split(":")
                molecular_error_bounds = []
                for mw_error in tmp_molecular_error_bounds:
                    molecular_error_bounds.append(float(mw_error))

            if arguments[arg_list_ip_error] == "blank":   isoelectric_error_bounds   = default_isoelectric_error_bounds
            else:
                tmp_isoelectric_error_bounds = arguments[arg_list_ip_error].strip().split(":")
                isoelectric_error_bounds = []
                for ip_error in tmp_isoelectric_error_bounds:
                    isoelectric_error_bounds.append(float(ip_error))

        # END OF elif exec_mode == "batch":
        
        if output_name == "screen":            output_target = sys.stdout
        else:                                  output_target = file(output_name,"w")

        spots_file_object = file(spots_file_name, "r")
        
        """
        At this point, all parameters are set... Proceed..
        """

        if verbose_command_sequence:
            sys.stderr.write("\nPrinting spot matches to output file %s\n" %(output_name))

        piana_api.print_spot_protein_correspondence(spots_file_object= spots_file_object,
                                                    molecular_error_bounds=molecular_error_bounds, isoelectric_error_bounds=isoelectric_error_bounds,
                                                    output_file_object=output_target,
                                                    format_mode = format_mode,
                                                    output_proteins_type=this_print_proteins_type_name, list_alternative_types=list_alternative_types)


        spots_file_object.close()
        
        if output_name != "screen":
            output_target.close()
    # END OF elif option == "match-proteins-to-spots"

    # -------------------------------------------------------------------------------------------------
    # option == find-shortest-route 
    # find the route between two proteins (ie. intermediate proteins)
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: 
    # find-shortest-route;protein-a-name=protein_a_name;protein-b-name=protein_a_name;protein-type=blank;output-target=blank;format-mode=format_mode

    elif option == "find-shortest-route":

        if exec_mode=="interactive":

            protein_a_name = raw_input("protein A: " + string_get_protein)
            protein_b_name = raw_input("protein B: " + string_get_protein)

            if input_proteins_type is None:
                this_protein_type = raw_input("Input protein type: " + string_get_protein_type)
            else:
                this_protein_type = input_proteins_type
                
            if output_proteins_species is None: this_protein_species = raw_input("Output protein species: "+ string_get_species_name)
            else:                               this_protein_species = output_proteins_species

        # END OF if exec_mode=="interactive":
        
        elif exec_mode=="batch":
            
            protein_a_name = arguments[arg_protein_a_name]
            protein_b_name = arguments[arg_protein_b_name]
            
            if arguments[arg_protein_type] != "blank":           this_protein_type = arguments[arg_protein_type]
            else:                                                this_protein_type = input_proteins_type
                
            if arguments[arg_output_target] != "blank":
                output_name = arguments[arg_output_target]
            else:
                if results_prefix is None:
                    raise ValueError("Using default output name without giving a results prefix")

                output_name = results_dir + results_prefix + "." + option + "." + protein_a_name + "_2_" + protein_b_name + "." + format_mode

            this_protein_species = output_proteins_species
        # END OF elif exec_mode=="batch":

        
        if protein_a_name is None or protein_b_name is None or this_protein_type is None or output_proteins_type is None or this_protein_species is None:
            raise ValueError("In find-shortest-route, protein a name (%s) , protein b name (%s) , protein type (%s) protein species (%s) and output type (%s) are required\n" %(
                protein_a_name,
                protein_a_name,
                this_protein_type,
                this_protein_species,
                output_proteins_type))

        this_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=this_protein_species)

        if output_name == "screen":
            output_target = sys.stdout
        else:
            output_target = file(output_name,"w")

        piana_api.print_shortest_route(protein_a_name=protein_a_name, protein_b_name=protein_b_name,
                                       input_proteins_type=this_protein_type, output_proteins_type=output_proteins_type,
                                       list_alternative_type_names=list_alternative_types,
                                       output_file_object=output_target, format_mode=format_mode, tax_id_value=this_tax_id)

    # -------------------------------------------------------------------------------------------------
    # option == find-distance-group 
    # find proteins that are at a certain distance from a query protein
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax:
    #      find-distance-group;protein-name=protein_name;protein-type=blank;distance=distance;output-target=blank;info=blank;format-mode=format_mode
    #
    elif option == "find-distance-group":

        if exec_mode=="interactive":

            query_protein = raw_input(string_get_protein)
            distance = int( raw_input(string_get_distance) )
            info = raw_input(string_get_info)

            if input_proteins_type is None:
                this_protein_type = raw_input("Input protein type: " + string_get_protein_type)
            else:
                this_protein_type = input_proteins_type
                
            if output_proteins_species is None: this_protein_species = raw_input("Output protein species: "+ string_get_species_name)
            else:                               this_protein_species = output_proteins_species

        # END OF if exec_mode=="interactive":
        
        elif exec_mode=="batch":
            
            query_protein = arguments[arg_protein_name]

            if arguments[arg_format_mode] != "blank":            format_mode = arguments[arg_format_mode]
            else:                                                format_mode = "txt"
            
            if arguments[arg_protein_type] != "blank":           this_protein_type = arguments[arg_protein_type]
            else:                                                this_protein_type = input_proteins_type
                
            if arguments[arg_distance] != "blank":
                if arguments[arg_distance] != "all":     distance = int(arguments[arg_distance])
                else:                                    distance = "all"
            else:
                raise ValueError("distance required for find-distance-group\n")
                
            if arguments[arg_info] != "blank":
                info = arguments[arg_info]
            else:
                raise ValueError("You must set argument info for 'command distance' in your configuration file")

            if arguments[arg_output_target] != "blank":
                output_name = arguments[arg_output_target]
            else:
                if results_prefix is None:
                    raise ValueError("Using default output name without giving a results prefix")

                output_name = results_dir + results_prefix + "." + option + "." + str(distance) + "." + format_mode

            this_protein_species = output_proteins_species
        # END OF elif exec_mode=="batch":

        
        if query_protein is None or this_protein_type is None or output_proteins_type is None or this_protein_species is None:
            raise ValueError("In find-distance-group, protein name (%s) , protein type (%s) protein species (%s) and output type (%s) are required\n" %(
                protein_name,
                this_protein_type,
                this_protein_species,
                output_proteins_type))

        this_tax_id = piana_api.get_one_tax_id_from_species_name(species_name=this_protein_species)

        # chapucilla para poder debuggear usando proteinPianas...
        if this_protein_type == "proteinPiana":
            query_protein = int(query_protein)
        
        if output_name == "screen":
            output_target = sys.stdout
        else:
            output_target = file(output_name,"w")

        """
        At this point we have all the required parameters... Proceed...
        """
            
        if verbose_command_sequence:
            sys.stderr.write("\nPrinting proteins at distance %s from %s to output file %s\n" %(distance, query_protein ,output_name))

        try:
            piana_api.print_proteins_at_distance_x(query_protein=query_protein, distance=distance, input_protein_type=this_protein_type,
                                                   output_protein_type=output_proteins_type, list_alternative_type_names=list_alternative_types,
                                                   output_file_object= output_target, format_mode=format_mode, info= info,
                                                   tax_id_value=this_tax_id)
        except:
            sys.stderr.write("We cannot find proteins at distance %s from protein %s because this protein is not in the network\n" %(distance,
                                                                                                                                     query_protein ))
            

        
        if output_name != "screen":
            output_target.close()

    # END OF elif option == "find-distance-group":
    

    
    # -------------------------------------------------------------------------------------------------
    # option == find-protein-patches
    # divides proteins into patches
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: find-protein-patches;clustering-stepss=number_clustering_steps
    #
    elif option == "find-protein-patches":

        
        # TO DO!!! This hard-coded assignment should be done somehow else... 

        # TO DO!!! Introduce command load-patchgroup-graph that creates a new PatchDecomposition object with info only for
        #          that patchgroup graph (so we can recover decompositions from other executions)
        #          -> not sure I want to do this... maybe all I need if the patchgroup graph itself...



        # some arguments are placed in a list of one element, just to be consistent with benchmarking mode
        #    in benchmarking mode, we have multiple arguments to run, which are placed in list. In non benchmarking
        #    mode there is just one case to run, but we anyway place it in a list to ease things
        if exec_mode == "interactive":
            
            number_steps = int( raw_input(string_get_clustering_steps) )
            score_threshold = int( raw_input(string_get_score_threshold) )

            save_mode = raw_input(string_get_save_mode)

            # TO DO!!! read interactively ranked_parameters_file
            # TO DO!!! read interactively patch_mode
            # TO DO!!! make find-protein-patches interactive.... in general, eveything has to be done!!!

            if save_mode == "disc":
                disc_name = raw_input(string_get_disc_name)
            
        elif exec_mode == "batch":
            
            if arguments[arg_clustering_steps] != "blank":
                number_steps = int(arguments[arg_clustering_steps])
            else:
                raise ValueError("Trying to find protein patches without giving number of clustering steps in configuration file")
            
            if arguments[arg_score_threshold] != "blank":
                score_threshold = int(arguments[arg_score_threshold])
            else:
                raise ValueError("Trying to find protein patches without giving a score threshold in configuration file")

            if arguments[arg_ranked_parameters_file] != "blank":
                ranked_parameters_file = arguments[arg_ranked_parameters_file] 
            else:
                if patch_mode != "train":
                    raise ValueError("Trying to find protein patches without giving a ranked parameters file name in configuration file")

           
            if arguments[arg_save_mode] != "blank":
                save_mode = arguments[arg_save_mode]
            else:
                raise ValueError("You need to set a save-mode when doing protein decomposition\n")
            
            if arguments[arg_disc_name] != "blank":
                disc_name = arguments[arg_disc_name]
            else:
                if save_mode == "disc":
                    # blank disc_name only produces error for save mode disc
                    raise ValueError("You need to set a save-mode when doing protein decomposition\n")
                
        # END OF elif exec_mode == "batch":

        
        graphs_file_name = "%s%s%s" %(results_dir, disc_name, results_prefix)  # directory/prefix of files that will hold patchgroup graphs

        # 
        # if benchmark_conf_file not None override scoring function parameters (only overrides those arguments set in the benchmark
        #                                                                       if a certain argument is blank (e.g function name)
        #                                                                       then keep argument from piana configuration file               )
        if benchmark_conf_file is not None:


            parse_benchmark_configuration_file(file_name= benchmark_conf_file)  # fills dictionary benchmark_arguments with keys and values
                
            if benchmark_arguments[arg_gold_std] != "blank":
                if gold_std is None:
                    # gold_std only to be taken from benchmark configuration file if nothing was given through piana command line
                    gold_std = benchmark_arguments[arg_gold_std]
                
            if benchmark_arguments[arg_patch_mode] != "blank":
                if patch_mode is None:
                    # patch_mode only to be taken from benchmark configuration file if nothing was given through piana command line
                    patch_mode = benchmark_arguments[arg_patch_mode]
                
            if benchmark_arguments[arg_comp_file_name] != "blank":
                comp_file_name = benchmark_arguments[arg_comp_file_name]
                
            if benchmark_arguments[arg_comp_file_dir] != "blank":
                comp_file_dir = benchmark_arguments[arg_comp_file_dir]
                
            if benchmark_arguments[arg_comparison_mode] != "blank":
                comparison_mode = benchmark_arguments[arg_comparison_mode]


            if gold_std is None:
                raise ValueError("Benchmarking without giving a gold std file")
            
            if patch_mode is None:
                raise ValueError("Benchmarking without giving a patch_mode")

            if patch_mode == "train":
                # this is a chaos... if the user wants to get results for the ranked parameters, but he is not training, then the list
                # must be read from the ranked parameters file and not from here.

                # Not very nice trick, but since it is only for the training, I allow myself to be a bit dirty:
                # write here the combinations of weights that you really want to test...

                list_ranked_parameters = [ (0,1,0, None),
                                           (1,2,1, None),
                                           (2,1,1, None),
                                           (1,1,1, None) ]
            # END OF if patch_mode == "train":

            if comparison_mode == "standard" or comparison_mode=="training":
                root_protein = None
                comp_file = comp_file_dir + comp_file_name

            elif comparison_mode == "root":
                root_protein = piana_api.piana_graph.get_root_node_ids()[0]
                comp_file = comp_file_dir + str(root_protein) + "." + comp_file_name
                # TO CHECK!!! what about if root_protein is not a proteinPiana code? Conversion needed!!!

                similar_proteins_dic = piana_api.piana_access.get_similar_proteins_dic(proteinPiana_value= root_protein) # used in benchmarking to make sure
                                                                                                                         # that we do not get TP from the comparison
                                                                                                                         # of proteins that are actually the same

            # END OF elif comparison_mode == "root":

            # just overwriting current comparison file...
            f = file(comp_file, "w")
            f.close()


        # END OF if benchmark_conf_file is not None:
        
        if patch_mode != "train":
            #    --> this is not a training. Therefore, read weights and stop conditions to be used from file of "ranked parameters"

            if patch_mode == "exec":
                # In  patch_mode exec, variables used for training must be set to None
                # (this has to be done because mode "eval" does set these variables when parsing the benchmark conf file)
                comp_file = None
                root_protein= None
                similar_proteins_dic= None
                gold_std = None
                comparison_mode = None

 
            list_ranked_parameters = []
            parse_ranked_parameters_file(file_name = ranked_parameters_file)   # fills list_ranked_parameters with parameters to be used (in order from best to worst)

            # list_ranked_parameters is a list of lists that follows format:
            #  [  [parameters], [],  ... ]
            #      with parameters being:
            #                      [0] --> w_patches
            #                      [1] --> w_prots
            #                      [2] --> w_belong
            #                      [3] --> type_of_stop_condition=value


            # in case patch_mode is eval or exec, it means a result will be printed out: therefore, we must make sure that there is an output proteins type set
            if output_proteins_type is None:
                raise ValueError("output-proteins-type cannot be none when trying to decompose a protein into patches")
            
        # END OF else:if patch_mode != "train":

        # ATTENTION: the root protein thing only works if the root proteins are proteinPianas.... in case
        # the proteins in the input file to piana are not proteinPianas, the conversion must be made somewhere... probably in piana.py?
        # but then, root_protein would be a list and not just one protein
        # Furthermore, if building the network from more than 1 protein, this whole concept has to be rethinked...
        
        stop_condition_met = 0                   
        # decompose proteins in network using ranked parameters (when training, these will be the parameters evaluated)
        for parameters in list_ranked_parameters:

            w_patches = parameters[0]
            w_prots   = parameters[1]
            w_belong  = parameters[2]
            # parameters[3] is type_of_stop_condition=value
            
            if verbose_patches:

                sys.stderr.write("\n----------------------NEW PARAMETERS for protein (pp=%s) (threshold %s) ----------------\n" %(
                    root_protein, hub_threshold))
                sys.stderr.write("w_patches is %s and w_prots is %s and w_belong is %s and stop_cond is %s\n" %(
                     w_patches, w_prots, w_belong, parameters[3] ))
                sys.stderr.write("----------------------------------------------------------------------------------------\n")

            if patch_mode == "train":
                # in training mode, things are a little different...


                try:
                    # if there is a file with this name, it means that no info is available for this protein
                    # how does this work? Not very clean...
                    # in PatchDecomposition, when we detect that the interaction network for a protein does
                    # not fulfill requirement 3 (known binding sites for at least one protein at distance 2 from root protein)
                    # we create this file. This is a dirty way of avoiding the creation of extra network... if no info is found
                    # using a certain weight, no info will be found when using other weights... therefore, skip iteration

                    no_info_file_fd = file("/home/raragues/phd/piana/code/execs/temp/" + str(root_protein) + "." + str(hub_threshold) + ".no_info_available",
                                           "r")
                    if verbose_patches:
                        sys.stderr.write("Skipping these iteration because we know the network will contain no information\n")
                    continue
                except:
                    pass
            # END OF if patch_mode == "train":

            user_scoring_function = ScoringFunction.get_scoring_function(scoring_function_name="quotient",
                                                                         w_int_patchgroups_shared = w_patches,
                                                                         w_int_proteinPianas_shared = w_prots,
                                                                         w_belong_to_same_protein = w_belong)

            iteration_graph_file_name = graphs_file_name + "_" + str(w_patches) + "_" + str(w_prots) + "_" + str(w_belong)



            # TO DO!!! In mode "eval" and "exec" I am not introducing any restrictions on which formulas have to be used
            #          This has to be corrected so that the user can choose how accurate he needs the results to be
            #          (in exchange of not getting results for many proteins)
            #          See comment on section 4 of evaluation/Benchmark/README.evaluting_results
            
            current_decomposition = PatchDecomposition(piana_graph = piana_api.piana_graph,
                                                       clustering_mode = None,
                                                       scoring_function=  user_scoring_function,
                                                       clustering_steps = number_steps,
                                                       score_threshold= score_threshold,
                                                       save_mode = save_mode,
                                                       disc_name = iteration_graph_file_name,
                                                       gold_std = gold_std,
                                                       comparison_results_file_name= comp_file,
                                                       stop_condition = parameters[3])

            stop_condition_met = current_decomposition.decompose(comparison_mode= comparison_mode,
                                                                 root_protein= root_protein,
                                                                 similar_proteins_dic= similar_proteins_dic,
                                                                 hub_threshold=hub_threshold,
                                                                 patch_mode= patch_mode)

            if stop_condition_met:
                
                break


        # END OF  parameters in list_ranked_parameters:

        if patch_mode != "train":
            if stop_condition_met:
                results_fd = file("/home/raragues/phd/piana/code/execs/temp_results/" + str(root_protein) + "." + str(hub_threshold) + ".clustering_results", "w")

                sys.stderr.write("STOP CONDITION MET!!!!!\n")


                current_decomposition.print_protein_decomposition_from_level_number(level_number=-1,
                                                                                    output_target= results_fd,
                                                                                    protein_type_name= "proteinPiana",
                                                                                    alternative_type_names= []   )

                current_decomposition.print_proteins_in_clusters_from_level_number(level_number=-1,
                                                                                   output_target = results_fd,
                                                                                   protein_type_name= "proteinPiana",
                                                                                   alternative_type_names= []    )

                current_decomposition.print_patchgroups_interactions_from_level_number(level_number=-1,
                                                                                       output_target= results_fd)

                current_decomposition.print_patchgroups_network_from_level_number(level_number=-1,
                                                                                  output_target= results_fd)
                results_fd.close()

                cir_assigned_fd = file("/home/raragues/phd/piana/code/execs/temp_results/" + str(root_protein) + "." + str(hub_threshold) + ".table_cir_assigned", "w")

                current_decomposition.print_table_cir_assigned_from_level_number( level_number=-1,
                                                                                  output_target = cir_assigned_fd,
                                                                                  protein_type_name= output_proteins_type,
                                                                                  alternative_type_names= list_alternative_types ,
                                                                                  cir_prefix = str(root_protein),
                                                                                  root_protein= root_protein, 
                                                                                  gold_std = gold_std,
                                                                                  similar_proteins_dic= similar_proteins_dic )
                cir_assigned_fd.close()
                
                cir_ints_fd = file("/home/raragues/phd/piana/code/execs/temp_results/" + str(root_protein) + "." + str(hub_threshold) + ".table_cir_ints", "w")

                current_decomposition.print_table_cir_int_from_level_number( level_number=-1,
                                                                             output_target = cir_ints_fd,
                                                                             cir_prefix = str(root_protein),
                                                                             root_protein= root_protein , 
                                                                             gold_std = gold_std       )
                cir_ints_fd.close()

            else:
                results_fd = file("/home/raragues/phd/piana/code/execs/temp_results/" + str(root_protein) + "." + str(hub_threshold) + ".nothing_found", "w")

            # END OF else: (if stop_condition_met:)

            results_fd.close()
        # END OF if patch_mode != "train":
       
    # END OF elif option == "find-protein-patches":
 
    # -------------------------------------------------------------------------------------------------
    # option == print-protein-patches or print-shared-patches or print-patches-interactions or print-patches-network
    #
    # prints patches information in different flavors:
    #
    #   - print-protein-patches: for each protein, which patches does it have
    #   - print-shared-patches:  for each patch, which proteins do have it
    #   - print-patches-interactions: the patch-patch interaction network
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: print-protein-patches;output-target=output_target;protein-type=protein_code_type;clustering-level=clustering_level_to_print
    # configuration file syntax: print-shared-patches;output-target=output_target;protein-type=protein_code_type;clustering-level=clustering_level_to_print
    # configuration file syntax: print-patches-interactions;output-target=output_target;clustering-level=clustering_level_to_print
    # configuration file syntax: print-patches-network;output-target=output_target;clustering-level=clustering_level_to_print
    #                    
    #
    elif option == "print-protein-patches" or option == "print-shared-patches" or option=="print-patches-interactions" or option=="print-patches-network":

        if exec_mode == "interactive":
            output_name = raw_input(string_get_output_name)
            
            if output_proteins_type is None:
                # if no output proteins code type was given, ask user to give one
                this_print_proteins_type_name = raw_input("Output: " + string_get_protein_type)
            else:
                this_print_proteins_type_name = output_proteins_type
                
            level_number = int(raw_input(string_get_clustering_level))

            
        # END OF if exec_mode == "interactive":

        elif exec_mode == "batch":
            # if batch mode, read arguments from configuration file (which has been previously parsed)

            if not (option=="print-patches-interactions" or option=="print-patches-network"):
                # print-patches-interactions/network  do not take a protein type as input (no need, only printing patches)
                if arguments[arg_protein_type] != "blank":
                    this_print_proteins_type_name = arguments[arg_protein_type]
                else:
                    this_print_proteins_type_name = output_proteins_type
            # END OF if not (option=="print-patches-interactions" or option=="print-patches-network"):
                
            if arguments[arg_clustering_level] != "blank":
                level_number = int(arguments[arg_clustering_level])
            else:
                raise ValueError("Trying to %s without giving the clustering level in configuration file" %option)

            if arguments[arg_output_target] != "blank":
                output_name = arguments[arg_output_target]
            else:
                if level_number == -1:
                    ending = "last"
                else:
                    ending = str(level_number)
                
                if results_prefix is None:
                    raise ValueError("Using default output name without giving a results prefix")

                output_name = results_dir + results_prefix + "." + option + "." + ending

        # END OF elif exec_mode == "batch":
        
        if output_name == "screen":
            output_target = sys.stdout
        else:
            output_target = file(output_name,"w")


        # TO DO!!! DO I have to apply species restriction to patches? Add argument , tax_id_value= output_tax_id?

        if verbose_command_sequence:
            sys.stderr.write("\nPrinting patches option %s for clustering level %s to output file %s\n" %(option, level_number, output_name))

        if option == "print-protein-patches":
            current_decomposition.print_protein_decomposition_from_level_number(level_number=level_number,
                                                                                output_file_object= output_target,
                                                                                protein_type_name= this_print_proteins_type_name,
                                                                                alternative_type_names= list_alternative_types   )
        elif option == "print-shared-patches":
            current_decomposition.print_proteins_in_clusters_from_level_number(level_number=level_number,
                                                                               output_file_object= output_target,
                                                                               protein_type_name= this_print_proteins_type_name,
                                                                               alternative_type_names= list_alternative_types    )
        elif option == "print-patches-interactions": 
            
            current_decomposition.print_patchgroups_interactions_from_level_number(level_number=level_number,
                                                                                   output_file_object= output_target)
        elif option == "print-patches-network": 
            
            current_decomposition.print_patchgroups_network_from_level_number(level_number=level_number,
                                                                              output_file_object= output_target)
        # END OF elif option == "print-patches-network":

        if output_name != "screen":
            output_target.close()


    # END OF elif option == "print-protein-patches" or option == "print-shared-patches" or option=="print-patches-interactions":



   
    # -------------------------------------------------------------------------------------------------
    # option ==  train-cirs
    # prints out all CIRs found for all levels of the clustering, with results at each level
    # this command is used to print out the results that will be later used to find out which
    # are the best parameters for finding CIRs and perform the evaluation
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax:
    #
    elif option == "train-cirs":

        if exec_mode == "batch":
            # if batch mode, read arguments from configuration file (which has been previously parsed)
            
            if arguments[arg_similarity_mode] != "blank":
                similarity_mode = arguments[arg_similarity_mode]
            else:
                raise ValueError("Trying to %s without setting the similarity_mode in configuration file" %option)
            
            if arguments[arg_minimum_score] != "blank":
                minimum_score = int(arguments[arg_minimum_score])
            else:
                raise ValueError("Trying to %s without setting the similarity_mode in configuration file" %option)
            
            if arguments[arg_cir_method] != "blank":
                cir_method = arguments[arg_cir_method]
            else:
                raise ValueError("Trying to %s without setting the similarity_mode in configuration file" %option)
        else:
            raise ValueError("This option not valid in interactive mode\n")

        # 0. Classes to create to do a new clustering:
        #
        #  0.1 --> subclasses of Graph, GraphNodeAttribute and GraphEdgeAttribute for the type
        #        of Graph you want to cluster
        #
        #           --> this might be just Graph if you are trying to cluster your
        #               initial graph, or can be a new Graph class if you are clustering
        #               a network that is being generated from your original graph
        #
        #               -> eg. in CIR clustering, the original graph is a PianaGraph (a PPI network)
        #                  but the PianaGraph is not the one we want to cluster. The graph
        #                  we want to cluster is a IRGraph, which is generated from the
        #                  PianaGraph. Therefore, for CIR clustering, we had to create
        #                  IRGraph, IRGraphNodeAttribute and IRGraphEdgeAttribute, with
        #                  methods to generate the IRGraph and specifying the characteristicts
        #                  or IRs
        #
        #  0.2 --> subclass of GraphCluster that particularizes the type of clusters that are
        #        being created
        #           --> new __init__ to define new arguments to this graph
        #           --> new do_action() is needed
        #           --> eg. for CIRs, this is a CirGraphCluster, where each element inside the
        #               cluster is a IRGraphNodeAttribute from the IRGraph
        #
        #  0.3 --> subclass of GraphClusterNodeAttribute that particularizes the type of node attributes in the
        #        GraphNodes of the subclass of GraphCluster
        #           --> this is usually an empty class, just here to compatibility
        #
        #  0.4 --> subclass of ClusteringSimilarityFunction that particularizes how to calculate the
        #        similarity between two clusters
        #
        #  0.5 --> subclass of ClusteringStopCondition that particularizes how to know if the clustering
        #        must stop or not 

        # 1. create IR interaction network from the protein interaction network
        #    --> there will be one IR per interaction in a protein
        #    --> the IR network will consist in pairs of IR interacting between them and isolated from the other pairs
        #         --> this represents that every interaction has its own IR to perform it
        #    --> by clustering the IR interaction network, we hope to find groups of IRs (ie. CIRs) that have a similar
        #        interaction behaviour

        # 2. create the cluster_generator (just in charge of creating an empty CirGraphCluster)
        #    --> this is just a generic class that is called by Clustering.cluster_graph() for performing the
        #        clustering. cluster_graph() needs to know which kind of GraphCluster it is dealing with, and
        #        this 'generator' is in charge of returning the empty objects that will be used to create the
        #        clusters
        #

        # 3. create the similarity function object

        # 4. create the stop condition object

        # 5. cluster the IRGraph with the given similarity function and stop condition, asking it to print results at all clustering levels

        if verbose_command_sequence:
            sys.stderr.write("\nCreating results files for CIR evaluation --")


        # get the root protein
        root_proteins = piana_api.piana_graph.get_root_node_ids()
        dic_root_proteins_g2 = {}

        if len(root_proteins) == 1:
            root_protein = root_proteins[0]
            # to speed up things afterwards, get now the list of proteins in G2
            #   -> we will only create multiple IRs for the root proteins and those in G2
            root_protein_g2 = piana_api.piana_access.get_all_g2_partners(proteinPiana_value= root_protein,
                                                                         use_self_ints = use_self_ints,
                                                                         list_source_dbs=list_source_dbs , inverse_dbs=inverse_dbs,
                                                                         list_source_methods=list_source_methods , inverse_methods=inverse_methods,
                                                                         threshold = hub_threshold)
            for one_root_g2 in root_protein_g2:
                dic_root_proteins_g2[one_root_g2] = None
            
        else:
            root_protein = "multiple"



        if cir_method == "irs":

            # initilialize the Interacting Regions Graph (which is the Graph that will then be clustered)
            ir_graph = IRGraph(graphID="ir", piana_access_object = piana_api.piana_access,
                               dic_root_proteins_g2=dic_root_proteins_g2)
            ir_graph.initialize_IRGraph_from_PianaGraph(piana_graph= piana_api.piana_graph,
                                                        root_protein=root_protein)             # by setting the root protein, we oblige the
                                                                                               # IR graph to create multiple IRs only for proteins that
                                                                                               # are either the root protein or share at least one
                                                                                               # interactor with the root protein. Other proteins will
                                                                                               # be represented by a single IR
                                                                                               #
                                                                                               # If the root_protein is 'multiple', this doesn't apply
                                                                                               #
                                                                                               # This is a trick we are applyign to gain in speed... creating
                                                                                               # multiple IRs for each protein slows down the process of
                                                                                               # clustering... and since we are only interested in getting
                                                                                               # CIRs for the root protein, and the only proteins that can
                                                                                               # be in the same CIR are those in G2, we limit the IRs to be
                                                                                               # those that can fall in the root protein CIRs


            if verbose_command_sequence:
                sys.stderr.write(" IR Graph created --")

            # uncomment this for visualizing the IR network (alternative means that the IR id and the protein ID will be shown on network box)
            #ir_file_fd = file("ir_network.dot", "w")
            #ir_file_alternative_fd = file("ir_network.alternative.dot", "w")

            #ir_graph.output_dot_file(output_target=ir_file_fd, use_alternative_id="no")
            #ir_graph.output_dot_file(output_target=ir_file_alternative_fd, use_alternative_id="yes")

            # initialize the clustering class and the generator
            cluster_manager = Clustering()
            cluster_generator = CirClusteredGraphGenerator() # from file code/Clustering/ClusteredGraphGenerator.py

            # create the similarity function and the stop condition
            similarity_function = CirClusteringSimilarityFunction(piana_access= piana_api.piana_access,
                                                                  path_length_threshold= -1,
                                                                  similarity_mode= similarity_mode)

            stop_condition = CirClusteringStopCondition(cir_similarity_function= similarity_function,
                                                        minimum_score= minimum_score)


            if verbose_command_sequence:
                sys.stderr.write(" starting clustering! \n")

            # do the clustering...
            final_cir_clustered_graph = cluster_manager.cluster_graph(graph_to_cluster= ir_graph,
                                                                      clustered_graph_generator= cluster_generator,
                                                                      similarity_function= similarity_function,
                                                                      stop_condition= stop_condition,
                                                                      clustering_print_mode= "all",
                                                                      output_dir= results_dir,
                                                                      output_prefix= results_prefix + "." + similarity_mode,
                                                                      root_protein = root_protein,
                                                                      original_graph= piana_api.piana_graph,
                                                                      call_do_action = 0)
        # END OF if cir_method == "irs":
        elif cir_method == "prots":

            piana_file_fd = file(results_dir + "/" + results_prefix + "." + "ppi_network.dot", "w")
            piana_api.piana_graph.output_dot_file(output_target=piana_file_fd, use_alternative_id="no")

            # --------------------------------------------------------------------------------------------------------
            # 1. Create the Interacting Protein Groups Graph (which is just a GraphCluster where clusters have proteins)
            # --------------------------------------------------------------------------------------------------------
            
            if verbose_prots_cir_shallow:
                sys.stderr.write("initializing IPG graph -- ")
            ipg_graph = GraphCluster(graph_id="G")
            ipg_graph.initialize_from_graph(graph= piana_api.piana_graph)
            
            file_alternative_fd = file(results_dir + "/" + results_prefix + "." + "ipg_network.initial.dot", "w")
            ipg_graph.output_dot_file(output_target=file_alternative_fd, use_alternative_id="yes")


            proteins_same_cluster = {}  # used to know which nodes coexist in the same cluster
                                        # so they are not used to create a new cluster
                                        #   -> only applies to original nodes (those that only contain 1 protein)
                                        # follows structure:
                                        #        { proteinPiana_1: {proteinPiana_sharing_cluster:None, proteinPiana_sharing_cluster, ..},
                                        #          proteinPiana_2: {proteinPiana_sharing_cluster:None, proteinPiana_sharing_cluster, ..},
                                        #          .......................
                                        #        }


            previous_similarities = {}  # used to keep those similarities that have already been calculated
                                        # follows structure:
                                        #       { key : similarity score,
                                        #         key : similarity score,
                                        #         .....................
                                        #       }
                                        # where key is a string lower_node_id.higher_node_id
            clustering_level = 0

            if verbose_command_sequence:
                sys.stderr.write(" -- starting clustering! \n")

            while(1):
                # while the loop is not breaked by the stop condition, continue clustering
                if verbose_prots_cir:
                    print " ------------------------------------------------------------------------------"
                    print "               NEW CLUSTERING LEVEL "
                    print " ------------------------------------------------------------------------------"
                    
                if verbose_prots_cir_shallow:
                    sys.stderr.write("lev_%s." %(clustering_level))
                # -----------------------------------------------------
                # 2. obtain the similarity matrix at this clustering point
                # -----------------------------------------------------
                if verbose_prots_cir_shallow:
                    sys.stderr.write("s.")
                    
                node_list= ipg_graph.get_node_object_list()
                
                number_nodes = len(node_list)


                
                similarity_matrix= numarray.zeros( ( number_nodes, number_nodes), numarray.Float32) 
                max_value=0
                index_to_node_id= {}
                positions_to_cluster= []
                
                for i in range(number_nodes):
                    node_1 = node_list[i]
                    node_attribute_1= node_1.get_node_attribute_object()
                    protein_nodes_1 = node_attribute_1.get_list_elements()
                    protein_ids_1 = Set([])
                    for one_protein_node in protein_nodes_1:
                        protein_ids_1.add(one_protein_node.get_node_id())
                    if node_1.is_original():
                        protein_1= protein_ids_1.copy().pop() # there is only one id... it is an original node

                    for j in range(i+1, number_nodes):
                        node_2 = node_list[j]
                        
                        node_attribute_2= node_2.get_node_attribute_object()
                        protein_nodes_2 = node_attribute_2.get_list_elements()
                        protein_ids_2 = Set([])
                        for one_protein_node in protein_nodes_2:
                            protein_ids_2.add(one_protein_node.get_node_id())
                        if node_2.is_original():
                            protein_2 = protein_ids_2.copy().pop()  # there is only one id... it is an original node



                        if verbose_prots_cir:
                            print "calculating similarity for node %s (proteins %s) and node %s (proteins %s)" %(node_1.get_node_id(),
                                                                                                                 protein_ids_1,
                                                                                                                 node_2.get_node_id(),
                                                                                                                 protein_ids_2,
                                                                                                                 )

                        if node_1.is_original() and node_2.is_original():
                            
                            if verbose_prots_cir:
                                print "  -> both nodes are original..."
                                print "  -> proteins same cluster are: %s" %(proteins_same_cluster)
                            # if both nodes are original, check if they already appear together in a cluster
                            #  -> done to avoid creating clusters that are subclusters of already existing clusters
                            if proteins_same_cluster.has_key(protein_1):
                                if proteins_same_cluster[protein_1].has_key(protein_2):
                                    # these two nodes already coexist in a cluster: do not process them again (ie. leave similarity to 0)
                                    continue
                        # END OF if node_1.is_original() and node_2.is_original():
                        
                        elif node_1.is_original() or node_2.is_original():
                            if verbose_prots_cir:
                                print "  -> one node is original..."
                            # if only one of the two is original, check if the cluster already has the protein of the original node
                            if protein_ids_1.intersection(protein_ids_2):
                                # a cluster that already has the protein of the original node should not be compared (ie. leave similarity to 0)
                                continue

                        node_id_1 = node_1.get_node_id()
                        node_id_2 = node_2.get_node_id()
                
                        if node_id_1 < node_id_2:
                            key = "%s.%s" %(node_id_1, node_id_2)
                        else:
                            key = "%s.%s" %(node_id_2, node_id_1)
                            
                        if previous_similarities.has_key(key):
                            # if this similarity was already calculated, use it
                            similarity_value = previous_similarities[key]
                        else:
                            # if it is a similarity between new nodes, calculate it
                            similarity_value = 0 # by default, similarity value is 0

                            if root_protein in protein_ids_1 or root_protein in protein_ids_2:
                                # only clusters that contain a root protein are used to form a new cluster
                                if similarity_mode == "min_per":
                                    set_partners_node_1 = Set(node_1.get_neighbour_ids())
                                    set_partners_node_2 = Set(node_2.get_neighbour_ids())

                                    num_partners_node_1 = float(len(set_partners_node_1))
                                    num_partners_node_2 = float(len(set_partners_node_2))
                                    num_common_partners = len(set_partners_node_1.intersection(set_partners_node_2))

                                    try:
                                        similarity_value = 100*num_common_partners / min(num_partners_node_1, num_partners_node_2)
                                    except:
                                        similarity_value = 0

                                # END OF if similarity_mode == "min_per":
                            # END OF if root_protein in protein_ids_1 or root_protein in protein_ids_2:
                            previous_similarities[key] = similarity_value
                        # END OF else: (if previous_similarities.has_key(key):)

                        if max_value < similarity_value:
                            max_value = similarity_value
                            positions_to_cluster = [(i,j)]
                        elif max_value == similarity_value:
                            positions_to_cluster.append( (i,j) )

                        if similarity_value:
                            similarity_matrix[i][j] = similarity_value

                        if verbose_prots_cir:
                            print "  --> similarity is %s" %(similarity_matrix[i][j])

                    # END OF for j in range(i+1, number_nodes):
                # END OF for i in range(number_nodes):

                if verbose_prots_cir_shallow:
                    sys.stderr.write("m=%.0f." %(max_value))
                # -----------------------------------------------------
                # 3. printing the network and clusters for this level
                # -----------------------------------------------------
                clustering_level_file_fd = file(results_dir + "/" + results_prefix + "." + similarity_mode + ".level_%s.dot" %clustering_level, "w")
                ipg_graph.output_dot_file(output_target=clustering_level_file_fd, use_alternative_id="yes")
                clustering_level_file_fd.close()


                results_level_file_fd = file(results_dir + "/" + results_prefix + "." + similarity_mode + ".level_%s.results" %clustering_level, "w")
                results_level_file_fd.write("description\tprotein=%s\tlabel=%s\tsimilarity=%s\n" %(root_protein, clustering_level, max_value))
                ipg_graph.print_proteins_same_cluster(output_target=results_level_file_fd, root_protein=root_protein)
                ipg_graph.print_proteins_interactions(output_target=results_level_file_fd, root_protein=root_protein)
                results_level_file_fd.close()


                clustering_level += 1


                # CHECK STOP CONDITION
                if max_value < minimum_score:
                    # break the while if similarity score lower than threshold
                    break
                

                # -------------------------------------------------------------
                # 4. use cluster with max similarity scores to create new clusters
                # -------------------------------------------------------------
                
                similarity_matrix_size = len(similarity_matrix)

                if verbose_prots_cir:
                    print "positions to cluster are: %s" %(positions_to_cluster)
                
                if verbose_prots_cir_shallow:
                    sys.stderr.write("len=%s." %(similarity_matrix_size))
                    
                used_nodes = {}   # node_ids of nodes that have already been used to generate a new cluster
                
                new_clusters = [] # list of new clusters that have been generated
                
                clustered_nodes_dic = {}  # dic that keeps as keys the current node ids and as values the
                                          # cluster object for next level graph
                                          #   -> used to add several clusters into the same one of the new level
                                          #      All clusters with the max_value that are transitive between
                                          #      them (ie. all of them have max_value in between) are clustered
                                          #      into the same cluster in just one clustering step


                correspondences_new_clusters = {}   # dic that keeps the node ids that were used to generate a new cluster
                                                    #   follows structure:
                                                    #       { new_cluster_id : [node id 1, node id 2, ...]
                                                    #         new_cluster_id : [node id 3, node id 4, ...]
                                                    #         ...............
                                                    #       }
                                                 
                for pair_i_j in positions_to_cluster:
                    # for each position to cluster, create a new cluster from the clusters that are 'similar'
                    #   -> unless restrictions apply

                    i = pair_i_j[0]
                    j = pair_i_j[1]
                    
                    i_node_id = node_list[i].get_node_id()
                    j_node_id = node_list[j].get_node_id()

                    protein_nodes_i= node_list[i].get_node_attribute_object().get_list_elements()
                    protein_ids_i = []
                    for one_protein_node in protein_nodes_i:
                        protein_ids_i.append(one_protein_node.get_node_id())
                        
                    protein_nodes_j = node_list[j].get_node_attribute_object().get_list_elements()
                    protein_ids_j = []
                    for one_protein_node in protein_nodes_j:
                        protein_ids_j.append(one_protein_node.get_node_id())

                                  
                    if used_nodes.has_key(j_node_id) and used_nodes.has_key(i_node_id):
                        # skip nodes when both have already been used to create a cluster
                        continue

                    elif used_nodes.has_key(j_node_id) or used_nodes.has_key(i_node_id):
                        # one of the two nodes was already used to create a new cluster: place the other one on that cluster
                        if used_nodes.has_key(j_node_id):
                            used_nodes[i_node_id] = None
                            to_add_node_id =  i_node_id
                            to_add_node = node_list[i]
                            to_add_index = i
                            to_add_proteins = protein_ids_i
                            already_added_index = j
                        else:
                            # ie. used node is 'i'
                            used_nodes[j_node_id] = None
                            to_add_node_id = j_node_id
                            to_add_node = node_list[j]
                            to_add_index = j
                            to_add_proteins = protein_ids_j
                            already_added_index = i
                        # END OF else: (if used_nodes.has_key(j_node_id):)

                        # get the cluster where already_added_index was placed...
                        previous_cluster= clustered_nodes_dic[already_added_index]
                        clustered_nodes_dic[to_add_index] = previous_cluster  # update the dic with cluster for this index_to_add

                        # add to previous_cluster the elements of to_add_node_id
                        to_add_node_list_elements = to_add_node.get_node_attribute_object().get_list_elements()
                        previous_cluster.get_node_attribute_object().add_element_list(to_add_node_list_elements)

                        # update correspondences and proteins_same_cluster

                        # -> get proteins that appear in the previous cluster
                        previous_elements = previous_cluster.get_node_attribute_object().get_list_elements()

                        protein_ids_previous = []
                        for one_protein_node in previous_elements:
                            protein_ids_previous.append(one_protein_node.get_node_id())

                        for one_previuos_protein in protein_ids_previous:
                            for one_to_add_protein in to_add_proteins:
                                if proteins_same_cluster.has_key(one_previuos_protein): proteins_same_cluster[one_previuos_protein][one_to_add_protein] = None
                                else:                                            proteins_same_cluster[one_previuos_protein] = {one_to_add_protein: None}
                                
                                if proteins_same_cluster.has_key(one_to_add_protein): proteins_same_cluster[one_to_add_protein][one_previuos_protein] = None
                                else:                                            proteins_same_cluster[one_to_add_protein] = {one_previuos_protein: None}
                            # END OF for one_to_add_protein in protein_ids_j:
                        # END OF for one_previuos_protein in protein_ids_i:

                        correspondences_new_clusters[previous_cluster.get_node_id()].append(to_add_node_id)
                        continue
                    
                    else:
                        # none of the nodes has been previosly used to create a new cluster
                        # create a new cluster and add it to ipg_graph

                        # used_nodes keeps track of which nodes have already been used in this iteration of the clustering
                        used_nodes[i_node_id]= None
                        used_nodes[j_node_id]= None

                        #print "checking if node id %s (orig=%s) and node id %s (orig=%s) have to update proteins_same_cluster" %(i_node_id,
                        #                                                                                                         node_list[i].is_original(),
                        #                                                                                                         j_node_id,
                        #                                                                                                         node_list[j].is_original())
                        
                        # proteins_same_cluster keeps track of which original nodes already co-exist in the same cluster
                        if node_list[i].is_original() or node_list[i].is_original():
                            #  -> if any of the two is original (or both): create entry between each pair of proteins in each cluster
                            #        -> not worrying about duplications because we are using a dic...
                            for one_i_protein in protein_ids_i:
                                for one_j_protein in protein_ids_j:
                                    #print "updating proteins_same_cluster for %s and %s" %(protein_ids_i, protein_ids_j)
                                    if proteins_same_cluster.has_key(one_i_protein): proteins_same_cluster[one_i_protein][one_j_protein] = None
                                    else:                                        proteins_same_cluster[one_i_protein] = {one_j_protein: None}

                                    if proteins_same_cluster.has_key(one_j_protein): proteins_same_cluster[one_j_protein][one_i_protein] = None
                                    else:                                        proteins_same_cluster[one_j_protein] = {one_i_protein: None}
                        # END OF if not (node_list[i].is_original() or node_list[i].is_original()):
                        
                        clustered_node= ipg_graph.create_grouped_node(node_id1= node_list[i].get_node_id(),
                                                                      node_id2= node_list[j].get_node_id(),
                                                                      old_graph=ipg_graph)

                        clustered_nodes_dic[i] = clustered_node 
                        clustered_nodes_dic[j] = clustered_node
                        new_clusters.append(clustered_node)
                        correspondences_new_clusters[clustered_node.get_node_id()] = [i_node_id, j_node_id]
                # END OF for pair_i_j in positions_to_cluster:


                if verbose_prots_cir:
                    print "New clusters have been created!"
                    for clustered_node in new_clusters:
                        print "------"
                        for element in clustered_node.get_node_attribute_object().get_list_elements():
                            print "clustered node %s contains protein %s" %(clustered_node.get_node_id(),
                                                                            element.get_node_id())
                    # END OF for clustered_node in new_clusters :


                # ---------------------------------------------
                # 5. Update the IPG graph with the new clusters
                # ---------------------------------------------

                # Now, add clusters in new_clusters to the ipg_graph. Add as well edges between this node and the intersection of nodes
                #  that were interacting with nodes that have been used to create this new cluster

                if verbose_prots_cir_shallow:
                    sys.stderr.write("a.")

                
                # for each new_cluster, find interactions that must be associated to it and add them to graph (add as well the new_cluster)
                for one_new_cluster in new_clusters:

                    ipg_graph.add_node(one_new_cluster)

                    # find the intersection of the sets of partners of all nodes that have been used to generate this one_new_cluster

                    # - start with all partners for first node...
                    first_node = ipg_graph.get_node(identifier=correspondences_new_clusters[one_new_cluster.get_node_id()][0] , get_mode="error")
                    set_partners_first_node = Set(first_node.get_neighbour_ids())
                    neighbours_intersection = copy.deepcopy(set_partners_first_node)
                    
                    for correspondent_node_id in correspondences_new_clusters[one_new_cluster.get_node_id()][1:]:
                        # for each previous node that has been used to generate this new cluster, find its edges and add them as well for one_new_cluster
                        
                        another_node = ipg_graph.get_node(identifier=correspondent_node_id , get_mode="error")
                        set_partners_another_node = Set(another_node.get_neighbour_ids())
                        neighbours_intersection.intersection_update(set_partners_another_node)
                    # END OF for correspondent_node_id in correspondences_new_clusters[one_new_cluster.get_node_id()][1:]:
                    
                    for one_neighbour_id in neighbours_intersection:
                        # add each particular edge of the previous nodes that generated this new cluster
                        
                        new_edge_attribute = GraphEdgeAttribute() # empty attribute: nothing to do with it at the moment
                        new_edge= GraphEdge(node1_id=one_neighbour_id, node2_id= one_new_cluster.get_node_id(),
                                            attribute_object= new_edge_attribute, graph=ipg_graph)
                        ipg_graph.add_edge(new_edge)
                    # END OF for one_neighbour_id in neighbours_intersection:
                # END OF for one_new_cluster in new_clusters:
            

                if verbose_prots_cir:
                    print "REMOVING CLUSTERS THAT WERE USED TO CREATE A NEW CLUSTER (when they are not original)"
                    print "number of new clusters created: %s" %(len(new_clusters))
                
                # --------------------------------------------------------------------
                # 6. remove non-original nodes that have been fused into a new cluster
                # --------------------------------------------------------------------
                #   if a non-original cluster A is has been used to create a new cluster B (by fusing it with another cluster), A
                #   must be removed from the graph

                if verbose_prots_cir_shallow:
                    sys.stderr.write("rm.")
                    
                for one_new_cluster in new_clusters:
                    # find to which previous cluster it was associated and remove the cluster if it is not original
                    for one_associated_node_id in correspondences_new_clusters[one_new_cluster.get_node_id()]:
                        one_associated_node = ipg_graph.get_node(identifier=one_associated_node_id, get_mode="error")
                        if not one_associated_node.is_original():
                            node_to_remove = ipg_graph.get_node(identifier=one_associated_node_id, get_mode="error")
                            ipg_graph.rm_node(node_object= node_to_remove)
                    # END OF for one_associated_node_id in correspondences_new_clusters[one_new_cluster.get_node_id()]:
                # END OF for one_new_cluster in new_clusters:

                if verbose_prots_cir_shallow:
                    sys.stderr.write("x--")
                       
            # END OF while(1)

            
            #after_file_alternative_fd = file("ipg_network.clustering_finished.dot", "w")
            #ipg_graph.output_dot_file(output_target=after_file_alternative_fd, use_alternative_id="yes")

            
        # END OF elif cir_method == "prots":
    # -------------------------------------------------------------------------------------------------
    # option ==  cluster-by-go-terms
    # clusters the ppi network using go terms
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax:cluster-by-go-terms;output-target=blank;term-type=blank;score-threshold=blank;level-threshold=blank;distance-threshold=blank;rep-term=blank;print-id=blank
    #
    elif option == "cluster-by-go-terms":

        if exec_mode=="interactive":

            output_name = raw_input(string_get_output_name )
            term_type = raw_input(string_get_term_type)
            go_score_threshold = float(raw_input(string_get_go_score_threshold))
            sim_mode = raw_input(string_get_sim_mode)
            level_threshold = int(raw_input(string_get_level_threshold))
            distance_threshold = int(raw_input(string_get_distance_threshold))
            rep_term = raw_input(string_get_rep_term)
            print_id = raw_input(string_get_print_id)

        # END OF if exec_mode=="interactive":
        
        elif exec_mode=="batch":

            if arguments[arg_term_type] != "blank":        term_type = arguments[arg_term_type]
            else:                                          raise ValueError("Trying to cluster by go terms without giving a term type")
            
            if arguments[arg_score_threshold] != "blank":  go_score_threshold = float(arguments[arg_score_threshold])
            else:                                          raise ValueError("Trying to cluster by go terms without giving a score threshold")
            
            if arguments[arg_sim_mode] != "blank":         sim_mode = arguments[arg_sim_mode]
            else:                                          raise ValueError("Trying to cluster by go terms without giving a mode for similarity function")
            
            if arguments[arg_level_threshold] != "blank":  level_threshold = int(arguments[arg_level_threshold])
            else:                                          raise ValueError("Trying to cluster by go terms without giving a level threshold")
            
            if arguments[arg_distance_threshold] != "blank":  distance_threshold = int(arguments[arg_distance_threshold])
            else:                                             raise ValueError("Trying to cluster by go terms without giving a distance threshold")
  
  
            if arguments[arg_rep_term] != "blank":         rep_term = arguments[arg_rep_term]
            else:                                          raise ValueError("Trying to cluster by go terms without giving a rep term ")
            
            if arguments[arg_print_id] != "blank":         print_id = arguments[arg_print_id]
            else:                                          raise ValueError("Trying to cluster by go terms without giving a print id")

            if arguments[arg_output_target] != "blank":    output_name = arguments[arg_output_target]
            else:
                if results_prefix is None:
                    raise ValueError("Using default output name without giving a results prefix")

                output_name = results_dir + results_prefix + "." + str(go_score_threshold) + "." + term_type + "." + str(level_threshold) + "." + sim_mode +"." + str(distance_threshold) + "." + option
        # END OF elif exec_mode=="batch":

        
        if output_name == "screen":
            output_target = sys.stdout
        else:
            output_target = file(output_name,"w")

        
        """
        At this point, all parameters are set... Proceed..
        """

        if verbose:
            sys.stderr.write("\nClustering PPI network to a GO clustered network in output file %s\n" %(output_name))

        piana_api.create_go_clustered_network(output_target=output_target,
                                              term_type=term_type, score_threshold=go_score_threshold,
                                              sim_mode=sim_mode, level_threshold=level_threshold,
                                              distance_threshold=distance_threshold, rep_term=rep_term, print_id=print_id)

        if output_name != "screen":
            output_target.close()
    
    # -------------------------------------------------------------------------------------------------
    # option == print-parameters
    # print parameters used to create current network
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax:  no configuration command provided for printing parameters
    #
    elif option == "print-parameters":

        sys.stdout.write("-----------------------------------------------------------\n")
        sys.stdout.write("Network depth set to: %s\n" %depth)
        sys.stdout.write("Hub threshold set to: %s\n" %hub_threshold)
        sys.stdout.write("Input proteins type set to: %s\n" %input_proteins_type)
        sys.stdout.write("Output proteins type set to: %s\n" %output_proteins_type)
        sys.stdout.write("-----------------------------------------------------------\n")
        
    # END OF elif option == "print-parameters":

    # -------------------------------------------------------------------------------------------------
    # option == modify-parameters  !!NOT WORKING!!!
    #  modifies current parameters 
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: under work... doesn't currently work in batch mode
    #
    elif option == "modify-parameters":
        
        if exec_mode == "interactive":
            sys.stdout.write("Write here the new parameter values (leave empty if you want to leave it unchanged)\n")
            
            temp_input_proteins_type = raw_input("Type for input: " + string_get_protein_type)
            if temp_input_proteins_type != "":
                input_proteins_type   = temp_input_proteins_type
                
            temp_output_proteins_type = raw_input("Type for output: " + string_get_protein_type)
            if temp_output_proteins_type != "":
                output_proteins_type =temp_output_proteins_type
               
            temp_depth = int(raw_input(string_get_depth))
            if temp_depth != "":
               depth =temp_depth
               
            temp_hub_threshold = int(raw_input(string_get_hub_threshold))
            if temp_hub_threshold != "":
               hub_threshold =temp_hub_threshold
               
            # TO DO!!! Right now, I don't let the user change pianaDB during execution (it has no effect on piana_access)
            temp_piana_dbname = raw_input(string_get_piana_dbname)
            if temp_piana_dbname != "":
              piana_dbname  =temp_piana_dbname
              
            temp_piana_dbhost = raw_input(string_get_piana_dbhost)
            if temp_piana_dbhost != "":
               piana_dbhost =temp_piana_dbhost
               
            temp_piana_dbuser = raw_input(string_get_piana_dbuser)
            if temp_piana_dbuser != "":
               piana_dbuser =temp_piana_dbuser
               
            temp_piana_dbpass = raw_input(string_get_piana_dbpass)
            if temp_piana_dbpass != "":
               piana_dbpass =temp_piana_dbpass

            temp_results_prefix = raw_input(string_get_results_prefix)
            if temp_results_prefix != "":
               results_prefix =temp_results_prefix

            temp_results_directory = raw_input(string_get_results_directory)
            if temp_results_directory != "":
               results_directory =temp_results_directory
            
        elif exec_mode == "batch":
            # TO DO!!!! modify parameters by reading arguments
            pass
        
                    
    # END OF elif option == "modify-parameters":

    # -------------------------------------------------------------------------------------------------
    # option == filter  !!NOT WORKING!!!
    # apply filters to network
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: under work... doesn't currently work in batch mode
    #
    elif option == "filter":

        sys.stdout.write("Function still not implemented\n")
        continue
        list_dbs = []

        if include_dip:
            list_dbs.append("dip")

        if include_string:
            list_dbs.append("string")

        if include_posas:
            list_dbs.append("posas")

        if include_ori:
            list_dbs.append("ori")

        if include_bind:
            list_dbs.append("bind")

        db_filter = FilterInteractionSourceDB(list_dbs=list_dbs, belong_mode="filter_belonging")

        filters = []
        filters.append(db_filter)

        if verbose:
            sys.stderr.write( "Applying filters for db list: %s \n" %list_dbs)

        piana_graph.filter(filters)
        
    # END OF elif option == "filter":

   
 
    # -------------------------------------------------------------------------------------------------
    # option == exit
    # exits the program
    # -------------------------------------------------------------------------------------------------
    # configuration file syntax: exit;
    #
    elif option == "exit":
        sys.stderr.write("Exiting....\n")
    
    # END OF elif option == "exit"

    # -------------------------------------------------------------------------------------------------
    # option unknown
    # -------------------------------------------------------------------------------------------------
    else:

        if exec_mode == "interactive":
            sys.stderr.write("Option <%s> unknown: choose one from the list\n" %option)
            sys.stderr.write("--------------------------------------------\n")

        elif exec_mode == "batch":
            raise ValueError("Trying to execute unknown command %s\n" %option)
        
    # END OF else: (if option == xxx)
        
        
    
# END OF while(option != exit)

sys.exit(os.EX_OK)

