"""
 File       : PianaApi.py
 Author     : R. Aragues
 Creation   : 17.03.2005
 Contents   : class that acts as an interface to all piana commands
 Called from: piana.py and web interface to PIANA

=======================================================================================================

If you simply want to use PIANA as a library for protein-protein interaction networks, this class
provides you with methods that execute a number of useful commands.

For example, you would something like this :

# 1. get from user strings describing the database, the host, mysql user and mysql password

piana_dbname= "pianaDB_limited"
piana_dbhost= "localhost"
piana_dbuser= None
piana_dbpass= None

# 2. create the piana api object
piana_api = PianaApi(piana_dbname=piana_dbname, piana_dbhost= piana_dbhost, piana_dbuser=piana_dbuser, piana_dbpass= piana_dbpass)

# 3. create a network from a list of proteins

#  3.1 get from user the file name, type of code used, depth desired, tax id, hub threshold, source dbs to use and methods to use
file_name = "dummy_file.txt"
this_file_protein_type = "geneName"  # any type in PianaGlobals.valid_protein_types
this_file_depth = 2
this_file_hub_threshold = 0
list_source_dbs= ["dip", "string"]
inverse_dbs= "no"
list_source_methods= ["y2h", "coip"]
inverse_methods= "no"

# 3.2 make the call to piana_api to add proteins in a file to the network
#    --> this will search for interactions in the database for those proteins
#        and create a network

piana_api.add_file_proteins_to_piana_graph(file_object= file(file_name,"r"),
                                           protein_type_name= this_file_protein_type,
                                           tax_id_value= this_file_tax_id,
                                           depth = this_file_depth,
                                           hub_threshold= this_file_hub_threshold,
                                           list_source_dbs= list_source_dbs,
                                           inverse_dbs= inverse_dbs,
                                           list_source_methods= list_source_methods,
                                           inverse_methods= inverse_methods )

# You could now continue calling commands from piana_api.*


"""

# PianaApi.py: class that acts as an interface to all piana commands
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues


import sys

from PianaGraph import *
from PianaGraphBuilder import *
from PianaGraphFilters import *
from PianaGraphExpansions import *
from PianaDBaccess import *
import PianaGlobals

from Clustering import *
from ClusteredGraphGenerator import *

from GoGraph import * # graph where each node is a GO term
from GoClusteringSimilarityFunction import *
from GoClusteringStopCondition import *

from IRGraph import * # graph where each node is an Interacting Region

from CirClusteringSimilarityFunction import *
from CirClusteringStopCondition import *


if piana_configuration_parameters.piana_mode == "developer" or piana_configuration_parameters.piana_mode == "advanced_user":
    from PatchDecomposition import *
    from ScoringFunction import *
    from ScoringFunctionBenchmark import *



verbose = 0


class PianaApi(object):
    """
    class that acts as an interface to all piana commands
    """
    
    def __init__(self, piana_dbname=None, piana_dbhost=None, piana_dbuser=None, piana_dbpass=None ):

        # creating connection to piana using PianaDBaccess class (piana_dbuser and piana_dbpassword can be None)
        self.piana_access = PianaDBaccess(dbname=piana_dbname, dbhost= piana_dbhost, dbuser=piana_dbuser, dbpassword= piana_dbpass)

        self.key_list_to_use = 1      # used for unifying protein names when printing information of protein lists
        
        self.user_root_proteins = {}  # keep a list of the names used by the user, in order to (in case of using the same protein code type) use those
                                      # names for the output   Contents of this dictionary will be None
        self.user_code_types = {}     # keep a list of the types of protein codes used by user. To be used in conjuction with user_root_proteins.
        
        # call to PianaGraph to create an empty graph object 
        self.piana_graph = PianaGraph("Piana Graph", piana_access_object= self.piana_access)



    #
    # these dictionaries are used to check the arguments received by PianaApi methods
    #
    
    info_values = {"all":None,
                   "scop":None,
                   "cath":None,
                   "no":None}

    valid_protein_types = PianaGlobals.valid_protein_types

    valid_source_dbs = PianaGlobals.source_databases

    valid_methods= PianaGlobals.method_names

    valid_output_formats= {"table":None,
                           "network":None}

    
    valid_output_modes= {"compact":None,
                         "extended":None}
    
    valid_print_modes= {"all":None,
                        "all_root":None,
                        "only_root":None,
                        "connecting":None}
    
    valid_format_modes= {"html":None,
                         "txt":None,
                         "dot":None}
    

    valid_expansion_types = PianaGlobals.expansion_types
    valid_expansion_modes= {"all":None,
                            "root":None}
    
    valid_exp_output_modes = {"add":None,
                             "print":None}



    valid_term_types = {"molecular_function":None,
                        "biological_process":None,
                        "cellular_component":None}

    valid_sim_modes = {"min":None,
                       "max":None,
                       "random":None,
                       "average":None}

    
    yes_no_values = {"yes":None,
                     "no":None}
    
    max_min_values = {"max":None,
                      "min":None}
    
    def _check_arguments(self,
                         method_name=None,
                         protein_code="def_val",
                         another_protein_code= "def_val",
                         protein_list= "def_val",
                         file_object= "def_val",
                         another_file_object= "def_val",
                         protein_type_name=  "def_val",
                         another_protein_type_name= "def_val",
                         list_alternative_type_names = "def_val",
                         protein_species_name= "def_val",
                         network_species_name= "def_val",
                         tax_id= "def_val",
                         depth="def_val",
                         distance="def_val",
                         info="def_val",
                         hub_threshold="def_val",
                         list_source_dbs="def_val",
                         inverse_dbs="def_val",
                         source_db="def_val",
                         list_source_methods="def_val",
                         inverse_methods="def_val",
                         method="def_val",
                         confidence="def_val",
                         expansion_type="def_val",
                         expansion_mode="def_val",
                         expansion_threshold="def_val",
                         exp_output_mode="def_val",
                         output_format="def_val",
                         output_mode="def_val",
                         print_mode="def_val",
                         format_mode="def_val",
                         term_type="def_val",
                         score_threshold="def_val",
                         sim_mode="def_val",
                         level_threshold="def_val",
                         distance_threshold="def_val",
                         rep_term="def_val",
                         print_id="def_val",
                         molecular_error_bounds="def_val",
                         isoelectric_error_bounds="def_val",
                         list_keywords="def_val", 
                         intersection_dbs="def_val",
                         file_over_expressed= "def_val",
                         file_infra_expressed= "def_val",
                         expression_protein_type= "def_val"):
        
        """
        checks arguments for PianaApi methods: arguments that are not to be checked must be left to their default value 'def_val' to make sure they
        are not checked...
        """


        if method_name is None:
            raise ValueError("check_arguments requires a method name")
        
        if protein_code != "def_val":
            if protein_code is None:
                # i cannot think of other invalid values for protein code...
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s does not accept %s as a protein code\
\n------------------------------------------------------------------------------------------\n" %(method_name, protein_code))
            
        if another_protein_code != "def_val":
            if another_protein_code is None:
                # i cannot think of other invalid values for another_protein code...
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s does not accept %s as a another_protein code\
\n------------------------------------------------------------------------------------------\n" %(method_name, another_protein_code))
            
        if protein_list != "def_val":
            if protein_list is None or not isinstance(protein_list, list):
                # i cannot think of other invalid values for protein list...
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s does not accept %s as a protein list\
\n------------------------------------------------------------------------------------------\n" %(method_name, protein_list))
            
        if file_object != "def_val":
            if exp_output_mode == "add":
                pass # in this mode, no file is required
            else:
                if not isinstance(file_object, file):
                    raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a file object (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, file_object))
        
        if another_file_object != "def_val":
            if exp_output_mode == "add":
                pass # in this mode, no file is required
            else:
                if not isinstance(another_file_object, file):
                    raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a file object (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, another_file_object))
            
        if another_file_object != "def_val":
            if not isinstance(another_file_object, file):
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a file object (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, another_file_object))
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid protein type name (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, expression_protein_type))
            
        if protein_type_name != "def_val":
            if not PianaApi.valid_protein_types.has_key(protein_type_name):
                valid_string = "\n       --> valid protein types are: "
                for tmp_protein_type in PianaApi.valid_protein_types.keys():
                    valid_string += "%s " %tmp_protein_type
                valid_string += "\n"
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid protein type name (your value was: %s)\n\
%s\
\n------------------------------------------------------------------------------------------\n" %(method_name, protein_type_name, valid_string))
        
        if another_protein_type_name != "def_val":
            if not PianaApi.valid_protein_types.has_key(another_protein_type_name):
                valid_string = "\n       --> valid protein types are: "
                for tmp_protein_type in PianaApi.valid_protein_types.keys():
                    valid_string += "%s " %tmp_protein_type
                valid_string += "\n"
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid protein type name (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, another_protein_type_name, valid_string))

        if list_alternative_type_names != "def_val":
            if not isinstance(list_alternative_type_names, list):
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a list for alternative types (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, list_alternative_type_names))

            for type_name in list_alternative_type_names:
                if not PianaApi.valid_protein_types.has_key(type_name):
                    valid_string = "\n       --> valid protein types are: "
                    for tmp_protein_type in PianaApi.valid_protein_types.keys():
                        valid_string += "%s " %tmp_protein_type
                    valid_string += "\n"
                
                    raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires valid protein type names for alternative type names (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, type_name , valid_string))
                # END OF if not PianaApi.valid_protein_types.has_key(type_name):
            # END OF for type_name in list_alternative_type_names:
                
        if protein_species_name != "def_val":
            if protein_species_name is None:
                # i cannot think of other invalid values for protein species...
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s does not accept %s as a protein species\
\n------------------------------------------------------------------------------------------\n" %(method_name, protein_species_name))
                 
        if network_species_name != "def_val":
            # this can be None only if tax_id is not None: it is the argument for create_species_piana_graph
            if tax_id != "def_val":
                if network_species_name is None and tax_id is None:
                    raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s does not accept None for both protein species and tax_id\
\n------------------------------------------------------------------------------------------\n" %(method_name))
                    
            
        if depth != "def_val":
            if not isinstance(depth, int) or depth < 0:
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid depth value (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, depth))
            
        if distance != "def_val":
            if not isinstance(distance, int) or distance < 1:
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid distance value (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, distance))

        if info != "def_val":
            if not PianaApi.info_values.has_key(info):
                valid_string ="Invalid info value set. Valid info values are: "
                for info_val in PianaApi.info_values:
                    valid_string += " %s" %(info_val)
                valid_string += "\n"
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid info value (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, info, valid_string))
            
        if hub_threshold != "def_val":
            if not isinstance(hub_threshold, int) or hub_threshold < 0:
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid hub_threshold value (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, hub_threshold))
            
        if list_source_dbs != "def_val":
            if list_source_dbs != "all":
                if not isinstance(list_source_dbs, list):
                    raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a list for source dbs (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, list_source_dbs))

                for tmp_source_db in list_source_dbs:
                    if not PianaApi.valid_source_dbs.has_key(tmp_source_db):
                        raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s found an invalid source db %s\
\n------------------------------------------------------------------------------------------\n" %(method_name, tmp_source_db ))
            
        if list_source_methods != "def_val":
            if list_source_methods != "all":
                if not isinstance(list_source_methods, list):
                    raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a list for source methods (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, list_source_methods))

                for tmp_method in list_source_methods:
                    if not PianaApi.valid_methods.has_key(tmp_method):
                        raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s found an invalid source method %s\
\n------------------------------------------------------------------------------------------\n" %(method_name, tmp_method))
            # END OF if list_source_methods != "all":

        if inverse_dbs != "def_val":
            if not PianaApi.yes_no_values.has_key(inverse_dbs):
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid inverse_dbs value (yes or no) (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, inverse_dbs ))
            
        if inverse_methods != "def_val":
            if not PianaApi.yes_no_values.has_key(inverse_methods):
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid inverse_methods value (yes or no) (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, inverse_methods ))

        if source_db != "def_val":
            if source_db is not None:
                if not PianaApi.valid_source_dbs.has_key(source_db):
                    raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s found an invalid source db %s\
\n------------------------------------------------------------------------------------------\n" %(method_name, source_db ))
            
        if method != "def_val":
            if method is not None:
                if not PianaApi.valid_methods.has_key(method):
                    raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s found an invalid method %s\
\n------------------------------------------------------------------------------------------\n" %(method_name, method ))
                
        if confidence != "def_val":
            if not isinstance(confidence, int):
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid confidence value (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, confidence))
            
        if expansion_type != "def_val":  
            if not PianaApi.valid_expansion_types.has_key(expansion_type):

                valid_string = "     --> valid expansion types are: "
                for tmp_exp_type in PianaApi.valid_expansion_types:
                    valid_string += " %s" %tmp_exp_type
                valid_string += "\n"

                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid expansion_type value (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, expansion_type, valid_string))
            
        if expansion_mode != "def_val":  
            if not PianaApi.valid_expansion_modes.has_key(expansion_mode):

                valid_string = "     --> valid expansion modes are: "
                for tmp_exp_mode in PianaApi.valid_expansion_modes:
                    valid_string += " %s" %tmp_exp_mode
                valid_string += "\n"
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid expansion_mode value (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, expansion_mode, valid_string))
            
        if expansion_threshold != "def_val":
            if not isinstance(expansion_threshold, int) or expansion_threshold < 0:
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid expansion_threshold value (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, expansion_threshold))
            
        if exp_output_mode != "def_val":  
            if not PianaApi.valid_exp_output_modes.has_key(exp_output_mode):
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid exp_output_mode value (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, exp_output_mode))
            
        if output_format != "def_val":  
            if not PianaApi.valid_output_formats.has_key(output_format):

                valid_string = "     --> valid output formats are: "
                for tmp_out_format in PianaApi.valid_output_formats:
                    valid_string += " %s" %tmp_out_format
                valid_string += "\n"
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid output_format value (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, output_format, valid_string))
            
        if output_mode != "def_val":  
            if not PianaApi.valid_output_modes.has_key(output_mode):

                valid_string = "     --> valid output modes are: "
                for tmp_out_mode in PianaApi.valid_output_modes:
                    valid_string += " %s" %tmp_out_mode
                valid_string += "\n"
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid output_mode value (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, output_mode, valid_string))
            
        if print_mode != "def_val":  
            if not PianaApi.valid_print_modes.has_key(print_mode):

                valid_string = "     --> valid print modes are: "
                for tmp_print_mode in PianaApi.valid_print_modes:
                    valid_string += " %s" %tmp_print_mode
                valid_string += "\n"
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid print_mode value (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, print_mode, valid_string))
            
        if format_mode != "def_val":  
            if not PianaApi.valid_format_modes.has_key(format_mode):

                valid_string = "     --> valid format modes are: "
                for tmp_format_mode in PianaApi.valid_format_modes:
                    valid_string += " %s" %tmp_format_mode
                valid_string += "\n"
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid format_mode value (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, format_mode, valid_string))
            
        if term_type != "def_val":  
            if not PianaApi.valid_term_types.has_key(term_type):

                valid_string = "     --> valid term types are: "
                for tmp_term_type in PianaApi.valid_term_types:
                    valid_string += " %s" %tmp_term_type
                valid_string += "\n"
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid term_type value (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, term_type, valid_string))
            
        if score_threshold != "def_val":
            if not isinstance(score_threshold, float) or score_threshold <= 0:
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid score_threshold value (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, score_threshold))
            
        if sim_mode != "def_val":  
            if not PianaApi.valid_sim_modes.has_key(sim_mode):

                valid_string = "     --> valid sim modes are: "
                for tmp_sim_mode in PianaApi.valid_sim_modes:
                    valid_string += " %s" %tmp_sim_mode
                valid_string += "\n"
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid sim_mode value (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, sim_mode, valid_string))
            
        if level_threshold != "def_val":
            if not isinstance(level_threshold, int) or level_threshold < 0:
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid level_threshold value (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, level_threshold))
            
        if distance_threshold != "def_val":
            if not isinstance(distance_threshold, int) or distance_threshold < 0:
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid distance_threshold value (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, distance_threshold))
            
        if rep_term != "def_val":  
            if not PianaApi.max_min_values.has_key(rep_term):

                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid rep_term value (min or max) (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, rep_term))
            
        if print_id != "def_val":  
            if not PianaApi.yes_no_values.has_key(print_id):
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a valid print_id value (yes or no) (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, print_id))

        if molecular_error_bounds != "def_val":
            if not isinstance(molecular_error_bounds, list):
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a list for molecular weight error bounds (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, molecular_error_bounds))

            for error_bound in molecular_error_bounds:
                if not isinstance(error_bound, float):
                    raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s found an invalid list of molecular weight error bounds %s\
\n------------------------------------------------------------------------------------------\n" %(method_name, error_bound))

        if isoelectric_error_bounds != "def_val":
            if not isinstance(isoelectric_error_bounds, list):
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a list for isoelectric point error bounds (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, isoelectric_error_bounds))

            for error_bound in isoelectric_error_bounds:
                if not isinstance(error_bound, float):
                    raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s found an invalid list of isoelectric point error bounds %s\
\n------------------------------------------------------------------------------------------\n" %(method_name, error_bound))

        if list_keywords != "def_val":
            if not isinstance(list_keywords, list):
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a list of keywords(your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, list_keywords))
            
        if intersection_dbs != "def_val":
            if intersection_dbs is not None:
                if not isinstance(intersection_dbs, list):
                    raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires a list for intersection dbs (your value was: %s)\
\n------------------------------------------------------------------------------------------\n" %(method_name, intersection_dbs))

                for tmp_source_db in intersection_dbs:
                    if not PianaApi.valid_source_dbs.has_key(tmp_source_db):
                        raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s found an invalid intersection db %s\
\n------------------------------------------------------------------------------------------\n" %(method_name, tmp_source_db ))
            # END OF  if intersection_dbs is not None:

        if (file_over_expressed != "def_val" and file_over_expressed is not None) or (
            file_infra_expressed != "def_val" and file_infra_expressed is not None):
            if not PianaApi.valid_protein_types.has_key(expression_protein_type):
                valid_string = "\n       --> valid protein types are: "
                for tmp_protein_type in PianaApi.valid_protein_types.keys():
                    valid_string += "%s " %tmp_protein_type
                valid_string += "\n"
                
                raise ValueError("\
\n------------------------------------------------------------------------------------------\n \
Attention! Check your parameters and command arguments! Your configuration file contains incorrect values...\n \n \
PianaApi method %s requires valid protein type names for expression protein type (your value was: %s)\n \
%s \
\n------------------------------------------------------------------------------------------\n" %(method_name, expression_protein_type , valid_string))

        if file_over_expressed != "def_val":
            # nothing to check...
            pass

        if file_infra_expressed != "def_val":
            # nothing to check...
            pass
                
    # ------------------------------------
    #
    #  Reset, Load, Save ... Piana Piana_graph
    # 
    # ------------------------------------

    def reset_piana_graph(self):
        """
        start a new piana_graph
        """
        self.piana_graph= PianaGraph("Piana Graph", piana_access_object= self.piana_access)

    def load_piana_graph(self, file_object):
        """
        loads a previously saved piana_graph into the current piana_graph (ie replaces current piana_graph by the one in file_object)

        file_object must have been opened using binary read (ie file_object= file(file_name, "rb") )
        
        """
        self._check_arguments(method_name= "load_piana_graph",
                              file_object= file_object)
            
        self.piana_graph = cPickle.load(file_object)

    def save_piana_graph(self, file_object):
        """
        saves current piana_graph in file file_object

        file_object must have been created using binary write (ir file_object = file(file_name, "wb")
        """
        self._check_arguments(method_name= "save_piana_graph",
                              file_object= file_object)
        
        cPickle.dump(self.piana_graph, file_object , 2)  # 2 is the pickle protocol to be applied


    

    # ------------------------------------
    #
    #  Miscelaneous 
    # 
    # ------------------------------------
    def get_one_tax_id_from_species_name(self, species_name=None):
        """
        returns the tax id associated to "species_name"

        If there are more than one tax id associated, raises an error (to avoid ambiguities)

        If there is no tax id associated to it, raises an error (to avoid using a name that doesn't exist)
        """

        if species_name == "all":
            # species_name all is codified in tax id with a 0
            return 0
        
        list_tax_ids = self.piana_access.get_taxonomies_from_species_name(species_name_value= species_name)

        num_tax_ids = len(list_tax_ids)

        if num_tax_ids == 1:
            return list_tax_ids[0]
        elif  num_tax_ids == 0:
            raise ValueError("No tax id associated to species name <%s>\n" %(species_name))
        else:
            raise ValueError("Ambiguous species name: <%s> is associated to tax ids %s \n" %(species_name, list_tax_ids))
            

    # ------------------------------------
    #
    #  Create, Add, ... to Piana Piana_graph
    # 
    # ------------------------------------
        
    def add_protein_to_piana_graph(self, protein_code, protein_type_name, tax_id_value,
                                   depth, hub_threshold, list_source_dbs="all",  inverse_dbs="no", list_source_methods="all", inverse_methods="no" ):
        """
        Adds one protein and its interactions to current piana_graph.
        
        
        "protein_code" is a string with the protein code (must be of type protein_type_name)

        "protein_type_name" is the type of code used for protein protein_code

        "tax_id_value" sets the species of the protein that is being added (can be used for eliminating ambiguities between codes across species)
           --> valid tax ids are 0 (do not take into account the species) and those taxonomy ids provided by ncbi taxonomy

        "depth" fixes the depth at which interactions will be added (eg. depth 2 will add parters of partners of protein protein_code)

        "hub_threshold" sets the maximum number of interactions a protein can have for it to be added to the piana_graph
           -> 0 is equivalent to not applying any hub threshold

        "list_source_dbs" sets the interaction databases that will be used to get interactions when building the piana_graph
           -> can be a list of dbs (eg ["dip", "string"]) or "all" (all source dbs used)

        "inverse_dbs" can be:
                       no (databases in list_source_dbs will be used to build the network)
                       yes (all databases except those in list_source_dbs will be used to build the network)

        "list_source_methods" sets the methods that will be used to get interactions when building the piana_graph
           -> can be a list of methods (eg ["y2h", "tap"]) or "all" (all methods used)

        "inverse_methods" can be:
                       no (methods in list_source_methods will be used to build the network)
                       yes (all methods except those in list_source_methods will be used to build the network)
        """

        self._check_arguments(method_name= "add_protein_to_piana_graph",
                              protein_code=protein_code,
                              protein_type_name=protein_type_name,
                              tax_id=tax_id_value,
                              depth=depth,
                              hub_threshold=hub_threshold,
                              list_source_dbs=list_source_dbs,
                              inverse_dbs=inverse_dbs,
                              list_source_methods= list_source_methods,
                              inverse_methods=inverse_methods)
        
        # builds a piana_graph for the protein passed as argument and then joins the current piana_graph (self.piana_graph) to the
        # piana_graph built from the new protein

        self.user_root_proteins[protein_code.upper()] = None # keeping the list of root proteins updated
        
        temp_piana_graph = PianaGraph("Temp Graph", piana_access_object= self.piana_access)
        
        try:
            temp_piana_builder = PianaGraphBuilder(piana_access_object=self.piana_access,
                                                   depth=depth,
                                                   hub_threshold= hub_threshold,
                                                   list_protein_codes= [protein_code],
                                                   code_type_name= protein_type_name,
                                                   tax_id_value=tax_id_value,
                                                   list_source_dbs= list_source_dbs,
                                                   inverse_dbs= inverse_dbs,
                                                   list_source_methods= list_source_methods,
                                                   inverse_methods= inverse_methods)
        
        
            temp_piana_graph.build_graph(temp_piana_builder)
        
            self.piana_graph.join_graphs(temp_piana_graph)
        
        except Exception, inst:
            # no information available for these proteins
            sys.stderr.write("---------------------------------------------------------------------------------------------------------------\n")
            sys.stderr.write("No proteins were added! Database error, codes were unknown or you've set the wrong type for your proteins.\n")
            sys.stderr.write(" Error reported: %s -- %s)\n" %(inst, sys.exc_info()[0]))
            sys.stderr.write("---------------------------------------------------------------------------------------------------------------\n")

        
    def add_file_proteins_to_piana_graph(self, file_object, protein_type_name, tax_id_value,
                                         depth, hub_threshold, list_source_dbs="all", inverse_dbs="no", list_source_methods="all", inverse_methods="no"):
        """
        Adds proteins and their interactions to current piana_graph. Proteins to add are in file "file_object"
        
        "file_object" is a file object pointing to a file that contains one protein per line

        "protein_type_name" is the type of code used for protein protein_code

        "tax_id_value" sets the species of the proteins that are being added (can be used for eliminating ambiguities between codes across species)
           --> valid species names are 0 (do not take into account the species) and those taxonomy ids provided by ncbi taxonomy

        "depth" fixes the depth at which interactions will be added (eg. depth 2 will add parters of partners of protein protein_code)

        "hub_threshold" sets the maximum number of interactions a protein can have for it to be added to the piana_graph
           -> 0 is equivalent to not applying any hub threshold

        "list_source_dbs" sets the interaction databases that will be used to get interactions when building the piana_graph
           -> can be a list of dbs (eg ["dip", "string"]) or "all" (all source dbs used)

        "inverse_dbs" can be:
                       no (databases in list_source_dbs will be used to build the network)
                       yes (all databases except those in list_source_dbs will be used to build the network)

        "list_source_methods" sets the methods that will be used to get interactions when building the piana_graph
           -> can be a list of methods (eg ["y2h", "tap"]) or "all" (all methods used)

        "inverse_methods" can be:
                       no (methods in list_source_methods will be used to build the network)
                       yes (all methods except those in list_source_methods will be used to build the network)
        """

        self._check_arguments(method_name= "add_file_proteins_to_piana_graph",
                              file_object=file_object,
                              protein_type_name=protein_type_name,
                              tax_id=tax_id_value,
                              depth=depth,
                              hub_threshold=hub_threshold,
                              list_source_dbs=list_source_dbs,
                              inverse_dbs=inverse_dbs,
                              list_source_methods= list_source_methods,
                              inverse_methods=inverse_methods)
        
        # builds a piana_graph for the proteins of the file and then joins the current piana_graph (self.piana_graph) to the
        # piana_graph built from these new proteins
        
        list_proteins = utilities.return_proteins_from_file(file_object= file_object,
                                                            proteins_type=protein_type_name)


        for protein_name in list_proteins:
            self.user_root_proteins[protein_name.upper()] = None # keeping the list of root proteins updated
            
        temp_piana_graph = PianaGraph("Temp Graph", piana_access_object= self.piana_access)

        try:
            temp_piana_builder = PianaGraphBuilder(piana_access_object=self.piana_access,
                                                   depth=depth,
                                                   hub_threshold= hub_threshold,
                                                   list_protein_codes= list_proteins,
                                                   code_type_name= protein_type_name,
                                                   tax_id_value=tax_id_value,
                                                   list_source_dbs= list_source_dbs,
                                                   inverse_dbs= inverse_dbs,
                                                   list_source_methods= list_source_methods,
                                                   inverse_methods= inverse_methods)
            
            temp_piana_graph.build_graph(temp_piana_builder)
            
            self.piana_graph.join_graphs(temp_piana_graph)

        except Exception, inst:
            # no information available for these proteins
            sys.stderr.write("---------------------------------------------------------------------------------------------------------------\n")
            sys.stderr.write("No proteins were added! Codes were unknown or you've set the wrong type for your proteins. error:%s -- %s)\n" %(
                inst, sys.exc_info()[0]))
            sys.stderr.write("---------------------------------------------------------------------------------------------------------------\n")


    def add_interaction_to_piana_graph(self, protein_a, protein_b, protein_type_name, source_db= None, method= None, confidence=1):
        """
        Adds one interaction between protein_a and protein_b to current piana_graph
         (adds only this interaction. This command doesn't search for interactions in the database for these proteins.)
         (this is not inserting the interaction into the piana database: it is just adding it to the current network)

        "source_db" is the database from which  you have obtained the interaction
            --> set it to None if this is not relevant for your analysis
            --> set it to 'user' if you want to label it as 'added by user' (will appear in yellow)
            
        "method" is the method you have used to detect the interaction
            --> set it to None if this is not relevant for your analysis
            
        "confidence" is not currently being used... set it to 1

        "protein_type_name" is the type of code used for protein protein_codes protein_a and protein_b (it has to be the same for both)
        
        """
        
        self._check_arguments(method_name= "add_interaction_to_piana_graph",
                              protein_code=protein_a,
                              another_protein_code=protein_b,
                              protein_type_name=protein_type_name,
                              source_db=source_db,
                              method=method,
                              confidence=confidence)
        
        self.piana_graph.add_interaction(protein_a=protein_a, protein_b=protein_b, source_db=source_db, method=method,
                                         confidence=confidence, protein_type_name= protein_type_name)


    def add_file_interactions_to_piana_graph(self, file_object, protein_type_name):
        """
        Adds interactions from a file to current piana_graph
         (adds only those interactions described in the file. This command doesn't search for interactions in the database for these proteins.)
         (this is not inserting the interactions into the piana database: it is just adding it to the current network)

        "file_object" is a file object describing one interaction per line

            --> The interactions file follows the format (set unknown values to None):

            protein_a<TAB>protein_b<TAB>source_database<TAB>detection_method<TAB>confidence_score

            for example, a line could be:

            HOG1    MOT1   None    y2h   None


            This format is described in detail in file piana/code/dbParsers/piana_text_intParser/README.piana_interaction_data_format


        "protein_type_name" is the type of code used for protein protein_code
        
        """

        self._check_arguments(method_name= "add_file_interactions_to_piana_graph",
                              file_object=file_object,
                              protein_type_name=protein_type_name)
        
        for line in file_object:

            line_fields = line.split()

            protein_a  = line_fields[0]
            protein_b  = line_fields[1]

            # to allow the file having just the two proteins in a line, check for line length
            # depending on the line length we suppose that the user has given a different number of parameters
            
            if len(line_fields) > 2:
                if line_fields[2] == "None":
                    source_db = None
                else:
                    source_db  = line_fields[2]
            else:
                source_db = None

            if len(line_fields) > 3:
                if line_fields[3] == "None":
                    method     = None
                else:
                    method     = line_fields[3]
            else:
                method = None

            if len(line_fields) > 4:
                if line_fields[4] == "None":
                    confidence = None
                else:
                    confidence = float(line_fields[4])
            else:
                confidence = None

            self.piana_graph.add_interaction(protein_a=protein_a, protein_b=protein_b, source_db=source_db, method=method,
                                        confidence=confidence, protein_type_name= protein_type_name)

        # END OF for line in file_fd:


    def create_species_piana_graph(self,  species_name=None, tax_id=None, hub_threshold=0,
                                   list_source_dbs="all", inverse_dbs="no", list_source_methods="all", inverse_methods="no"):
        """
        Replaces current piana_graph with the protein-protein interaction piana_graph for a given species (using all proteins)

        The user can fix the species using a tax id or a species name (don't use both at the same time):
            "taxonomy_id" is the tax id for which the piana_graph will be built (set it to None if not using it)
               -> has to be a valid tax id (eg. 9606; ...)
        
            "species_name" is the species for which the piana_graph will be built (set it to None if not using it)
               -> has to be the name of a species (e.g "human", "yeast", ...)

          --> in case both species_name_value and taxonomy_value are set to something different from None, taxonomy_value is used

        "hub_threshold" sets the maximum number of interactions a protein can have for it to be added to the piana_graph
           -> 0 is equivalent to not applying any hub threshold

        "list_source_dbs" sets the interaction databases that will be used to get interactions when building the piana_graph
           -> can be a list of dbs (eg ["dip", "string"]) or "all" (all source dbs used)

        "inverse_dbs" can be:
                       no (databases in list_source_dbs will be used to build the network)
                       yes (all databases except those in list_source_dbs will be used to build the network)

        "list_source_methods" sets the methods that will be used to get interactions when building the piana_graph
           -> can be a list of methods (eg ["y2h", "tap"]) or "all" (all methods used)

        "inverse_methods" can be:
                       no (methods in list_source_methods will be used to build the network)
                       yes (all methods except those in list_source_methods will be used to build the network)


        Note: no depth argument is needed, since we are adding all interactions for all proteins of a given species
    
        """
        
        self._check_arguments(method_name= "create_species_piana_graph",
                              network_species_name=species_name,
                              tax_id=tax_id,
                              hub_threshold=hub_threshold,
                              list_source_dbs=list_source_dbs,
                              inverse_dbs=inverse_dbs,
                              list_source_methods= list_source_methods,
                              inverse_methods=inverse_methods)
        
        proteins_with_given_species = self.piana_access.get_proteins_sharing_species(species_name_value= species_name,
                                                                                     taxonomy_value= tax_id)


        if proteins_with_given_species:
            # Create piana_graph for all proteins in proteins_with_given_species
            #             In the call to the builder we set tax_id_value to 0 (ie. ignore species) because the list of proteinPiana is 
            #             already only of the species we wanted... and leaving it to 0 will speed up things in the sql queries
            piana_builder = PianaGraphBuilder(piana_access_object=self.piana_access,
                                              depth=1,
                                              hub_threshold= hub_threshold,
                                              list_protein_codes= proteins_with_given_species,
                                              code_type_name= "proteinPiana",
                                              tax_id_value=0,
                                              list_source_dbs= list_source_dbs,
                                              inverse_dbs= inverse_dbs,
                                              list_source_methods= list_source_methods,
                                              inverse_methods= inverse_methods)

            self.piana_graph.build_graph(piana_builder)
        # END OF if proteins_with_given_species:

        else:
            sys.stderr.write("\nNo proteins found for species name %s or tax id %s !!!!\n" %(species_name, taxonomy_id))


    def create_database_method_piana_graph(self, tax_id_value=0, list_source_dbs="all", inverse_dbs="no", list_source_methods="all", inverse_methods="no"):
        """
        Replaces current piana_graph with the interaction piana_graph for a given list of databases and methods
        (ie. adds all interactions that appear in databases list_source_dbs that are of a method that appear in list_source_methods)

        "tax_id_value" is the species for which the piana_graph will be built
           --> valid tax ids are 0 (do not take into account the species) and those taxonomy ids provided by ncbi taxonomy

        "list_source_dbs" sets the interaction databases that will be used to get interactions when building the piana_graph
           -> can be a list of dbs (eg ["dip", "string"]) or "all" (all source dbs used)

        "inverse_dbs" can be:
                       no (databases in list_source_dbs will be used to build the network)
                       yes (all databases except those in list_source_dbs will be used to build the network)

        "list_source_methods" sets the methods that will be used to get interactions when building the piana_graph
           -> can be a list of methods (eg ["y2h", "tap"]) or "all" (all methods used)

        "inverse_methods" can be:
                       no (methods in list_source_methods will be used to build the network)
                       yes (all methods except those in list_source_methods will be used to build the network)

        Note: no depth argument is needed, since we are adding all interactions that respect a certain criteria

    
        """
        
        self._check_arguments(method_name= "create_database_method_piana_graph",
                              tax_id=tax_id_value,
                              list_source_dbs=list_source_dbs,
                              inverse_dbs=inverse_dbs,
                              list_source_methods= list_source_methods,
                              inverse_methods=inverse_methods)
        
        # TO DO!!! Add argument hub_threshold to this method

        if tax_id_value != 0:
            set_user_tax_ids = Set([tax_id_value])
            
        
        # this call returns the list of triplets (proteinPiana1, proteinPiana2, interactionPiana) that have database_name as sourceDBID
        all_database_method_interactions = self.piana_access.get_all_protein_protein_interactions(list_source_dbs= list_source_dbs,
                                                                                                  inverse_dbs= inverse_dbs,
                                                                                                  list_source_methods= list_source_methods,
                                                                                                  inverse_methods= inverse_methods)

        number_skipped_interactions = 0
        for interaction in all_database_method_interactions:

            # interaction[0] is proteinPianaA
            # interaction[1] is proteinPianaB
            # interaction[2] is interactionPiana

            if tax_id_value != 0:
                set_tax_id_a = Set(self.piana_access.get_protein_taxonomy_ids(proteinPiana_value = interaction[0]))
                set_tax_id_b = Set(self.piana_access.get_protein_taxonomy_ids(proteinPiana_value = interaction[1]))

                if not set_tax_id_a.intersection(set_user_tax_ids) or not set_tax_id_b.intersection(set_user_tax_ids):
                    # skip interaction if any of the two proteins is not of the species selected by user
                    number_skipped_interactions += 1
                    continue
            # END OF  if tax_id_value != 0:

            new_node_a_attribute = PianaGraphNodeAttribute(proteinPiana_value=interaction[0],
                                                           mem_mode = "onDemand",
                                                           piana_access = self.piana_access)
            new_node_b_attribute = PianaGraphNodeAttribute(proteinPiana_value=interaction[1],
                                                           mem_mode = "onDemand",
                                                           piana_access = self.piana_access)

            new_node_a = self.piana_graph.get_node(identifier=interaction[0], attribute=new_node_a_attribute)
            new_node_b = self.piana_graph.get_node(identifier=interaction[1], attribute=new_node_b_attribute)

            self.piana_graph.add_node(new_node_a)
            self.piana_graph.add_node(new_node_b)

            new_edge_attribute = PianaGraphEdgeAttribute(interactionPiana_value= interaction[2],
                                                         mem_mode = "onDemand",
                                                         piana_access = self.piana_access)

            new_edge = self.piana_graph.create_edge(node_id1= interaction[0],
                                                    node_id2= interaction[1],
                                                    attribute_object= new_edge_attribute)

            if new_edge[1] == 1:
                # edge is new
                self.piana_graph.add_edge(new_edge[0])
        # END OF for interaction in all_database_method_interactions:


    # -------------------------------------
    #
    #  Apply modifications/extensions to piana_graph
    # 
    # -------------------------------------
    
    def expand_piana_graph_interactions(self, expansion_type, expansion_mode, expansion_threshold, hub_threshold, exp_output_mode,
                                        output_file_object, proteins_type_name, list_alternative_type_names,
                                        output_tax_id, list_source_dbs="all", inverse_dbs="no", list_source_methods="all", inverse_methods="no"):
        """

        Expands interactions in the current piana_graph by propagating interactions to nodes "expansion_mode" from all nodes that
        have a common characteristic "expansion_type"

        "expansion_type" defines the characteristic that is used to propagate interactions between nodes
            -> valid expansion-type values are those defined in PianaGlobals.expansion_types (currently can be cog, scop (ie. scop family), interpro or ec)
            -> if two proteins share expansion-type, interactions are interpropagated

        "expansion_mode" defines to which nodes we will propagate the interactions to
             -> valid expansion-nodes values are: all (all proteins in piana_graph are expanded) or root (only root proteins are expanded)
             -> if you are looking for new interactions (predictions) for your input proteins, use root
             -> if you want to expand all the proteins in the piana_graph (partners of root proteins as well) use all
             -> root proteins are the source proteins used to build the piana_graph

        "expansion_threshold" is used to avoid propagating interactions when there are too many nodes that share the expansion type
             -> valid values are: 0 (no thresholds applied) and positive integers

        "hub_threshold" sets the maximum number of interactions a protein can have for it to be added to the piana_graph
           -> 0 is equivalent to not applying any hub threshold

        "list_source_dbs" sets the interaction databases that will be used to get interactions when building the piana_graph
           -> can be a list of dbs (eg ["dip", "string"]) or "all" (all source dbs used)

        "inverse_dbs" can be:
                       no (databases in list_source_dbs will be used to build the network)
                       yes (all databases except those in list_source_dbs will be used to build the network)

        "list_source_methods" sets the methods that will be used to get interactions when building the piana_graph
           -> can be a list of methods (eg ["y2h", "tap"]) or "all" (all methods used)

        "inverse_methods" can be:
                       no (methods in list_source_methods will be used to build the network)
                       yes (all methods except those in list_source_methods will be used to build the network)


        "exp_output_mode" sets whether new interactions are added to the piana_graph or printed to an output file
        
           -> valid exp-output-mode values are: add (add predictions to piana_graph) and print (print to output-target)
              -> 'add' will add to the piana_graph the predictions found by expansion
              -> 'print' will print to output-target (or to default results file) the list of predictions found by expansion
           -> for example, if you wanted to get predictions for root nodes using double cog expansion
              you would first use command expand-interactions with expansion-nodes=all and mode=add
              and then, another command expand-interactions with expansion-nodes=root and mode=print
              doing this "double expansion" you will be predicting interactions based on a previous expansion

           - if exp-output-mode is add, the following arguments can be ignored: leave them to blank:
           - if exp-output-mode is "print" then :

              -> "output_file_object" is the file object where the interactions will be printed


              -> "output_tax_id" restricts the species of proteins in the interactions that will be printed
                   --> valid tax ids are 0 (do not take into account the species) and those taxonomy ids provided by ncbi taxonomy
                   
              -> "proteins_type_name" is the type of code that should be used for printing proteins identifiers
                   -> valid protein-type values are those defined in PianaGlobals.valid_protein_types

              -> the results will follow the following format (one interaction per line):

                 protein1<TAB>protein2<TAB>expansion_type<TAB>source_interactionPiana<TAB>source_proteinPiana

        """
        
        self._check_arguments(method_name= "expand_piana_graph_interactions",
                              expansion_type=expansion_type,
                              expansion_mode=expansion_mode,
                              expansion_threshold=expansion_threshold,
                              hub_threshold=hub_threshold,
                              exp_output_mode=exp_output_mode,
                              file_object= output_file_object,                              
                              protein_type_name=proteins_type_name,                             
                              list_alternative_type_names=list_alternative_type_names,
                              tax_id=output_tax_id,
                              list_source_dbs=list_source_dbs,
                              inverse_dbs=inverse_dbs,
                              list_source_methods= list_source_methods,
                              inverse_methods=inverse_methods)
        
        # TO DO!!! Introduce some kind of mechanism to create the expansion object without the if elif, just by
        #          using a dictionary where content are the class to be used to create the expansion object
        #          I guess I can use the expansion_types dictionary to say which class to use for each expansion
        #          type... can I write a class in a dictionary content?
        #          use apply() see email with python tips
        
        if expansion_type == "ec":
            expansion_object = ExpansionSameEC(piana_access_object = self.piana_access,
                                               list_source_dbs= list_source_dbs,
                                               inverse_dbs= inverse_dbs,
                                               list_source_methods= list_source_methods,
                                               inverse_methods= inverse_methods)

        elif expansion_type == "cog":
            expansion_object = ExpansionSameCog(piana_access_object = self.piana_access,
                                                list_source_dbs= list_source_dbs,
                                                inverse_dbs= inverse_dbs,
                                                list_source_methods= list_source_methods,
                                                inverse_methods= inverse_methods)

        elif expansion_type == "interpro":
            expansion_object = ExpansionSameInterpro(piana_access_object = self.piana_access,
                                                     list_source_dbs= list_source_dbs,
                                                     inverse_dbs= inverse_dbs,
                                                     list_source_methods= list_source_methods,
                                                     inverse_methods= inverse_methods)

        elif expansion_type == "scop":
            expansion_object = ExpansionSameScop(piana_access_object = self.piana_access,
                                                 list_source_dbs= list_source_dbs,
                                                 inverse_dbs= inverse_dbs,
                                                 list_source_methods= list_source_methods,
                                                 inverse_methods= inverse_methods)
            
        else:
            raise ValueError("Trying to apply expansion of unknown type %s: if you have created a new Expansion in PianaGraphExpansions, you must as well add the expansion key to PianaGlobals.expansion_types\n" %(expansion_type))
        # END OF  if expansion_type == ....

        if verbose:
            sys.stderr.write("\nApplying expansion to %s nodes using expansion type %s with output mode %s and class name %s\n" %(expansion_mode,
                                                                                                                                  expansion_type,
                                                                                                                                  exp_output_mode,
                                                                                                                                  output_tax_id))

        self.piana_graph.expand(expansion=expansion_object, expansion_mode=expansion_mode, exp_output_mode= exp_output_mode,
                                expansion_threshold= expansion_threshold, hub_threshold= hub_threshold,
                                output_target= output_file_object, code_type=proteins_type_name,
                                alternative_code_types= list_alternative_type_names,
                                class_name= output_tax_id)


    # -------------------------------------
    #
    #  Print Table, DOT, protein info... 
    # 
    # -------------------------------------

        
    def print_interactions(self, protein_type_name, output_file_object, output_format, print_mode, format_mode, 
                           list_alternative_type_names, tax_id_value, list_keywords= [], intersection_dbs=None,
                           file_over_expressed= None, file_infra_expressed= None , expression_protein_type= None):
        """
        Prints interactions from current piana_graph in the format chosen by the user.

        "protein_type_name"  is the type of code that should be used for printing proteins identifiers
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types
             
        "output_file_object" is the file object (sys.stdout to print to screen) where interactions will be printed

        "output_format" is the format that will be followed for the output
           -'table': prints interactions in table format
           -'network': prints interactions in a format that can be visualized as a network

        "format_mode" sets the type of format that will be used for output

           -> valid formats for output_format 'table' are:
               - 'txt' will print flat text
               - 'html' will print html 
               
           -> valid formats for output_format 'network' are:
               - 'dot': uses DOT format as defined in www.graphviz.org

           --> format dot will produce an output that can be then given to visualization programs
               for example, neato from GraphViz, would work by:
                $> cat output_in_dot_format | neato -Tgif -o network.gif
               
           --> format_mode 'txt' for table will print a table in the format indicated in the description of command print-table-*
               in the template for piana configuration files: piana/code/execs/conf_files/general_template.piana_conf


        "print_mode" sets which proteins will be printed
           -> "all" will print all interactions in the piana_graph
           -> "all_root" will print all interactions in the piana_graph where at least one partner is a root protein
           -> "only_root" will print only interactions between root proteins
           -> "connecting" will print only interactions between root proteins and those proteins that connect more than one root protein
        
        "list_alternative_types" can be used to set a list of alternative types in case no protein_type_name code is found
         --> user must provide a list of valid easy-to-remember type names
             list_alternative_types can for example look like this: ["gi", "uniacc", "md5"]
               -> The list of values that this list can contain can be obtained by doing python piana.py or
                  looking to variable valid_protein_types in PianaGlobals.py

             I suggest always placing md5 at the end of alternative types, so you never get a None in the output


        "tax_id_value" determines which species must be the proteins that will be printed
           --> valid tax ids are 0 (do not take into account the species) and those taxonomy ids provided by ncbi taxonomy

        "list_keywords" sets a list of keywords that will be used to highlight important proteins in the network
           -> currently, it highlights in red the proteins in the DOT file that contain at least one keyword in the function, description or name
           -> currently, it is not used when printing the interactions in a table
           -> If you are interested in highlighting proteins related to cancer, list_keywords could be: ['cancer', 'onco', 'carcinoma', 'tumor']
           
        "intersection_dbs" sets intersection mode, which only prints out interactions that appear in all dbs of the list being passed
            -> it can be None (no intersection mode applied) or a list of database names
            -> valid database names are those in PianaGlobals.interaction_databases
            
        "file_over_expressed" and "file_infra_expressed" are names of files that contain proteins that are either over expressed or infra expressed

        "expression_protein_type" is the type of protein code used in files file_over_expressed and file_infra_expressed
        """

        self._check_arguments(method_name= "print_interactions",
                              file_object=output_file_object,
                              protein_type_name=protein_type_name,                            
                              list_alternative_type_names=list_alternative_type_names,
                              tax_id=tax_id_value,
                              list_keywords=list_keywords,
                              intersection_dbs=intersection_dbs,
                              file_over_expressed=file_over_expressed,
                              file_infra_expressed=file_infra_expressed,
                              expression_protein_type=expression_protein_type)
        
        
        self.piana_graph.output_interactions( protein_type_name= protein_type_name,
                                              output_target= output_file_object,
                                              output_format= output_format,
                                              filter_mode = "all",
                                              print_mode = print_mode,
                                              format_mode = format_mode,
                                              intersection_dbs = intersection_dbs,
                                              list_alternative_type_names= list_alternative_type_names,
                                              tax_id_value= tax_id_value,
                                              list_keywords= list_keywords,
                                              file_over_expressed= file_over_expressed,
                                              file_infra_expressed= file_infra_expressed,
                                              expression_protein_type= expression_protein_type,
                                              user_protein_names = self.user_root_proteins)


       
    def print_all_proteins_information(self,  protein_type_name, output_file_object, output_mode= "compact", format_mode="txt",
                                       list_alternative_type_names= [], tax_id_value= 0, list_keywords= [],
                                       file_over_expressed= None, file_infra_expressed= None, expression_protein_type= None):
        """
        Prints information about all the proteins in the current piana_graph
        
        "protein_type_name" is the type of code that should be used for printing proteins identifiers
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types
        
        "output_file_object" is the file object (sys.stdout to print to screen) where interactions will be printed
        
        "output_mode" can be:
           - 'compact': all relevant information in one line
           - 'extended': all information in text paragraphs
            
        "format_mode" sets the type of format that will be used for output
               - 'txt' will print flat text
               - 'html' will print html 

        "list_alternative_type_names" can be used to set a list of alternative types in case no protein_type_name code is found
           --> user must provide pairs a list of valid easy-to-remember type names
           list_alternative_types can for example look like this: ["gi", "uniacc", "md5"]
           I suggest always placing md5 at the end of alternative types, so you never get a None in the output
           
        "tax_id_value" determines which species must be the proteins that will be printed
           --> valid tax ids are 0 (do not take into account the species) and those taxonomy ids provided by ncbi taxonomy
           

        "list_keywords" sets a list of keywords that will be used to highlight important proteins in the network
           -> currently, it doesn't do anything
           
        "file_over_expressed" and "file_infra_expressed" are names of files that contain proteins that are either over expressed or infra expressed

        "expression_protein_type" is the type of protein code used in files file_over_expressed and file_infra_expressed

            
        """

        self._check_arguments(method_name= "print_all_proteins_information",
                              protein_type_name=protein_type_name,
                              file_object=output_file_object,
                              output_mode=output_mode,
                              format_mode=format_mode,
                              list_alternative_type_names=list_alternative_type_names,
                              tax_id=tax_id_value,
                              list_keywords=list_keywords,
                              file_over_expressed=file_over_expressed,
                              file_infra_expressed=file_infra_expressed,
                              expression_protein_type=expression_protein_type)

        self.piana_graph.output_all_proteins_information( protein_type_name=protein_type_name,
                                                          output_target= output_file_object,
                                                          output_mode= output_mode,
                                                          format_mode = format_mode,
                                                          filter_mode= "all",
                                                          list_alternative_type_names=list_alternative_type_names,
                                                          list_keywords= list_keywords,
                                                          tax_id_value= tax_id_value,
                                                          user_protein_names = self.user_root_proteins,
                                                          file_over_expressed= file_over_expressed,
                                                          file_infra_expressed= file_infra_expressed,
                                                          expression_protein_type= expression_protein_type)
        
        
        
    def print_root_proteins_information(self,  protein_type_name, output_file_object, output_mode= "compact", format_mode="txt",
                                        list_alternative_type_names= [], tax_id_value= 0, list_keywords= [],
                                        file_over_expressed= None, file_infra_expressed= None, expression_protein_type= None):
        """
        Prints information about root proteins in the current piana_graph
         -> a root protein is a protein that was given by the user as input (ie. protein of interest)
        
        "protein_type_name" is the type of code that should be used for printing proteins identifiers
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types
        
        "output_file_object" is the file object (sys.stdout to print to screen) where interactions will be printed
        
        
        "output_mode" can be:
           - 'compact': all relevant information in one line
           - 'extended': all information in text paragraphs
            
        "format_mode" sets the type of format that will be used for output
               - 'txt' will print flat text
               - 'html' will print html 
            
            
        "list_alternative_type_names" can be used to set a list of alternative types in case no protein_type_name code is found
           --> user must provide pairs a list of valid easy-to-remember type names
           list_alternative_types can for example look like this: ["gi", "uniacc", "md5"]
           I suggest always placing md5 at the end of alternative types, so you never get a None in the output
           
        "tax_id_value" determines which species must be the proteins that will be printed
           --> valid tax ids are 0 (do not take into account the species) and those taxonomy ids provided by ncbi taxonomy

        "list_keywords" sets a list of keywords that will be used to highlight important proteins in the network
           -> currently, it doesn't do anything
           
        "file_over_expressed" and "file_infra_expressed" are names of files that contain proteins that are either over expressed or infra expressed

        "expression_protein_type" is the type of protein code used in files file_over_expressed and file_infra_expressed
            
        """

        self._check_arguments(method_name= "print_root_proteins_information",
                              protein_type_name=protein_type_name,
                              file_object=output_file_object,
                              output_mode=output_mode,
                              format_mode=format_mode,
                              list_alternative_type_names=list_alternative_type_names,
                              tax_id=tax_id_value,
                              list_keywords=list_keywords,
                              file_over_expressed=file_over_expressed,
                              file_infra_expressed=file_infra_expressed,
                              expression_protein_type=expression_protein_type)
        
        self.piana_graph.output_root_proteins_information( protein_type_name=protein_type_name,
                                                           output_target= output_file_object,
                                                           output_mode= output_mode,
                                                           format_mode = format_mode,
                                                           filter_mode= "all",
                                                           list_alternative_type_names=list_alternative_type_names,
                                                           list_keywords= list_keywords,
                                                           tax_id_value=tax_id_value,
                                                           user_protein_names = self.user_root_proteins,
                                                           file_over_expressed= file_over_expressed,
                                                           file_infra_expressed= file_infra_expressed,
                                                           expression_protein_type= expression_protein_type )
        
        
        
    def print_connecting_proteins_information(self,  protein_type_name, output_file_object, output_mode= "compact", format_mode="txt",
                                              list_alternative_type_names= [], tax_id_value= 0, list_keywords= [],
                                              file_over_expressed= None, file_infra_expressed= None, expression_protein_type= None):
        """
        Prints information about linkers in the current piana_graph
         -> a linker is a protein that connects two or more root proteins
        
        "protein_type_name" is the type of code that should be used for printing proteins identifiers
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types
        
        "output_file_object" is the file object (sys.stdout to print to screen) where interactions will be printed
        
        
        "output_mode" can be:
           - 'compact': all relevant information in one line
           - 'extended': all information in text paragraphs
            
        "format_mode" sets the type of format that will be used for output
               - 'txt' will print flat text
               - 'html' will print html 
            
            
        "list_alternative_type_names" can be used to set a list of alternative types in case no protein_type_name code is found
           --> user must provide pairs a list of valid easy-to-remember type names
           list_alternative_types can for example look like this: ["gi", "uniacc", "md5"]
           I suggest always placing md5 at the end of alternative types, so you never get a None in the output
               
        "tax_id_value" determines which species must be the proteins that will be printed
           --> valid tax ids are 0 (do not take into account the species) and those taxonomy ids provided by ncbi taxonomy

        "list_keywords" sets a list of keywords that will be used to highlight important proteins in the network
           -> currently, it doesn't do anything
           
        "file_over_expressed" and "file_infra_expressed" are names of files that contain proteins that are either over expressed or infra expressed

        "expression_protein_type" is the type of protein code used in files file_over_expressed and file_infra_expressed
        
        """

        self._check_arguments(method_name= "print_connecting_proteins_information",
                              protein_type_name=protein_type_name,
                              file_object=output_file_object,
                              output_mode=output_mode,
                              format_mode=format_mode,
                              list_alternative_type_names=list_alternative_type_names,
                              tax_id=tax_id_value,
                              list_keywords=list_keywords,
                              file_over_expressed=file_over_expressed,
                              file_infra_expressed=file_infra_expressed,
                              expression_protein_type=expression_protein_type)
        
        self.piana_graph.output_connecting_proteins_information(  protein_type_name= protein_type_name,
                                                                  output_target= output_file_object,
                                                                  output_mode= output_mode,
                                                                  format_mode = format_mode,
                                                                  filter_mode= "all",
                                                                  list_alternative_type_names= list_alternative_type_names,
                                                                  list_keywords= list_keywords,
                                                                  tax_id_value=tax_id_value,
                                                                  user_protein_names = self.user_root_proteins,
                                                                  file_over_expressed= file_over_expressed,
                                                                  file_infra_expressed= file_infra_expressed,
                                                                  expression_protein_type= expression_protein_type)
        
        
        


        
    def print_list_proteins_information(self, protein_list, input_proteins_type, output_file_object, output_proteins_type,
                                        list_alternative_type_names= [], output_mode="compact", format_mode="txt", list_keywords= [],
                                        tax_id_value=0,
                                        file_over_expressed= None, file_infra_expressed= None, expression_protein_type= None ):
        """
        Prints information for all proteins in list "protein_list" (does not take into account the network, only these proteins)
         --> only works in compact mode to prevent the creation of enormous text files
        
        "protein_list" is the list of proteins for which you want to retrieve the information
        
        "input_proteins_type" is the type of code of proteins in the protein list
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types

        "output_file_object" is the file object where the protein information will be written

        "output_proteins_type" is the type of code that will be used to identify proteins in the output file
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types

        "tax_id_value" determines which species must be the proteins that will be printed
           --> valid tax ids are 0 (do not take into account the species) and those taxonomy ids provided by ncbi taxonomy
           
        "list_alternative_type_names" can be used to set a list of alternative types in case no protein_type_name code is found
           --> user must provide pairs a list of valid easy-to-remember type names
           list_alternative_types can for example look like this: ["gi", "uniacc", "md5"]
           I suggest always placing md5 at the end of alternative types, so you never get a None in the output
           
        "output_mode" can be:
           - 'extended' (multiple lines per protein to be shown directly to the screen)
           - 'compact' (one line per protein to be shown directly on the screen)
            
        "format_mode" sets the type of format that will be used for output
               - 'txt' will print flat text
               - 'html' will print html 

        "list_keywords" sets a list of keywords that will be used to highlight important proteins in the network
           -> currently, it doesn't do anything
           
        "file_over_expressed" and "file_infra_expressed" are names of files that contain proteins that are either over expressed or infra expressed

        "expression_protein_type" is the type of protein code used in files file_over_expressed and file_infra_expressed
           
        """

        self._check_arguments(method_name= "print_list_proteins_information",
                              protein_list= protein_list,
                              protein_type_name=input_proteins_type,
                              file_object=output_file_object,
                              another_protein_type_name=output_proteins_type,
                              list_alternative_type_names=list_alternative_type_names,
                              output_mode=output_mode,
                              format_mode=format_mode,
                              list_keywords=list_keywords,
                              tax_id=tax_id_value,
                              file_over_expressed=file_over_expressed,
                              file_infra_expressed=file_infra_expressed,
                              expression_protein_type=expression_protein_type)
        
        self.piana_graph.output_list_proteins_information(  list_proteins = protein_list,
                                                            input_proteins_type = input_proteins_type,
                                                            output_proteins_type_name= output_proteins_type,
                                                            output_target= output_file_object,
                                                            output_mode= output_mode,
                                                            format_mode = format_mode,
                                                            filter_mode= "all",
                                                            list_alternative_type_names= list_alternative_type_names,
                                                            list_keywords= list_keywords,
                                                            tax_id_value= tax_id_value,
                                                            key_list= self.key_list_to_use,
                                                            file_over_expressed= file_over_expressed,
                                                            file_infra_expressed= file_infra_expressed,
                                                            expression_protein_type= expression_protein_type)
        self.key_list_to_use += 1


    def print_file_proteins_information(self, input_file_object, input_proteins_type, output_file_object, output_proteins_type,
                                        output_mode, format_mode="txt", list_keywords= [], list_alternative_type_names= [], tax_id_value=0,
                                        file_over_expressed= None, file_infra_expressed= None, expression_protein_type= None):
        """
        Prints information for all proteins in file "input_file_object"
         --> only works in compact mode to prevent the creation of enormous text files
        
        "input_file_object" is the file object with the protein codes for which you want to obtain the information
        
        "input_proteins_type" is the type of code that should be used for printing proteins identifiers
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types

        "output_file_object" is the file object where the protein information will be written

        "output_proteins_type" is the type of code that will be used to identify proteins in the output file
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types

        "tax_id_value" sets the species of the protein that will be printed (can be used for eliminating ambiguities between codes across species)
           --> valid tax ids are 0 (do not take into account the species) and those taxonomy ids provided by ncbi taxonomy
           
        "output_mode" can be:
           - 'extended' (multiple lines per protein to be shown directly to the screen)
           - 'compact' (one line per protein to be shown directly on the screen)
            
        "format_mode" sets the type of format that will be used for output
               - 'txt' will print flat text
               - 'html' will print html 

        "list_keywords" sets a list of keywords that will be used to highlight important proteins in the network
           -> currently, it doesn't do anything
           
        "file_over_expressed" and "file_infra_expressed" are names of files that contain proteins that are either over expressed or infra expressed

        "expression_protein_type" is the type of protein code used in files file_over_expressed and file_infra_expressed
        """
        
        self._check_arguments(method_name= "print_file_proteins_information",
                              another_file_object=input_file_object ,
                              protein_type_name=input_proteins_type,
                              file_object=output_file_object,
                              another_protein_type_name=output_proteins_type,
                              list_alternative_type_names=list_alternative_type_names,
                              output_mode=output_mode,
                              format_mode=format_mode,
                              list_keywords=list_keywords,
                              tax_id=tax_id_value,
                              file_over_expressed=file_over_expressed,
                              file_infra_expressed=file_infra_expressed,
                              expression_protein_type=expression_protein_type)
        
        list_input_protein_codes = utilities.return_proteins_from_file(file_object=input_file_object, proteins_type= input_proteins_type)

        self.print_list_proteins_information(protein_list= list_input_protein_codes,
                                             input_proteins_type= input_proteins_type,
                                             output_file_object= output_file_object,
                                             output_proteins_type= output_proteins_type,
                                             list_alternative_type_names= list_alternative_type_names,
                                             output_mode= output_mode,
                                             format_mode= format_mode,
                                             list_keywords= list_keywords,
                                             tax_id_value= tax_id_value,
                                             file_over_expressed= file_over_expressed,
                                             file_infra_expressed= file_infra_expressed,
                                             expression_protein_type= expression_protein_type)
        
    # -------------------------------------
    #
    #  Print specific information from piana_graph
    # 
    # -------------------------------------

    def print_proteins_at_distance_x(self, query_protein, distance, input_protein_type, output_protein_type, list_alternative_type_names,
                                     output_file_object, format_mode, info, tax_id_value = 0):
        """
        Prints to "output_file_object" proteins from the network that are at distance "distance" from the protein "query protein"

        "query_protein" is the query protein: proteins returned will be at distance "distance" from this protein

        "distance" is the distance at which the proteins will be searched

        "input_protein_type" is the type of code used for protein query_protein
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types

        "output_protein_type" is the type of code that will be used to print proteins
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types
        
        "list_alternative_types" can be used to set a list of alternative types in case no protein_type_name code is found
         --> user must provide pairs a list of valid easy-to-remember type names
             list_alternative_types can for example look like this: ["gi", "uniacc", "md5"]
             I suggest always placing md5 at the end of alternative types, so you never get a None in the output
             
        "output_file_object" is the file object (sys.stdout to print to screen) where proteins at distance X will be printed
        
        "format_mode" sets the type of format that will be used for output
               - 'txt' will print flat text
               - 'html' will print html 

        "info" sets whether domain information associated to the proteins will be printed or not
           - 'all' will show all domain information
           - 'scop' will show SCOP domain information
           - 'cath' will show CATH domain information
           - 'no' shows no domain information
        
        
        """

        
        self._check_arguments(method_name= "print_proteins_at_distance_x",
                              protein_code=query_protein,
                              distance=distance,
                              protein_type_name=input_protein_type,
                              another_protein_type_name=output_protein_type,
                              list_alternative_type_names=list_alternative_type_names,
                              file_object=output_file_object,
                              format_mode=format_mode,
                              info=info)
        
        # create the distance groups dic with keys
        distance_groups_dic = self.piana_graph.get_distance_groups( protein_ext_code= query_protein,
                                                                    input_protein_type_name= input_protein_type,
                                                                    output_protein_type_name= output_protein_type,
                                                                    list_alternative_type_names= list_alternative_type_names )


        # TO DO!!! This is not optimized at all! if the user asks for two distance groups, he will have to repeat all the process twice, which doesn't
        # make sense... all distance groups are calculated at once in get_distances()
        #  -> one possibility would be to modify get_distances so it only gets distances at the threshold fixed by the user... in a big piana_graph this
        #     would make a lot of sense.

        # TO DO!!! This is not taking into account the user_protein_names: user_protein_names = self.user_root_proteins
        #          It has to be taken into account, otherwise the protein names used might not be the ones that the user gave as root proteins

        # TO DO!!! This is a bit weird... I have to tell print_distance_group that its input type is the output type of get_distance_group...
        #          It would make more sense to first get proteins as proteinPiana and then output them in the code chosen by user
        if distance != "all":
            self.piana_graph.print_distance_group(distance_group= distance_groups_dic[distance],
                                                  input_protein_type_name= output_protein_type ,
                                                  format_mode= format_mode,
                                                  output_target= output_file_object,
                                                  info= info,
                                                  tax_id_value = tax_id_value)
        else:
            output_file_object = file(results_dir + output_name + ".1","w")
            self.piana_graph.print_distance_group(distance_group= distance_groups_dic[1],
                                                  input_protein_type_name= output_protein_type,
                                                  format_mode= format_mode,
                                                  output_target= output_file_object,
                                                  info= info,
                                                  tax_id_value = tax_id_value)
            
            output_file_object = file(results_dir + output_name + ".2","w")
            self.piana_graph.print_distance_group(distance_group= distance_groups_dic[2],
                                                  input_protein_type_name= output_protein_type,
                                                  format_mode= format_mode,
                                                  output_target= output_file_object,
                                                  info= info,
                                                  tax_id_value = tax_id_value)
            
            output_file_object = file(results_dir + output_name + ".3","w")
            self.piana_graph.print_distance_group(distance_group= distance_groups_dic[3],
                                                  input_protein_type_name= output_protein_type,
                                                  format_mode= format_mode,
                                                  output_target= output_file_object,
                                                  info= info,
                                                  tax_id_value = tax_id_value)




    def create_go_clustered_network(self, output_target=None, term_type=None, score_threshold=None, sim_mode=None, level_threshold=None,
                                    distance_threshold=None, rep_term=None, print_id=None):
        """
        Creates a network of GO (Gene Ontology) terms from the piana graph
        Then, clusters the Go network using parameters provided
        Finally,  prints the clustered go network 
        
        "output_target" is the file object where the clustered network in DOT format will be printed
        
        
        - "term_type" sets the kind of GO terms that will be used for the clustering.
          -> term-type can be "molecular_function", "biological_process" or "cellular_component"
        
        - "score_threshold" is the lowest score obtained by the similarity function allowed for continuing the clustering
          -> can be any real number from 0 to 100 (0 will group all proteins, 100 will not group any proteins). To obtain a relevant clustered network
             use score thresholds between 0.1 and 1
    
        - "sim_mode" sets how to calculate distances between two clusters
           - "random" takes a random element from each cluster and evaluates similarity between them
           - "min" takes the minimal distance between elements of each cluster
           - "max" takes the maximal distance between elements of each cluster
           - "average" takes the average distance between all elements of each cluster
    
        - "level_threshold" is the lowest level of the go term in the cluster allowed for continuing the clustering
           -> GO is a hierarchy organized from a initial root level (ie. 0) that increasingly makes more specific the terms. 
              Therefore, the higher the level used the less clustering will be performed. To obtain a relevant clustered network 
              use level thresholds between 1 and 3. It all depends on how general you want to be in the interpretation of the network.
    
        - "distance_threshold" is the maximum distance allowed between two proteins in order to be clustered
           -> can be any integer between 1 and ...
    
        - "rep_term" sets which of the GO terms of the cluster will be used for printing output
           -> can be min (term of minimal depth in the hierarchy) or max (maximal depth)
    
        - "print_id" sets which id will be used for identifying the clusters in the printed output
           -> can be "no" (default id: go term name) or "yes" (a more complex id)
        """
        # TO DO!!! I should be able to create a go graph from a protein graph. And to work later with a network I clustered before... the clustered
        # graph must be kept somewhere in piana_api object

        # TO DO!!! improve this method to allow the user to do more things (print several levels, print composition, etc)
        
        # TO DO!!! Where do I print the cluster composition and the cluster interactions? I should pass a prefix instead of a target...
        
        
        self._check_arguments(method_name= "create_go_clustered_network",
                              file_object=output_target,
                              term_type=term_type,
                              score_threshold=score_threshold,
                              sim_mode=sim_mode,
                              level_threshold=level_threshold,
                              distance_threshold=distance_threshold,
                              rep_term=rep_term,
                              print_id=print_id)

        go_graph= GoGraph(piana_access_object = self.piana_access, term_type_value = term_type)

        if verbose:
            sys.stderr.write("Initializing GoGraph from a pianaGraph object \n")

        go_graph= go_graph.initialize_GoGraph_from_pianaGraph(pianaGraph = self.piana_graph)

        #result_network_file_name= file_name + ".initial_go_network"
        #sys.stderr.write("Printing GoGraph network to %s\n" %result_network_file_name)

        #go_graph.print_go_dot_file(output_target=file(result_network_file_name, "w"))

        if verbose:
            sys.stderr.write("Creating Clustering object\n")

        cluster_manager = Clustering()
        
        if verbose:
            sys.stderr.write("Creating GraphCluster generator\n")

        cluster_generator = ClusteredGraphGenerator()

        if verbose:
            sys.stderr.write("Creating sim object\n")
            
        sim_fun = GoClusteringSimilarityFunction(piana_access= self.piana_access,
                                                 term_type= term_type,
                                                 mode=sim_mode,
                                                 path_length_threshold=distance_threshold)

        if verbose:
            sys.stderr.write("Creating stop object\n")
            
        stop_cond= GoClusteringStopCondition(highest_depth=level_threshold, go_similarity_function= sim_fun, coeficient= score_threshold)

        if verbose:
            sys.stderr.write("Performing clustering\n")
            
        clustered_graph= cluster_manager.cluster_graph(graph_to_cluster=go_graph,
                                                       clustered_graph_generator= cluster_generator,
                                                       similarity_function=sim_fun,
                                                       stop_condition=stop_cond,
                                                       clustering_print_mode=None,
                                                       output_prefix=None,
                                                       original_graph=None)

        if verbose:
            sys.stderr.write("Printing  clustering results\n")

        # the similarity function knows to which graph it belongs....
        sim_fun.print_go_graph_dot_file(output_target= output_target, representative_term=rep_term, use_alternative_id=print_id)

        #sys.stderr.write("Printing  clustering composition\n")
        #clustered_graph.print_cluster_composition()

        #sys.stderr.write("Printing  clustering interactions\n")
        #clustered_graph.print_cluster_interactions()

 
    # -------------------------------------
    #
    #  Other PIANA utilities
    # 
    # -------------------------------------


    def print_spot_protein_correspondence(self, spots_file_object, molecular_error_bounds, isoelectric_error_bounds,
                                          output_file_object, output_proteins_type, list_alternative_types, format_mode):
        """
        Finds correspondences between proteins in the piana_graph and spots in a 2D electrophoresis gel
          --> It does it by comparing molecular weights and isoeletric points of the spots with the mw and ip of protein sequences
          --> prints to "output_file_object" (a file object) the spots matches for each error allowed, using protein codes indicated by "protein_type_name"
          --> (one spot can be assigned to several proteinPianas and viceversa (this is just a matching by mw and ip...)
               
        "spots_file_object": a file object with spots from a Gel, and their Molecular Weight and Isoelectric Point

               text file with spots must follows format (one spot per line):
        
                    spot_id<TAB>Molecular Weight<TAB>Isoelectric Point

               Attention!!! - Numbers must be in american style: 234234.45 and not 234234,45
                            - No headers or footers allowed


        "mw_error_bounds" and "ip_error_bounds" are lists of error bounds (they must have the same number of elements)

            the error bounds describe the percentage of error admitted when matching a spot mw or ip to the theoretical mw or ip of a protein
            for example:

                    mw_error_bounds   = [0.0, 0.0025, 0.005, 0.01]
                    ip_error_bounds   = [0.0, 0.0025, 0.005, 0.01]

        
        "output_proteins_type" is the easy-to-remember type name that will be used for printing the proteins that match the 2D gel
          -> Valid protein_type_name are those listed in PianaGlobals.valid_protein_types

        "list_alternative_types" can be used to set a list of alternative types in case no protein_type_name code is found
          -> the user must provide a list of valid easy-to-remember type names
            list_alternative_types can for example look like this: ["gi", "uniacc", "md5"]
             
            I suggest always placing md5 at the end of alternative types, so you never get a None in the output
            
        "format_mode" sets the type of format that will be used for output
               - 'txt' will print flat text
               - 'html' will print html 
        """
        self._check_arguments(method_name= "print_spot_protein_correspondence",
                              another_file_object=spots_file_object,
                              molecular_error_bounds= molecular_error_bounds,
                              isoelectric_error_bounds= isoelectric_error_bounds,
                              file_object=output_file_object,
                              another_protein_type_name=output_proteins_type,
                              list_alternative_type_names=list_alternative_types,
                              format_mode=format_mode)
        
        spots_matches = self.piana_graph.match_spots_to_proteins(spots_file_object= spots_file_object,
                                                                 mw_error_bounds= molecular_error_bounds,
                                                                 ip_error_bounds= isoelectric_error_bounds,
                                                                 match_mode="all")

        self.piana_graph.print_spots_matches(spots_matches= spots_matches,
                                             output_target= output_file_object,
                                             format_mode= format_mode,
                                             protein_type_name= output_proteins_type,
                                             list_alternative_type_names=list_alternative_types,
                                             mw_error_bound= molecular_error_bounds,
                                             ip_error_bound= isoelectric_error_bounds,
                                              user_protein_names = self.user_root_proteins)

   
    def protein_code_2_protein_code(self, input_file_object, input_proteins_type, output_file_object, output_proteins_type,
                                    list_alternative_types, format_mode , tax_id_value = 0):
        """
        Translates protein codes of a certain type into another type
        
        "input_file_object" is the file object with the protein codes to translate (one protein code per line)
        
        "input_proteins_type" is the type of code used to identify proteins in input_file_object
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types

        "output_file_object" is the file object where the translated codes will be written

        "output_proteins_type" is the target type of code
             -> valid protein-type values are those defined in PianaGlobals.valid_protein_types
            
        "format_mode" sets the type of format that will be used for output
               - 'txt' will print flat text
               - 'html' will print html

        "tax_id_value" sets which will be the species for the proteins translated
           --> valid tax ids are 0 (do not take into account the species) and those taxonomy ids provided by ncbi taxonomy
        
        """
        # TO DO!!! Limit codes to a specific species if output-proteins-species is not all
        #          --> set a tax_id_value argument

        self._check_arguments(method_name= "protein_code_2_protein_code",
                              another_file_object=input_file_object ,
                              tax_id = tax_id_value,
                              protein_type_name=input_proteins_type,
                              file_object=output_file_object,
                              another_protein_type_name=output_proteins_type,
                              list_alternative_type_names=list_alternative_types,
                              format_mode=format_mode)
        
        # No name unification needed in this method, since the user is giving his list of input codes and the program is
        # returning all other codes associated to those names: no need to choose one in particular.
        
        list_input_protein_codes = utilities.return_proteins_from_file(file_object=input_file_object , proteins_type= input_proteins_type)


        if format_mode == "html":
            output_file_object.write("<table border=1>\n")
            
        for protein in list_input_protein_codes:

            list_other_codes = []
            
            list_proteinPiana = self.piana_access.get_list_protein_piana(proteinCode_value= protein.strip(),
                                                                         proteinCodeType_value= utilities.get_code_column(input_proteins_type),
                                                                         tax_id_value= tax_id_value,
                                                                         source_db_info= "no")

            for proteinPiana in list_proteinPiana:
                # each call to get_list_protein_external_codes returns the list of output_proteins_type codes associated to proteinPiana
                list_other_codes.extend( self.piana_access.get_list_protein_external_codes(proteinPiana=proteinPiana,
                                                                                           protein_type_name= output_proteins_type,
                                                                                           alternative_type_names= list_alternative_types))
            # END OF for proteinPiana in list_proteinPiana:

            
            # print to output_file_object the result following format (with format_mode=='txt'):
            #   code_in_input_proteins_type<TAB>code1_in_output_proteins_type<TAB>code2_in_output_proteins_type<TAB>...

            if format_mode == "html":
                output_file_object.write("<tr><td>%s</td>" %(protein.strip()) )
            elif format_mode == "txt":
                output_file_object.write("%s" %(protein.strip()) )
                
            for other_code in list_other_codes:
                if format_mode == "txt":
                    output_file_object.write("%s%s" %( PianaGlobals.tab_separators[format_mode], other_code) )
                elif format_mode == "html":
                    output_file_object.write("<td>%s</td>" %( other_code) )

            if format_mode == "txt":
                output_file_object.write("%s" %PianaGlobals.line_separators[format_mode])
            elif format_mode == "html":
                output_file_object.write("</tr>")
          
        # END OF for protein  in list_input_protein_codes:
        if format_mode == "html":
            output_file_object.write("</table>\n")
    
