"""
 File       : run_multiple_pianas.py
 Author     : R. Aragues
 Creation   : 14.10.2004
 Contents   : runs multiple independent piana.py instances (eg. for each protein in a file)
 Called from: command line

=======================================================================================================

This program makes calls to piana.py, using parameters provided in the command line, creating a separate network (and therefore
separate results files) for 1) each protein in the file or 2) each file in a directory


output written to files with prefix composed of results-prefix in command-line and 1) protein name or 2) file name

"""

# run_multiple_pianas.py: runs multiple independent piana.py instances
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues

import sys
import os
import time
import signal
import getopt
import glob

import re
import readline

import PianaGlobals
from ScoringFunctionBenchmark import *

import utilities

#MAX_TIME_ALLOWED = 3600   # giving 60 minutes to finish to a process (1 hour)
#MAX_TIME_ALLOWED = 7200   # giving 120 minutes finish to a process (2 hours)
#MAX_TIME_ALLOWED = 14400   # giving 240 minutes finish to a process (4 hours)
#MAX_TIME_ALLOWED = 28800   # giving 480 minutes finish to a process (6 hours)
MAX_TIME_ALLOWED = 86400   # giving 1 day finish to a process (24 hours)

STEP_TIME= 10 # number of seconds for each step (intervals that father checks for child completion)

verbose = 1

# Thresholds used when user choses -1 as hub-threshold:
#
#    this is the hub_threshold that will be progressively used to build the PianaGraph from which the protein
#    binding sites will be "inferred". We progressively apply these thresholds because we have seen that
#    the lower the threshold the higher the accuracy in finding binding sites. But also, the number of proteins that
#    can be decompose increases when we increase the threshold, so our methodology is to start with a low threshold and then
#    increase the threshold till finding an answer (if an answer was found in piana, there will be a file called
#     --> hub_threhsold is the maximum number of interactions that a protein can have in order to be added to the graph
#
#  Attention! If you want these progressive thresholds to be applied, you have to set hub-threshold to -1 in the command line
thresholds_to_use = [10, 20, 30, 40, 70, 0]


# ----------------------
# Function usage()
# ----------------------
def usage():
    print "------------------------------------------------------------------------------------------------------------------------------"
    print "This program  runs multiple independent piana.py instances (eg. for each protein in a file)\n"
    print " \n"
    print "Usage: python run_multiple_pianas.py --input-file=input_file_name  --input-proteins-type=input_proteins_type  "
    print "              --input-proteins-species=input_proteins_species --depth=depth --hub-threshold=hub_threshold --output-proteins-type=output_proteins_type  "
    print "              --piana-dbname=piana_dbname --piana-dbhost=piana_dbhost --piana-dbuser=piana_dbuser --piana-dbpass=piana_dbpass "
    print "              --results-prefix=results_prefix  --results-dir=results_dir --spots-file-name=spots_file_name"
    print "              --configuration-file=configuration_file --benchmark-conf-file=benchmark_conf_file --gold-std-dir=gold_std_dir --patch-mode=patch_mode"
    print "              --expression-proteins-type=expression_proteins_type --over-expression-correspondences=over_expression_correspondences "
    print "              --infra-expression-correspondences=infra_expression_correspondences "
    print "              [--help] [--verbose] "
    print "\nwhere:"  
    print "     --help               : prints this message and exits"
    print "     --verbose            : prints process info to stdout"
    print ""  
    print "     This program can be used to run independent piana.py 1) for each protein in a input file or 2) for each file in an input directory"
    print "            -> set only one of the two:"
    print "            --------------------------------------------------------------------------------------------------"
    print "            input_file_name : file name of input file containing one protein per line"
    print "            input_dir       : directory for the files you want to run piana (don't write the ending slash!)"
    print "            --------------------------------------------------------------------------------------------------"
    print ""  
    print "     depth                : depth of the network to be built" 
    print "     hub_threshold        : maximum number of interactions allowed for a protein to be added to the network" 
    print "                            if num_ints_of_protein >= hub_threshold, no interactions will be added for that protein"
    print "                            if hub_threshold== 0, no thresholds applied"
    print "                            if hub_threshold== -1, progressive threshold applied (to be used for finding protein patches)"
    print "     piana_dbname         : name of database piana to be used (required )"
    print "     piana_dbhost         : name of host where database piana to be used is placed (required)"
    print "     piana_dbuser         : username accessing the database (not required in most systems)"
    print "     piana_dbpass         : password of username accessing the database (not required in most systems)"
    print "     spots_file_name      : input file containing one spot per line, with isoelectric point and molecular weight" 
    print "                            if no spots-file is given in command line, no action performed"
    print "     results_prefix       : prefix of results files, indicating a representative name of the experiment being made"
    print "     results_dir          : directory where results will be saved (must end with '/') "
    print "     configuration_file   : file that sets piana parameters and execution commands"
    print "     benchmark_conf_file  : file name of benchmark configuration file (only to be used by raragues)" 
    print "     gold_std_dir         : directory with gold stds for proteins in input_file" 
    print "                              --> number of proteins in input file must be equal to number of gold stds" 
    print "                              --> gold_std files must be name_of_protein.gold_std" 
    print "                              --> no need to say that protein names in input file must be the same as in input_file" 
    print "     patch_mode           : sets the mode for finding protein patches (only to be used by raragues)" 
    print "                              - train: creating comparison files used by ScoringFunctionBenchmark (needs a gold std and benchmark conf file)" 
    print "                              - eval: finding patches and evaluating how good the results are (needs a gold std and benchmark conf file)" 
    print "                              - exec: finding patches and printing decomposition for the proteins" 
    print "     input_proteins_species : species of proteins in input_file_name (used to fix species for codes with many species (eg geneName)"  
    print "     input_proteins_type  : type of code of proteins in input_file_name"  
    print "     output_proteins_type : type of protein code that will be used for output"  
    print "     expression_proteins_type  : type of code of proteins in expression files (see below)"  
    print "     over_expression_correspondences : file with correspondences between the files in input-dir and file-over-expressed piana argument"
    print "     infra_expression_correspondences : file with correspondences between the files in input-dir and file-infra-expressed piana argument"
    print "                            --> Attention!: these two arguments are only valid if giving an input-dir"
    print "                            --> these arguments must be used when the expression files are different for each file in input-dir"
    print "                                It is an easy way of setting which expression file corresponds to which file in input-dir"
    print "                            --> the format for these two files is the following:"
    print "                                   file_name_without_path<TAB>over_expressed_file_name_path"
    print "                                   file_name_without_path<TAB>over_expressed_file_name_path"
    print "                                   file_name_without_path<TAB>over_expressed_file_name_path"
    print "                                   ........................................................"
    print "                              -> Attention!: first column is just the file name. Second column is the full path to the expression file"
 
    
# ---------------------------
# Reading input from user
# --------------------------- 


def set_parameter_value(parameter_name= None, value=None):
    """
    checks if parameter name "parameter_name" has a related parameter, and in case it does, assigns "value" to it

    In piana.py it is used by both parseArguments() and parse_configuration_file(). Here (run_multiple_pianas) it is only
    used by parse-arguments but I decided to keep it separate to make easier the update of parameters when piana.py adds new options
    to command line
    
    """
    global spots_file_name
    global input_file_name
    global input_dir
    
    global input_proteins_type
    global input_proteins_species
    global output_proteins_type
    global output_proteins_species
    global list_alternative_types
    
    global expression_proteins_type
    global infra_expression_correspondences
    global over_expression_correspondences

    global depth
    global list_source_dbs
    global list_source_methods

    global hub_threshold

    global results_prefix
    global results_dir
    global configuration_file
    
    global benchmark_conf_file
    global gold_std_dir
    global patch_mode
    
    global piana_dbname
    global piana_dbhost
    global piana_dbuser
    global piana_dbpass

    global benchmark_conf_file
    
    global verbose


    # finding the variable associated to parameter_name and assigning value to it
    # (to make sure command line has preference over configuration file, before
    #  assigning the new value, make sure a value was not already assigned before (ie is not None) )
    # When parsing the configuration file, many values will be "blank": set variable to None is value is "blank"
    
    if parameter_name == "input-file":
        if input_file_name is None:
            if value != "blank":
                input_file_name =  value
             
    elif parameter_name == "input-dir":
        if input_dir is None:
            if value != "blank":
                input_dir = value
             
    elif parameter_name == "piana-dbname":
        if piana_dbname is None:
            if value != "blank":
                piana_dbname = value
        
    elif parameter_name == "piana-dbhost":
        if piana_dbhost is None:
            if value != "blank":
                piana_dbhost = value
        
    elif parameter_name == "piana-dbuser":
        if piana_dbuser is None:
            if value != "blank":
                piana_dbuser = value
        
    elif parameter_name == "piana-dbpass":
        if piana_dbpass is None:
            if value != "blank":
                piana_dbpass = value
        
    elif parameter_name == "input-proteins-type":
        if input_proteins_type is None:
            if value != "blank":
                input_proteins_type = value
        
    elif parameter_name == "expression-proteins-type":
        if expression_proteins_type is None:
            if value != "blank":
		expression_proteins_type = value
        
    elif parameter_name == "over-expression-correspondences":
        if over_expression_correspondences is None:
            if value != "blank":
		over_expression_correspondences	 = value
        
    elif parameter_name == "infra-expression-correspondences":
        if infra_expression_correspondences is None:
            if value != "blank":
		infra_expression_correspondences = value
        
    elif parameter_name == "input-proteins-species":
        if input_proteins_species is None:
            if value != "blank":
                input_proteins_species = value
        
    elif parameter_name == "output-proteins-type":
        if output_proteins_type is None:
            if value != "blank":
                output_proteins_type = value
                
    elif parameter_name == "output-proteins-species":
        # this parameter can only be set through a configuration file: no need to check if it is None
        if value != "blank":
            output_proteins_species = value
        
    elif parameter_name == "depth":
        if depth is None:
            if value != "blank":
                depth = int(value)
        
    elif parameter_name == "list-source-dbs":
        # this parameter can only be set through a configuration file: no need to check if it is None
        #   if configuration file doesn't have a value for it, set to "all"
        if value != "blank" and value != "all":
            list_source_dbs = value.split(":")
        else:
            list_source_dbs = "all"
        
    elif parameter_name == "list-source-methods":
        # this parameter can only be set through a configuration file: no need to check if it is None
        #   if configuration file doesn't have a value for it, set to "all"
        if value != "blank" and value != "all":
            list_source_methods = value.split(":")
        else:
            list_source_methods = "all"
        
    elif parameter_name == "spots-file-name":
        if spots_file_name is None:
            if value != "blank":
                spots_file_name = value
        
    elif parameter_name == "hub-threshold":
        if hub_threshold is None:
            if value != "blank":
                hub_threshold = int(value)
        
    elif parameter_name == "results-prefix":
        if results_prefix is None:
            if value != "blank":
                results_prefix = value
        
    elif parameter_name == "results-dir":
        if results_dir is None:
            if value != "blank":
                results_dir = value
        
        
    elif parameter_name == "configuration-file":
        configuration_file = value
        
        
    elif parameter_name == "benchmark-conf-file":
        benchmark_conf_file = value
        
        
    elif parameter_name == "gold-std-dir":
        gold_std_dir = value
        
        
    elif parameter_name == "patch-mode":
        patch_mode = value
        
    elif parameter_name == "list-alternative-types":
        # this parameter will only be set through the configuration file... no need to check if it is None
        if value != "blank":
            list_alternative_types = value.split(":")
        else:
            list_alternative_types = []
        
    elif parameter_name == "verbose":
        verbose = 1
        
    elif parameter_name == "help":
        # print help information and exit
        usage()
        sys.exit(2)
        

def parseArguments():
    """
    parses command line.

    piana.py --help for command line options
    """
    try:
        opts, args = getopt.getopt(sys.argv[1:], "",
                                   ["verbose","help","piana-dbname=", "piana-dbhost=","piana-dbuser=","piana-dbpass=", "depth=", "hub-threshold=",
                                    "input-file=","input-proteins-type=","input-proteins-species=",  "output-proteins-type=", "spots-file-name=",
                                    "expression-proteins-type=","over-expression-correspondences=","infra-expression-correspondences=",
                                    "configuration-file=","benchmark-conf-file=","gold-std-dir=","patch-mode=", "results-prefix=", "results-dir=", "input-dir=" ])

    except getopt.GetoptError, msg:
        # print help information and exit:
        sys.stderr.write( "command line arguments are not correct: %s\n" %(msg))
        sys.exit(2)
     
    for option,value in opts:

        # this function checks what the option is, and sets the corresponding parameter to "value"
        set_parameter_value(parameter_name= option.strip("--"), value= value)
    # ENDOF for option,value in opts:

             

# --------
# --------
#  Main()               
# --------                               
# --------

# --
# General variables that will be instantiated with values from the command line
# --
input_file_name = None
input_dir = None


input_proteins_type = None
input_proteins_species = None
output_proteins_type = None
output_proteins_species = None
list_alternative_types = None

expression_proteins_type = None
over_expression_correspondences = None
infra_expression_correspondences = None

spots_file_name = None

depth = None
list_source_dbs = None
list_source_methods = None

hub_threshold = None

results_prefix= None          # prefix that all results produced by piana will contain in their name
results_dir= None             # directory where results will be saved
configuration_file = None     # file that sets values for piana parameters and execution commands

benchmark_conf_file = None
gold_std_dir = None
patch_mode = None


piana_dbname= None            # defines the piana database parameters
piana_dbhost= None
piana_dbuser= None
piana_dbpass= None


# parsing arguments from the command line
parseArguments()

if input_file_name is None and input_dir is None:
    raise ValueError("Enter either an input file name or an input dir")

if configuration_file is None:
    raise ValueError("Enter a configuration file name: this program does not work in interactive mode")


# if the user set over-expression-correspondences and infra-expression-correspondences, then we have to
# make sure that we are using the right expression files when calling PIANA

dic_over_expression_correspondences = {}   # keys are the file names in input dir and contents the path 
                                           # to the correspondent file-over-expressed argument for piana.py

if over_expression_correspondences is not None:

    over_expression_correspondences_fd = file(over_expression_correspondences, "r")
    for one_line in over_expression_correspondences_fd:
	line_fields = one_line.split()
	dic_over_expression_correspondences[line_fields[0]] = line_fields[1].strip("\n")
# END OF if over_expression_correspondences is not None:


dic_infra_expression_correspondences = {}

if infra_expression_correspondences is not None:

    infra_expression_correspondences_fd = file(infra_expression_correspondences, "r")
    for one_line in infra_expression_correspondences_fd:
	line_fields = one_line.split()
	dic_infra_expression_correspondences[line_fields[0]] = line_fields[1].strip("\n")
# END OF if infra_expression_correspondences is not None:




# PIANA is going to be run several times.
# Depending on the arguments gave by user, one of the two will happen
#
#  1. user gave an input_file_name: that means he wants to run PIANA independently for each protein in that file
#
#     --> create a temporal directory inside directory temp, with one file for each protein: then run piana for all files in that temporal dir
#
#  2. user gave an input_dir: that means he wants to run PIANA independently for each file in that dir
#
#     --> run piana for all files in the input_dir

number_of_runs = 0


# If we are in case 1, create the temporal directory with one file per protein
if input_file_name:
    
    # create the temporal directory
    temp_head, temp_tail = os.path.split(input_file_name)  # splits the file name in the directories preceding it and the file name itself
    input_dir = "./temp/" + temp_tail
    try:
        os.removedirs(input_dir)
    except:
        pass
    os.mkdir(input_dir, 0770)

    input_file_fd = file(input_file_name,"r")
    for line in input_file_fd:
	# for each protein in the input file, create a file into the temp directory with content that protein
	protein = line.strip()
	
	# we create an temporary file with just one protein from input_file
	#  this file is then set as input file for this piana call
	temp_file_name = "%s/%s.%s.one_protein_file.txt" %(input_dir, protein, results_prefix)
	temp_fd = file(temp_file_name, "w")
	temp_fd.write("%s\n" %(protein) )
	temp_fd.close()
# END OF if input_file_name:


# for each  file in the input_dir (either the one gave by the user or created from the input file), run piana.py
for one_file in glob.glob(input_dir + "/*.*"):

    if not os.path.isfile(one_file):
	# skip any directories
	continue

    number_of_runs += 1
    num_threshold_to_use = 0
    answer_found_for_file = 0
    while not answer_found_for_file:
        # this loop will only be a loop when using progressive threshold (hub_threshold = -1)
        #    --> used to use different thresholds (fixed by indexing list thresholds_to_use) till an answer is found for protein patches
        # In other cases, answer_found_for_file will always become 1 after one execution of piana
        piana_arguments = []

        piana_arguments.append("python2.3")
        piana_arguments.append("piana.py")
        # build the call to piana by adding command line atoms

        piana_arguments.append("--input-file=%s" %one_file)

        # only batch mode permitted with this script
        piana_arguments.append("--exec-mode=batch")

        if input_proteins_type is not None:
            piana_arguments.append("--input-proteins-type=%s" %input_proteins_type)

        if input_proteins_species is not None:
            piana_arguments.append("--input-proteins-species=%s" %input_proteins_species)

        if depth is not None:
            piana_arguments.append("--depth=%s" %depth)

        if output_proteins_type is not None:
            piana_arguments.append("--output-proteins-type=%s" %output_proteins_type)

        if piana_dbname is not None:
            piana_arguments.append("--piana-dbname=%s" %piana_dbname)

        if piana_dbhost is not None:
            piana_arguments.append("--piana-dbhost=%s" %piana_dbhost)

        if piana_dbuser is not None:
            piana_arguments.append("--piana-dbuser=%s" %piana_dbuser)

        if piana_dbpass is not None:
            piana_arguments.append("--piana-dbpass=%s" %piana_dbpass)

        if spots_file_name is not None:
            piana_arguments.append("--spots-file-name=%s" %spots_file_name)

        if hub_threshold is not None:
            if hub_threshold == -1:
                # progressive threshold applied till finding an answer (only used for finding protein patches
                threshold_being_used = thresholds_to_use[num_threshold_to_use]
                num_threshold_to_use += 1  # increasing the threshold by 10,
                                           # it will be applied in case we do not find any patches with the current threshold
            # END OF else: (if hub_threshold != -1:)
            elif hub_threshold is not None:
                # standard threshold applied
                threshold_being_used = hub_threshold
                

            piana_arguments.append("--hub-threshold=%s" %threshold_being_used)
        # END OF if hub_threshold is not None:
        else:
            threshold_being_used = "from_conf_file"

	
	directory, this_prefix = os.path.split(one_file)

        if results_prefix is not None:
            # create the results prefix for this specific file
            results_combined_prefix= this_prefix + "." + results_prefix
        else:
            results_combined_prefix= this_prefix

        piana_arguments.append("--results-prefix=%s" %results_combined_prefix)

        if expression_proteins_type is not None:
            piana_arguments.append("--expression-proteins-type=%s" %expression_proteins_type)

	file_name = one_file.split("/")[-1] # remove the path from the file so we can check its correspondent expression files

	if dic_over_expression_correspondences.has_key(file_name):
            piana_arguments.append("--file-over-expressed=%s" %dic_over_expression_correspondences[file_name])

	if dic_infra_expression_correspondences.has_key(file_name):
            piana_arguments.append("--file-infra-expressed=%s" %dic_infra_expression_correspondences[file_name])
	    
	    

        if results_dir is not None:
            piana_arguments.append("--results-dir=%s" %results_dir)

        if configuration_file is not None:
            piana_arguments.append("--configuration-file=%s" %configuration_file)

        if benchmark_conf_file is not None:
            piana_arguments.append("--benchmark-conf-file=%s" %benchmark_conf_file)

        if gold_std_dir is not None:
            # Notize that this argument is different in piana (a single gold std file) and run_piana_... (a directory with gold std files)
            piana_arguments.append("--gold-std=%s" %(gold_std_dir + this_prefix + ".gold_std"))
            
        if patch_mode is not None:
            piana_arguments.append("--patch-mode=%s" %patch_mode)

        if verbose:
            sys.stderr.write("\n======================PIANA RUN NUMBER %s (File=%s) (Threshold=%s)===================\n" %(
                number_of_runs,
                this_prefix,
                threshold_being_used))
            sys.stderr.write(" calling piana with arguments <<<<<%s>>>>>> \n" %piana_arguments)
            sys.stderr.write("\n=========================================================================================================\n")

        pid = os.fork()
        if pid == 0:
            # the child
            # system call to piana.py with arguments taken from command line
            os.execvp("python2.3", piana_arguments)
        else:
            # the father waits till completion of child
            time_to_finish = MAX_TIME_ALLOWED
            
            while time_to_finish > 0:
                # If the child takes too long to complete, we kill it... it normally means the computer does not have enough memory to handle that file
                
                (status_pid, status) = os.waitpid(pid, os.WNOHANG)   # check if child has already finish... if not, continue with code...
                
                if status_pid == pid:
                    # if the process finished normally  then break the while
                    # TO CHECK: I don't know why, but when the child process has finished, the value of status_pid is the pid of the child and when
                    #           the child didn't finish, the value of status_pid is 0. THis works, but it would be nice to know why...
                    if verbose:
                        sys.stderr.write(" Process finished in time" )

                    break
                else:
                    # process did not finish... give some more time to child...
                    
                    time.sleep(STEP_TIME)             
                    time_to_finish -= STEP_TIME
            # END OF while time_to_finish > 0:
            
            if verbose:
                sys.stderr.write("\nChecking if time was consumed completely...." )

            if time_to_finish <= 0:
            
                if verbose:
                    sys.stderr.write("Time left was lower than 0! " )
                # if time given to process was consumed, kill it (checking that it didn't finish between the last loop and here
                try:
                    answer_found_for_file = 1   # no hope for this file... consider it done
                    os.kill(pid, signal.SIGKILL)
                    killed_file_name = "/home/raragues/phd/piana/code/execs/temp/%s.file_killed" %(this_prefix)

                    killed_file_fd = file(killed_file_name, "w")

                    killed_file_fd.write("file %s killed: it took too long\n" %(this_prefix))
                    killed_file_fd.close()
                    if verbose:
                        sys.stderr.write(" Child killed because it took too long to complete\n" )

                    continue
                except:

                    # consider that the process finished if there is an error when killing the child
                    if verbose:
                        sys.stderr.write(" Child died before we could kill it: %s\n" %sys.exc_type)

                    pass
            # END OF if time_to_finish <= 0:
            
            if patch_mode == "train":
                # In training, we must check whether we should apply the next threshold or one was already found
                if hub_threshold == -1:
                    # progressive threshold being applied: check in comparison_file if there was an answer
                    #   if there was an answer, go for next file
                    #   if there wasn't an answer, increase threshold and try again with new threshold

                    glob_comparison_file_name  = utilities.GlobDirectoryWalker("/home/raragues/phd/piana/code/execs/temp_comparison" ,
                                                                               "%s.*" %(protein))

                    list_comparison_file_name = []
                    for file_name in glob_comparison_file_name:
                        list_comparison_file_name.append(file_name)

                    if len(list_comparison_file_name) > 1:
                        # the comparison file name must be in list_comparison_file_name[0].
                        # If there are more than one element, something strange has happened
                        raise ValueError("How can I have more than one comparison file for protein %s\n" %protein)
                    elif len(list_comparison_file_name) == 0:
                        # the comparison file name must be in list_comparison_file_name[0].
                        # If there is no comparison file, something strange has happened
                        raise ValueError("How can I have no comparison file for protein %s\n" %protein)

                    # this object only used to check if the comparison file contains information or not
                    temp_scoring_function_stats = ScoringFunctionBenchmark(output_target_prefix= "dummy")
                    # file_has_information[0] is 1 if there was information for shared patches
                    # file_has_information[1] is 1 if there was information for patches interactions
                    file_has_information = temp_scoring_function_stats.get_information(comparison_file_name= list_comparison_file_name[0],
                                                                                       protein_name = protein)


                    if file_has_information[0]:
                        # there was an answer for this threshold: go for next protein
                        if verbose:
                            sys.stderr.write("Answer found for protein %s: going to next protein\n" %protein)
                        answer_found_for_protein = 1
                    else:
                        if hub_threshold != -1 or num_threshold_to_use == len(thresholds_to_use):
                            # if no answer found, but all thresholds have been used  , we stop the loop,
                            # since we are not going to find an answer anyway...
                            #  (this if might seem wrong because it checks an index against a length
                            #      (lenght starts at 1, index at 0) but note that the index is increased
                            #       before using it, so here we are actually checking if next index to be
                            #       used is valid or not)
                            #  )
                            if verbose:
                                sys.stderr.write("No answer found for protein %s at any threshold: going to next protein\n" %(protein))
                            answer_found_for_protein = 1


                # END OF if hub_threshold == -1:
                else:
                    # if progressive threshold not being applied: go for next protein
                    if verbose:
                        sys.stderr.write("No answer found for file %s at this threshold %s: going to next file\n" %(this_prefix,
														    hub_threshold))
                    answer_found_for_file = 1
            # END OF if patch_mode == "train":
            
            elif patch_mode == "eval" or patch_mode == "exec":
                # check if stop condition was met: in case it was, break the loop (answer_found_for_file =1). Otherwise, try another hub_threshold 
                # (unless all hub_thresholds have been used or a fixed hub_threshold was set)

                
                
                file_name = "/home/raragues/phd/piana/code/execs/temp_results/" + str(this_prefix) + "." + \
                            str(threshold_being_used) +  ".clustering_results"

                if verbose:
                    sys.stderr.write("Checking file %s to see if a result has been found...\n" %(file_name))

                try:
                    # if the stop condition was met, there will be a file with results for it and no exception will occur
                    stop_cond_fd  = file(file_name, "r")
                    answer_found_for_file = 1
                    if verbose:
                        sys.stderr.write("Decomposition found for file at hub_threshold %s. Going to next file\n" %(thresholds_being_used) )
                except:
                    # if there is a exception it means there was no file with results which means nothing was found for this protein
                    # at the current parameters
                    if verbose:
                        sys.stderr.write("No result found...  checking if new hub thresholds exist...\n")
                    
                    if hub_threshold != -1 or num_threshold_to_use == len(thresholds_to_use):
                        # if no answer found, but all thresholds have been used (or it was not using multiple thresholds) , 
                        # we stop the loop, since we are not going to find an answer anyway...
                        #  (this if might seem wrong because it checks an index against a length (lenght starts at 1, index at 0)
                        #   but note that the index is increased before using it, so here we are actually checking if
                        #   next index to be used is valid or not)
                        answer_found_for_file = 1
                        if verbose:
                            sys.stderr.write("No decomposition found and no more hub thresholds to use: go to next file\n")
                    else:
                        if verbose:
                            sys.stderr.write("trying a new hub_threshold\n")
            # END OF elif patch_mode == "eval" or patch_mode == "exec":

            else:
                # no patch_mode set: we are not doing a patch decomposition:
                # therefore, set answer_found_for_file to 1 or the loop won't end 
                answer_found_for_file = 1
            
        # END OF else: ( --> END OF FATHER CODE)
    # END OF while not answer_found_for_file
# END OF for line in input_file_fd:
