"""
File        : GraphPatchGroup.py
Author      : Ramon Aragues
Creation    : 09.2004
Contents    : implementation of GraphPatchGroup class
Called from : programs/classes that handle a GraphPatchGroup
Subclass    : of Graph.py

=======================================================================================================

This file implements class GraphPatchGroup

a GraphPatchGroup is the graph where nodes are PatchGroup and edges, interactions between those


it also implements the methods that calculate the properties of the graph:

 - matrix with "number of interactors shared between the nodes"
 - matrix with "number of proteins shared between the nodes"
 - matrix with "belongs to same protein or not between the nodes"

"""

from sets import *

from Graph import *
from GraphPatchGroupNodeAttribute import *
from GraphPatchGroupEdgeAttribute import *
from Patch import *

import utilities

verbose = 1
verbose_detailed = 0
verbose_differences = 0

#----------------------------------------------------------------------------------------------
class GraphPatchGroup(Graph):
#----------------------------------------------------------------------------------------------
    """
    
    """
    def __init__(self, graph_id=None ):
        """
        
        """
        self.interacting_protein_pairs = None
        self.pairs_sharing_patch = None
        self.protein_decomposition = None      # dictionary that follows structure:
                                               # 
                                               #  { protein_code1: [patchgroup_id1, patchgroup_id2, ...],
                                               #    protein_code2: [patchgroup_id3, patchgroup_id1, ...],
                                               #    .................................................
                                               #  }
                                               
        self.patchgroup_proteins = None        # dictionary that follows structure:
                                               # 
                                               #        { patchgroup_id1: [protein_ext_code1, protein_ext_code2, ...],
                                               #          patchgroup_id2: [protein_ext_code3, protein_ext_code2, ...],
                                               #          ..............................................
                                               #        }
        
        self.counter_dummy_patchgroup_id = 0   # used for loading a patchgroup graph from a file: since we cannot assure that the patchgroup ids
                                               # given by the user comply with internal requirements, we assign it a dummy patchgroup id, and
                                               # we keep track of correspondence between the user id and the dummy one by means of a dictionary
                                               # --> always holds an unassigned patchgroup id
                                               
        self.correspondence_dummy_user_patch_id = {} # the 1st dictionary that keeps correspondence. Keys are dummy patchgroup ids and content the user patch id
        self.correspondence_user_dummy_patch_id = {} # the 2nd dictionary that keeps correspondence. Keys are user patch ids and content the dummy patchgroup id
 
        Graph.__init__(self, graphID=graph_id)


    # TO DO!!!! How do I build a new PianaGraph based on a given patchgroup graph? Should it give the same PianaGraph that originated the patchgroup?
    #           Think about this... this can be used in the future to do protein-protein interaction prediction

    # ------------------------------------------
    #  Methods to create a patchgroup graph
    #------------------------------------------
    # These methods are inherited from class Graph
    #

    def _get_dummy_patchgroup_id(self, external_patchgroup_id= None):
        """
        returns the patchgroup id assigned to external_patchgroup_id and updates the correspondences dictionary
        
        """

        self.correspondence_dummy_user_patch_id[self.counter_dummy_patchgroup_id] = external_patchgroup_id
        self.correspondence_user_dummy_patch_id[external_patchgroup_id] = self.counter_dummy_patchgroup_id
        
        self.counter_dummy_patchgroup_id += 1

        return self.counter_dummy_patchgroup_id - 1

    def get_size(self):
        """
        returns size of GraphPatchGroup (ie number of links)
        """

        return len(self.edges_dic)
        
    def load_patchgroup_graph_from_piana(self, user_proteins_dict= None,
                                         species_name= None,
                                         list_cluster_interactions=None,
                                         list_interaction_methods= None,
                                         clustering_method= None,
                                         piana_access= None ):
        """

        Loads this GraphPatchGroup with information extracted from
        pianaDB. The user sets with dictionary user_proteins_dict for
        which proteins the GraphPatchGroup will be built

        "user_proteins_dict" contains (as keys) the proteinPianas for which the patchgroup graph will be constructed
            --> it can be "all": construct graph for all proteins in pianaDB which have identified patches(clusters)
            --> if not all, each key is a user-selected proteinPiana
            --> this is used to build a subnetwork of the network described in the two input patch files (a subnetwork that only contains user-selected proteins)

        "list_cluster_interactions" is a list of interacting cluster pairs (clusterID, clusterID) 

        "list_interaction_methods" is a list of methods of finding protein interactions: will be used to get interactions for proteins in the patchgroup graph
        
        "clustering_method" is the dbali method that you want to use for creating your patchgroup graph
            --> valid methods are those in PianaGlobals.method_names dbali*

        "piana_access" is the PianaDBaccess object used to access pianaDB

        
        """
        user_patchgroups_dict = {} # this dictionary will hold as keys those patchgroups that are in proteins selected by user through proteins_file

        if species_name != "all":
            set_user_tax_ids = Set(piana_access.get_taxonomies_from_species_name(species_name_value = species_name))
            if not set_user_tax_ids:
                raise ValueError("<%s> is not a valid species name (ie. not found in database)\n" %(species_name))

        
        if user_proteins_dict == "all":

            user_proteins_dict = {}
            # fill user_proteins_dict with all proteins that have at least one interaction (in the list of source methods)
            # get_all_protein_protein_interactions returns list of (proteinPianaA, proteinPianaB, interactionPiana)
            list_protein_protein_interactions = piana_access.get_all_protein_protein_interactions(list_source_methods= list_interaction_methods)

            for protein_protein_interaction in list_protein_protein_interactions:

                protein_a= protein_protein_interaction[0]
                protein_b= protein_protein_interaction[1]

                if species_name != "all":
                    set_tax_id_a = Set(piana_access.get_protein_taxonomy_ids(proteinPiana_value =protein_a))
                    set_tax_id_b = Set(piana_access.get_protein_taxonomy_ids(proteinPiana_value =protein_b))

                    if not set_tax_id_a.intersection(set_user_tax_ids) or not set_tax_id_b.intersection(set_user_tax_ids):
                        # skip interaction if any of the two proteins is not of the species selected by user
                        continue

                # END OF if species_name != "all":

                    
                # END OF if species_name != "all":
                
                
                if not user_proteins_dict.has_key(protein_a):
                    user_proteins_dict[protein_a] = None
                
                if not user_proteins_dict.has_key(protein_b):
                    user_proteins_dict[protein_b] = None

                    
        # END OF user_proteins_dict == "all":

            

        # if there is a limited set of proteins in user_dict, get clusters for those proteins and add pairs 
        pairs_proteinPiana_cluster_id = []
        for proteinPiana in user_proteins_dict:
            list_cluster_id= piana_access.get_protein_dbali_cluster(proteinPiana_value= proteinPiana, clustering_method= clustering_method,
                                                                    source_db="all")
            for cluster_id in list_cluster_id:
                pairs_proteinPiana_cluster_id.append( (proteinPiana, cluster_id) )
            # END OF for cluster_id in list_cluster_id:
        # END OF for proteinPiana in user_proteins_dict:
       
        # 1. First of all, create the patchgroup objects (ie GraphPatchGroupNodeAttribute) and add nodes to graph
        #   --> 1.1. create a patch for each patch (cluster) for each protein
        #   --> 1.2  add list of patches to the patchgroup object
        #   --> 1.3 add node with attribute to graph

        proteins_in_cluster = {}   # dictionary that will hold cluster_id as key, and content the proteins that have it
        for pair_proteinPiana_cluster_id in pairs_proteinPiana_cluster_id:

            this_proteinPiana = pair_proteinPiana_cluster_id[0]
            this_cluster_id = pair_proteinPiana_cluster_id[1]
            
            if proteins_in_cluster.has_key(this_cluster_id):
                proteins_in_cluster[this_cluster_id].append(this_proteinPiana)
            else:
                proteins_in_cluster[this_cluster_id] = [this_proteinPiana]
        # END OF for pair_proteinPiana_cluster_id in pairs_proteinPiana_cluster_id:
            

        for cluster_id in proteins_in_cluster:

            list_patch_objects = []
            for proteinPiana in proteins_in_cluster[cluster_id]:
                new_patch_id = self._get_dummy_patchgroup_id(external_patchgroup_id= cluster_id)
                list_patch_objects.append(Patch(patch_id=new_patch_id, proteinPiana=proteinPiana))
            # END OF for proteinPiana in list_proteinPiana:
            
            
            new_patchgroup_id = self._get_dummy_patchgroup_id(external_patchgroup_id= cluster_id)

            
            # keep this patchgroup in dict to later know (when parsing the patchgroup interactions) that it is in one
            # of the proteins selected by user
            user_patchgroups_dict[new_patchgroup_id] = None
            
            new_patchgroup_attribute= GraphPatchGroupNodeAttribute(patch_group_id= new_patchgroup_id )
            
            new_patchgroup_attribute.add_list_patch_objects(new_list_patch_objects= list_patch_objects)
            
            new_node = GraphNode(nodeID= new_patchgroup_id, attribute = new_patchgroup_attribute, isRoot =1, graph = self, alternative_id= cluster_id)
            self.add_node(new_node)
        # END OF for cluster_id in proteins_in_cluster:

            
        """
        At this point, we have added all nodes for clusters of proteins that the user selected.

        Now, we have to add the edges between those clusters. Edges have been passed as a list to this method
        
        """
        for pair_clusters in list_cluster_interactions:
            user_patch_id_a = pair_clusters[0]
            user_patch_id_b = pair_clusters[1]

            if self.correspondence_user_dummy_patch_id.has_key(user_patch_id_a):
                internal_patchgroup_id_a = self.correspondence_user_dummy_patch_id[user_patch_id_a]
            else:
                continue
                
            if self.correspondence_user_dummy_patch_id.has_key(user_patch_id_b):
                internal_patchgroup_id_b = self.correspondence_user_dummy_patch_id[user_patch_id_b]
            else:
                continue

            #insert_all or  

            if (user_patchgroups_dict.has_key(internal_patchgroup_id_a) and user_patchgroups_dict.has_key(internal_patchgroup_id_b)):
                # only adding edge if both patchgroups were previously added( ie. are in at least one of the proteins selected by user through proteins_file)
                #  --> we do not want edges between patchgroups that have not been added...

                new_edge_attribute= GraphPatchGroupEdgeAttribute() # empty attribute to maintain compatability with Graph class

                new_edge = self.get_edge(identifier1= internal_patchgroup_id_a,
                                         identifier2= internal_patchgroup_id_b,
                                         attribute_object=new_edge_attribute,
                                         get_mode="new")
                self.add_edge(new_edge)
            # END OF if proteins_file == "all" or (user_patchgroups_dict.has_key(internal_patchgroup_id_a) and user_patchgroups_dict.has_key(internal_......))
        # END OF for pair_clusters in list_cluster_interactions:
 
            
        
    # ------------------------------------------
    #  Methods to get patchgroup graph contents
    #------------------------------------------

        
    def get_patchgroup(self, patchgroup_id=None):
        """
        gets the patchgroup object for a given patchgroup_id
        """

        patchgroup_att = (self.get_node(identifier = patchgroup_id, get_mode="error")).get_node_attribute_object()

        return patchgroup_att


    def get_protein_decomposition(self, piana_access= None, protein_type_name= None, alternative_type_names= None):
        """
        gets the patchgroups that are in each protein (ie. protein decomposition into functional sites)

        returns a dictionary that follows structure:

                          { protein_code1: [patchgroup_id1, patchgroup_id2, ...],
                            protein_code2: [patchgroup_id3, patchgroup_id1, ...],
                            .................................................
                          }


        if piana_access is None, then uses internal graph identifiers for nodes
        """

        if self.protein_decomposition is not None:
            return self.protein_decomposition
        else:
            
            self.protein_decomposition = {}

            for patchnode in self.get_node_object_list():
                patchgroup = patchnode.get_node_attribute_object()
                patchgroup_id = patchgroup.get_patch_group_id()

                for proteinPiana in  patchgroup.get_list_proteinPiana():

                    if piana_access is not None:
                        list_protein_ext_code = piana_access.get_list_protein_external_codes(proteinPiana=proteinPiana,
                                                                                             protein_type_name=protein_type_name,
                                                                                             alternative_type_names=alternative_type_names)
                        if list_protein_ext_code:
                            protein_ext_code = list_protein_ext_code[0] # use only one code for clarity
                        else:
                            protein_ext_code = "no_code_found"
                    else:
                        protein_ext_code = proteinPiana

                    # if there was already an entry, append new patchgroup_id, otherwise create new list
                    if self.protein_decomposition.has_key(protein_ext_code):
                        self.protein_decomposition[protein_ext_code].append(patchgroup_id)
                    else:
                        self.protein_decomposition[protein_ext_code]= [patchgroup_id]

                # END OF for proteinPiana in  patchgroup.get_list_proteinPiana():
            # END OF for patchnode in self.get_node_object_list():

            return self.protein_decomposition

        
    def get_patchgroup_proteins(self, piana_access= None, protein_type_name= None, alternative_type_names= None, root_protein= None):
        """
        gets the proteins that are in each patchgroup (ie. the proteins that share a functional site)
        
        returns a dictionary that follows structure:

                          { patchgroup_id1: [protein_ext_code1, protein_ext_code2, ...],
                            patchgroup_id2: [protein_ext_code3, protein_ext_code2, ...],
                            ..............................................
                          }


        if piana_access is None, then uses internal graph identifiers for nodes

        if root_protein is not None, only returns patchgroup_ids that contain the root protein
        
        """

        
        if self.patchgroup_proteins is not None and piana_access is None:
            # this is not very clean... normally, I don't want to recreate the dictionary each time somebody calls this method, that's why
            # I return it if it is not None
            # However, when asking for the dictionary with the idea of printing it, I must recreate it, so the code types are those chosen by the user
            # How do I know if this method is being called for printing? Because the user will pass a piana_access... that explains the and piana_access...
            # therefore, if a piana_access is passed as argument, recreate the dictionary
            return self.patchgroup_proteins
        else:
        
            self.patchgroup_proteins = {}

            for patchnode in self.get_node_object_list():

                patchgroup = patchnode.get_node_attribute_object()

                proteinPianas_dic_in_patchgroup = patchgroup.get_dict_proteinPianas()

                if root_protein is not None:
                    # if there is a root protein and it does not appear in the patchgroup, then do not return the patchgroup
                    if proteinPianas_dic_in_patchgroup.has_key(root_protein):
                        continue

                patchgroup_id = patchgroup.get_patch_group_id()
                self.patchgroup_proteins[patchgroup_id] = []

                for proteinPiana in proteinPianas_dic_in_patchgroup:

                    if piana_access is not None:

                        list_protein_ext_code = piana_access.get_list_protein_external_codes(proteinPiana=proteinPiana,
                                                                                             protein_type_name= protein_type_name,
                                                                                             alternative_type_names= alternative_type_names)


                        if list_protein_ext_code:
                            protein_ext_code = list_protein_ext_code[0] # use only one code for clarity
                        else:
                            protein_ext_code = "no_code_found"

                    else:
                        protein_ext_code = proteinPiana

                    (self.patchgroup_proteins[patchgroup_id]).append(protein_ext_code)
                # END OF for proteinPiana in list_proteinPianas_in_cluster:
            # END OF for patchnode in self..get_node_object_list():

            return self.patchgroup_proteins



    # METHOD to get the interactions between patchgroups is inherited from class Graph: get_node_node_links()

    def get_protein_pairs_sharing_patchgroup_id(self, piana_access= None, protein_type_name= None, alternative_type_names= None,
                                                comparison_mode=None, root_protein=None, similar_proteins_dic= {}, proteins_allowed= None):
        """
        returns a list of protein pairs that share a patchgroup id

        list returned looks like this  [ [protein1, protein2], [protein1, protein3] , ...]

        first member of the pair is always the one that is "smaller"... (ie using <)
        
        if "piana_access" is None, then uses internal graph identifiers for nodes (ignoring protein_type_name and alternative_type_names)
        otherwise, the list returned will contain proteins of type protein_type_name (or "alternative_type_name" if nothing was found)

       
        
        "comparison_mode" determines how is the comparison performed:

           - 'standard': all against all, we are considering that both patchgroup graphs should be identical
           - 'training': If other_patchgroup_graph (ie. the gold standard) has no info about a protein, it is ignored (ie. no differences computed).
                          -> if there is no info about any of the proteins in other_patch_group_graph (ie. the gold standard) then this function returns None
           - 'root':     Only checking composition of root protein.
                          -> If there is no info of the root protein in other_patch_group_graph (ie. the gold standard) then this function returns None
                          -> if comparison_mode is set to root, then parameter "root_protein" is required

        "root_protein" sets which is the root protein that generated the patchgroup graph. Only used when comparison_mode == "root"


        "similar_proteins_dic" is required for comparison_mode root: it is a list of proteins that are similar to root_protein (ie. are the same protein
                                                                                                                                with different sequence)
        
        if "proteins_allowed" is not None, the list returned will only contain pairs that appear in that list. This is used to make sure we don't count
        as False Positives those predictions for which the Gold Standard didn't know anything

        TO DO!!!!!
        
        ATTENTION!!! the list returned does not contain duplicate pairs... this means that if two proteins A and B share two distinct patches,
        there will be only one pair [A, B]... this is not completely correct, but I think is better than adding duplicates because otherwise,
        when counting number of true positives, I will count each duplicate as true... even if the other graph just has one instance of the pair!!!
        The best thing to do would be to check that both graphs have the exact same number of that kind of pair, to count as well these cases...
        but this is too much work for what I think the advantage would be...  The way to check for that as well would be to have some kind of dictionary
        that counts how many times each pair appears, and count tp and fp based on those numbers... if other had 3 instances of A,B and self only has 1,
        then TP=1 and FP =0 and FN=2 (because there were two instances missed by self...)



        
        """
        if self.pairs_sharing_patch is not None:
            return self.pairs_sharing_patch
        else:
            
            self.pairs_sharing_patch = []

            patchgroup_proteins = self.get_patchgroup_proteins(piana_access=piana_access ,
                                                               protein_type_name= protein_type_name,
                                                               alternative_type_names= alternative_type_names)  # dictionary with patchgroup_ids as keys,
                                                                                                                # and list of protein codes as content

            for patchgroup_id in patchgroup_proteins:

                number_of_proteins_in_patchgroup = len( patchgroup_proteins[patchgroup_id] )

                if verbose_detailed:
                    sys.stderr.write("\nfor patchgroup id %s, list of proteins is: %s\n" %(patchgroup_id, patchgroup_proteins[patchgroup_id]))

                for i in range(number_of_proteins_in_patchgroup):
                    for j in range(i+1, number_of_proteins_in_patchgroup):

                        i_ext_code= patchgroup_proteins[patchgroup_id][i]
                        j_ext_code= patchgroup_proteins[patchgroup_id][j]

                        if comparison_mode == "training":
                            # in this mode, only considering pairs for which the golden standard has info
                            if proteins_allowed:
                                if i_ext_code not in proteins_allowed or j_ext_code not in proteins_allowed:
                                    continue
                        elif comparison_mode == "root":
                            # in this mode, only considering pairs for root_protein (and having info for it)
                            if i_ext_code != root_protein and j_ext_code != root_protein:
                                continue
                            elif proteins_allowed:
                                if i_ext_code not in proteins_allowed or j_ext_code not in proteins_allowed:
                                    continue

                            if similar_proteins_dic.has_key(i_ext_code) or similar_proteins_dic.has_key(j_ext_code):
                                continue

                        if i_ext_code <= j_ext_code:
                            low_member  = i_ext_code
                            high_member = j_ext_code
                        else:
                            low_member  = j_ext_code
                            high_member = i_ext_code

                        # TO DO!!! This pertenance check can be speed up by creating a dictionary that keeps as keys
                        # which pairs have been already added. Memory usage would increase, but it would be faster
                            
                        if (low_member, high_member) not in self.pairs_sharing_patch:
                            self.pairs_sharing_patch.append( (low_member, high_member) )

            return self.pairs_sharing_patch
    
    def get_interacting_proteins_pairs(self, piana_access= None, protein_type_name= None, alternative_type_names= None,
                                       comparison_mode="standard", root_protein=None, similar_proteins_dic= {}, proteins_allowed= None):
        """

        returns a list of proteins that interact in the format [ (protein1, protein2), (protein3, protein4) ...]
        
        if piana_access is None, then uses internal graph identifiers for nodes (ignoring protein_type_name and alternative_type_names)
        otherwise, the list returned will contain proteins of type protein_type_name (or alternative_type_name if nothing was found)

       
        
        "comparison_mode" determines which  pairs are returned:

           - 'standard': root_protein and list proteins_allowed are ignored: all pairs returned
           - 'training': only pairs with proteins that appear in list proteins_allowed
           - 'root':     only pairs where root_protein is a member and the other member is in list proteins_allowed

        "root_protein" sets which is the root protein that generated the patchgroup graph. Only used when comparison_mode == "root"


        "similar_proteins_dic" is required for comparison_mode root: it is a list of proteins that are similar to root_protein (ie. are the same protein
                                                                                                                                with different sequence)
        
        if "proteins_allowed" is not None, the list returned will only contain pairs between proteins that appear in that
        list. This is used to make sure we don't count as False Positives those predictions for which the Gold Standard didn't
        know anything

        Attention: returns a list of inmutable tuples, so they can be converted to sets afterwards
        
        TO DO!!!!!
        ATTENTION!!! the list returned does not contain duplicate pairs... this means that 


        """

        # TO DO!!! change proteins_allowed to a dictionary to speed up things...

        
        if self.interacting_protein_pairs is not None:
            return self.interacting_protein_pairs
        else:

            self.interacting_proteins_pairs = []


            # get list of pairs of patchgroups that interact
            interacting_patchgroups_pairs = self.get_node_node_links()

            # get which proteins are in each patchgroup (dictionary indexed by patchgroup_id and content list of proteins that have it)
            patchgroup_proteins = self.get_patchgroup_proteins(piana_access=piana_access ,
                                                               protein_type_name= protein_type_name,
                                                               alternative_type_names= alternative_type_names)

            for interacting_patchgroups_pair in interacting_patchgroups_pairs:

                one_patchgroup   = interacting_patchgroups_pair[0]
                other_patchgroup = interacting_patchgroups_pair[1]

                for one_protein_code in patchgroup_proteins[one_patchgroup]:
                    for other_protein_code in patchgroup_proteins[other_patchgroup]:

                        if comparison_mode == "training":
                            # in this mode, only considering pairs for which the golden standard has info
                            if proteins_allowed:
                                if one_protein_code not in proteins_allowed or other_protein_code not in proteins_allowed:
                                    continue
                            
                        elif comparison_mode == "root":
                            # in this mode, only considering pairs for root_protein (and having info for it)
                            if one_protein_code != root_protein and other_protein_code != root_protein:
                                continue
                            elif proteins_allowed:
                                if one_protein_code not in proteins_allowed or other_protein_code not in proteins_allowed:
                                    continue

                            if similar_proteins_dic.has_key(one_protein_code) or similar_proteins_dic.has_key(other_protein_code):
                                continue
                        
                        # TO DO!!! This pertenance check can be speed up by creating a dictionary that keeps as keys
                        # which pairs have been already added. Memory usage would increase, but it would be faster
                        if (one_protein_code, other_protein_code) not in self.interacting_proteins_pairs:
                            self.interacting_proteins_pairs.append( (one_protein_code, other_protein_code) )

            # END OF for interacting_patchgroups_pair in interacting_patchgroups_pairs:


            return self.interacting_proteins_pairs
        
    def has_info(self, protein_id):
        """
        returns 1 if this patchgroup has information about the decomposition of protein "protein_id"
        otherwise returns 0
        """

        temp_protein_decomposition = self.get_protein_decomposition()

        if temp_protein_decomposition.has_key(protein_id):
            return 1
        else:
            return 0

    def get_proteins_with_composition_info(self):
        """
        returns a list with proteins that are described in this GraphPatchGroup


        during the training, this method is used to get the list of proteins that have shared patches info in the Gold Standard, so we do not count in the
        statistics predictions for which we do not have any information (we don't want to count as false positives those predictions that we do not
        know if they are correct or not)
        """

        temp_protein_decomposition = self.get_protein_decomposition()
        
        return temp_protein_decomposition.keys()

    def get_proteins_with_int_info(self):
        """
        returns a list with proteins that are described in this GraphPatchGroup

        during the training, this method is used to get the list of proteins that have ints info in the Gold Standard, so we do not count in the
        statistics predictions for which we do not have any information (we don't want to count as false positives those predictions that we do not
        know if they are correct or not)
        """
        proteins_with_ints = {}

        temp_pair_protein_ints = self.get_interacting_proteins_pairs() # all arguments are None: returns all pairs that interact

        

        for pair_proteins in temp_pair_protein_ints:
            # keeping protein names as keys of an empty dictionary
            proteins_with_ints[pair_proteins[0]] = None
            proteins_with_ints[pair_proteins[1]] = None
        
        return proteins_with_ints.keys()
        
        
    # ------------------------------------------
    #  Methods to test patchgroup graph contents
    #------------------------------------------

                                
    def compare_to(self, other_patchgroup_graph= None, comparison_mode="standard", root_protein=None, similar_proteins_dic= {}, piana_access = None):
        """
        Compares self patchgroup graph to "other_patchgroup_graph"
           -> when training, self is the decomposition done by the clustering and other_patchgroup_graph is the gold standard

        returns a list [ number of protein patches divergence,
                         specificity shared patches, sensibility shared patches, 
                         specificity patches interactions, sensibility patches interactions,
                         TPs for shared patches, FPs for shared patches, FNs for shared patches,
                         TPs for int, FPs for int, FNs for int                                     ]
       
                         

        
        "comparison_mode" determines how is the comparison performed:

           - 'standard': all against all, we are considering that both patchgroup graphs should be identical
           - 'training': If other_patchgroup_graph (ie. the gold standard) has no info about a protein, it is ignored (ie. no differences computed).
                          -> if there is no info about any of the proteins in other_patch_group_graph (ie. the gold standard) then this function returns None
           - 'root':     Only checking composition of root protein.
                          -> If there is no info of the root protein in other_patch_group_graph (ie. the gold standard) then this function returns None
                          -> if comparison_mode is set to root, then parameter "root_protein" is required

        "root_protein" sets which is the root protein that generated the patchgroup graph. Only used when comparison_mode == "root"


        "similar_proteins_dic" is required for comparison_mode root: it is a list of proteins that are similar to root_protein (ie. are the same protein
                                                                                                                                with different sequence)

        "piana_access" is used for debugging

       
        Attention!!!!!
        this function only works for patchgroup graphs that have been created from the same pianaDB (same proteinPiana ids)!!!! If you need to use it
        for graphs created from different pianaDBs you need to give a piana_access object and set type to md5 as argument to all methods


        Important!!! To compare two patchgroup graphs, they must have  been derived from the same set of proteins... otherwise it
        doesn't really make much sense... does it? I am not sure... anyway, in most (except to FN) cases, I am comparing self to other, so
        the results would be the same is the gold standard covers other proteins... only FN would be "wrong", but it would be wrong for all

        """
        # When training, self is the decomposition done by the clustering and other_patchgroup_graph is the gold standard

        # there are several aspects involved in deciding how similar to patchgroup graphs are....
        # depending for what we want to tune the formula for, we could give more or less weight to each of these aspects
        # for example, if we want a formula that is very good at finding which proteins share a the same functional site, then we could
        # give more weight to protein_shared_patches_comparison


        # ---------------------------
        # keep in a list for which proteins the other_patchgroup_graph (ie. the gold standard during the training) and the training graph have info
        # ---------------------------
        # -> "*_gold_std" this will be used to avoid counting as FP those predictions that the golden standard doesn know anything about
        # -> "*_predictions" this will be used to avoid counting as FN those "truths" that were impossible to predict (due to lack of information)

        # int refers to interactions information
        # composition refers to protein decomposition information
        
        proteins_with_composition_gold_std = other_patchgroup_graph.get_proteins_with_composition_info()
        proteins_with_composition_predictions = self.get_proteins_with_composition_info()
        
        proteins_with_int_gold_std = other_patchgroup_graph.get_proteins_with_int_info()
        proteins_with_int_predictions = self.get_proteins_with_int_info()

        # --
        # differences in protein composition
        # --
        protein_composition_comparison = self.get_differences_in_protein_composition(other_patchgroup_graph= other_patchgroup_graph,
                                                                                     comparison_mode= comparison_mode,
                                                                                     root_protein= root_protein)

        if verbose:
            sys.stderr.write("Comparison in difference of number of patches per protein: %s\n" %(protein_composition_comparison))

        # --
        # differences in protein shared patches
        # --

        protein_shared_patches_comparison = self.get_differences_in_protein_shared_patches(
                                                               other_patchgroup_graph= other_patchgroup_graph,
                                                               comparison_mode= comparison_mode,
                                                               root_protein= root_protein,
                                                               similar_proteins_dic = similar_proteins_dic,
                                                               proteins_with_composition_gold_std = proteins_with_composition_gold_std,
                                                               proteins_with_composition_predictions = proteins_with_composition_predictions,
                                                               piana_access = piana_access)
        
        shared_tps = protein_shared_patches_comparison[0]
        shared_fps = protein_shared_patches_comparison[1]
        shared_fns = protein_shared_patches_comparison[2]
        
        try:
            spec_patches = shared_tps / float(shared_tps + shared_fps)

        except:
            # if exception 0/0, then specificity is 0 (this is quite arbitrary... Tp=0, Fp=0 is not really a 0 specificity....)
            spec_patches = None

        try: 
            sens_patches = shared_tps / float(shared_tps+ shared_fns)
        except:
            # if exception 0/0, then sensibility is 0 (this is quite arbitrary... Tp=0, Fp=0 is not really a 0 sensibility....)
            sens_patches = None

        if verbose:
            sys.stderr.write("Shared patches comparison: TP = %s and FP = %s and FN = %s (spec=%s,sens=%s)\n" %(shared_tps,
                                                                                                                shared_fps,
                                                                                                                shared_fns,
                                                                                                                spec_patches, sens_patches))

        # --
        # differences in patches interactions
        # --

        patches_interactions_comparison = self.get_differences_in_patches_interactions(
                                                             other_patchgroup_graph= other_patchgroup_graph,
                                                             comparison_mode= comparison_mode,
                                                             root_protein= root_protein,
                                                             similar_proteins_dic = similar_proteins_dic,
                                                             proteins_with_int_gold_std = proteins_with_int_gold_std,
                                                             proteins_with_int_predictions = proteins_with_int_predictions)
        
        int_tps = patches_interactions_comparison[0]
        int_fps = patches_interactions_comparison[1]
        int_fns = patches_interactions_comparison[2]

        try:
            spec_int = int_tps/ float(int_tps + int_fps)
        except:
            # if exception 0/0, then specificity is None  (ie. we cannot evaluate the spec)
            spec_int = None
            
        try: 
            sens_int= int_tps / float(int_tps + int_fns)
        except:
            # if exception 0/0, then sensibility is None (ie. we cannot evaluate the sens)
            sens_int = None

        if verbose:
            sys.stderr.write("Patches interactions comparison: TP = %s and FP = %s and FN = %s (spec=%s,sens=%s)\n\n" %(int_tps,
                                                                                                                        int_fps,
                                                                                                                        int_fns,
                                                                                                                        spec_int, sens_int ))

        # --
        # return results
        # --
        return [protein_composition_comparison,
                spec_patches,
                sens_patches,
                spec_int,
                sens_int,
                shared_tps,
                shared_fps,
                shared_fns,
                int_tps,
                int_fps,
                int_fns]
    


    def get_differences_in_protein_composition(self, other_patchgroup_graph, comparison_mode="standard", root_protein=None):
        """
        computes differences between self and other_patchgroup_graph in terms of the number of patches that each protein has


        "comparison_mode" determines how is the comparison performed:

           - 'standard': all against all, we are considering that both patchgroup graphs should be identical
           - 'training': If other_patchgroup_graph (ie. the gold standard) has no info about a protein, it is ignored (ie. no differences computed).
                          -> if there is no info about any of the proteins in other_patch_group_graph (ie. the gold standard) then this function returns None
           - 'root':     Only checking composition of root protein.
                          -> If there is no info of the root protein in other_patch_group_graph (ie. the gold standard) then this function returns None
                          -> if comparison_mode is set to root, then parameter "root_protein" is required

        "root_protein" sets which is the root protein that generated the patchgroup graph. Only used when comparison_mode == "root"

        

        Attention!!!!!
        this function only works for patchgroup graphs that have been created from the same pianaDB!!!! If you need to use it
        for graphs created from different pianaDBs you need to change the proteinPiana identifiers below to code type name md5
        
        """

        # When training, self is the decomposition done by the clustering and other_patchgroup_graph is the gold standard
        
        number_of_differences = 0

        if comparison_mode == "root" and root_protein is None:
            raise ValueError("comparison_mode root requires a root_protein parameter")
        

        this_proteins_patches  = self.get_protein_decomposition()
        other_proteins_patches = other_patchgroup_graph.get_protein_decomposition()

        info_found = 0

        for this_protein in this_proteins_patches:

            if comparison_mode == "standard":
                if other_proteins_patches.has_key(this_protein):
                    number_of_differences += abs( len(this_proteins_patches[this_protein]) - len(other_proteins_patches[this_protein] )  )
                else:
                    sys.stderr.write("Warning: other_patchgroup_graph doesn't have protein %s\n" %(this_protein))
                    
            elif comparison_mode == "training":

                if other_patchgroup_graph.has_info(this_protein):
                    info_found = 1
                    number_of_differences += abs( len(this_proteins_patches[this_protein]) - len(other_proteins_patches[this_protein] )  )

            elif comparison_mode == "root":
                if this_protein != root_protein:
                    # in comparison_mode root, only counting statistics about the root protein
                    continue
                
                if other_patchgroup_graph.has_info(this_protein):
                    info_found = 1
                    number_of_differences += abs( len(this_proteins_patches[this_protein]) - len(other_proteins_patches[this_protein] )  )
                

            else:
                raise ValueError("Incorrect comparison_mode used")

        # END OF for this_protein in this_proteins_patches:
        
        if (comparison_mode == "training" or comparison_mode=="root") and info_found == 0:
            number_of_differences = None
        
        return number_of_differences
        
        

        
    def get_differences_in_protein_shared_patches(self, other_patchgroup_graph, comparison_mode="standard", root_protein=None, similar_proteins_dic= {},
                                                  proteins_with_composition_gold_std=None, proteins_with_composition_predictions=None,
                                                  piana_access=None):
        """
        computes differences between self and other_patchgroup_graph in terms of the patches shared between proteins


        "comparison_mode" determines how is the comparison performed:

           - 'standard': all against all, we are considering that both patchgroup graphs should be identical
           - 'training': If other_patchgroup_graph (ie. the gold standard) has no info about a protein, it is ignored (ie. no differences computed).
                          -> if there is no info about any of the proteins in other_patch_group_graph (ie. the gold standard) then this function returns None
           - 'root':     Only checking composition of root protein.
                          -> If there is no info of the root protein in other_patch_group_graph (ie. the gold standard) then this function returns None
                          -> if comparison_mode is set to root, then parameter "root_protein" is required

        "root_protein" sets which is the root protein that generated the patchgroup graph. Only used when comparison_mode == "root"


        "similar_proteins_dic" is required for comparison_mode root: it is a list of proteins that are similar to root_protein (ie. are the same protein
                                                                                                                                with different sequence)

        "proteins_with_composition_gold_std" is the list for which the golden standard has composition information. In comparison_mode 'training' or 'root'
        it will only consider those proteins in this list for calculating the statistics (ie. will not consider a prediction as FP unless the golden standard
        had information about that protein)

        "proteins_with_composition_predictions" is the list of proteins for which composition predictions were made. In comparison_mode 'training' or 'root'
        it will only consider those proteins in this list for calculating the statistics (ie will not consider a truth as FN unless some kind of prediction
        was made for that protein)

        "piana_access" is used for debugging

        Attention!!!!!
        this function only works for patchgroup graphs that have been created from the same pianaDB!!!! If you need to use it
        for graphs created from different pianaDBs you need to change the proteinPiana identifiers below to code type name md5

        returns a list [True Positives, False Positives, False Negatives]

        1. create pairs of proteins that share a patch in self
        2. create pairs of proteins that share a patch in other_patchgroup_graph

        3. for each pair in self, see how many identical pairs are there in other (True Positives)
        4. for each pair in self, see how many times that pair does not appear in other (False Positives)
        5. for each pair in other, see how many times that pair does not appear in self (False Negatives)

        """
        # When training, self is the decomposition done by the clustering and other_patchgroup_graph is the gold standard

        this_list_of_pairs = self.get_protein_pairs_sharing_patchgroup_id(comparison_mode= comparison_mode,
                                                                          root_protein= root_protein,
                                                                          similar_proteins_dic= similar_proteins_dic,
                                                                          proteins_allowed=proteins_with_composition_gold_std)

        other_list_of_pairs = other_patchgroup_graph.get_protein_pairs_sharing_patchgroup_id(comparison_mode= comparison_mode,
                                                                                             root_protein= root_protein,
                                                                                             similar_proteins_dic= similar_proteins_dic,
                                                                                             proteins_allowed=proteins_with_composition_predictions)
        

        if piana_access is not None:
            # this code only used for debugging to visualize the comparisons that are being done
            this_list_of_ext_codes = []

            for pair in this_list_of_pairs:
                ext_code_1 = piana_access.get_list_protein_external_codes(proteinPiana= pair[0],
                                                                          protein_type_name= "uniacc",
                                                                          alternative_type_names= ["md5"],
                                                                          answer_mode="single")
                ext_code_2 = piana_access.get_list_protein_external_codes(proteinPiana= pair[1],
                                                                          protein_type_name= "uniacc",
                                                                          alternative_type_names= ["md5"],
                                                                          answer_mode="single")
                this_list_of_ext_codes.append( (ext_code_1, ext_code_2) )
            # END OF for pair in this_list_of_pairs:

            other_list_of_ext_codes = []
        
            for pair in other_list_of_pairs:
                    ext_code_1 = piana_access.get_list_protein_external_codes(proteinPiana= pair[0],
                                                                              protein_type_name= "uniacc",
                                                                              alternative_type_names= ["md5"],
                                                                              answer_mode="single")
                    ext_code_2 = piana_access.get_list_protein_external_codes(proteinPiana= pair[1],
                                                                              protein_type_name= "uniacc",
                                                                              alternative_type_names= ["md5"],
                                                                              answer_mode="single")
                    other_list_of_ext_codes.append( (ext_code_1, ext_code_2) )
            # END OF for pair in other_list_of_pairs:
            
            print "comparing list %s (ext codes = %s)" %(this_list_of_pairs, this_list_of_ext_codes)
            print "\nwith list %s (ext codes = %s)" %(other_list_of_pairs, other_list_of_ext_codes)

        # END OF if piana_access is not None:

        
        return self.calculate_tp_fp_fn(this_list_of_pairs=  this_list_of_pairs,
                                       other_list_of_pairs= other_list_of_pairs)


    def get_differences_in_patches_interactions(self, other_patchgroup_graph, comparison_mode="standard", root_protein=None, similar_proteins_dic= {},
                                                  proteins_with_int_gold_std=None, proteins_with_int_predictions=None):
        """
        computes differences between self and other_patchgroup_graph in terms which patches interact with which patches


        "comparison_mode" determines how is the comparison performed:

           - 'standard': all against all, we are considering that both patchgroup graphs should be identical
           - 'training': If other_patchgroup_graph (ie. the gold standard) has no info about a protein, it is ignored (ie. no differences computed).
                          -> if there is no info about any of the proteins in other_patch_group_graph (ie. the gold standard) then this function returns None
           - 'root':     Only checking composition of root protein.
                          -> If there is no info of the root protein in other_patch_group_graph (ie. the gold standard) then this function returns None
                          -> if comparison_mode is set to root, then parameter "root_protein" is required

        "root_protein" sets which is the root protein that generated the patchgroup graph. Only used when comparison_mode == "root"


        "similar_proteins_dic" is required for comparison_mode root: it is a list of proteins that are similar to root_protein (ie. are the same protein
                                                                                                                                with different sequence)

        "proteins_with_int_gold_std" is the list for which the golden standard has interaction information. In comparison_mode 'training' or 'root' it
        will only consider  those proteins in this list for calculating the statistics (ie. will not consider a prediction as FP unless the golden standard
        had information about that protein)

        "proteins_with_int_predictions" is the list of proteins for which predictions were made. In comparison_mode 'training' or 'root' it will only consider
        those proteins in this list for calculating the statistics (ie will not consider a truth as FN unless some kind of prediction was made for that
        protein)

        Attention!
        Since we cannot actually compare patchgroup interactions (their identifiers are meaningless between different patchgroup graphs)
        we have to compare something else: protein interactions. This is, we consider that two proteins interact if patchgroups containing them interact

        By measuring the number of TP, FP and FN for proteins interactions we are not exactly calculating differences in patches interactions, but
        it is a good measure as well.

        TO CHECK!! Should I change the name of this method to reflect exactly what it does?


        1. Get pairs of proteins that interact (ie. that patchgroups containing them interact) for self
        2. Get pairs of proteins that interact (ie. that patchgroups containing them interact) for other

        3. for each pair in self, see how many identical pairs are there in other (True Positives)
        4. for each pair in self, see how many times that pair does not appear in other (False Positives)
        5. for each pair in other, see how many times that pair does not appear in self (False Negatives)


        """
        # When training, self is the decomposition done by the clustering and other_patchgroup_graph is the gold standard
       
        return self.calculate_tp_fp_fn(this_list_of_pairs=  self.get_interacting_proteins_pairs(comparison_mode= comparison_mode,
                                                                                                root_protein= root_protein,
                                                                                                similar_proteins_dic= similar_proteins_dic,
                                                                                                proteins_allowed= proteins_with_int_gold_std),
                                       other_list_of_pairs= other_patchgroup_graph.get_interacting_proteins_pairs(comparison_mode= comparison_mode,
                                                                                                                  root_protein= root_protein,
                                                                                                                  similar_proteins_dic= similar_proteins_dic,
                                                                                                                  proteins_allowed= proteins_with_int_predictions))

    
    def calculate_tp_fp_fn(self, this_list_of_pairs, other_list_of_pairs):
        """
        given two lists of pairs following the format [ (id1, id2), (id3, id4), ...] calculates tp, fp and fn from this (eg. prediction) to other (e.g reality)


        in each pair, member [0] is always <= member [1]

        returns a list [number of true positives, number of false positives, number of false negatives]

        """
        # When training, self is the decomposition done by the clustering and other_patchgroup_graph is the gold standard


        this_set = Set(this_list_of_pairs)
        other_set = Set(other_list_of_pairs)

        number_of_tp = len( this_set.intersection(other_set) )
        number_of_fp = len( this_set.difference(other_set)   )
        number_of_fn = len( other_set.difference(this_set)   )


        return [number_of_tp, number_of_fp, number_of_fn]



    # ------------------------------------------
    #  Methods to print GraphPatchGroup objects
    # ------------------------------------------

    
    def output_table_cir_assigned(self, output_target, piana_access,
                                  protein_type_name, alternative_type_names, cir_prefix, root_protein, gold_std,
                                  similar_proteins_dic  ):
        """
        
        Prints a table of CIRs and proteins that have it
        
        CIR_id<TAB>protein1_with_this_CIR<TAB>protein2_with_this_CIR<TAB>....
        
        CIR ids will be formed by concatenating 'cir_prefix' to the internal identifier of the CIR
        
        "output_target" is the file object where the table will be printed (sys.stdout to print to screen)
        
        "piana_access" is the PianaDBaccess object that is used to access PianaDB
        
        "protein_type_name" is the easy-to-remember type name that will be used for printing the proteins
        -> valid protein_type_name are those listed in PianaGlobals.valid_protein_types.keys()
        
        "alternative_type_names" can be used to set a list of alternative types in case no protein_type_name code is found
        --> user must provide pairs a list of valid easy-to-remember type names
        list_alternative_types can for example look like this: ["gi", "uniacc", "md5"]
        I suggest always placing md5 at the end of alternative types, so you never get a None in the output
        
        "cir_prefix" is the prefix that will precede the identifier of CIRs in this GraphPatchGroup
        -> useful to distinguish between CIRs from different GraphPatchGroup objects

        "root_protein" is the protein that was used to build the network. It is used to output only results that have a root protein

        "similar_proteins_dic" is a list of proteins that are identical to the root protein. Used to avoid printing identical proteins
        """

        # ATTENTION: the root protein thing only works if the root proteins are proteinPianas.... in case
        # the proteins in the input file to piana are not proteinPianas, the conversion must be made somewhere... probably in piana.py?
        # but then, root_protein would be a list and not just one protein
        # Furthermore, if building the network from more than 1 protein, this whole concept has to be rethinked...




        # if the gold_std_file_name is not None, that means that we are in mode "eval". Therefore, the output table
        # must contain the confirmation (or not) of the assignments by the Gold Standard
        if gold_std is not None:
            gold_std_patchgroup_graph = utilities.get_patchgroup_graph_from_file_name(file_name= gold_std)
        else:
            gold_std_patchgroup_graph = None



        if gold_std_patchgroup_graph is not None:
            # this is mode "eval", where we want to print the results giving confirmation from the gold standard
            
            answer_patchgroup_proteins = self.get_patchgroup_proteins(piana_access= None, protein_type_name= None,
                                                                      alternative_type_names= [], root_protein=root_protein)

            pairs_in_cir = {} # use to keep a dictionary of the form { patchgroup_id1:[ "p1\tp2", "p1\tp3", ...], patchgroup_id2:["p5\tp6", "p5\tp7", ...] , ...}
            
            for patchgroup_id in answer_patchgroup_proteins:
                
                if root_protein is not None:
                    # creatign pairs where the root protein is always in there
                    # we just have to check that we don't insert the pair (root,root)
                    # we are sure all the proteins are pairs with the root protein, because we asked get_patchgroup_proteins to only return
                    # pairs where the root protein appeared
                    for other_protein in answer_patchgroup_proteins[patchgroup_id]:
                        if other_protein != root_protein:
                            pair_string = "%s\t%s" %(root_protein, other_protein)

                            if pairs_in_cir.has_key(patchgroup_id):
                                pairs_in_cir[patchgroup_id].append( pair_string)
                            else:
                                pairs_in_cir[patchgroup_id] = [pair_string]
                        
                else:
                    raise ValueError("this is not implemented when root protein is not given: you have to change many things to make it valid")
            # END OF for patchgroup_id in answer_patchgroup_proteins:

            

            # get pairs of proteins that have a common CIR according to gold std
            # (the returned proteins are proteinPianas)
            gold_std_pairs_same_cir = gold_std_patchgroup_graph.get_protein_pairs_sharing_patchgroup_id(comparison_mode= "root",
                                                                                                        root_protein= root_protein,
                                                                                                        similar_proteins_dic= similar_proteins_dic,
                                                                                                        proteins_allowed=None)

            # create keys from those pairs
            gold_std_string_pairs_same_cir = {} # used to create keys "p1\tp2" of pairs confirmed by the gold std

            for pair in gold_std_pairs_same_cir:
                pair_key = "%s\t%s" %(pair[0], pair[1])
                gold_std_string_pairs_same_cir[pair_key] = None

            """
            Now, we have the dictionary of pairs in each cir (ie. patchgroup_id) and pairs confirmed by the gold std
            Simply print all pairs in the cirs, checking if they are confirmed by the gold std
            """

            pairs_already_printed = {} # used to avoid repeating the print of the same pairs

            for patchgroup_id in pairs_in_cir:

                for string_pair_proteinPianas in pairs_in_cir[patchgroup_id]:
                    # string_pair_proteinPianas is a string "p1<TAB>ps"
                    (protein_1, protein_2) = string_pair_proteinPianas.split()

                    partners_a = Set(piana_access.get_all_partners(proteinPiana_value= protein_1, list_source_dbs= "all", list_source_methods= "all"))
                    partners_b = Set(piana_access.get_all_partners(proteinPiana_value= protein_2, list_source_dbs= "all", list_source_methods= "all"))

                    # converting to ext codes (we do it here instead of doing it in get_patchgroup_proteins because otherwise we
                    # would not be able to check if it is confirmed by the gold standard
                    ext_code_1 = piana_access.get_list_protein_external_codes(proteinPiana= protein_1,
                                                                              protein_type_name= protein_type_name,
                                                                              alternative_type_names= alternative_type_names,
                                                                              answer_mode="single")
                    ext_code_2 = piana_access.get_list_protein_external_codes(proteinPiana= protein_2,
                                                                              protein_type_name= protein_type_name,
                                                                              alternative_type_names= alternative_type_names,
                                                                              answer_mode="single")
                    

                    num_partners_in_common = len( partners_a.intersection(partners_b) )

                    cir_string = cir_prefix + "-" + str(patchgroup_id)
                    pair_ext_codes_string = "%s\t%s" %(ext_code_1, ext_code_2)

                    if not pairs_already_printed.has_key(pair_ext_codes_string):

                        pairs_already_printed[pair_ext_codes_string] = None
                        
                        output_target.write("%s\t%s\t%s" %(num_partners_in_common, cir_string, pair_ext_codes_string ))
                        if gold_std_string_pairs_same_cir.has_key(string_pair_proteinPianas):
                            output_target.write("\tgold_std_matched\n" )
                        else:
                            output_target.write("\tgold_std_failed\n" )
                    # END OF if not pairs_already_printed.has_key(pair_string):
                # END OF for pair_proteins in pairs_in_cir[patchgroup_id]:
            # END OF for patchgroup_id in pairs_in_cir:

                        
                
        # END OF if gold_std_patchgroup_graph is not None:
        else:
            # this is mode "exec", where we want to print the results but we cannot give confirmation
        
            # get a dictionary with keys the patchgroup ids, and contents the proteins external codes that have that patchgroup id
            patchgroup_proteins_dict = self.get_patchgroup_proteins(piana_access= piana_access, protein_type_name= protein_type_name,
                                                                    alternative_type_names= alternative_type_names, root_protein=root_protein           )
            
            
            for patchgroup_id in patchgroup_proteins_dict:  
                
                output_target.write("%s.%s" %(cir_prefix, patchgroup_id)) # this prints the CIR identifier preceded by the prefix
                
                for protein_in_cluster in patchgroup_proteins_dict[patchgroup_id]:
                    output_target.write("\t%s" %(protein_in_cluster) )    # this prints each of the proteins that have that CIR
                
                output_target.write("\n")
            # END OF for patchgroup_id in patchgroup_proteins_dict:
        # END OF else: (if gold_std_patchgroup_graph is not None:)

 

    
    def output_table_cir_ints(self, output_target, cir_prefix, root_protein, gold_std  ):
        """
        
        Prints a table of CIR interactions
        
        CIR_id1<TAB>CIR_id2
        
        CIR ids will be formed by concatenating 'cir_prefix' to the internal identifier of the CIR
        
        "output_target" is the file object where the table will be printed (sys.stdout to print to screen)
        
        
        "cir_prefix" is the prefix that will precede the identifier of CIRs in this GraphPatchGroup
        -> useful to distinguish between CIRs from different GraphPatchGroup objects
        """
                # get a list of pathgroup pairs [patchgroup_id, patchgroup_id] that interact
        patchgroup_pathgroup_links = self.get_node_node_links()


        for patchgroup_pathgroup_link in patchgroup_pathgroup_links:

            output_target.write("%s.%s\t%s.%s\n" %(cir_prefix, patchgroup_pathgroup_link[0], cir_prefix, patchgroup_pathgroup_link[1]))
