"""
File        : IRGraph.py
Author      : Ramon Aragues
Creation    : 6.2.2006
Contents    : implementation of graph handling graphs of interacting regions
Called from : programs/classes that find CIRs 

=======================================================================================================


IRGraph is the intermediate step between a PPI network and a CIR-CIR network.


The process is as follows:

1. given a ppi network
2. create its IRGraph (each interaction --> one IR for each protein)
3. use Clustering to cluster the IRGraph into CIRs
   
"""

# IRGraph.py :  is the intermediate step between a PPI network and a CIR-CIR network.
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues


from Graph import *
from PianaDBaccess import *
from IRGraphNodeAttribute import *

#----------------------------------------------------------------------------------------------
class IRGraph(Graph):
#----------------------------------------------------------------------------------------------

    """
    Implements a Graph where nodes are Interacting Regions of proteins
    """
    def __init__(self, graphID = None, piana_access_object = None, dic_root_proteins_g2={}):

        self.graph_id = graphID
        self.piana_access = piana_access_object
        self.node_id_counter = 0

        self.proteins_ir_was_created = {}   # keeps the proteins for which one IR was already created
                                            # follows structure:
                                            #        { proteinPiana_1: IR id that is representing it,
                                            #          proteinPiana_2: IR id that is representing it,
                                            #          ....................
                                            #        }
                                            # This dic will only be used with proteins that are not
                                            # the root protein and do not share any interactor with the
                                            # root protein (ie are in G2).

        self.dic_root_proteins_g2 = dic_root_proteins_g2

        Graph.__init__(self, self.graph_id)


    def _get_new_node_id(self):
        """
        This method is created to give an id to new nodes
        """
        new_node_id = self.node_id_counter
        self.node_id_counter += 1
        return new_node_id

    def get_protein_requirements(self, protein_object, proteinPiana):
        """
        returns a tuple (protein_a_is_new_ir, single_ir_protein)
        """
        protein_is_new_ir = 0  # used to control whether this protein needs a new IR to be created (==1) or not (==0)
        single_ir_protein = 0  # used to control whether this protein is represented by a single IR (==1) or multiple (==0)

        if protein_object.is_root() or self.dic_root_proteins_g2.has_key(proteinPiana):
            # it is a root protein or a root g2 protein.... we must create a new IR for it for this particular interaction
            protein_is_new_ir = 1
        else:
            # it is not a root protein and it doesn't share partners with root protein: it is a protein represented by a single IR
            #      - check if an IR already exists for this protein
            #          - if it exists, set it to use that IR for this interaction
            #          - if it doesn't create a new IR and set it to 'represent' this protein from now on

            single_ir_protein = 1
            if not self.proteins_ir_was_created.has_key(proteinPiana):
                # this protein is represented by a IR that has to be created
                protein_is_new_ir = 1
        # END OF else: (if protein_object.is_root() or dic_root_proteins_g2.has_key(proteinPiana):)

        return (protein_is_new_ir, single_ir_protein)

        
    
    def initialize_IRGraph_from_PianaGraph(self, piana_graph = None, root_protein='multiple'):
        """
        Method that initializes this IRGraph from a PianaGraph
        Creates a two linked nodes (with IRGraphNodeAttribute) for each  interaction
        
 
        "piana_graph": PianaGraph object that we want to create the IRGraph from


        if "root_protein" is not 'multiple', the IR generated will only contain generate multiple IRs for those
        proteins that are either the root protein or share at least one interactor with the root protein. Other proteins
        will be represented in the IRGraph by just one Interacting Region. This will speed up things when clustering, and it
        doesn't affect the results because we are only clustering those IRs where the root protein is involved.

            -> if root protein is not multiple, then dic_root_proteins_g2 must contain as keys those proteins that
               are at distance 2 of the root protein: used to speed up things...
        
        """

        if piana_graph is None:    raise ValueError ("No PianaGraph given to transform into IRGraph")

        if verbose:   sys.stderr.write("Initializing IRGraph from pianagraph\n")
            
        for one_edge in piana_graph.get_edge_object_list():
            # for each edge, create a link between two IR nodes 
            #     -> this depends on whether we are applying the method to the root protein or to the whole network
            #          

            # get proteinPianas and node object (ie protein object) from the original interaction in piana_graph
            proteinPiana_a = one_edge.get_start_node_id()
            proteinPiana_b = one_edge.get_end_node_id()
            
            protein_object_a = piana_graph.get_node(identifier= proteinPiana_a, get_mode="error")
            protein_object_b = piana_graph.get_node(identifier= proteinPiana_b, get_mode="error")

            if not self.dic_root_proteins_g2:
                # if root_protein_partners is empty (ie. if not (root_protein is not None and root_protein != 'multiple':))
                #   -> create two different interacting regions for each interaction

                # get ids for nodes in IRGraph (each node is a 'potential interacting region'
                ir_id_a = self._get_new_node_id()
                ir_id_b = self._get_new_node_id()

                # create nodes for IRGraph (standard GraphNode with a IRGraphNodeAttribute)
                ir_node_attribute_a = IRGraphNodeAttribute(ir_id= ir_id_a,
                                                           proteinPiana_value= proteinPiana_a,
                                                           is_root_value= protein_object_a.is_root())
                ir_node_attribute_b = IRGraphNodeAttribute(ir_id= ir_id_b,
                                                           proteinPiana_value= proteinPiana_b,
                                                           is_root_value= protein_object_b.is_root())

                ir_node_a = GraphNode(nodeID= ir_id_a, attribute= ir_node_attribute_a, alternative_id= str(ir_id_a) + "." + str(proteinPiana_a) )
                ir_node_b = GraphNode(nodeID= ir_id_b, attribute= ir_node_attribute_b, alternative_id= str(ir_id_b) + "." + str(proteinPiana_b) )

                # create edge for IRGraph (standard GraphEdge)
                new_edge_attribute = GraphEdgeAttribute() # empty attribute: nothing to do with it at the moment
                new_edge= GraphEdge(node1_id= ir_id_a, node2_id= ir_id_b,
                                    attribute_object= new_edge_attribute) # I am sure this edge doesn't exist (new ir_ids
                                                                          # are generated at each iteration: therefore, no
                                                                          # need to do get_edge or create_edge
                self.add_node(ir_node_a)
                self.add_node(ir_node_b)
                self.add_edge(new_edge)
            # END OF if root_protein is not None and root_protein != 'multiple':
            else:
                # there is a root protein set...
                #   -> create multiple interacting regions only for the root protein or those that share at least one interactor with the root protein

                # 1. Find out whether one_edge.get_start_node_id() or one_edge.get_end_node_id() are the root protein or share an interactor with the root protein
                # 2. if start or end are in the cases described above, create a new IR associated to them
                #    if start or end are not in the cases describes above, check if a IR was already created for them
                #          -> if yes, use that IR for this edge
                #          -> if not, create an IR and update dic with proteins for which an IR was already created


                # 1.
                (protein_a_is_new_ir, single_ir_protein_a) = self.get_protein_requirements(protein_object=protein_object_a,
                                                                                           proteinPiana=proteinPiana_a)
                (protein_b_is_new_ir, single_ir_protein_b) = self.get_protein_requirements(protein_object=protein_object_b,
                                                                                           proteinPiana=proteinPiana_b)

                # 2.
                
                # --       
                # process protein A          
                # --    
                if protein_a_is_new_ir:
                    # we must create a new IR for this protein A. Add Node to IRGraph
                    ir_id_a = self._get_new_node_id()
                    ir_node_attribute_a = IRGraphNodeAttribute(ir_id= ir_id_a,
                                                               proteinPiana_value= proteinPiana_a,
                                                               is_root_value= protein_object_a.is_root())
                    ir_node_a = GraphNode(nodeID= ir_id_a, attribute= ir_node_attribute_a, alternative_id= str(ir_id_a) + "." + str(proteinPiana_a) )
                    self.add_node(ir_node_a)
                    
                if single_ir_protein_a:
                    # A is a protein that will be represented by a single IR... 
                    # and retrieve the IR id
                    #    -> there are two cases inside the same if: 1) protein_a_is_new_ir == 1 and protein_a_is_new_ir == 0
                    #          -> both can be managed with the same code (although it is not optimal in speed terms)
                    if self.proteins_ir_was_created.has_key(proteinPiana_a):
                        # an IR was already created for it in a previous iteration... retrieve it
                        ir_id_a = self.proteins_ir_was_created[proteinPiana_a]

                    else:
                        # We are using this newly created IR: update proteins_ir_was_created with the IR
                        self.proteins_ir_was_created[proteinPiana_a] = ir_id_a
                # --       
                # process protein B       
                # --    
                if protein_b_is_new_ir:
                    # we must create a new IR for this protein B. Add Node to IRGraph
                    ir_id_b = self._get_new_node_id()
                    ir_node_attribute_b = IRGraphNodeAttribute(ir_id= ir_id_b,
                                                               proteinPiana_value= proteinPiana_b,
                                                               is_root_value= protein_object_b.is_root())
                    ir_node_b = GraphNode(nodeID= ir_id_b, attribute= ir_node_attribute_b, alternative_id= str(ir_id_b) + "." + str(proteinPiana_b) )
                    self.add_node(ir_node_b)
                    
                if single_ir_protein_b:
                    # B is a protein that will be represented by a single IR... 
                    # and retrieve the IR id
                    #    -> there are two cases inside the same if: 1) protein_b_is_new_ir == 1 and protein_b_is_new_ir == 0
                    #          -> both can be managed with the same code (although it is not optimal in speed terms)
                    if self.proteins_ir_was_created.has_key(proteinPiana_b):
                        # an IR was already created for it in a previous iteration... retrieve it
                        ir_id_b = self.proteins_ir_was_created[proteinPiana_b]

                    else:
                        # We are using this newly created IR: update proteins_ir_was_created with the IR
                        self.proteins_ir_was_created[proteinPiana_b] = ir_id_b
                   
                
                # --       
                # Now, add the edge (either between new nodes or between nodes that already existed...
                # --       
                new_edge_attribute = GraphEdgeAttribute() # empty attribute: nothing to do with it at the moment
                new_edge= GraphEdge(node1_id= ir_id_a, node2_id= ir_id_b,
                                    attribute_object= new_edge_attribute) # I am sure this edge doesn't exist (new ir_ids
                                                                          # are generated at each iteration: therefore, no
                                                                          # need to do get_edge or create_edge
                self.add_edge(new_edge)



                
        # END OF for one_edge in piana_graph.get_edge_object_list():




    #
    # THESE METHODS HAVE NOT BEEN ADAPTED TO IR GRAPHS
    #
    def print_IR_node_values(self,go_node = None):
        """
        prints the label of one GO node
        
        "go_node": node from a go graph
        """

        node_attribute=go_node.get_node_attribute_object()

        return "%s        proteinPiana %s : IRTerm %s"  %(node_attribute.get_go_name(),
                                                         node_attribute.get_proteinPiana(),
                                                         node_attribute.get_term_id())

    def print_IR_dot_file(self, output_target= None):
        """
        method that prints IRGraph in dot format to a file object "output_target"
        """
        
        output_target.write("graph G { graph [orientation=portrait, pack=true, overlap=scale]\n")
        output_target.write(" node [shape=box,fontsize=12,width=0.15,height=0.15,style=filled,fillcolor=lightblue];\n")

        for node in self.get_node_object_list():
            if node.get_node_attribute_object().get_is_root():
                output_target.write(""""%s" [fillcolor = %s]\n""" %(self.print_go_node_values(go_node = node),"yellow"))

        for edge in self.get_edge_object_list():
            start_node= edge.get_start_node_id()
            end_node= edge.get_end_node_id()
            output_target.write(""" "%s" -- "%s" [len=2];\n""" %(self.print_go_node_values(go_node = self.get_node(start_node)),
                                                                 self.print_go_node_values(go_node = self.get_node(end_node))))
        # END OF for edge in self.edges:
                                 
        # print graph termination    
        output_target.write( "}\n")   

        
                    

                        
        
    


