"""
 File       : GraphBuilder.py
 Author     : R. Aragues & D. Jaeggi
 Creation   : 31.07.2003
 Contents   : class for building a graph from a database
 Called from: Graph.py and piana.py

=======================================================================================================

A class that defines how a graph should be built and that provides functions to
allow a graph to be automatically be built from a database by calling Graph.build_graph()


Common to all these methods and, importantly, problem independent, are methods to
recursively expand a graph and update a Graph object with the appropriate data. This
methods are contained in this class.

Making a GraphBuilder instance problem specific relies only on making one method
problem specific: GraphBuilder.get_links(node). This method gets all the links for a node and
should be overridden by the user to be adapted to the data source and type being used.

The concept is that, once a specific GraphBuilder class has been written (and possibly
GraphNodeAttribute and GraphEdgeAttribute) the following calls would be sufficient to generate
a complete graph (containing, in this case, all actors linked to Jack Nicholson by a
maximum hop of 2 co-actors):

my_graph = Graph("My Graph")
my_builder = MyGraphBuilder(depth=2, node1_id="Jack Nicholson")
my_graph.build_graph(my_builder) 

In this instance MyGraphBuilder may look something like:

class MyGraphBuilder(GraphBuilder):
    def get_links(self, actor):
        ....SQL statements....
        links = []
        for linked_actor in linked_actors:
            new_link = self.LinkDetail(actor, linked_actor)
            links.append(new_link)
        return links

That's all it needs to do, on a basic level. Easy peasy, non?!
Of course, more complex problems may need more complexity here, but
this is fundamentally all that needs to be done.
"""
# GraphBuilder.py: implements a class for building a graph from a database
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues


from GraphNode import *
from GraphEdge import *

verbose = 0
verbose_get_all_links = 0
verbose_build = 0
verbose_build_shallow = 0

class GraphBuilder:
    """

    The main GraphBuilder class.

    A subclass must be created to suit the problem type, overriding at least methods get_link_details() and create_node_attribute()
    
    """
    def __init__(self, list_node_id= None, depth= 0, hub_threshold= 0):
        """

        list_node_id is the list of node_id that will be used to build the graph

        depth represents "how many nodes away from each node1_id" will the builder go
        
        depth can be:
                ---> 1-N : depth
                ---> 0: no depth taken into account (ie. build complete graph around nodes in list_node_id)

        "hub_threshold" is used to set a threshold to the number of links that a node can have in order to be added to the network
                - if 0, no thresholds will be used: all nodes will be added to network
                - if >0, only those nodes having less than "hub_threshold" number of links will be added to the network
        
        """

        if list_node_id is None or not list_node_id:
            raise ValueError("GraphBuilder needs a node to build the graph")

        # Initialising self variables with values passed as arguments
        self.graph = None
        self.depth = depth
        self.hub_threshold = hub_threshold
        self.list_node_id = list_node_id

    def set_graph(self,graph_object):
        """
        Sets the graph to be built - this is called by Graph.build_graph()
        """
        self.graph = graph_object


    def build_graph(self):
        """
        The main graph building method. This is called by Graph.build_graph()
        """
        if self.graph is None:
            raise Error("Graph must be set via set_graph before I can build it!")

        if verbose_build_shallow:
            number_of_nodes = len(self.list_node_id)
            number_added = 0
        
        # for each starting node in list_node_id, add edges and partner nodes till depth is reached
        for node_id in self.list_node_id:

            if verbose_build_shallow:
                number_added += 1
                sys.stderr.write("Adding node %s to graph (%s out of %s)\n" %(node_id, number_added, number_of_nodes))
                
            # expands graph from root node to depth self.depth
            self._build_single_node_graph(root_node_id= node_id)

        
    def _build_single_node_graph(self, root_node_id):
        """
        Method to build the graph of a single node.
        Called from build_graph
        """

        if verbose_build or verbose:
            sys.stderr.write("getting all links for node id = %s\n" %root_node_id)
            
        # links will be a list of linkDetail objects describing all links where root_node_id is
        # involved at distance depth
        #  _get_all_links will not return those nodes that do not respect the hub threshold
        links = self._get_all_links(root_node_id= root_node_id)

        if verbose_build:
            num_links = len(links)

            if num_links != 0:
                sys.stderr.write("************************node id = %s has %s links: %s\n" %(root_node_id,num_links , links))
            
        # using first link detail object to create root_node object and set it as root
        # the node will be again added in the loop, but there it was more time costly to
        # check if it was a root node
        
        # TO DO!!!!! This is not a very clean way of doing it, but I cannot think of another right now...
        # Maybe creating the node directly from the node_id and adding it... but I have to learn
        # better how to add attributes to a node... 
        
        if links:

            if links[0].node1_id == root_node_id:
                root_node_object= self.graph.get_node(identifier= links[0].node1_id,
                                                      attribute= links[0].get_node1_attribute(),
                                                      get_mode="new")
                root_node_object.set_as_root()
                
            elif links[0].node2_id == root_node_id:
                root_node_object= self.graph.get_node(identifier= links[0].node2_id,
                                                      attribute= links[0].get_node2_attribute(),
                                                      get_mode="new")
                root_node_object.set_as_root()
                
            self.graph.add_node(node_object= root_node_object)
        # END OF if links:
        
        else:
            # even if there are no links for this root node, we still have to make sure that the node appears in the Graph

            if verbose_build:
                sys.stderr.write("------root node id = %s didn't have edges------\n" %root_node_id)

            root_node_attribute = self.create_node_attribute(node_id= root_node_id)
            
            root_node_object = self.graph.get_node(identifier= root_node_id,
                                                   attribute= root_node_attribute,
                                                   get_mode="new")
            root_node_object.set_as_root()

            self.graph.add_node(node_object= root_node_object)
        # END OF else: (if links:)

        
        if verbose:
            total= len(links)
            sys.stderr.write("looping through all link detail objects: %s of them" %total)
            i = 0



        list_edges_to_add = []
        # For each link detail object (describing edge) associated to the node at depth self.depth, add the link and
        # the nodes associated to the link detail object to the graph
        for link in links:
            if not isinstance(link, self.LinkDetail):
                raise ValueError("get_links must return a list of LinkDetail objects")

            if verbose:
                sys.stderr.write( "Creating %s out of %s -- " %( i, total))
                i += 1

            # get_nodes creates a new node in case it doesn't exist in the graph
            graph_temp1_node_object = self.graph.get_node(identifier= link.node1_id, attribute= link.get_node1_attribute(), get_mode="new")

            self.graph.add_node(node_object= graph_temp1_node_object)


            graph_temp2_node_object = self.graph.get_node(identifier= link.node2_id, attribute= link.get_node2_attribute(), get_mode="new")
            self.graph.add_node(node_object= graph_temp2_node_object)

            edge = self.graph.get_edge(identifier1= graph_temp1_node_object,
                                       identifier2= graph_temp2_node_object,
                                       attribute_object= link.get_edge_attribute(),
                                       get_mode="new")

            list_edges_to_add.append(edge) # keeps list of edges that have to be added afterwards
                                           # adding the edges a posteriori increases speed of method
        # END OF for link in links:

        if verbose:
            total_to_add = len(list_edges_to_add)
            k=0

        for edge_to_add in list_edges_to_add:

            self.graph.add_edge(edge_object= edge_to_add)

            if verbose:
                sys.stderr.write( "Adding %s out of %s -- " %( k, total_to_add))
                k += 1
        # END OF for edge_to_add in list_edges_to_add:

       
    def _get_all_links(self, root_node_id):
        
        """
        This method handles the recursion:
          - making sure to go till depth X
          - creating links that exist between nodes that have been created
          
        
        Gets all links to root_node_id found in database up to depth.
        Returns a list of linkDetail objects
        Default search depth is 3.

        uses get_links, which is specific to each database

        """

        # we create an list with one empty list for each level of depth
        # this list will contain the nodes that appear at that particular distance from root node
        levels = []
        for i in range(self.depth+1):
            levels.append( [] )


        # root node is at level 0
        levels[0].append(root_node_id)

        # visited_nodes will contain those nodes that have already been visited
        visited_node_ids = {}
        visited_node_ids[root_node_id] = 1

        # links is a list that will contain existing links in the graph
        links = []
        # visited_links is a dictionary that will contain those links that have already been added
        visited_link_ids = {}
        
        # For each level of depth, we search linked nodes
        for i in range(self.depth):

            if verbose_get_all_links:
                sys.stderr.write( "Starting level %s "%i)
                sys.stderr.write("Have visited_nodes %s" %visited_node_ids.keys())
            
            new_visited_node_ids = []      # to keep track of nodes that have been visited in this level
            
            for node in levels[i]:
                
                # fetch links for particular node at the depth level
                # get_links is the method implemented that deals with the particular graph application
                # new_links will contain LinkDetail objects with nodes involved in the link
                new_link_details = self.get_link_details(node_id= node)


                if self.hub_threshold != 0 and len(new_link_details) >= self.hub_threshold:
                    # skip the links for this node: there are more links than the threshold established
                    #  or threshold != 0 (0 is used when no thresholds are to be applied)
                    #  (this check can be also done on get_link_details itself... we keep
                    #   this comparison just in case the user doesn't want to do it there...)
                    continue

                # For each of the new links found for node "node", append nodes involved in the link
                for new_link_detail in new_link_details:

                    new_nodes_id = (new_link_detail.node1_id, new_link_detail.node2_id)

                    for new_node_id in new_nodes_id:

                        # nodes are not added if they have already been visited in a previous level
                        # therefore, a node will only appear at the lowest level it is placed in (e.g. if a node
                        # is at distance 2 and 5 to the root node, it will only appear on level 2)
                        if not visited_node_ids.has_key(new_node_id):

                            levels[i+1].append(new_node_id)
                            new_visited_node_ids.append(new_node_id)
                            # links.append(new_link_detail) I've commented this... I think it was a mistake
                        # END OF if not visited_node_ids.has_key(new_node_id):

                    # END OF for new_node_id in new_nodes_id:

                    # we only add the link to the Graph if it was not added previosly
                    # we do it by checking visited_link_ids
                    if not visited_link_ids.has_key(new_link_detail.edge_id):
                        links.append(new_link_detail)
                        visited_link_ids[new_link_detail.edge_id] = 1

                    # END OF if not visited_link_ids.has_key(new_link_detail.edge_id):

                # END OF for new_link_detail in new_link_details:
                
            # END OF for node in levels[i]:
            
            # need to separate visited_nodes on a previous level and visited_nodes on this level
            # otherwise we only find one link between new node on a new level, and any node on
            # the previous level.
            for new_visited_node_id in new_visited_node_ids:
                visited_node_ids[new_visited_node_id] = 1
        
        # END OF for i in range(self.depth):
        return links
    
    def get_link_details(self, node_id):
        """
        To be overridden by a method specific to the database
        This should return an array of LinkDetail objects defining links to "node_id"
        """
        raise Error("GraphBuilder.get_link_details must be overridden by method specific to subclass of GraphBuilder")
    
    def create_node_attribute(self, node_id):
        """
        To be overridden by a method specific to the subclass of GraphBuilder
        This should return an attribute of the class
        """
        raise Error("GraphBuilder.create_node_attribute must be overridden by method specific to subclass of GraphBuilder")




    #--------------------------------------------
    #
    # INTERNAL CLASSES TO GRAPHBUILDER 
    # 
    #--------------------------------------------
    
    class LinkDetail:
        """
        Internal class for storing and transmitting link details.
        A LinkDetail must be composed of 2 nodes as a minimum.
        Each node can have one attribute.
        The link can also have any number of attributes.
        """
        def __init__(self, node1_id, node2_id, edge_id):
            self.node1_id = node1_id
            self.node2_id = node2_id
            self.node1_attribute = None
            self.node2_attribute = None
            self.edge_id = edge_id
            self.edge_attribute = None
            
        def __repr__(self):
            str = "<Link with id %s between %s and %s>" %(self.edge_id, self.node1_id, self.node2_id)
            return str
            
        def set_attribute(self, attribute_object, node_id=None):
            """
            Sets attribute to the different components of the LinkDetail object

            Depending on the type of attribute passed as argument, the attribute is set to the edge or to one
            of the two nodes
            
            """
            
            if isinstance(attribute_object, GraphNodeAttribute):
                
                if node_id == self.node1_id:
                    self.node1_attribute = attribute_object
                    
                elif node_id == self.node2_id:
                    self.node2_attribute = attribute_object
                    
                else:
                    raise ValueError("Node %s not found in graph" %node_id)
                
            elif isinstance(attribute_object, GraphEdgeAttribute):
                self.edge_attribute = attribute_object
                
            else:
                raise TypeError("Attribute must be a Attribute object")

        def get_node_attribute(self, node_id_value):
            """
            Gets node attribute of node_id
            """

            if self.node1_id == node_id_value:
                
                return self.node1_attribute
            
            elif  self.node2_id == node_id_value:
                
                return self.node2_attribute
            
            else:
                return None
        
        def get_node1_attribute(self):
            """
            Gets node1's attribute
            """
            return self.node1_attribute

        def get_node2_attribute(self):
            """
            Gets node1's attribute
            """
            return self.node2_attribute

        def get_edge_attribute(self):
            """
            Gets edge attributes
            """
            return self.edge_attribute
