"""
File        : ConsolidateClusteringSimilarityFunction.py
Author      : Ramon Aragues
Creation    : 9.2.2006
Contents    : similarity function that determines similarity between CIR clusters (when consolidating)
Called from : CirGraphCluster.do_action()

=======================================================================================================

"""

# ConsolidateClusteringSimilarityFunction.py: similarity function that determines
#                                             similarity between CIR clusters
#                                             (when consolidating)
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues

import PianaGlobals
from GraphCluster import *
from Clustering import *
from ClusteringSimilarityFunction import *
import numarray

verbose = 0

#------------------------------------------------------------------------------------------------
class ConsolidateClusteringSimilarityFunction(ClusteringSimilarityFunction):
    """
    Similarity function that determines similarity between clusters with go terms
    """
    
    def __init__(self, path_length_threshold= None):

        """

        "path_length_threshold": Maximum distance between two clusters to be joined

        """

        # for CIR clustering, mode is irrelevant (similarity is always calculated looking at the cluster as a whole)
        ClusteringSimilarityFunction.__init__(self, dbaccess = None,
                                              mode = None,
                                              path_length_threshold_value = path_length_threshold)



    def get_proteinPianas_list(self, list_node_attribute=None):
        """
        Method that returns a protein piana list, from an attributes list given

        "list_node_attribute": list of attributes into a cluster. Each attribute is a protein pianaID
        """
        proteinPiana_dic={}

        for node_attribute in list_node_attribute:
            proteinPiana_dic[node_attribute.get_proteinPiana()] = None

        return proteinPiana_dic.keys()


    def calculate_similarity(self, list_node_attributes1, list_node_attributes2,
                             cluster1_id= None, cluster2_id=None,
                             clustered_graph=None, original_graph=None  ):
        """
        Method that returns similarity score from two lists of attributes of nodes that are being clustered
         ( called from Clustering.cluster_graph() to fill the similarity matrix)

        "list_node_attributes1" is a list of node attributes that belong to the same cluster1
        "list_node_attributes2" is a list of node attributes that belong to the same cluster2

        "cluster1_id" is the id for cluster 1 (used to get protein partners of cluster1)
        "cluster2_id" is the id for cluster 1 (used to get protein partners of cluster2)

        "clustered_graph" is the current ClusterGraph (used to get protein partners of clusters)

        "original_graph" is the PianaGraph that originated this clustering

        This method calculates how similar two IR clusters (ie two CIRs) are
        """

        # calculate the "similarity" between cluster1 and cluster2

        # similarity is determined by
        #
        # number_of_protein_partners_in_common(cluster1, cluster2) x
        #                           number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)


        # 1. number_of_protein_partners_in_common(cluster1, cluster2) is obtained by looking to the current clustered graph
        #    1.1 --> get cluster that interacts with cluster1 (partner_c1)
        #    1.2 --> get cluster that interacts with cluster2 (partner_c1)
        #    1.3 --> get proteins in partner cluster1 (proteins_in_partner_c1)
        #    1.4 --> get proteins in partner cluster2 (proteins_in_partner_c2)
        #
        #    1.5 --> num_intersection_clusters_partners  = len(intersection(proteins_in_partner_c1, proteins_in_partner_c2))

        # 1.1
        cluster1_node = clustered_graph.get_node(identifier=cluster1_id)
        list_partner_c1 = cluster1_node.get_neighbour_ids()
        
        if len(list_partner_c1) == 1:
            partner_c1 = clustered_graph.get_node(identifier=list_partner_c1[0])
        else:
            raise ValueError("How can I have a CIR with more than one partner?\n")
            
        # 1.2
        cluster2_node = clustered_graph.get_node(identifier=cluster2_id)
        list_partner_c2 = cluster2_node.get_neighbour_ids()
        
        if len(list_partner_c2) == 1:
            partner_c2 = clustered_graph.get_node(identifier=list_partner_c2[0])
        else:
            raise ValueError("How can I have a CIR with more than one partner?\n")
            
        # 1.3
        list_ir_attributes_1 = partner_c1.get_node_attribute_object().get_list_elements()
        proteins_in_partner_c1 = Set(self.get_proteinPianas_list(list_node_attribute=list_ir_attributes_1 ))
        
        
        # 1.4
        list_ir_attributes_2 = partner_c2.get_node_attribute_object().get_list_elements()
        proteins_in_partner_c2 = Set(self.get_proteinPianas_list(list_node_attribute=list_ir_attributes_2 ))

        # 1.5
        num_intersection_clusters_partners= len(proteins_in_partner_c1.intersection(proteins_in_partner_c2))
        
        print "\n======================================"
        print "IN CLUSTER %s, protein partners are %s" %(cluster1_id, proteins_in_partner_c1)
        print "IN CLUSTER %s, protein partners are %s" %(cluster2_id, proteins_in_partner_c2)
        print "num_intersection_clusters_partners): %s" %(num_intersection_clusters_partners)


        if num_intersection_clusters_partners != 0:
            # second term of the formula only needs to be calculated in the first term is not 0
            #  (this wouldn't be true if the formula weren't a product of the two terms)
            
            # 2. number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2) is obtained by looking to the
            # current clustered graph and to the original PPI network
            #    2.1 --> get proteins in cluster1 (proteins_c1)
            #    2.2 --> get proteins in cluster2 (proteins_c2)
            #    2.3 --> get proteins that interact (in original graph) with proteins in cluster1 (partners_of_proteins_in_c1)
            #    2.4 --> get proteins that interact (in original graph) with proteins in cluster2 (partners_of_proteins_in_c2)
            #
            #    2.5 -->  result = len(intersection(partners_of_proteins_in_c1, partners_of_proteins_in_c2))
            #

            # 2.1
            proteins_c1 = self.get_proteinPianas_list(list_node_attribute= list_node_attributes1)

            # 2.2
            proteins_c2 = self.get_proteinPianas_list(list_node_attribute= list_node_attributes2)

            # 2.3
            dic_partners_of_proteins_in_c1 = {}
            for proteinPiana_in_c1 in proteins_c1:
                node_protein_in_c1 = original_graph.get_node(identifier=proteinPiana_in_c1)
                neighbours_node_protein_in_c1 = node_protein_in_c1.get_neighbour_ids()
                for one_neighbour in neighbours_node_protein_in_c1:
                    dic_partners_of_proteins_in_c1[one_neighbour] = None
            # END OF for proteinPiana_in_c1 in proteins_c1:
            partners_of_proteins_in_c1 = Set(dic_partners_of_proteins_in_c1.keys())

            # 2.4
            dic_partners_of_proteins_in_c2 = {}
            for proteinPiana_in_c2 in proteins_c2:
                node_protein_in_c2 = original_graph.get_node(identifier=proteinPiana_in_c2)
                neighbours_node_protein_in_c2 = node_protein_in_c2.get_neighbour_ids()
                for one_neighbour in neighbours_node_protein_in_c2:
                    dic_partners_of_proteins_in_c2[one_neighbour] = None
            # END OF for proteinPiana_in_c1 in proteins_c1:
            partners_of_proteins_in_c2 = Set(dic_partners_of_proteins_in_c2.keys())

            # 2.5
            num_intersection_protein_partners = len(partners_of_proteins_in_c1.intersection(partners_of_proteins_in_c2))
            print "protein partners for proteins in cluster %s are %s" %(cluster1_id, proteins_in_partner_c1)
            print "protein partners for proteins in cluster %s are %s" %(cluster2_id, proteins_in_partner_c2)
            print "num_intersection_protein_partners: %s" %(num_intersection_protein_partners)
            print "======================================\n"
        # END OF if num_intersection_clusters_partners != 0:
        else:
            num_intersection_protein_partners = -1 # set it to -1 to know that it wasn't calculated
                                                   #   --> since it is going to be multiplied by 0, -1 doesn't affect anything 
        
        
            
        return num_intersection_clusters_partners*num_intersection_protein_partners
