"""
File        : dip2piana.py
Author      : Ramon Aragues
Creation    : 16.12.2003
Contents    :  inserts information from a DIP mysql database into a piana database
Called from : command line

=======================================================================================================

This file implements a program that fills up tables in database piana with information from a dip database

--> a DIP database must exist already with format shown in create_dip_tables.sql (directory code/dbCreation)

    this existing DIP database was previously populated using parseDIP.py


Attention!! The DIP database must be created from a single dip xin file! Otherwise, this parser will introduce false
interactions, because it uses node_id to identify the nodes in a interaction. If you want to be able to read information
from a database populated from different DIP xin files, then you have to:

 - Transform de from:node_id to:node_id fields in edge to from:node_uid to:node_uid in DipContentHandler.py
 - change the sql query to read node_uids instead of node_ids

 - same for edge_id and edge_uid

 (read email to Baldo and Joan from 03.10.2004 about DIP)


"""

# dip2piana.py: inserts information from a DIP mysql database into a piana database
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues

import sys
import getopt

import re
import readline
import MySQLdb

from PianaDBaccess import *
import PianaGlobals

verbose = 0

# ----------------------
# Function usage()
# ----------------------
def usage():
    print "\n--------------------------------------------------------------------------------------------------------------"
    print "This program fills up tables in database piana with information from DIP \n"
    print "\nUsage: python dip2piana.py  --piana-dbname=piana_dbname --piana-dbhost=piana_dbhost --piana-dbuser=piana_dbuser --piana-dbpass=piana_dbpass"
    print "              --dip-dbname=dip_dbname --dip-dbhost=dip_dbhost --dip-dbuser=dip_dbuser --dip-dbpass=dip_dbpass "
    print "             --insert-interactions  --insert-protein-coreferences [--help] [--verbose]"
    print "\nwhere:"
    print "     piana_dbname : name of database piana to be used (required)"
    print "     piana_dbhost : name of host where database piana to be used is placed (required)"
    print "     piana_dbuser : username accessing the database (not required in most systems)"
    print "     piana_dbpass : password of username accessing the database (not required in most systems)"
    print "     dip_dbname   : name of the dip mysql database (required)"
    print "     dip_dbhost   : name of the machine with dip mysql server (required)"
    print "     dip_dbuser   : name of the mysql dip username (not required in most systems)"
    print "     dip_dbpass   : name of the mysql dip username (not required in most systems)"
    print "     --insert-interactions: if flag set, inserts into PIANA the interactions in DIP"
    print "     --insert-protein-coreferences: if flag set, inserts into PIANA the dip coreferences between protein codes"
    print "     --help       : prints this message and exits"
    print "     --verbose    : prints process info to stdout"
    print "--------------------------------------------------------------------------------------------------------------"
        

   
# ---------------------------
# Function parseArguments()                                               
# --------------------------- 

def parseArguments():
    
    global dip_dbname  
    global dip_dbhost
    global dip_dbuser
    global dip_dbpass
    
    global piana_dbname
    global piana_dbhost
    global piana_dbuser
    global piana_dbpass

    global insert_interactions
    global insert_protein_coreferences
    
    global verbose
    
    try:
        opts, args = getopt.getopt(sys.argv[1:], "", ["verbose","help","dip-dbname=","dip-dbuser=","dip-dbhost=","dip-dbpass=",
                                                      "piana-dbname=", "piana-dbhost=", "piana-dbuser=", "piana-dbpass=",
                                                      "insert-interactions","insert-protein-coreferences" ])
    except getopt.GetoptError, bad_opt:
        # print help information and exit:
        sys.stderr.write( bad_opt.__str__() )
        usage()
        sys.exit(2)
     
    for option,value in opts:
        
        if option == "--dip-dbhost":
            dip_dbhost = value
            
        elif option == "--dip-dbname":
            dip_dbname = value
            
        elif option == "--dip-dbuser":
            dip_dbuser = value
            
        elif option == "--dip-dbpass":
            dip_dbpass = value
            
        elif option == "--piana-dbname":
            piana_dbname = value
            
        elif option == "--piana-dbhost":
            piana_dbhost = value
            
        elif option == "--piana-dbuser":
            piana_dbuser = value
             
        elif option == "--piana-dbpass":
            piana_dbpass = value
             
        elif option == "--insert-interactions":
            insert_interactions = 1
             
        elif option == "--insert-protein-coreferences":
            insert_protein_coreferences = 1
             
        elif option == "--verbose":
            verbose = 1
            
        elif option == "--help":
            # print help information and exit
            usage()
            sys.exit(2)

    # check arguments
    if dip_dbname is None or dip_dbhost is None:
        raise ValueError("trying to establish a connection to dip database without giving a host or database name")


def get_associated_list_proteinPianas(dip_cursor, piana_access, node_dip_id, ext_codes_wanted="no"):
    """
    from a dip node of database connected via dip_cursor, return a list of (proteinPiana, sourceDB) associated to it

    if ext_codes_wanted is 'yes' then it returns a list of (proteinPiana, sourceDB, unientry, uniacc, pir, gi)
    """


    # -----------------------------------------------------------------------
    #  1. find the external identifiers for node_dip_id
    # -----------------------------------------------------------------------
    #     after the queries, we'll have:
    #
    #          protein_name[0][0] --> default name taken by DIP as node name (normally uniprot entry)
    #
    #          protein_otherNames[0][0] --> node_ext_swissProtAcc
    #          protein_otherNames[0][1] --> node_ext_pir
    #          protein_otherNames[0][2] --> node_ext_gi
    #          protein_otherNames[0][3] --> node_ext_refseq
    #
    #     the database has string "None" when there is no value for an external code

    sqlquery = """select node_name, node_taxonomy from nodes where node_id = %s""" %(node_dip_id)
    dipcursor.execute(sqlquery)
    protein_name = dipcursor.fetchall()

    sqlquery = """select node_ext_sprot, node_ext_pir, node_ext_gi, node_ext_refseq from nodes_ext_names where node_ext_id = %s""" %(node_dip_id)
    dipcursor.execute(sqlquery)
    protein_otherNames = dipcursor.fetchall()

    # -----------------------------------------------------------------------
    #  2. find the piana internal identifier for those protein codes
    # -----------------------------------------------------------------------


    protein_default_name=   protein_name[0][0]        # if default name is unientry, it won't have ":" in the string
                                                      # if it is not a unientry, it will have PIR:pir_code
                                                      #                                        GNB:gi_code
                                                      # in dip20041003 this are all the possible default names (there is no SWP as default name)
    protein_taxonomy = int(protein_name[0][1])

    protein_swissprot = "None" # to be set if default name is unientry


    if len( protein_otherNames[0][0] ) == 6:
        # in some cases, the code here is a swiss prot entry, check it
        protein_swissaccession= protein_otherNames[0][0]
    else:
        protein_swissaccession= "None"
        protein_swissprot= protein_otherNames[0][0]
        
    protein_pir=           protein_otherNames[0][1]
    
    if protein_otherNames[0][2] != 0:
        # the dip database contains a 0 for gi, since it is an integer and doesn't allow a None string... take care of this
        protein_gi=  protein_otherNames[0][2]
    else:
        protein_gi = "None"

        
    # refseq would be protein_otherNames[0][3]

    list_proteinPiana = []

    # obtaining proteinPianas from default name (we use the option source_db_info='yes' in order to keep record of the database
    # that established that relationship between a code and a proteinPiana. We use that info afterwards to set in the pianaDB
    # whether the interaction comes directly from dip ('dip') or from a code inference ('dip_c')

    if protein_default_name.find(":") == -1:

        # if there is no ":" in the default name, then it is a unientry
        protein_swissprot = protein_default_name
        list_proteinPiana = piana_access.get_list_protein_piana(proteinCode_value= protein_default_name,
                                                                proteinCodeType_value= PianaGlobals.swissProtID_col,
                                                                tax_id_value= protein_taxonomy, source_db_info= "yes")
    else:

        # there was a ":" in the default name. Therefore, this is not a unientry. Type of code is before ":" and code afterwards
        pair_type_code = protein_default_name.split(":")

        if pair_type_code[0] == "PIR":

            # it is a PIR code
            # to my delight, DIP uses indistinctinly Pir accessions and Pir entry codes: check for both...
            #   (most of the time, Pir Entry == Pir Accession. But in a few cases this isn't the case and
            #    since DIP uses both, we better retrieve proteinPianas for both (removing duplicates))

            list_proteinPiana= piana_access.get_list_protein_piana(proteinCode_value= pair_type_code[1],
                                                                   proteinCodeType_value=PianaGlobals.pirEntryID_col,
                                                                   tax_id_value= protein_taxonomy, source_db_info= "yes")

            accession_list_proteinPiana= piana_access.get_list_protein_piana(proteinCode_value= pair_type_code[1],
                                                                 proteinCodeType_value=PianaGlobals.pirAccessionID_col,
                                                                 tax_id_value= protein_taxonomy, source_db_info= "yes")

            for accesion_proteinPiana in accession_list_proteinPiana:
                if not accesion_proteinPiana in list_proteinPiana:
                    list_proteinPiana.append(accesion_proteinPiana)
                # END OF if not accesion_proteinPiana in list_proteinPiana:
            # END OF for accesion_proteinPiana in accession_list_proteinPiana:

        # END OF if pair_type_code[0] == "PIR":

        elif pair_type_code[0] == "GNB":

            # it is a GI code
            list_proteinPiana = piana_access.get_list_protein_piana(proteinCode_value=pair_type_code[1],
                                                                    proteinCodeType_value= PianaGlobals.giID_col,
                                                                    tax_id_value= protein_taxonomy, source_db_info= "yes")
        # END OF elif pair_type_code[0] == "GNB":

        else:
            sys.stderr.write("Unknown type of code found in default name")
    # END OF else: (if protein_default_name.find(":") != -1:)

    """
    At this point, list_proteinPiana is filled with proteinPianas found for the default name used by DIP

    But we want to make sure that proteinPianas of other codes are also used. Of course, in most cases the proteinPianas
    found from one code should be the same as the proteinPianas found for another code. But we all know how these biological
    databases are...
    """
    temp_list_proteinPiana = []

    if protein_swissaccession != "None":
        # retrieving proteinPianas for uniprot accession numbers
        temp_list_proteinPiana.extend( piana_access.get_list_protein_piana(proteinCode_value= protein_swissaccession,
                                                                           proteinCodeType_value=PianaGlobals.swissAccessionID_col,
                                                                           tax_id_value= protein_taxonomy, source_db_info= "yes")     )
    if protein_pir != "None":
        # retrieving proteinPianas for pir
        temp_list_proteinPiana.extend( piana_access.get_list_protein_piana(proteinCode_value=protein_pir,
                                                                           proteinCodeType_value=PianaGlobals.pirAccessionID_col,
                                                                           tax_id_value= protein_taxonomy, source_db_info= "yes")   )
    if protein_gi != "None":
        # retrieving proteinPianas for pir
        temp_list_proteinPiana.extend( piana_access.get_list_protein_piana(proteinCode_value=protein_gi,
                                                                           proteinCodeType_value=PianaGlobals.giID_col,
                                                                           tax_id_value= protein_taxonomy, source_db_info= "yes") )


    # TO DO!!!! When refseq is parsed, try to get_list_protein_piana from it

    # adding those proteinPianas found from another code that are not already in list_proteinPiana:
    for temp_proteinPiana in temp_list_proteinPiana:

        if not temp_proteinPiana in list_proteinPiana:
            list_proteinPiana.append(temp_proteinPiana)
        # END OF if not temp_proteinPiana in list_proteinPiana:

    # END OF for temp_proteinPiana in temp_list_proteinPiana:


    if ext_codes_wanted == "yes":
        # if user asked to get the list of ext codes, return it..

        completed_list = []
        for proteinPiana in list_proteinPiana:
            # proteinPiana[0] is the proteinPiana
            # proteinPiana[1] is the sourceDB
            completed_list.append( (proteinPiana[0], proteinPiana[1], protein_swissprot, protein_swissaccession, protein_pir, protein_gi))
        
        return completed_list 
    else:
        return list_proteinPiana


    
# --------
# --------
#  Main()               
# --------                               
# --------

dip_dbname = None
dip_dbuser = None
dip_dbhost = None
dip_dbpass = None

piana_dbname = None
piana_dbuser = None
piana_dbhost = None
piana_dbpass = None

insert_interactions = 0
insert_protein_coreferences = 0

# parsing arguments from the command line
parseArguments()

# Initialisating connection to piana
piana_access = PianaDBaccess(dbname=piana_dbname, dbhost=piana_dbhost, dbuser=piana_dbuser, dbpassword= piana_dbpass)

# opening connection to MySQL DIP database and create a cursor to work with the database
if dip_dbuser is None and dip_dbpass is None:
    dipdb = MySQLdb.connect(db=dip_dbname, host=dip_dbhost)
                
elif dip_dbpass is None and dip_dbuser is not None:
    dipdb = MySQLdb.connect(user=dip_dbuser, db=dip_dbname, host=dip_dbhost)
    
else:
    dipdb = MySQLdb.connect(user=dip_dbuser, db=dip_dbname, host=dip_dbhost, passwd= dip_dbpass )

 
dipcursor = dipdb.cursor()

if insert_protein_coreferences:
    # the user wants to insert into PIANA the coreferences DIP has between proteins

    # 1 - get all the dip nodes
    # 2 - get all the codes for each node
    # 3 - get all the proteinPianas for those codes
    # 4 - insert a correspondence between each of those proteinPianas and each of the codes

    if verbose:
        sys.stderr.write("Inserting protein coreferences from DIP\n")


    # 1
    sqlquery = """ select node_id, node_uid, node_taxonomy from nodes"""
    dipcursor.execute(sqlquery)
    dip_node_ids = dipcursor.fetchall()

    for node_id in dip_node_ids:
        # node_id is a tuple (node_id, node_uid, node_taxonomy)

        protein_taxonomy = int(node_id[2])
        
        # 2 and 3
        list_proteinPianas = get_associated_list_proteinPianas(dip_cursor= dipcursor, piana_access= piana_access, node_dip_id= node_id[0],
                                                               ext_codes_wanted="yes")
        
        # create a dic of unique proteinPianas associated to this dip node id
        dic_proteinPiana = {}
        for proteinPiana in list_proteinPianas:
            # proteinPiana is a tuple 
            #    proteinPiana[0] --> proteinPiana
            #    proteinPiana[1] --> source_db
            #    proteinPiana[2] --> swissprot_code
            #    proteinPiana[3] --> sprot accession
            #    proteinPiana[4] --> pir_code
            #    proteinPiana[5] --> gi_code

            if proteinPiana[1] != "completion":
                dic_proteinPiana[proteinPiana[0]] = {'sourceDB':'standard',
                                                     'unientry': proteinPiana[2],
                                                     'uniacc':proteinPiana[3], 'pir':proteinPiana[4], 'gi':proteinPiana[5]}
            elif proteinPiana[1] == "completion":
                if not dic_proteinPiana.has_key(proteinPiana[0]):
                    dic_proteinPiana[proteinPiana[0]] = {'sourceDB':'completion',
                                                         'unientry': proteinPiana[2],
                                                         'uniacc':proteinPiana[3], 'pir':proteinPiana[4], 'gi':proteinPiana[5]}
                    

        
        # now, insert ext codes for the proteinPianas associated to this node
        for proteinPiana in dic_proteinPiana:

            if dic_proteinPiana[proteinPiana]['sourceDB'] == "completion":
                sourcedb= 'dip_c'
            else:
                sourcedb= 'dip'


            # insert the dip_uid code into the piana database
            piana_access.insert_protein_id_intDB_code(protein_id_intDB_value="dip_uid:" + node_id[1],
                                                      proteinPiana_value=proteinPiana ,
                                                      intDB_source_value=sourcedb)

            # now insert the other codes
            if dic_proteinPiana[proteinPiana]['unientry'] != "None" and not proteinPiana in piana_access.get_list_protein_piana(
                                                                                        proteinCode_value= dic_proteinPiana[proteinPiana]['unientry'],
                                                                                        proteinCodeType_value= PianaGlobals.swissProtID_col,
                                                                                        tax_id_value= protein_taxonomy, source_db_info= "no"):
                # check that the proteinPiana does not exist already in the table unientry
            
                piana_access.insert_swissProt_code(swissProt_code_value= dic_proteinPiana[proteinPiana]['unientry'],
                                                   proteinPiana_value=proteinPiana, swissProt_source_value=sourcedb)


            if dic_proteinPiana[proteinPiana]['uniacc'] != "None" and not proteinPiana in piana_access.get_list_protein_piana(
                                                                                       proteinCode_value= dic_proteinPiana[proteinPiana]['uniacc'] ,
                                                                                       proteinCodeType_value=PianaGlobals.swissAccessionID_col,
                                                                                       tax_id_value= protein_taxonomy, source_db_info= "no"):
                # check that the proteinPiana does not exist already in the table swissAccession
                piana_access.insert_swissAccession_code(swissAccession_code_value=dic_proteinPiana[proteinPiana]['uniacc'],
                                                        proteinPiana_value= proteinPiana,
                                                        swissAccession_source_value=sourcedb, isPrimary_value=0)

            if dic_proteinPiana[proteinPiana]['gi'] != "None" and not proteinPiana in piana_access.get_list_protein_piana(
                                                                                 proteinCode_value= dic_proteinPiana[proteinPiana]['gi'],
                                                                                 proteinCodeType_value=PianaGlobals.giID_col,
                                                                                 tax_id_value= protein_taxonomy, source_db_info= "no"):
                # check that the proteinPiana does not exist already in the table gi
                piana_access.insert_gi_code(gi_code_value=dic_proteinPiana[proteinPiana]['gi'],
                                            proteinPiana_value= proteinPiana, gi_source_value= sourcedb)
                    
        # END OF for proteinPiana in dic_proteinPiana:

# END OF if insert_protein_coreferences


if insert_interactions:
    log_fd = file("dip_parser.log", "w")
    # Obtaining all links in database DIP
    # after the query, we'll have a list of records with the following info:
    #   dip_interactions[0] --> edge_id
    #   dip_interactions[1] --> edge_uid
    #   dip_interactions[2] --> edge_from  (it is a dip ID, which is not coherent across DIP versions... )
    #   dip_interactions[3] --> edge_to (it is a dip ID, which is not coherent across DIP versions... )
    #   dip_interactions[4] --> edge_class   (class of record: protein, ..)
    
    if verbose:
        sys.stderr.write("Inserting interactions from DIP\n")

    sqlquery = """ select edge_id, edge_uid, edge_from, edge_to, edge_class from edges"""
    dipcursor.execute(sqlquery)
    dip_interactions = dipcursor.fetchall()


    # For each interaction in DIP we do the following:
    #  1. find the characteristics of the interaction
    #  2. find the external identifier for proteins involved in the interaction
    #  3. find the piana internal identifier for those proteins
    #  4. insert a piana interaction with the internal identifiers
    if verbose:
        sys.stderr.write("Number of dip interactions is: %s \n" %( len(dip_interactions)))
        number_of_interactions_no_id = 0



    for dip_interaction in dip_interactions:

        # -----------------------------------------------------------------------
        #  1. find the characteristics of the interaction
        # -----------------------------------------------------------------------
        #     after the query, we'll have a record with the following info:
        #          interaction_chars[0][0] --> edge_exp_id
        #          interaction_chars[0][1] --> edge_exp_uid
        #          interaction_chars[0][2] --> edge_exp_source (pubmed id)
        #          interaction_chars[0][3] --> edge_exp_name (experiment type: yeast 2 hybrid, ...)
        #         

        sqlquery = """select edge_exp_name, edge_exp_source \
        from edges_experiments where edge_exp_id = %s""" %(dip_interaction[0])
        dipcursor.execute(sqlquery)
        interaction_chars = dipcursor.fetchall()

        # get the list of methods and pmids for this interaction
        list_methods = {}
        list_pmids = {}
        for one_int_chars in interaction_chars:
            list_methods[one_int_chars[0].strip()] = None
            list_pmids[one_int_chars[1].split(":")[1].strip()] = None
            
        # -----------------------------------------------------------------------
        #  2. get a list of protein pianas for dip nodes "from:" and "to:"
        # -----------------------------------------------------------------------
        list_proteinPiana_a= get_associated_list_proteinPianas(dip_cursor= dipcursor, piana_access= piana_access, node_dip_id=dip_interaction[2])
        list_proteinPiana_b= get_associated_list_proteinPianas(dip_cursor= dipcursor, piana_access= piana_access, node_dip_id=dip_interaction[3])

        # list_proteinPiana_x is a list of tuples (proteinPiana, source_db)

        # create a dictionary establishish completion/standard for the source db of all proteinPianas a
        dic_proteinPiana_a = {}
        
        for proteinPiana in list_proteinPiana_a:
            # proteinPiana is a tuple (proteinPiana, sourceDB)
            if proteinPiana[1] != "completion":
                dic_proteinPiana_a[proteinPiana[0]] = "standard"
            else:
                if not dic_proteinPiana_a.has_key(proteinPiana):
                    # don't overwrite if a standard db already had it
                    # normally, this should happen (that a code is both completed and standard) because completion makes sure not to duplicate codes
                    dic_proteinPiana_a[proteinPiana[0]] = "completion"
                
        # create a dictionary establishish completion/standard for the source db of all proteinPianas b
        dic_proteinPiana_b = {}
        
        for proteinPiana in list_proteinPiana_b:
            # proteinPiana is a tuple (proteinPiana, sourceDB)
            if proteinPiana[1] != "completion":
                dic_proteinPiana_b[proteinPiana[0]] = "standard"
            else:
                if not dic_proteinPiana_b.has_key(proteinPiana):
                    # don't overwrite if a standard db already had it
                    # normally, this should happen (that a code is both completed and standard) because completion makes sure not to duplicate codes
                    dic_proteinPiana_b[proteinPiana[0]] = "completion"


        # -----------------------------------------------------------------------
        #  3. insert a piana interaction with the internal identifiers
        # -----------------------------------------------------------------------

        if verbose:
            sys.stderr.write("Inserting interaction (methods %s) between lists of proteinPianas: A <%s> and B <%s>\n" %(list_methods, list_proteinPiana_a, list_proteinPiana_b))
            
        # TO DO!!! Confidence value is now set to 1!!! Change to proper value. In fact, interactionConfidence_value will not be an argument
        #          since it will be an internal value calculated from several factors (sourceDB, method used, intersection of DBs, ...)

        for proteinPiana_a in list_proteinPiana_a:
            for proteinPiana_b in list_proteinPiana_b:
                # proteinPiana_a is a pair (proteinPiana, source_db)
                # proteinPiana_b is a pair (proteinPiana, source_db)

                if dic_proteinPiana_a[ proteinPiana_a[0] ] == "completion" or dic_proteinPiana_b[ proteinPiana_b[0]] == "completion":
                    # if the proteinPiana comes from an inference (using complete_piana.py) then state in the database that
                    # this interaction comes from a completion
                    source_db = "dip_c"
                else:
                    source_db = "dip"

                for method in list_methods:
                    for one_pmid in list_pmids:
                        piana_access.insert_interaction(proteinPianaA_value = proteinPiana_a[0],
                                                        isSourceA_value = 1,
                                                        proteinPianaB_value = proteinPiana_b[0],
                                                        isSourceB_value = 1,
                                                        interactionConfidence_value = 1,
                                                        methodDescription_value = method,
                                                        sourceDBDescription_value = source_db,
                                                        confidenceAssignedSourceDB_value=1,
                                                        pubmed_id_value= one_pmid)

            # END OF for proteinPiana_b in list_proteinPiana_b:
        # END OF for proteinPiana_a in list_proteinPiana_a:

        if verbose:
            if not list_proteinPiana_a or not list_proteinPiana_b:

                number_of_interactions_no_id += 1
                sys.stderr.write("Int not inserted (no proteinPiana found) -->  DIP int_uid %s between id<<%s>> and id<<%s>> \n" %(dip_interaction[1],
                                                                                                                           dip_interaction[2],
                                                                                                                           dip_interaction[3]))
                log_fd.write("Int not inserted (no proteinPiana found) -->  DIP int_uid %s between <<%s>> and <<%s>> \n" %(dip_interaction[1],
                                                                                                                       dip_interaction[2],
                                                                                                                       dip_interaction[3]))
            # END OF if not list_protein_piana_a or not list_protein_piana_b:
        # END OF if verbose:


    # END OF for dip_interaction in dip_interactions:

    log_fd.close()
    
    if verbose:
        sys.stderr.write( "All done!\n")
        sys.stderr.write("Number of dip interactions: %s \n" %( len(dip_interactions)))
        sys.stderr.write("Number of dip interactions NOT inserted (relation code-sequence not found): %s \n" %( number_of_interactions_no_id) )
    

    
# END OF if insert_interactions

                             

