"""
File        : psi2piana.py
Author      : Ramon Aragues
Creation    : 24.10.2005
Contents    : inserts information from a PSI mysql database into a piana database
Called from : command line

=======================================================================================================

This file implements a program that fills up tables in database piana with information from a psi database

--> a PSI database must exist already with format shown in create_psi_tables.sql (directory code/dbCreation)

    this existing PSI database was previously populated using parsePSI.py
"""

# psi2piana.py: inserts information from a PSI mysql database into a piana database
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues

import sys
import getopt

import re
import readline
import MySQLdb

from PianaDBaccess import *
import PianaGlobals


ambiguous_tag = "ambiguous"

verbose = 0
verbose_detailed = 0

counting_discarded = 0

# ----------------------
# Function usage()
# ----------------------
def usage():
    print "\n--------------------------------------------------------------------------------------------------------------"
    print "This program fills up tables in database piana with information from a PSI database\n"
    print "\nUsage: python psi2piana.py  --piana-dbname=piana_dbname --piana-dbhost=piana_dbhost --piana-dbuser=piana_dbuser --piana-dbpass=piana_dbpass"
    print "              --psi-dbname=psi_dbname --psi-dbhost=psi_dbhost --psi-dbuser=psi_dbuser --psi-dbpass=psi_dbpass "
    print "              --psi-db=psi_db --insert-psidb-ids --help --verbose"
    print "\nwhere:"
    print "     --insert-psidb-ids : if flag is set, parsers inserts in PIANA those internal ids used in the psi database (only hprd)"
    print "     piana_dbname : name of database piana to be used (required)"
    print "     piana_dbhost : name of host where database piana to be used is placed (required)"
    print "     piana_dbuser : username accessing the database (not required in most systems)"
    print "     piana_dbpass : password of username accessing the database (not required in most systems)"
    print "     psi_dbname   : name of the psi mysql database (required)"
    print "     psi_dbhost   : name of the machine with psi mysql server (required)"
    print "     psi_dbuser   : name of the mysql psi username (not required in most systems)"
    print "     psi_dbpass   : name of the mysql psi username (not required in most systems)"
    print "     psi_db       : the psi database you are parsing (unfortunately, the standards are not that respected...)"
    print "                          - hprd: when parsing the Human Protein Reference Database"
    print "                          - mips: when parsing the MIPS Mammalian Protein-Protein Database"
    print "                          - bind: when parsing the Biomolecular Interaction Network Database"
    print "                          - (*) whatever name you want, just make sure it appears in source_databases & interaction_databases of PianaGlobals.py"
    print "     --help       : prints this message and exits"
    print "     --verbose    : prints process info to stdout"
    print "--------------------------------------------------------------------------------------------------------------"
        

   
# ---------------------------
# Function parseArguments()                                               
# --------------------------- 

def parseArguments():
    
    global psi_dbname  
    global psi_dbhost
    global psi_dbuser
    global psi_dbpass
    
    global piana_dbname
    global piana_dbhost
    global piana_dbuser
    global piana_dbpass
    
    global psi_db
    global insert_psidb_ids
    
    global verbose
    
    try:
        opts, args = getopt.getopt(sys.argv[1:], "", ["verbose","help","insert-psidb-ids", "psi-dbname=","psi-dbuser=","psi-dbhost=","psi-dbpass=",
                                                      "piana-dbname=", "piana-dbhost=", "piana-dbuser=", "piana-dbpass=", "psi-db=" ])
    except getopt.GetoptError, bad_opt:
        # print help information and exit:
        sys.stderr.write( bad_opt.__str__() )
        usage()
        sys.exit(2)
     
    for option,value in opts:
        
        if option == "--psi-dbhost":
            psi_dbhost = value
            
        elif option == "--psi-dbname":
            psi_dbname = value
            
        elif option == "--psi-dbuser":
            psi_dbuser = value
            
        elif option == "--psi-dbpass":
            psi_dbpass = value
             
        elif option == "--psi-db":
            psi_db = value
            
        elif option == "--piana-dbname":
            piana_dbname = value
            
        elif option == "--piana-dbhost":
            piana_dbhost = value
            
        elif option == "--piana-dbuser":
            piana_dbuser = value
             
        elif option == "--piana-dbpass":
            piana_dbpass = value
             
        elif option == "--insert-psidb-ids":
            insert_psidb_ids = 1
             
        elif option == "--verbose":
            verbose = 1
            
        elif option == "--help":
            # print help information and exit
            usage()
            sys.exit(2)

    # check arguments
    if psi_dbname is None or psi_dbhost is None:
        raise ValueError("trying to establish a connection to psi database without giving a host or database name")
    
    if psi_db is None :
        print "--------------------------------"
        print "You didn't set a name for your psi database"
        print "--------------------------------"
        usage()
        sys.exit(2)    
        


def get_associated_list_proteinPianas(psi_cursor, piana_access, node_psi_id):
    """
    from a psi node of database connected via psi_cursor, return a list of (proteinPiana, sourceDB) associated to it

    sourceDB will be set to ambiguous_tag if the proteinPiana has been obtained from a geneName associated to more than
    one sequence
    """
    # I have removed an argument ext_codes_wanted that was not being used in this file...

    # -----------------------------------------------------------------------
    #  1. find the external identifiers for node_psi_id
    # -----------------------------------------------------------------------
    #     after the queries, we'll have:
    #
    #         list_ext_codes  --> list of tuples (uniacc, emblacc, gi id)
    #
    #
    #     the database has string "None" when there is no value for an external code

    sqlquery = """select spAcc_id, emblAcc_id, gi_id, description, sequence from proteinCodes where counter_id = %s""" %(node_psi_id)
    psicursor.execute(sqlquery)
    list_ext_codes = psicursor.fetchall()


    sqlquery = """select tax_id from proteinFeatures where counter_id = %s""" %(node_psi_id)
    psicursor.execute(sqlquery)
    value_retrieved = psicursor.fetchall()
    if value_retrieved:
        tax_id = int(value_retrieved[0][0])
    else:
        tax_id = 0


    # instead of dealing directly with the sql result, create a dictionary like this:
    #                             { 'spAcc_id': [P23423, q23423, ...],
    #                               'emblAcc_id': [...]
    #                               'gi_id': [2342,234234, ...]
    #                               'description': [sdfsdfsdfd,sdfsdfdsf, ...]
    #                               'sequence': [sdfsdfsdfsdfsdf,sdfsdfsdfsdfsdf]
    #                             }
    # this dictionary will be then used to search for the proteinPianas (by giving priority to certain codes over the others)

    dic_all_codes = { 'spAcc_id': [],
                      'emblAcc_id': [],
                      'gi_id': [],
                      'description': [],
                      'sequence': []
                      }
    
    for group_codes in list_ext_codes:
        protein_swissacc=   group_codes[0]
        protein_emblacc =   group_codes[1]
        protein_gi =   group_codes[2]
        protein_description =   group_codes[3]
        protein_sequence =   group_codes[4]

        if protein_swissacc and protein_swissacc != "NULL" and protein_swissacc != "None":
            dic_all_codes['spAcc_id'].append(protein_swissacc)
            
        if protein_emblacc and protein_emblacc != "NULL":
            dic_all_codes['emblAcc_id'].append(protein_emblacc)
            
        if protein_gi and protein_gi != "NULL":
            dic_all_codes['gi_id'].append(protein_gi)
            
        if protein_description and protein_description != "NULL":
            dic_all_codes['description'].append(protein_description)
            
        if protein_sequence and protein_sequence != "NULL":
            dic_all_codes['sequence'].append(protein_sequence)
            
    # END OF for group_codes in list_ext_codes:
        
    # Now, use the dictionary to search for proteinPianas associated to those codes...
    #          1. all proteinPianas associated to spAcc_id emblAcc_id gi_id sequence
    #          2. if nothing found, search for geneName using description
    #                 --> if only one proteinPiana associated: use it relying on it!
    #                 --> if more than one proteinPiana associated, use it setting it to _c
    

    temp_list_proteinPiana = []


    for spAcc_id_value in dic_all_codes['spAcc_id']:
        temp_list_proteinPiana.extend( piana_access.get_list_protein_piana(proteinCode_value= spAcc_id_value,
                                                                           proteinCodeType_value= PianaGlobals.swissAccessionID_col,
                                                                           tax_id_value= tax_id, source_db_info= "yes")         )

    for emblAcc_id_value in dic_all_codes['emblAcc_id']:
        temp_list_proteinPiana.extend( piana_access.get_list_protein_piana(proteinCode_value= emblAcc_id_value,
                                                                           proteinCodeType_value= PianaGlobals.emblAccessionID_col,
                                                                           tax_id_value= tax_id, source_db_info= "yes")         )

    for gi_id_value in dic_all_codes['gi_id']:
        temp_list_proteinPiana.extend( piana_access.get_list_protein_piana(proteinCode_value= gi_id_value,
                                                                           proteinCodeType_value= PianaGlobals.giID_col,
                                                                           tax_id_value= tax_id, source_db_info= "yes")         ) 
        
    for sequence_value in dic_all_codes['sequence']:
        temp_list_proteinPiana.extend( piana_access.get_list_protein_piana(proteinCode_value= sequence_value,
                                                                           proteinCodeType_value= PianaGlobals.proteinSequence_col,
                                                                           tax_id_value= tax_id, source_db_info= "yes")         )
        
        
    if not temp_list_proteinPiana:
        # nothing was found for 'good' codes.... now check how many proteinPianas associated to this description there are...
        # If there is only one, we will use it (and trust it). Otherwise, we will use it but we will insert it as 'unrelyable'
        # by appending _c to the database name
        geneName_proteinPianas = piana_access.get_list_protein_piana(proteinCode_value= protein_description,
                                                                     proteinCodeType_value= PianaGlobals.geneName_col,
                                                                     tax_id_value= tax_id, source_db_info= "yes")

        # now, see how many different proteinPianas there are (geneName_proteinPianas is a list of tuples (proteinPiana, sourceDB)
        dic_geneName_proteinPianas = {}
        for one_geneName_proteinPiana in geneName_proteinPianas:
            dic_geneName_proteinPianas[one_geneName_proteinPiana[0]] = None

        number_of_pps_associated = len(dic_geneName_proteinPianas)
        
        if number_of_pps_associated == 1:
            # if there is only one proteinPiana associated to this geneName, use it...
            temp_list_proteinPiana.extend(geneName_proteinPianas)
        elif number_of_pps_associated > 1:
            # there are several proteinPianas associated, but we cannot trust them entirely... set sourceDB to 'ambigous'
            for one_proteinPiana in dic_geneName_proteinPianas:
                # one_proteinPiana is a proteinPiana (no db info about it)
                temp_list_proteinPiana.append((one_proteinPiana, ambiguous_tag))
        elif number_of_pps_associated == 0:
            # if no geneName found, search for proteinPianas in table description
            description_proteinPianas= piana_access.get_list_protein_piana(proteinCode_value= protein_description,
                                                                           proteinCodeType_value= PianaGlobals.proteinDescription_col,
                                                                           tax_id_value= tax_id, source_db_info= "yes")
            # now, see how many different proteinPianas there are (description_proteinPianas is a list of tuples (proteinPiana, sourceDB)
            dic_description_proteinPianas = {}
            for one_description_proteinPiana in description_proteinPianas:
                dic_description_proteinPianas[one_description_proteinPiana[0]] = None

            number_of_pps_associated_description = len(dic_description_proteinPianas)

            if number_of_pps_associated_description == 1:
                # only considering it if there is only one proteinPiana associated to it... otherwise, we ignore it (too risky to trust a description)
                temp_list_proteinPiana.extend(description_proteinPianas)
        # END OF elif number_of_pps_associated == 0:
    # END OF if len(temp_list_proteinPiana) == 0:
            
    # remove duplicates
    dic_proteinPiana = {}
    list_proteinPiana = []

    for temp_proteinPiana in temp_list_proteinPiana:
        # temp_proteinPiana is a tuple (proteinPiana, sourceDBID) where sourceDBID might be set to ambiguous_tag
        key = "%s.%s" %(temp_proteinPiana[0], temp_proteinPiana[1] )
        if not dic_proteinPiana.has_key(key):
            dic_proteinPiana[key] = None
            list_proteinPiana.append(temp_proteinPiana)
        
    # END OF for temp_proteinPiana in temp_list_proteinPiana:

    return list_proteinPiana



    
# --------
# --------
#  Main()               
# --------                               
# --------

psi_dbname = None
psi_dbuser = None
psi_dbhost = None
psi_dbpass = None

psi_db = None

piana_dbname = None
piana_dbuser = None
piana_dbhost = None
piana_dbpass = None

insert_psidb_ids= 0

testing_uniq  = {} # just testing how many different interactions are inserted...

# parsing arguments from the command line
parseArguments()

# Initialisating connection to piana
piana_access = PianaDBaccess(dbname=piana_dbname, dbhost=piana_dbhost, dbuser=piana_dbuser, dbpassword= piana_dbpass)

# opening connection to MySQL PSI database and create a cursor to work with the database
if psi_dbuser is None and psi_dbpass is None:
    psidb = MySQLdb.connect(db=psi_dbname, host=psi_dbhost)
                
elif psi_dbpass is None and psi_dbuser is not None:
    psidb = MySQLdb.connect(user=psi_dbuser, db=psi_dbname, host=psi_dbhost)
    
else:
    psidb = MySQLdb.connect(user=psi_dbuser, db=psi_dbname, host=psi_dbhost, passwd= psi_dbpass )

 
psicursor = psidb.cursor()


# TO DO!!! I am not using co-references provided by psi interaction databases

if insert_psidb_ids:
    # if user asked to insert protein internal ids for the psi database, do it
    if psi_db == "hprd":
        # but only if it is HPRD (the other databases do not have 'interesting' internal ids) 
        
        sqlquery = """ select counter_id, db_id from proteins"""
        psicursor.execute(sqlquery)
        all_pairs_protein_ids = psicursor.fetchall()

        for one_pair_protein_id in all_pairs_protein_ids:
            # one_pair_protein_id is [counter_id, db_id]
            all_proteinPiana= get_associated_list_proteinPianas(psi_cursor= psicursor, piana_access= piana_access,
                                                                node_psi_id=one_pair_protein_id[0])


            for one_proteinPiana in all_proteinPiana:
                # one_proteinPiana is a tuple (proteinPiana, sourcedb)

                if one_proteinPiana[1] == ambiguous_tag:
                    source_db = psi_db + "_c"
                else:
                    source_db = psi_db
                
                # insert the dip_uid code into the piana database
                piana_access.insert_protein_id_intDB_code(protein_id_intDB_value="hprd_id:" + one_pair_protein_id[1].strip(),
                                                          proteinPiana_value= one_proteinPiana[0],
                                                          intDB_source_value=source_db)
# END OF if insert_psidb_ids:

# Obtaining all links in database PSI

if verbose:
    sys.stderr.write("Inserting interactions from PSI\n")

sqlquery = """ select interaction_key, proteinA_counter_id, proteinB_counter_id from interactions"""
psicursor.execute(sqlquery)
psi_interactions = psicursor.fetchall()


# For each interaction in PSI we do the following:
#  1. find the method of the interaction
#  2. find the psi external identifier for proteins involved in the interaction
#  3. find the piana internal identifier for those proteins
#  4. insert a piana interaction with the internal identifiers
if verbose:
    num_total_ints = len(psi_interactions)
    sys.stderr.write("Number of psi interactions is: %s \n" %(num_total_ints))
    number_of_interactions_no_id = 0
    number_of_interactions_with_id = 0
    num_ints_parsed = 0

log_file_name = "psi_log.%s" %(psi_db)
log_fd = file(log_file_name, "w")

for psi_interaction in psi_interactions:
    #   psi_interaction[0] --> interaction_key
    #   psi_interaction[1] --> proteinA_counter_id
    #   psi_interaction[2] --> proteinB_counter_id
    
    if verbose:
        num_ints_parsed += 1
    # -----------------------------------------------------------------------
    #  1. find the method and pubmed id of the interaction
    # -----------------------------------------------------------------------
    #     after the query, we'll have a record with the following info:
    #          list_methods -->  list of tuples (method,)
    #          list_pmids -->  list of tuples (pmid,)
    #         

    if verbose_detailed:
        sys.stderr.write("getting method and pubmed from psi database\n" )
        
    sqlquery = """select method from interactionMethod where interaction_key = '%s' """ %(psi_interaction[0])
    psicursor.execute(sqlquery)
    list_methods = psicursor.fetchall()
    
    sqlquery = """select pubmed_id from interactionFeatures where interaction_key = '%s' """ %(psi_interaction[0])
    psicursor.execute(sqlquery)
    list_pmids = psicursor.fetchall()


    # -----------------------------------------------------------------------
    #  2. get a list of protein pianas for psi nodes "from:" and "to:"
    # -----------------------------------------------------------------------
    if verbose_detailed:
        sys.stderr.write("getting proteinPianas for psi A counter_id %s\n" %(psi_interaction[1]))
        
    list_proteinPiana_a= get_associated_list_proteinPianas(psi_cursor= psicursor, piana_access= piana_access, node_psi_id=psi_interaction[1])
    if verbose_detailed:
        sys.stderr.write("getting proteinPianas for psi B counter_id %s\n" %(psi_interaction[2]))
        
    list_proteinPiana_b= get_associated_list_proteinPianas(psi_cursor= psicursor, piana_access= piana_access, node_psi_id=psi_interaction[2])

    # list_proteinPiana_x is a list of tuples (proteinPiana, source_db)

    if verbose_detailed:
        sys.stderr.write("finding source for proteinPianas\n" )
        

    # create a dictionary establishish completion/standard for the source db of all proteinPianas a
    dic_proteinPiana_a = {}

    for proteinPiana in list_proteinPiana_a:
        # proteinPiana is a tuple (proteinPiana, sourceDB)
        if proteinPiana[1] != "completion" and proteinPiana[1] != ambiguous_tag:
            # even if there was a completion for this proteinPiana, overwrite it...
            dic_proteinPiana_a[proteinPiana[0]] = "standard"
        else:
            if not dic_proteinPiana_a.has_key(proteinPiana):
                # don't overwrite if a standard db already had it
                # normally, this shouldn't happen (that a code is both completed and standard) because completion makes sure not to duplicate codes
                dic_proteinPiana_a[proteinPiana[0]] = "unreliable"

    # create a dictionary establishish completion/standard for the source db of all proteinPianas b
    dic_proteinPiana_b = {}

    for proteinPiana in list_proteinPiana_b:
        # proteinPiana is a tuple (proteinPiana, sourceDB)
        if proteinPiana[1] != "completion" and proteinPiana[1] != ambiguous_tag:
            dic_proteinPiana_b[proteinPiana[0]] = "standard"
        else:
            if not dic_proteinPiana_b.has_key(proteinPiana):
                # don't overwrite if a standard db already had it
                # normally, this should happen (that a code is both completed and standard) because completion makes sure not to duplicate codes
                dic_proteinPiana_b[proteinPiana[0]] = "unreliable"


    # -----------------------------------------------------------------------
    #  3. insert a piana interaction with the internal identifiers
    # -----------------------------------------------------------------------

    if verbose:
        sys.stderr.write("int %sof%s: %s (pps %s) and %s (pps %s) and methods %s\n" %(num_ints_parsed,
                                                                                      num_total_ints,
                                                                                      psi_interaction[1],
                                                                                      list_proteinPiana_a,
                                                                                      psi_interaction[2],
                                                                                      list_proteinPiana_b,
                                                                                      list_methods))
        
        log_fd.write("int %sof%s: %s (pps %s) and %s (pps %s) and methods %s\n" %(num_ints_parsed,
                                                                                  num_total_ints,
                                                                                  psi_interaction[1],
                                                                                  list_proteinPiana_a,
                                                                                  psi_interaction[2],
                                                                                  list_proteinPiana_b,
                                                                                  list_methods))
        
    # TO DO!!! Confidence value is now set to 1!!! Change to proper value. In fact, interactionConfidence_value will not be an argument
    #          since it will be an internal value calculated from several factors (sourceDB, method used, intersection of DBs, ...)

    for proteinPiana_a in list_proteinPiana_a:
        for proteinPiana_b in list_proteinPiana_b:
            # proteinPiana_a is a pair (proteinPiana, source_db)
            # proteinPiana_b is a pair (proteinPiana, source_db)

            if verbose:
                if proteinPiana_a[0] <= proteinPiana_b[0]:
                    a= proteinPiana_a[0]
                    b= proteinPiana_b[0]
                else:
                    a= proteinPiana_b[0]
                    b= proteinPiana_a[0]
                
                key = "%s.%s" %(a, b)
                testing_uniq[key] = None
            # END OF if verbose:
                
            if dic_proteinPiana_a[ proteinPiana_a[0] ] == "unreliable" or dic_proteinPiana_b[ proteinPiana_b[0]] == "unreliable":
                # if the proteinPiana comes from an inference (using complete_piana.py) or its ambigous (comes from a geneName)
                # then state in the database that this interaction is not completely trustable using the _c
                source_db = psi_db + "_c"
            else:
                source_db = psi_db

            if len(list_methods) == 0:
                # there was no method associated... set a unclassified method
                list_methods = ["unclassified"]

            for method in list_methods:
                if verbose:
                    if verbose_detailed:
                        sys.stderr.write("Inserting piana interaction %s -- %s with method %s and source db %s\n" %(proteinPiana_a[0],
                                                                                                                    proteinPiana_b[0],
                                                                                                                    method, source_db ))


                    log_fd.write("Inserting piana interaction %s -- %s with method %s and source db %s\n" %(proteinPiana_a[0],
                                                                                                            proteinPiana_b[0],
                                                                                                            method, source_db ))
                # END OF if verbose:

                if len(list_pmids) == 0:
                    list_pmids = [0]
                    
                for one_pmid in list_pmids:
                    # one_pmid is a tuple ('pmid', ) (it comes from an sql query)
                    piana_access.insert_interaction(proteinPianaA_value = proteinPiana_a[0],
                                                    isSourceA_value = 1,
                                                    proteinPianaB_value = proteinPiana_b[0],
                                                    isSourceB_value = 1,
                                                    interactionConfidence_value = 1,
                                                    methodDescription_value = method[0],
                                                    sourceDBDescription_value = source_db,
                                                    confidenceAssignedSourceDB_value=1,
                                                    pubmed_id_value= one_pmid[0])
                # END OF for one_pmid in list_pmids:
            # END OF for method in list_methods:

        # END OF for proteinPiana_b in list_proteinPiana_b:
    # END OF for proteinPiana_a in list_proteinPiana_a:

    if verbose:
        if not list_proteinPiana_a or not list_proteinPiana_b:

            number_of_interactions_no_id += 1
            
            sys.stderr.write("Int not inserted (no proteinPiana found) -->  PSI int %s between <<%s>> and <<%s>> \n" %(psi_interaction[0],
                                                                                                                       psi_interaction[1],
                                                                                                                       psi_interaction[2]))
            
            log_fd.write("Int not inserted (no proteinPiana found) -->  PSI int %s between <<%s>> and <<%s>> \n" %(psi_interaction[0],
                                                                                                                   psi_interaction[1],
                                                                                                                   psi_interaction[2]))
        # END OF if not list_protein_piana_a or not list_protein_piana_b:
        else:
            number_of_interactions_with_id += 1
    # END OF if verbose:


# END OF for psi_interaction in psi_interactions:


if verbose:
    sys.stderr.write( "All done!\n")
    sys.stderr.write("Number of psi interactions: %s \n" %( len(psi_interactions)))
    sys.stderr.write("Number of psi interactions inserted: %s \n" %( number_of_interactions_with_id) )
    sys.stderr.write("Number of psi interactions NOT inserted: %s \n" %( number_of_interactions_no_id) )
    sys.stderr.write("Number of different piana interactions inserted: %s \n" %( len(testing_uniq)))
    log_fd.write("Number of psi interactions: %s \n" %( len(psi_interactions)))
    log_fd.write("Number of psi interactions inserted: %s \n" %( number_of_interactions_with_id) )
    log_fd.write("Number of psi interactions NOT inserted: %s \n" %( number_of_interactions_no_id) )
    log_fd.write("Number of different piana interactions inserted: %s \n" %( len(testing_uniq)))
    log_fd.close()
                             

