"""
 File       : PianaDBaccess.py
 Author     : R. Aragues & J. Planas
 Creation   : 23.01.2004
 Contents   : class used as an interface with database piana
 Called from: all classes/programs that select/insert information from/into piana

=======================================================================================================

This class is the piana user interface to piana database. There are
lower levels that can be used to select/insert information from/into
database piana (e.g. PianaDB class) but they shouldn't be used unless
the programmer has a complete understanding of the system.

This class implements functions such as insert_interaction() that,
given all data about a particular interaction will make consistency
checks and all required insertions to piana table. This is done this
way to insure that a user doesn't forget to update all tables that are
related to a new interaction, making trasparent for him the internal
structure of the database.

Summarizing, this class should be the only means of interaction with
the database used by piana
"""

# PianaDBaccess.py: class used as an interface with database piana
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues


import re, sys, os
import Bio.SeqUtils.ProtParam

from GraphNodeAttribute import *
from GraphEdgeAttribute import *
from PianaDB import *
import PianaGlobals

import utilities


verbose = 0
verbose_insert_interaction = 0
verbose_species= 0


class PianaDBaccess(object):
    """
    Class used as an interface with database piana
    """
    def __init__(self, dbname=None, dbhost=None, dbuser=None, dbpassword= None):
        """
        "dbname" is the database name to which you want to connect to (required)
 
        "dbhost" is the machine with the mysql server that holds the piana database (required)

        "dbuser" is the mysql user (not required in most systems)

        "dbpassword" is the mysql password (not required in most systems)
        """

        if verbose:
            sys.stderr.write("Arguments received in PianaDBaccess() are: %s, %s, %s, %s\n" %( dbname, dbhost, dbuser, dbpassword))

        if dbname is None:
            raise ValueError("A piana database name is needed to initialiate a connection")
            
        if dbhost is None:
            raise ValueError("A host name for the piana database is needed to initialiate a connection")

        self.dbname= dbname
        self.dbhost= dbhost
        self.dbuser= dbuser
        self.dbpassword= dbpassword

        # opening connection to database piana using class PianaDB
        self.db = PianaDB(dbname=self.dbname, dbhost=self.dbhost, dbuser=self.dbuser, dbpassword= self.dbpassword )

    # ----
    # methods required for using pickle with piana objects
    # ----
    def __getstate__(self):
        odict = self.__dict__.copy() # copy the dict since we change it
        del odict['db']              # remove filehandle entry
        return odict

    def __setstate__(self, dict):
        self.__dict__ = dict
        self.__class__.__init__(self, self.dbname, self.dbhost, self.dbuser, self.dbpassword)
        
    def __getnewargs__(self):
        return (self.dbname, self.dbhost, self.dbuser, self.dbpassword)


    # --
    # not sure it is being used... 
    # --
    def get_piana_db(self):
        """
        Returns the PianaDB object used for this PianaDBaccess object
        
        """
        return self.db
    
    # ----------------------------------------
    # Access methods to PianaGlobals variables
    # ----------------------------------------

    def get_from_dict(self, dict_name, description_value):
        """
        returns ID for a given description of a dictionarized entity to find its "description_value"
        description_value must exist in the dictionary PianaGlobals.dict_name, otherwise nothin will be returned
        """
        
        for entity_id in dict_name.keys():

            normalized_description = description_value.lower()
            
            if normalized_description in dict_name[entity_id]:

                if verbose:
                    sys.stderr.write(" <%s> normalized is equal to <%s>\n " %(normalized_description, entity_id))

                return entity_id


        return "unknown"


    def get_methodID(self, methodDescription_value):
        """
        returns methodID for a given description of a method to find interactions "methodDescription_value"

        methodDescription_value must exist in dictionary PianaGlobals.method_names, otherwise nothing will be returned

        
        """

        methodID = self.get_from_dict(dict_name= PianaGlobals.method_names, description_value= methodDescription_value)
        return methodID


    def get_sourceDBID(self, sourceDBDescription_value):
        """
        return sourceDBID for a given description of an external source ("sourceDBDescription_value")
        sourceDBDescription_value must exist in dictionary PianaGlobals.source_databases, otherwise nothing will be returned
        """
        sourceDBID = self.get_from_dict(dict_name= PianaGlobals.source_databases, description_value= sourceDBDescription_value)
        
        return sourceDBID
    
    def get_interaction_source_database_color(self, database_name):
        """
        returns the color code established in PianaGlobals for database_name
        """
        return PianaGlobals.interaction_source_databases_colors[database_name][0]
    
    def get_interaction_line_style(self, interaction_type):
        """
        returns the line style established in PianaGlobals for interaction_type

        interaction_type can be:
             - normal
             - expanded
        """
        return PianaGlobals.interaction_line_styles[interaction_type][0]
    
    def get_node_fill_color(self, node_type):
        """
        returns the color code established in PianaGlobals for node_type

        node_type can be:
             - root
             - normal
        """
        return PianaGlobals.node_fill_colors[node_type][0]
    
    def get_node_border_color(self, node_origin):
        """
        returns the color code established in PianaGlobals for node_origin

        node_type can be:
             - expanded
             - normal
        """
        return PianaGlobals.node_border_colors[node_origin][0]
   
    # ------------------------------------
    # Access (insertions and retrievals) methods to protein tables
    # ------------------------------------

    def update_table_column(self, proteinPiana= None, table= None, column= None, new_value= None):
        """
        updates column "column" of table "table" where proteinPiana="proteinPiana" with value "new_value"

        Used to update values in protein tables where the unique identifier in proteinPiana. it will change the current value
        in the column with the new value provided (see its use in update_sequence_ip.py)
        
        """

        # we use insert_d_content althought we are updating: the way of executing/retrieving the sql query is identical
        # 
        number_rows_updated = self.db.insert_db_content( PianaGlobals.UpdateTableColumn.get_sqlquery(proteinPiana= proteinPiana,
                                                                                                     table= table, column= column,
                                                                                                     new_value= new_value),
                                                         answer_mode="num_updated" )

        return number_rows_updated

    def get_all_proteinPiana(self):
        """
        returns all proteinPianas in pianaDB
        
        """
        # TO DO!!! retrieve only those proteins of species tax_id_value 

        return self.db.select_db_content( PianaGlobals.SelectAllProteinPiana.get_sqlquery()
                                          , answer_mode="list", remove_duplicates="yes" )

    def get_list_protein_piana(self, proteinCode_value= None, proteinCodeType_value= None, tax_id_value=0, source_db_info="no" ):
        """
        Method used to retrieve a list of proteinPiana identifiers from a given protein "proteinCode_value" of type "proteinCodeType_value"

        For example, one geneName "mot1" could correspond to many proteinPianas, since different databases give different sequences to mot1
        Internally, we have to work with all proteinPianas, and then give the answer to the user in the type of code he chose
        
        There can be many proteinPianas [id, id, id] or just one [id] or cero []. 

        valid "proteinCodeType_value" are those columns in PianaGlobals (should look something like PianaGlobals.xxxx_col)


        "tax_id_value" fixes the species for the proteinPiana that will be returned. If tax_id_value is 0, all proteinPianas associated to
                       proteinCode_value will be returned.
                       In case you don't know the species of your code, set tax_id_value to 0. If your code implicitly points to a single
                       species, this will be OK.

                       In which cases tax_id_value has an effect on the proteinPianas that are returned?
                       For example, if a geneName is associated to several proteinPianas, only those that are of tax_id_value will be returned.

        "list_source_dbs" determines which protein source dbs are used to find the protein pianas
           - "all" will return all proteinPianas regardless of source
           - if a list is given, only those databases will be used to get proteinPianas

        "source_db_info" determines if information about the sourceDB that inserted the proteinPiana is returned or not
           - "no" will simply return a list of proteinPianas
           - "yes" will return a list of tuples (proteinPiana, sourceDBID)

        if protein_code being passed is a pdb code, the format of the code must be pdb_code.chain_id
         -> If the chain_id is None, write pdb_code. (leaving the dot to mark that chain is unknown)
        """

        if proteinCode_value is None or proteinCodeType_value is None:
            raise ValueError("trying to get list of protein pianas for code (%s) and/or type (%s) None" %(proteinCode_value, proteinCodeType_value ))
        
        if proteinCodeType_value == PianaGlobals.proteinSequence_col :
            
            # if the type passed is a sequence, fasten up things by setting the code to md5. To do so, 
            # get the md5 code for the fasta (using package utilities from /piana/code/utilities/utilities.py)
            proteinCode_value = utilities.sequence2md5(proteinCode_value)
            proteinCodeType_value = PianaGlobals.proteinMD5_col
            
        elif proteinCodeType_value == PianaGlobals.proteinPiana_col:
            # the type passed is a proteinPiana... a little strange since the user is asking to get a proteinPiana from a proteinPiana
            # but maybe it makes sense for automated methods... this line makes this trasparent by returning the same value as long() in a list
            if source_db_info == "yes":
                return [long(proteinCode_value), "protein"]
            else:
                return [long(proteinCode_value)]
        # END OF elif proteinCodeType_value == PianaGlobals.proteinPiana_col: (if proteinCodeType_value == PianaGlobals.proteinSequence_col : )
            
        if verbose:
            sys.stderr.write("Calling SelectProteinPiana with proteinCode %s and proteinCodeType %s\n" %(proteinCode_value,
                                                                                                       proteinCodeType_value))

        # set how many items are being selected, which depends on whethers source dd info is requested or not
        if source_db_info == "no":
            selected_items= 1
        else:
            selected_items= 2
            
        # if tax_id_value is 0 , call to standard SelectProteinPiana. Otherwise, fix a taxonomy by calling SelectProteinPianaTax
        if tax_id_value == 0:
            
            list_proteinPiana_value = self.db.select_db_content( PianaGlobals.SelectProteinPiana.get_sqlquery(protein_code= proteinCode_value,
                                                                                                              code_type= proteinCodeType_value,
                                                                                                              source_db_info= source_db_info)
                                                                 , answer_mode="list", remove_duplicates="yes",
                                                                 number_of_selected_elems= selected_items )
            
        else:

            list_proteinPiana_value = self.db.select_db_content( PianaGlobals.SelectProteinPianaTax.get_sqlquery(protein_code= proteinCode_value,
                                                                                                                 code_type= proteinCodeType_value,
                                                                                                                 taxonomy_value= tax_id_value,
                                                                                                                 source_db_info= source_db_info)
                                                                 , answer_mode="list", remove_duplicates="yes",
                                                                 number_of_selected_elems= selected_items  )

            # remove duplicates
            dic_proteins = {}
            for protein in list_proteinPiana_value:
                dic_proteins[protein] = None
            list_proteinPiana_value = dic_proteins.keys()
        # END OF else: (if tax_id_value == 0)


        return list_proteinPiana_value

    
    def get_new_protein_piana(self):
        """
        Method used to retrieve a new proteinPiana identifier from table proteinPianaCounter

        Called every time we want to insert a new sequence in the database
        
        It takes care of increasing by 1 the counter table

        proteinPiana is just an internal identifier used to uniquely identify proteins: each sequence is a different proteinPiana
        """
        
        # obtaining proteinPiana value from counter table
        new_proteinPiana_value = self.db.select_db_content( PianaGlobals.SelectNewProteinPiana.get_sqlquery(),
                                                            answer_mode="single" )
        
        
        
        # updating counter table with next proteinPiana value to be returned
        updated_proteinPiana_value = new_proteinPiana_value + 1
        
        self.db.insert_db_content( PianaGlobals.InsertProteinPianaCounter.get_sqlquery(updated_proteinPiana_value),
                                   answer_mode=None )
        
        return new_proteinPiana_value
   


    def insert_protein(self, proteinSequence_value, tax_id_value= None):
        """
        Method used to insert new proteins into pianaDB in `protein` table.
        
        Returns a proteinPiana: it is the code corresponding to the  (sequence, tax_id)
                                  --> if (sequence, tax) didn't exist already, creates a new proteinPiana
                                  --> if the sequence is already present in pianaDB, returns the previous proteinPiana
        --------------------------------------------------------------------------------------------------------------

        Since pianaDB is a sequence-based DB, introducing proteinSequence_value is mandatory.

        There is one proteinPiana for each (sequence, tax_id): therefore, tax_id is mandatory. However, when
        the user doesn't know the tax id for the protein, we use a dummy tax_id (ie. 0) to allow inserting
        that sequence. Therefore, if the tax_id is unknown, leave it to None.
        
        We use MD5 codes (calculated here) instead of sequences to fasten up the process of comparing sequences

        MW and IP values for the protein are calculated using BioPython methods

        This method takes care of handling proteinPiana codes:
        
          1. It first looks for existence of the (sequence, tax_id) in table proteinCorrespondence,
          which is the registry of correspondences between (protein sequence, tax id) and proteinPiana identifiers. We need to keep this
          registry in order to make sure that proteinPiana identifiers do not change when updating the database, or building a
          new one from scratch.

          2. If the (sequence, tax_id) does not exist in the database, then obtain a new proteinPiana identifier. This is done with a method
          that looks into a counter table, returns its value and increases the counter for the next proteinPiana identifier.

              2.1 Update proteinPianaCorrespondence table with new proteinPiana and the (md5, tax_id)

          3. Once proteinPiana is known (newly generated, or  assigned from an old insertion) insert the protein into the database

          

        """
        # TO DO!!!!!! How do I keep the correspondences between proteinPianas of different piana dbs?
        #             With a registry file (/data/proteinCorrespondence/....) updated??? Writing to the file whenever I insert a
        #             new protein? Doing a database dump (select proteinPiana, proteinMD5) each time I want to create a new database?
        #             --> Do I need to keep the correspondences between different piana dbs... maybe not...

        if verbose:
            sys.stderr.write("converting sequence to md5\n")

        #remove unwanted characters from sequence
        cleaned_proteinSequence_value = utilities.get_clean_sequence(input_sequence= proteinSequence_value)
        
        # get the md5 code for the fasta (using package utilities from /piana/code/utilities/utilities.py)
        proteinMD5_value = utilities.sequence2md5(cleaned_proteinSequence_value)
        
        if verbose:
            sys.stderr.write("calculating length\n")
            
        # calculate the length
        proteinSequenceLength_value = len(cleaned_proteinSequence_value)

        if verbose:
            sys.stderr.write("creating biopython protein analyzer\n")
            
        # creating a BioPython protein analyzer
        analyzed_protein = Bio.SeqUtils.ProtParam.ProteinAnalysis(cleaned_proteinSequence_value)

        if verbose:
            sys.stderr.write("calculating molecular weight\n")
            
        try:
            # calculate the molecular weight
            proteinMW_value = analyzed_protein.molecular_weight()
        except:
            # any error in the function sets weight to 0
            proteinMW_value = 0

        if verbose:
            sys.stderr.write("calculating isoelectric point\n")
            
        try:
            # this is a modified version of isoelectric_point
            #
            # I have done two main things to the program:
            #
            #    - introduce an attribute correction_step in SeqUtils.ProtParam.isoelectric_point()
            #         def isoelectric_point(self, correction_step= 0.001):
            #
            #    - rewrite SeqUtils.IsoelectricPoint by modifying the step-wise correction of the charge so
            #      it never gets blocked. The modification I've made has to be done on file
            #      python2.X/site-packages/Bio/SeqUtils/IsoelectricPoint.py
            #
            #      I keep a copy of the modified code under piana/code/utilities/modified_IsoelectricPoint.py
            
            # TO CHECK!!! Does the latest version of biopython need these modifications? as of version 2.3, it does...
            proteinIP_value= analyzed_protein.isoelectric_point(correction_step = 0.001)
        except:
            # any error in the function sets isoelectric point to 0
            proteinIP_value = 0

        if verbose:
            sys.stderr.write("initiating insertion of information\n")


        if not tax_id_value:
            # if no tax_id given, set a dummy value to represent that sequence under any species
            tax_id_value = 0

        # 1. look for existence of the (sequence, tax_id) in table proteinCorrespondence
        #    to check for errors, try to get a list of proteinPiana... then take only one element
        list_proteinPiana_value = self.db.select_db_content(
            PianaGlobals.SelectProteinPianaCorrespondence.get_sqlquery(proteinMD5_value = proteinMD5_value,
                                                                       tax_id_value = tax_id_value),
            answer_mode="list", remove_duplicates="yes")

        if not list_proteinPiana_value:
            proteinPiana_value = None
            
        elif len(list_proteinPiana_value) == 1:
            proteinPiana_value= list_proteinPiana_value[0]
            
        else:
            raise ValueError("how can we have more than one proteinPiana for the same (sequence, tax id)? Error!")
        

        # 2. If the (sequence, tax_id) does not exist in the database, then obtain a new proteinPiana identifier.
        #    and insert it into the database.
        if proteinPiana_value is None:
            proteinPiana_value = self.get_new_protein_piana()
        
            # 2.1 Update proteinPianaCorrespondence table with new proteinPiana and the md5 value

            self.db.insert_db_content( PianaGlobals.InsertProteinPianaCorrespondence.get_sqlquery(proteinPiana_value = proteinPiana_value,
                                                                                                  proteinMD5_value = proteinMD5_value,
                                                                                                  tax_id_value=tax_id_value ),
                                       answer_mode= None )



            # If it is a newly generated proteinPiana insert it into the database

            self.db.insert_db_content(
                PianaGlobals.InsertProtein.get_sqlquery(proteinPiana_value = proteinPiana_value,
                                                        tax_id_value = tax_id_value,
                                                        proteinSequence_value = cleaned_proteinSequence_value,
                                                        proteinMD5_value = proteinMD5_value,
                                                        proteinSequenceLength_value = proteinSequenceLength_value,
                                                        proteinMW_value = proteinMW_value,
                                                        proteinIP_value = proteinIP_value)
                , answer_mode=None )

        # END OF if proteinPiana_value is None:

        return proteinPiana_value

    def insert_protein_similarity(self, proteinPiana_a_value, proteinPiana_b_value ):
        """
        Insert a pair of proteinPianas that are in fact the "same" protein (they are sufficiently similar to be considered the same
        for some situations)

        This is used to avoid comparing two proteins that are in fact the same.

        InsertProteinSimilarity makes sure that the order proteinPianaA < proteinPianaB is respected
        """
                  
        self.db.insert_db_content( PianaGlobals.InsertProteinSimilarity.get_sqlquery(proteinPianaA_value=proteinPiana_a_value,
                                                                                     proteinPianaB_value=proteinPiana_b_value)
                                   , answer_mode=None )

    def check_proteins_similarity(self, proteinPiana_a_value, proteinPiana_b_value ):
        """
        Returns 1 if the proteins are the same (ie. there is an entry proteinPiana_a, proteinPiana_b in table proteinSimilarity)
        """
        similar_proteins = self.db.select_db_content(PianaGlobals.SelectProteinSimilarity.get_sqlquery(proteinPianaA_value=proteinPiana_a_value,
                                                                                                       proteinPianaB_value=proteinPiana_b_value
                                                                                                       ),
                                                     answer_mode ="single", number_of_selected_elems=2)

        if similar_proteins is not None:
            return 1
        else:
            return 0

    def get_similar_proteins_dic(self, proteinPiana_value):
        """
        Returns a dictionary with keys those proteins that are similar to proteinPiana_value

        """
        similar_proteins_dic = {}

        similar_proteins_higher = self.db.select_db_content(PianaGlobals.SelectSimilarProteins.get_sqlquery(proteinPiana_value=proteinPiana_value,
                                                                                                            mode="higher"),
                                                     answer_mode ="list")
        
        similar_proteins_lower = self.db.select_db_content(PianaGlobals.SelectSimilarProteins.get_sqlquery(proteinPiana_value=proteinPiana_value,
                                                                                                            mode="lower"),
                                                     answer_mode ="list")

        for protein in similar_proteins_higher:
            similar_proteins_dic[protein] = None
            
        for protein in similar_proteins_lower:
            similar_proteins_dic[protein] = None

        return similar_proteins_dic
                  
    # ------------------------------------
    # Access (insertions and retrievals) methods to codes tables
    # ------------------------------------

    # --
    # insert methods (Access methods to codes tables)
    # --

    def insert_protein_code(self, code_table, proteinPiana, code_value, sourceDBID):
        """
        inserts in table code_table (with column code_column) a correspondence between code_value and proteinPiana

        "code_table" is the table into which the code will be inserted
           -> valid code_table values are those in PianaGlobals.*_table
           
        "code_value" is the value of the external code that corresponds to the piana internal id

        "proteinPiana" is the proteinPiana of this protein

        "sourceDBID" is the external database that establishes the correspondance between these codes
        """

        # TO CHECK!!! I am not really sure why I am doing this... historical reasons, I guess...
        #             but this method is only being used by complete_piana.py
        #             All other parsers and programs use the specific methods
        #             described below, specifying exactly which is the type of code they are inserting

        if code_table == PianaGlobals.swissAccession_table:
            self.insert_swissAccession_code( swissAccession_code_value=code_value,
                                             proteinPiana_value= proteinPiana,
                                             swissAccession_source_value=sourceDBID,
                                             isPrimary_value=0)
            
        elif code_table == PianaGlobals.emblAccession_table:
            self.insert_emblAccession_code(emblAccession_code_value=code_value,
                                           proteinPiana_value= proteinPiana,
                                           emblAccession_source_value=sourceDBID)
            
        elif code_table == PianaGlobals.emblPID_table:
            self.insert_emblPID_code(emblPID_code_value=code_value,
                                     proteinPiana_value=proteinPiana,
                                     emblPID_source_value=sourceDBID)
            
        elif code_table == PianaGlobals.pirEntry_table:
            self.insert_pirEntry_code(pirEntry_code_value= code_value,
                                      proteinPiana_value= proteinPiana,
                                      pirEntry_source_value= sourceDBID)
            
        elif code_table == PianaGlobals.pirAccession_table:
            self.insert_pirAccession_code(pirAccession_code_value= code_value,
                                          proteinPiana_value= proteinPiana,
                                          pirAccession_source_value= sourceDBID)
            
        elif code_table == PianaGlobals.pdb_table:
            self.insert_pdb_code(pdb_code_value= code_value.split(".")[0],
                                 proteinPiana_value= proteinPiana,
                                 chain_value= code_value.split(".")[1],
                                 pdb_source_value= sourceDBID )
            
        elif code_table == PianaGlobals.swissProt_table:
            self.insert_swissProt_code(swissProt_code_value=code_value,
                                       proteinPiana_value= proteinPiana,
                                       swissProt_source_value=sourceDBID)
            
        elif code_table == PianaGlobals.geneName_table:
            self.insert_geneName_code(geneName_code_value= code_value,
                                      proteinPiana_value= proteinPiana,
                                      geneName_source_value= sourceDBID)
            
        elif code_table == PianaGlobals.gi_table:
            self.insert_gi_code( gi_code_value= code_value,
                                 proteinPiana_value= proteinPiana,
                                 gi_source_value= sourceDBID)

        else:
            raise ValueError("Trying to insert an unknown type (ie table) of protein code (%s)\n" %(code_table))
            
    def insert_swissProt_code(self, swissProt_code_value, proteinPiana_value, swissProt_source_value):
        """
        Insert correspondence between swissProt code "swissProt_code_value" and "proteinPiana_value" in table swissProt

        Insert as well source database (to keep origin database of that code) 
        """
        self.db.insert_db_content( PianaGlobals.InsertProteinExternalCode.get_sqlquery(table_name= PianaGlobals.swissProt_table,
                                                                                       code_value= swissProt_code_value,
                                                                                       proteinPiana_value= proteinPiana_value,
                                                                                       codeSource_value= swissProt_source_value )
                                   , answer_mode=None )
        
    def insert_swissAccession_code(self, swissAccession_code_value, proteinPiana_value, swissAccession_source_value, isPrimary_value):
        """
        Insert correspondence between swissProt accession number  "swissAccession_code_value" and "proteinPiana_value" in table swissAccession

        Insert as well source database (to keep origin database of that code) 
        
        isPrimary_value indicates whether it is the primary accession code or not
        
        """
        self.db.insert_db_content( PianaGlobals.InsertProteinSwissAccessionCode.get_sqlquery(table_name= PianaGlobals.swissAccession_table,
                                                                                             code_value= swissAccession_code_value,
                                                                                             proteinPiana_value= proteinPiana_value,
                                                                                             isPrimary_value= isPrimary_value,
                                                                                             codeSource_value= swissAccession_source_value )
                                   , answer_mode=None )
        
        
    def insert_geneName_code(self, geneName_code_value, proteinPiana_value, geneName_source_value):
        """
        Insert correspondence between  geneName "geneName_code_value" and "proteinPiana_value" in table geneName

        Insert as well source database (to keep origin database of that code) 
        """
        self.db.insert_db_content( PianaGlobals.InsertProteinExternalCode.get_sqlquery(table_name= PianaGlobals.geneName_table,
                                                                                       code_value= geneName_code_value,
                                                                                       proteinPiana_value= proteinPiana_value,
                                                                                       codeSource_value= geneName_source_value )
                                   , answer_mode=None )
        
    
    def insert_emblPID_code(self, emblPID_code_value, proteinPiana_value, emblPID_source_value):
        """
        Insert correspondence between embl pid  "emblPID_code_value" and "proteinPiana_value" in table emblPID

        Insert as well source database (to keep origin database of that code) 
        """
        try:
            splited = re.split("\.", emblPID_code_value)
            emblPID_code_value = splited[0]
            version = int( splited[1] )
            
        except IndexError:
            version = 0
            
        self.db.insert_db_content( PianaGlobals.InsertProteinEMBLCode.get_sqlquery(table_name= PianaGlobals.emblPID_table,
                                                                                   code_value= emblPID_code_value,
                                                                                   proteinPiana_value= proteinPiana_value,
                                                                                   version_value= version,
                                                                                   codeSource_value= emblPID_source_value )
                                   , answer_mode=None )

    def insert_emblAccession_code(self, emblAccession_code_value, proteinPiana_value, emblAccession_source_value):
        """
        Insert correspondence between embl accession  "emblAccession_code_value" and "proteinPiana_value" in table emblAccession

        Insert as well source database (to keep origin database of that code) 
        """
        try:
            splited = re.split("\.", emblAccession_code_value)
            emblAccession_code_value = splited[0]
            version = int( splited[1] )
            
        except IndexError:
            version = 0
            
        self.db.insert_db_content( PianaGlobals.InsertProteinEMBLCode.get_sqlquery(table_name= PianaGlobals.emblAccession_table,
                                                                                   code_value= emblAccession_code_value,
                                                                                   proteinPiana_value= proteinPiana_value,
                                                                                   version_value= version,
                                                                                   codeSource_value= emblAccession_source_value )
                                   , answer_mode=None )
    
    def insert_interPro_code(self, interProID_code_value, proteinPiana_value, interProDescription_value, interPro_source_value ):
        """
        Insert interPro information into table interPro (interProID and interProDescription)
        
        Insert as well source (to keep origin database of that code) and proteinPiana (to establish the relationship)
        
        """
        self.db.insert_db_content( PianaGlobals.InsertProteinInterProCode.get_sqlquery(table_name= PianaGlobals.interPro_table,
                                                                                       code_value= interProID_code_value,
                                                                                       proteinPiana_value= proteinPiana_value,
                                                                                       description_value= interProDescription_value,
                                                                                       codeSource_value= interPro_source_value)
                                   , answer_mode=None )

    def insert_pdb_code(self, pdb_code_value, proteinPiana_value, chain_value, pdb_source_value ):
        """
        Insert correspondence between pdbs and "proteinPiana_value" in table pdb

        Internally, the pdb code is formed by "pdb_code_value" + "." + "chain_value"

        Insert as well source database (to keep origin database of that code) 
        
        """
        self.db.insert_db_content( PianaGlobals.InsertProteinPdbCode.get_sqlquery(table_name= PianaGlobals.pdb_table,
                                                                                  code_value= pdb_code_value,
                                                                                  proteinPiana_value= proteinPiana_value,
                                                                                  chain_value= chain_value,
                                                                                  codeSource_value= pdb_source_value )
                                   , answer_mode=None )


    def insert_gi_code(self, gi_code_value, proteinPiana_value, gi_source_value):
        """
        Insert correspondence between  gi  "gi_code_value" and "proteinPiana_value" in table gi

        Insert as well source database (to keep origin database of that code) 
        """
        self.db.insert_db_content( PianaGlobals.InsertProteinExternalCode.get_sqlquery(table_name= PianaGlobals.gi_table,
                                                                                       code_value= gi_code_value,
                                                                                       proteinPiana_value= proteinPiana_value,
                                                                                       codeSource_value= gi_source_value )
                                   , answer_mode=None )
 
        
    
    def insert_pirEntry_code(self, pirEntry_code_value, proteinPiana_value, pirEntry_source_value):
        """
        Insert correspondence between  pir entry  "pirEntry_code_value" and "proteinPiana_value" in table pirEntry

        Insert as well source database (to keep origin database of that code) 
        """
        
        # isComplete_value indicates whether it is a complete code or not (not sure how to process it...)
        isComplete_value = 1
        
        self.db.insert_db_content( PianaGlobals.InsertProteinPIRCode.get_sqlquery(table_name= PianaGlobals.pirEntry_table,
                                                                                  code_value= pirEntry_code_value,
                                                                                  proteinPiana_value= proteinPiana_value,
                                                                                  isComplete_value= isComplete_value,
                                                                                  codeSource_value= pirEntry_source_value )
                                   , answer_mode=None )

    def insert_pirAccession_code(self, pirAccession_code_value, proteinPiana_value, pirAccession_source_value):
        """
        Insert correspondence between  pir accession  "pirAccession_code_value" and "proteinPiana_value" in table pirAccession

        Insert as well source database (to keep origin database of that code) 
        """
        
        # isComplete_value indicates whether it is a complete code or not (not sure how to process it...)
        isComplete_value = 1

        
        self.db.insert_db_content( PianaGlobals.InsertProteinPIRCode.get_sqlquery(table_name= PianaGlobals.pirAccession_table,
                                                                                  code_value= pirAccession_code_value,
                                                                                  proteinPiana_value= proteinPiana_value,
                                                                                  isComplete_value= isComplete_value,
                                                                                  codeSource_value= pirAccession_source_value )
                                   , answer_mode=None )
            
    def insert_protein_id_intDB_code(self, protein_id_intDB_value, proteinPiana_value, intDB_source_value):
        """
        Insert correspondence between swissProt code "swissProt_code_value" and "proteinPiana_value" in table swissProt

        Insert as well source database (to keep origin database of that code) 
        """
        self.db.insert_db_content( PianaGlobals.InsertProteinExternalCode.get_sqlquery(table_name= PianaGlobals.protein_id_intDB_table,
                                                                                       code_value= protein_id_intDB_value,
                                                                                       proteinPiana_value= proteinPiana_value,
                                                                                       codeSource_value= intDB_source_value )
                                   , answer_mode=None )


    # --
    # get methods (Access methods to codes tables)
    # --

    def get_protein_mw(self, proteinPiana=None):
        """
        returns molecular weight of protein proteinPiana
        """
        # TO DO!!!! If not found in db, or == 0, it could be calculated here!!!! using biopython...

        mw = self.db.select_db_content(PianaGlobals.SelectProteinTableColumn.get_sqlquery(proteinTable_column_value=PianaGlobals.proteinMW_col,
                                                                                            proteinPiana_value=proteinPiana
                                                                                            ),
                                         answer_mode ="single")

        return mw
        

    def get_protein_ip(self, proteinPiana=None):
        """
        returns isoelectric point of protein proteinPiana
        """
        # TO DO!!!! If not found in db, or == 0, it could be calculated here!!!! using biopython...

        ip = self.db.select_db_content(PianaGlobals.SelectProteinTableColumn.get_sqlquery(proteinTable_column_value=PianaGlobals.proteinIP_col,
                                                                                            proteinPiana_value=proteinPiana
                                                                                            ),
                                         answer_mode ="single")
        return ip
 
    def get_protein_sequence(self, proteinPiana= None):
        """
        returns  the sequence of protein proteinPiana
        """

        sequence = self.db.select_db_content(PianaGlobals.SelectProteinTableColumn.get_sqlquery(proteinTable_column_value=PianaGlobals.proteinSequence_col,
                                                                                                proteinPiana_value=proteinPiana
                                                                                                ),
                                             answer_mode ="single")
        return sequence
   
    def get_protein_sequenceLength(self, proteinPiana_value = None):
        """
        returns the length of the sequence of protein proteinPiana_value
        """
        length = self.db.select_db_content(PianaGlobals.SelectProteinTableColumn.get_sqlquery(proteinTable_column_value=PianaGlobals.proteinSequenceLength_col,
                                                                                              proteinPiana_value=proteinPiana_value
                                                                                              ),
                                           answer_mode ="single")
        return length
        
    def get_list_protein_external_codes(self, proteinPiana=None, protein_type_name=None, alternative_type_names=[], answer_mode= "list", source_db_info="no" ):
        """

        returns the list of external protein codes in type "protein_type_name" that correspond to protein proteinPiana.
        
        If no code is found for "protein_type_name" , it succesively tries with types listed in alternative_type_names
           -> if an alternative type name code is found, the list returned will contain strings of the form "type_name:external_code"
           -> it will only return the list of codes for the first alternative type found
           -> eg, alternative_type_names could be ["uniacc", "geneName", "md5"]

        "proteinPiana" is the internal piana code for the protein you want to get the external codes

        "protein_type_name" is an easy-to-remember protein code type
           -> valid protein_type_name values are those in PianaGlobals.valid_protein_types.keys()
           -> it can be "all", meaning that the list returned will contain all codes in all types (all codes will be preceded by type name)
               -> alternative_type_names are ignored if protein_type_name is all

        "alternative_type_names" is a list of easy-to-remember protein code type
           for example, alternative_type_names could be ["uniacc", "gi", "proteinPiana"]
           I recomend writing always md5 at the end to make sure you don't have Nones in your output
              -> alternative_type_names are ignored if protein_type_name is all

        "answer_mode" can be "single" (if you just want one external code) or "list" (if you want all external codes for that proteinPiana)
        
        "source_db_info" determines if information about the sourceDB that inserted the proteinPiana is returned or not
           - "no" will simply return a list of proteinPianas
           - "yes" will return a list of tuples (proteinPiana, sourceDBID) (this mode does not allow alternative_type_names nor protein_type_name=='all')

        Attention!!! This method does not work if you set "protein_type_name" to 'all' and "answer_mode" to 'single'!!!!
        Attention!!! This method does not work if you ask for all external codes (protein_type_name="all") with their source db infos (source_db_info="yes")!!!!
        """
           
        # TO DO!!! Not very intuitive to call it get_list_* and then allow the method to return only one code as an element


        if source_db_info == "yes" and protein_type_name == "all":
            raise ValueError("this method does not allow asking for all external codes with their source db infos (can be changed, but has not been implemented")


        if protein_type_name != "all":
            # set column and table for the protein type name that will be used for output
            pair_table_column = utilities.get_code_table_column(code_type_name= protein_type_name)
            proteinCode_table = pair_table_column[0]
            proteinCode_col   = pair_table_column[1]


            # obtain proteinPiana in the code desired by the user (stated as argument protein_type_name to this method)
            list_protein_external_codes = self.get_code_value(proteinPiana_value = proteinPiana,
                                                              proteinCode_table = proteinCode_table,
                                                              proteinCode_col = proteinCode_col,
                                                              answer_mode = answer_mode,
                                                              source_db_info= source_db_info)
        else:
            # protein_type_name is "all": set alternative_type_names to all possible type names (except for fasta, which is an alias to sequence)
            alternative_type_names = PianaGlobals.valid_protein_types.keys()
            alternative_type_names.remove("fasta")
            list_protein_external_codes = []


        # if no external codes found for protein_type_name (or it is equal to "all"), find other codes (from the list of alternative types)
        #                                       ---> the type of code will precede the code (eg: "proteinPiana:111111")
        if not list_protein_external_codes:

            # transform easy-to-remember type names to  pairs [table, column] for alternative types
            list_pair_table_column_alternative_types = []
            for alternative_type_name in alternative_type_names:
                list_pair_table_column_alternative_types.append( ( alternative_type_name, utilities.get_code_table_column(code_type_name= alternative_type_name)) )

            # find alternative codes
            for alternative_type in list_pair_table_column_alternative_types:
                # alternative_type is a tuple  ( type_name, [proteincode_table, proteincode_col ])
                list_temp_external_code= self.get_code_value(proteinPiana_value = proteinPiana,
                                                             proteinCode_table = alternative_type[1][0],
                                                             proteinCode_col = alternative_type[1][1],
                                                             answer_mode = answer_mode)

                if list_temp_external_code:

                    code_type_label = alternative_type[0] + ":"
                    
                    if answer_mode == "list":
                        # if something was found, append the protein codes (describing the type being used)
                        for temp_external_code in list_temp_external_code:
                            list_protein_external_codes.append( code_type_label + str(temp_external_code))
                    else:
                        list_protein_external_codes = code_type_label + str(list_temp_external_code)

                    if protein_type_name != "all":
                        # if mode not all, we only want the protein codes for the first alternative type found... exit loop
                        break
                # END OF if list_temp_external_code:
            # END OF for alternative_type in list_alternative_types:
        # END OF if not list_protein_external_codes:

        # althougth it says list_*, in answer_mode "single" it is a single element
        return list_protein_external_codes


    def get_code_value(self, proteinCode_table, proteinCode_col, proteinPiana_value= None, answer_mode = None, source_db_info="no"):
        """
        Method used to retrieve a protein external code from column "proteinCode_col" of table "proteinCode_table"
        for a given  proteinPiana (it is a generalization to obtain external code from a given table that corresponds to protein proteinPiana)
        
        "answer_mode" can be 'single' (if you just want one external code) or 'list' (if you want all external codes from that table)
        
        "source_db_info" determines if information about the sourceDB that inserted the proteinPiana is returned or not
           - "no" will simply return a list of proteinPianas
           - "yes" will return a list of tuples (proteinPiana, sourceDBID)

        in those cases where there is only one external code for "proteinPiana", "answer_mode" 'single' will return it as a single element and "list"
        as the first (and only) element of a list

        If nothing is found, the method will return None (in "answer_mode" "single") and an empty list (in "answer_mode" "list")

        """
        # TO CHECK! I don't remember the difference between this method and get_list_external_codes
        #           (apart from allowing the use of alternative_types)
        
        if proteinPiana_value is None or answer_mode is None:
            raise ValueError("proteinPiana (%s) and answer_mode (%s) cannot be None when trying to retrieve a external code" %(proteinPiana_value,
                                                                                                                               answer_mode))

        if source_db_info == "yes":
            selected_items = 2
        else:
            selected_items = 1
            
        if proteinCode_col == PianaGlobals.proteinPiana_col:
            # in case the type of value asked is proteinPiana , just return same value
            # looks strange to return a value that has been passed by the user, but it has to be here to
            # be trasparent to the type of values being used
            if answer_mode == "single":
                if source_db_info == "yes":
                    code_value = (proteinPiana_value, "piana")
                else:
                    code_value = proteinPiana_value
                    
            elif answer_mode == "list":
                if source_db_info == "yes":
                    code_value = [ (proteinPiana_value, "piana") ]
                    
                else:
                    code_value = [proteinPiana_value]
            
        elif proteinCode_table == PianaGlobals.swissAccession_table:
            # swissAccession requires special treatment, because we only want to retrieve the primary accession number (for answer_mode="single")
            if answer_mode == "single":
                code_value = self.db.select_db_content( PianaGlobals.SelectSwissAccession.get_sqlquery(proteinPiana_value = proteinPiana_value,
                                                                                                       return_primary=1,
                                                                                                       source_db_info= source_db_info)
                                                        , answer_mode= answer_mode,
                                                        number_of_selected_elems= selected_items )

                if not code_value:
                    # if we didn't find a primary accession number, try searching for other swiss accessions
                    # (this shouldn't be happening, but due to code completion algorithms, there are proteins that have swissaccessions
                    #  assigned that do not appear as being primary)
                    code_value = self.db.select_db_content( PianaGlobals.SelectSwissAccession.get_sqlquery(proteinPiana_value = proteinPiana_value,
                                                                                                           return_primary=0,
                                                                                                           source_db_info= source_db_info)
                                                            , answer_mode= answer_mode,
                                                            number_of_selected_elems= selected_items)
                    
                
            elif answer_mode == "list":
                code_value = self.db.select_db_content( PianaGlobals.SelectSwissAccession.get_sqlquery(proteinPiana_value = proteinPiana_value,
                                                                                                       return_primary=0,
                                                                                                       source_db_info= source_db_info)
                                                        , answer_mode= answer_mode,
                                                        remove_duplicates="yes",
                                                        number_of_selected_elems= selected_items)
            
        else:
            # all other values retrived with standard method
            code_value = self.db.select_db_content( PianaGlobals.SelectProteinCode.get_sqlquery(proteinCode_table=proteinCode_table,
                                                                                                proteinCode_col=proteinCode_col,
                                                                                                proteinPiana_value=proteinPiana_value,
                                                                                                source_db_info= source_db_info),
                                                    answer_mode = answer_mode,
                                                    remove_duplicates="yes",
                                                    number_of_selected_elems= selected_items)
        
        return code_value
    
    
    # ----------------------------------------------------------
    # Access (insertions and retrievals) methods to protein attributes relationship tables
    # ----------------------------------------------------------


    # --
    # insert methods
    # --

    def insert_protein_species(self, tax_id, proteinPiana_value, proteinSpeciesSource_value):
        """
        Method that inserts the correspondence between proteinPiana "proteinPiana_value" and its Species "tax_id"

        "tax_id" is the taxonomy id from ncbi

        "proteinPiana_value" is the internal piana identifier for the protein

        "proteinSpeciesSource_value" is the external database that has set this correspondence (must appear in PianaGlobals.source_databases)
        """

        self.db.insert_db_content(
            PianaGlobals.InsertProteinAttributeRelationship.get_sqlquery( attribute_relationship_table= PianaGlobals.proteinSpecies_table,
                                                                          attribute_value= tax_id,
                                                                          proteinPiana_value= proteinPiana_value,
                                                                          attributeSource_value= self.get_sourceDBID(proteinSpeciesSource_value) )
            , answer_mode=None)

    def insert_protein_go(self, go_id, proteinPiana_value, proteinGoSource_value):
        """
        Method that inserts the correspondence between proteinPiana "proteinPiana_value" and its GO (Gene Ontology) term id "go_id"

        "go_id" is the GO term id

        "proteinPiana_value" is the internal piana identifier for the protein

        "proteinGoSource_value" is the external database that has set this correspondence
        """
        self.db.insert_db_content(
            PianaGlobals.InsertProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinGo_table,
                                                                          attribute_value= go_id,
                                                                          proteinPiana_value= proteinPiana_value,
                                                                          attributeSource_value= self.get_sourceDBID(proteinGoSource_value) )
            , answer_mode=None)
        
  
    def insert_protein_cog(self, cog_id, proteinPiana_value, proteinCogSource_value):
        """
        Method that inserts the correspondence between proteinPiana "proteinPiana_value" and its COG (Cluster of Orthologous Genes) "cog_id"

        "cog_id" is the COG term id

        "proteinPiana_value" is the internal piana identifier for the protein

        "proteinCogSource_value" is the external database that has set this correspondence
        """
        self.db.insert_db_content(
            PianaGlobals.InsertProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinCog_table,
                                                                          attribute_value= cog_id,
                                                                          proteinPiana_value= proteinPiana_value,
                                                                          attributeSource_value= self.get_sourceDBID(proteinCogSource_value) )
            , answer_mode=None)
  
    def insert_protein_dbali_cluster(self, dbali_cluster_id_value , proteinPiana_value, clustering_method_value, patch_residues_value,
                                     protein_dbali_cluster_source):
        """
        Method that inserts the correspondence between proteinPiana "proteinPiana_value" and its DBAli cluster "dbali_cluster_id_value"

        "dbali_cluster_id_value" is the cluster id given to DBAli to that protein

        "proteinPiana_value" is the internal piana identifier for the protein

        "clustering_method_value" is the method followed by DBAli to establish the correspondence
          -> the method must be listed in PianaGlobals.pibase_dbali_methods

        "patch_residues_value" is the list of residues (string comma-separated) in the protein that correspond to that DBAli cluster 

        "protein_dbali_cluster_source" is the external database that has set this correspondence
        """
        self.db.insert_db_content(
            PianaGlobals.InsertProteinDBAliRelationship.get_sqlquery( dbali_cluster_id_value= dbali_cluster_id_value,
                                                                      proteinPiana_value= proteinPiana_value,
                                                                      clustering_method_value= clustering_method_value ,
                                                                      patch_residues_value=patch_residues_value,
                                                                      protein_dbali_cluster_source=  protein_dbali_cluster_source)
        , answer_mode=None)
  
    def insert_protein_scop(self, cf, sf, fa, proteinPiana_value, proteinScopSource_value):
        """
        Method that inserts the correspondence between proteinPiana "proteinPiana_value" and its SCOP "cf", "sf", "fa" values

        "cf" is the fold id 
        "sf" is the superfamily id 
        "fa" is the family id
        
        "proteinPiana_value" is the internal piana identifier for the protein

        "proteinScopSource_value" is the external database that has set this correspondence
        """
        self.db.insert_db_content(
            PianaGlobals.InsertProteinScopRelationship.get_sqlquery( cf_value= cf, sf_value= sf, fa_value= fa, 
                                                                     proteinPiana_value= proteinPiana_value,
                                                                     source_value= self.get_sourceDBID(proteinScopSource_value) )
            , answer_mode=None)
  

    def insert_protein_cath(self, cath_id_value, res_start_value, res_end_value, segmentID_value,  proteinPiana_value, proteinCathSource_value):
        """
        Method that inserts the correspondence between proteinPiana "proteinPiana_value" and its CATH "cath_id_value"

        "cath_id_value" is the CATH id

        "res_start_value" is the residue where the CATH domain starts
        "res_end_value" is the residue where the CATH domain ends
        "segmentID_value" indicates which segment of the domain we are inserting (there can be several separate segments for one domain)

        "proteinPiana_value" is the internal piana identifier for the protein

        "proteinCathSource_value" is the external database that has set this correspondence
        """
        self.db.insert_db_content(
            PianaGlobals.InsertProteinCathRelationship.get_sqlquery( cathID_value= cath_id_value,
                                                                     res_start_value=res_start_value,
                                                                     res_end_value=res_end_value,
                                                                     segmentID_value=segmentID_value, 
                                                                     proteinPiana_value= proteinPiana_value,
                                                                     source_value= self.get_sourceDBID(proteinCathSource_value) )
            , answer_mode=None)

    def insert_protein_ec(self, ec_id, proteinPiana_value, proteinECSource_value):
        """
        Method that inserts the correspondence between proteinPiana "proteinPiana_value" and its EC code "ec_id"

        "ec_id" is the EC id

        "proteinPiana_value" is the internal piana identifier for the protein

        "proteinECSource_value" is the external database that has set this correspondence
        """
        self.db.insert_db_content(
            PianaGlobals.InsertProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinEC_table,
                                                                          attribute_value= ec_id,
                                                                          proteinPiana_value= proteinPiana_value,
                                                                          attributeSource_value= self.get_sourceDBID(proteinECSource_value) )
            , answer_mode=None)
        
    def insert_protein_description(self, description, proteinPiana_value, proteinDescriptionSource_value):
        """
        Method that inserts the correspondence between proteinPiana "proteinPiana_value" and its description (text string)

        "description" is the text string describing the protein

        "proteinPiana_value" is the internal piana identifier for the protein

        "proteinDescriptionSource_value" is the external database that has set this correspondence
        """
        self.db.insert_db_content(
            PianaGlobals.InsertProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinDescription_table,
                                                                          attribute_value= description,
                                                                          proteinPiana_value= proteinPiana_value,
                                                                          attributeSource_value= self.get_sourceDBID(proteinDescriptionSource_value) )
            , answer_mode=None)
        
    def insert_protein_function(self, function, proteinPiana_value, proteinFunctionSource_value):
        """
        Method that inserts the correspondence between proteinPiana "proteinPiana_value" and its function (text string)

        "function" is the text string describing the function of the protein

        "proteinPiana_value" is the internal piana identifier for the protein

        "proteinFunctionSource_value" is the external database that has set this correspondence
        """
        self.db.insert_db_content(
            PianaGlobals.InsertProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinFunction_table,
                                                                          attribute_value= function,
                                                                          proteinPiana_value= proteinPiana_value,
                                                                          attributeSource_value= self.get_sourceDBID(proteinFunctionSource_value) )
            , answer_mode=None)
        
    def insert_protein_keyword(self, keyword, proteinPiana_value, proteinKeywordSource_value):
        """
        Method that inserts the correspondence between proteinPiana "proteinPiana_value" and an associated keyword

        "keyword" is the text string with a keyword associated to the protein

        "proteinPiana_value" is the internal piana identifier for the protein

        "proteinKeywordSource_value" is the external database that has set this correspondence
        """
        self.db.insert_db_content(
            PianaGlobals.InsertProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinKeyword_table,
                                                                          attribute_value= keyword,
                                                                          proteinPiana_value= proteinPiana_value,
                                                                          attributeSource_value= self.get_sourceDBID(proteinKeywordSource_value) )
            , answer_mode=None)
        
        
    def insert_protein_subcellularLocation(self, subcellularLocation, proteinPiana_value, proteinSubcellularLocationSource_value):
        """
        Method that inserts the correspondence between proteinPiana "proteinPiana_value" and its cellular location

        "subcellularLocation" is the text string with the cellular location of the protein

        "proteinPiana_value" is the internal piana identifier for the protein

        "proteinSubcellularLocationSource_value" is the external database that has set this correspondence
        """

        self.db.insert_db_content(
            PianaGlobals.InsertProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinSubcellularLocation_table,
                                                                          attribute_value= subcellularLocation,
                                                                          proteinPiana_value= proteinPiana_value,
                                                                          attributeSource_value= self.get_sourceDBID(proteinSubcellularLocationSource_value) )
            , answer_mode=None)

    # --
    # select methods
    # --

    def get_protein_taxonomy_ids(self, proteinPiana_value):
        """
        Returns a list with taxonomy ids (as defined by ncbi) related to "proteinPiana_value"
        """
        
        list_protein_taxonomy_id = self.db.select_db_content(
            PianaGlobals.SelectProteinAttributeRelationship.get_sqlquery( attribute_relationship_table= PianaGlobals.proteinSpecies_table,
                                                                          attribute_relationship_col= PianaGlobals.speciesNCBI_col,
                                                                          proteinPiana_value= proteinPiana_value)
        , answer_mode="list", remove_duplicates="yes")

        return list_protein_taxonomy_id

    def get_protein_kingdoms(self, proteinPiana_value):
        """
        Returns a list with kingdoms (as defined by ncbi) related to "proteinPiana_value"
        """
        
        list_protein_kingdoms = self.db.select_db_content(
            PianaGlobals.SelectProteinKingdom.get_sqlquery( proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return list_protein_kingdoms


    def get_protein_species_names(self, proteinPiana_value):
        """
        Returns a list with species names (as defined by ncbi) related to "proteinPiana_value"
        """
        list_taxonomy_ids = self.get_protein_taxonomy_ids(proteinPiana_value=proteinPiana_value )
        
        protein_species_names= self.get_species_names_from_taxonomies(self, list_taxonomy_ids = list_taxonomy_ids)
        
        return protein_species_names

    
    def get_species_names_from_taxonomies(self, list_taxonomy_ids = []):
        """
        Returns a list with species names that are related to the taxonomy ids in "list_taxonomy_ids"
        """
        list_species_names = []
        for taxonomy_id in list_taxonomy_ids:
            list_species_names.extend(self.db.select_db_content(PianaGlobals.SelectSpeciesNamefromSpeciesNCBI.get_sqlquery(speciesNCBI_value = taxonomy_id)
                                                                , answer_mode="list", remove_duplicates="yes") )

        # remove duplicates
        dic_species = {}
        for species in list_species_names:
            dic_species[species] = None

        return dic_species.keys()


    def get_taxonomies_from_species_name(self, species_name_value = None):
        """
        Returns a list with taxonomy ids for a given speciesName species_name_value
        """
        list_speciesNCBI = self.db.select_db_content(
            PianaGlobals.SelectSpeciesNCBIfromSpeciesName.get_sqlquery(speciesName_value = species_name_value)
            , answer_mode="list", remove_duplicates="yes")

        return list_speciesNCBI
    
    def get_protein_go_accession(self, proteinPiana_value):
        """
        returns a list with all GO accessions (codes that look like "GO:234324") that are assigned to protein "proteinPiana_value"

        In case no GO term is found, returns an empty list.
        """
        # get go ids using self.get_protein_go_term_id
        list_term_id= self.get_protein_go_term_id(proteinPiana_value)
        list_go_accessions = []
        for term_id in list_term_id:
            list_go_accessions.extend(
                PianaGlobals.SelectXFromGoTermId.get_sqlquery(go_table= PianaGlobals.go_table,
                                                              x_col= PianaGlobals.go_acc_col,
                                                              term_id_value= term_id))
                # TO DO!!! check that there are no repetitions in list_go_accessions
        # END OF for term_id in list_term_id:


        # remove duplicates
        dic_gos= {}
        for go in list_go_accessions:
            dic_gos[go] = None
        
        return dic_gos.keys()

    def get_protein_go_name(self, go_term_id_value):
        """
        returns a list with all GO names (a string describing the go term id) that are assigned to protein "proteinPiana_value"

        In case no GO term is found, returns an empty list.
        """
   
        return self.db.select_db_content(PianaGlobals.SelectXFromGoTermId.get_sqlquery(x_col= PianaGlobals.go_name_col,
                                                                                       term_id_value= go_term_id_value),
                                         answer_mode="single", remove_duplicates="yes")
       
    def get_protein_go_term_id(self, proteinPiana_value, term_type_value=None):
        """
        returns a list with all GO terms ids (internal go identifiers) that are assigned to protein "proteinPiana_value"

        In case no GO term is found, returns an empty list.

        "term_type_value" can be None or one of the following:

          - "molecular_function"
          - "biological_process"
          - "cellular_component"

        if term_type_value is None, then returns all go terms independently of their term type category
        """
        # TO CHECK!!!  One proteinPiana will return one or more term_id values?
        #             If yes (and the answer is clearly yes) get_proteins_sharing_go has to be modified

        if term_type_value is None:
            list_protein_go = self.db.select_db_content(
                PianaGlobals.SelectProteinAttributeRelationship.get_sqlquery( attribute_relationship_table= PianaGlobals.proteinGo_table,
                                                                              attribute_relationship_col= PianaGlobals.goID_col,
                                                                              proteinPiana_value= proteinPiana_value)
                , answer_mode="list", remove_duplicates="yes")

        else:
            list_protein_go = self.db.select_db_content(PianaGlobals.SelectGoTermIdUsingTermType.get_sqlquery( proteinPiana=proteinPiana_value,
                                                                                                               term_type=term_type_value)
                                                        , answer_mode="list", remove_duplicates="yes")
            
        return list_protein_go
    
    def get_proteins_sharing_go(self, proteinPiana_value):
        """
        Returns a list with proteinsPianas that have the same go_term_id as "proteinPiana_value"
        """
        # TO CHECK!!!  One proteinPiana will return one or more term_id values?
        #             If yes (and the answer is clearly yes) get_proteins_sharing_go has to be modified
        #             I think this has been solved, but I am not sure...

        # Getting go_term_id assigned to a proteinPiana
        list_term_id=self.get_protein_go_term_id(proteinPiana_value)
        list_protein_piana = []
        # List of proteinsPiana that has the same go_term_id as proteinPiana_value
        for go_term_id in list_term_id:
            
            list_protein_piana.extend(
                PianaGlobals.SelectXFromGoTermId.get_sqlquery(go_table=PianaGlobals.proteinGo_table,
                                                              x_col=PianaGlobals.goID_col,
                                                              term_id_value=go_term_id), answer_mode="list", remove_duplicates="yes")

        # remove duplicates
        dic_protein = {}
        for protein in list_protein_piana:
            dic_protein[protein] = None

        return dic_protein.keys()

    def get_go_depth(self, term_id_value):
        """
        Returns depth value in GO tree for "term_id_value"
        """

        if term_id_value is None:
            # if no term_id given (proteinPiana didn't have a go assigned), depth is considered huge
            # TO DO!!! create a constant for huge distances and huge depths
            return PianaGlobals.huge_depth

        return  self.db.select_db_content(PianaGlobals.SelectXFromGoTermId.get_sqlquery(x_col= PianaGlobals.go_distance2root,
                                                                                        term_id_value=term_id_value)
                                          , answer_mode="single", remove_duplicates="yes")


    def get_term2term_distance(self, term1, term2):
        """
        Returns distance in the GO tree between "term1" and "term2"
        """
        if term1 == term2:
            return 0
        
        distance= self.db.select_db_content(PianaGlobals.Select_go_term2term_distance.get_sqlquery(term1_id= term1,
                                                                                                   term2_id= term2)
                                            , answer_mode="single", remove_duplicates="yes")
        if distance:
            return distance
        else:
            return PianaGlobals.huge_distance

    
    def get_protein_cog(self, proteinPiana_value):
        """
        returns a list with all COG identifiers that are assigned to protein "proteinPiana_value"

        In case no COG identifier is found, returns an empty list.
        """

        protein_cog = self.db.select_db_content(
            PianaGlobals.SelectProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinCog_table,
                                                                          attribute_relationship_col= PianaGlobals.cogID_col,
                                                                          proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")
        
        return protein_cog

         
    def get_all_protein_dbali_cluster(self, clustering_method):
        """
        Returns a list with all tuples (proteinPiana, dbali cluster ID) for a specific clustering method
        """

        protein_dbali_clusters = self.db.select_db_content(
            PianaGlobals.SelectAllDBAliClusterRelationship.get_sqlquery(clustering_method_value= clustering_method)
            , answer_mode="list", remove_duplicates="yes", number_of_selected_elems=2)
        
        return protein_dbali_clusters
       
    def get_protein_dbali_cluster(self, proteinPiana_value, clustering_method, source_db):
        """
        Returns a list with all dbali cluster IDs of a proteinPiana for a specific clustering method

        "source_db" can be 'dbali', 'blast_transfer' or 'all'. It can be used to limit the dbali_clusters returned to those found through a specific technique

        "clustering_method" refers to the parameters used for the clustering. Valid values are shown in PianaGlobals.pibase_dbali_methods
        """
        protein_dbali_clusters = self.db.select_db_content(
            PianaGlobals.SelectDBAliClusterRelationship.get_sqlquery(proteinPiana_value= proteinPiana_value,
                                                                     clustering_method_value= clustering_method,
                                                                     source_db_value= source_db)
            , answer_mode="list", remove_duplicates="yes")
        
        return protein_dbali_clusters
       
    def get_pair_method_protein_dbali_cluster(self, proteinPiana_value):
        """
        Returns a list of tuples (dbali cluster ID, clustering_method) for protein "proteinPiana_value"
        """
        protein_dbali_clusters = self.db.select_db_content(
            PianaGlobals.SelectPairMethodDBAliClusterRelationship.get_sqlquery(proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes", number_of_selected_elems=2)
        
        return protein_dbali_clusters
        
    def get_proteins_with_scop(self, cf=None, sf=None, fa=None):
        """
        Returns a list of proteinPianas that have a given SCOP code

        if any of the three categories is None, that category is ignored
        if all categories are set, then this method impose that the proteins returned have those three categories
        """

        list_proteinPianas = self.db.select_db_content(
            PianaGlobals.SelectProteinsWithScop.get_sqlquery(cf= cf, sf=sf, fa=fa)
            , answer_mode="list", remove_duplicates="yes", number_of_selected_elems=1)
        
        return list_proteinPianas
        
    def get_protein_scop_cf_sf_fa(self, proteinPiana_value):
        """
        Returns a list of tuples (cf, sf, fa) for protein "proteinPiana_value"
        """

        list_protein_scop = self.db.select_db_content(
            PianaGlobals.SelectProteinScopRelationship.get_sqlquery(proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes", number_of_selected_elems=3)
        
        return list_protein_scop
    
    def get_protein_scop_cf(self, proteinPiana_value):
        """
        Returns a list of cf codes for protein "proteinPiana_value"
        """
        list_protein_scop_cf = self.db.select_db_content(
            PianaGlobals.SelectProteinScopCfRelationship.get_sqlquery(proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes", number_of_selected_elems=1)
        
        return list_protein_scop_cf
    
    def get_protein_scop_sf(self, proteinPiana_value):
        """
        Returns a list of sf codes for protein "proteinPiana_value"
        """
        list_protein_scop_sf = self.db.select_db_content(
            PianaGlobals.SelectProteinScopSfRelationship.get_sqlquery(proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes", number_of_selected_elems=1)
        
        return list_protein_scop_sf
    
    def get_protein_scop_fa(self, proteinPiana_value):
        """
        Returns a list of fa codes for protein "proteinPiana_value"
        """
        list_protein_scop_fa = self.db.select_db_content(
            PianaGlobals.SelectProteinScopFaRelationship.get_sqlquery(proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes", number_of_selected_elems=1)
        
        return list_protein_scop_fa
    
    def get_protein_cath(self, proteinPiana_value, residue_value=None):
        """
        Returns a list of cathIDs for protein "proteinPiana_value"
        
        If "residue_value" is not None, then it will return only caths that are defined around that residue
        If "residue_value" is None, returns all cath values for the protein
        """
        if residue_value is None:
            list_protein_cath = self.db.select_db_content(
                PianaGlobals.SelectProteinCathRelationship.get_sqlquery(proteinPiana_value= proteinPiana_value)
                , answer_mode="list", remove_duplicates="yes")
            
            return list_protein_cath
        
        else:
            dict_protein_cath = {}

            list_protein_triplet_cath_resstart_resend = self.db.select_db_content(
                PianaGlobals.SelectProteinResiduesCathRelationship.get_sqlquery(proteinPiana_value= proteinPiana_value)
                , answer_mode="list", remove_duplicates="yes", number_of_selected_elems=3)

            for protein_triplet_cath_resstart_resend in list_protein_triplet_cath_resstart_resend:
                # Here, list_protein_triplet_cath_resstart_resend has
                #   [0] --> cath_id
                #   [1] --> res start
                #   [2] --> res end
                
                if residue_value > int(protein_triplet_cath_resstart_resend[1]) and residue_value < int(protein_triplet_cath_resstart_resend[2]):
                    if not dict_protein_cath.has_key(protein_triplet_cath_resstart_resend[0]):
                        dict_protein_cath[protein_triplet_cath_resstart_resend[0]] = None


            return dict_protein_cath.keys()
            
    def get_protein_ec(self, proteinPiana_value):
        """
        returns a list with all EC identifiers that are assigned to protein "proteinPiana_value"

        In case no EC identifier is found, returns an empty list.
        """

        protein_ec =  self.db.select_db_content(
            PianaGlobals.SelectProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinEC_table,
                                                                          attribute_relationship_col= PianaGlobals.ecID_col,
                                                                          proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return protein_ec
        
    def get_protein_description(self, proteinPiana_value):
        """
        returns a list with all descriptions (text string) that are assigned to protein "proteinPiana_value"

        In case no description is found, returns an empty list.
        """

        protein_description = self.db.select_db_content(
            PianaGlobals.SelectProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinDescription_table,
                                                                          attribute_relationship_col= PianaGlobals.proteinDescription_col,
                                                                          proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return protein_description
        
    def get_protein_function(self, proteinPiana_value):
        """
        returns a list with all functions (text string) that are assigned to protein "proteinPiana_value"

        In case no function is found, returns an empty list.
        """

        protein_function = self.db.select_db_content(
            PianaGlobals.SelectProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinFunction_table,
                                                                          attribute_relationship_col= PianaGlobals.proteinFunction_col,
                                                                          proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return protein_function
        
    def get_protein_keyword(self, proteinPiana_value):
        """
        returns a list with all keywords (text string) that are assigned to protein "proteinPiana_value"

        In case no keyword is found, returns an empty list.
        """

        protein_keyword = self.db.select_db_content(
            PianaGlobals.SelectProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinKeyword_table,
                                                                          attribute_relationship_col= PianaGlobals.proteinKeyword_col,
                                                                          proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return protein_keyword
        
        
    def get_protein_subcellularLocation(self, proteinPiana_value):
        """
        returns a list with all cellular locations (text string) that are assigned to protein "proteinPiana_value"

        In case no keyword is found, returns an empty list.
        """
 
        protein_subcellularLocation = self.db.select_db_content(
            PianaGlobals.SelectProteinAttributeRelationship.get_sqlquery( attribute_relationship_table=PianaGlobals.proteinSubcellularLocation_table,
                                                                          attribute_relationship_col= PianaGlobals.proteinSubcellularLocation_col,
                                                                          proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")
        
        return protein_subcellularLocation


    def check_keywords_in_protein(self, list_proteinPiana, protein_code=None, protein_code_type=None, keywords=[]):
        """
        Checks all protein info [ie. description, function, keyword list] for keywords given by user

        Returns the list of words in "keywords" that appear somewhere in the protein. (empty if nothing found)

        "list_proteinPiana" is the list of proteins that you want to check (if it is just one protein, write [your_proteinPiana]

        You can also use external code types: in that case, set list_proteinPiana to None and set protein_code and protein_code_type to something:

             "protein_code" is the protein external code
             "protein_code_type" is the type of code used for protein_code
                --> it has to be one of the types listed in PianaGlobals.valid_protein_types
                --> attention: not fixing tax id here, therefore, I recommend not using geneNames...
                
          
        "keywords" is a list of strings
           --> strings of keywords must be in lower case.
           --> This method will check that the keywords appear for the protein, regardless of the case of the words in the protein info
           --> for example, keywords could be ['cancer', 'onco', 'apoptosis']
        """

        # TO DO!!! allow the user to set the tax_id of protein_code
        #          or use only proteinPianas for checking keywords...
             
        if list_proteinPiana is None:
            list_proteinPiana = self.get_list_protein_piana(proteinCode_value= protein_code,
                                                            proteinCodeType_value= utilities.get_code_column(protein_code_type),
                                                            tax_id_value=0 )

        list_all_words = []

        for proteinPiana in list_proteinPiana:
            list_all_words.extend(self.get_protein_description(proteinPiana_value=proteinPiana))
            list_all_words.extend(self.get_protein_function(proteinPiana_value=proteinPiana))
            list_all_words.extend(self.get_protein_keyword(proteinPiana_value=proteinPiana))
        # END OF for proteinPiana in list_proteinPiana:

        # Now, check in all text fields obtained for the protein, which are the keywords that appear
        # and remove duplicates
        words_matched = {}
        for text_protein in list_all_words:
            lowered_text_protein = text_protein.lower()
            for keyword in keywords:
                if lowered_text_protein.find(keyword) != -1:
                    words_matched[keyword] = None

        return words_matched.keys()
        
    # -------------------------------------------
    # Access methods that retrieve proteins with a common characteristic to a given proteinPiana
    #
    # These methods are used by PianaGraphExpansion classes
    # -------------------------------------------

    def get_proteins_sharing_ec(self, proteinPiana_value ):
        """
        Returns a list of proteinPiana proteins that share an EC identifier with "proteinPiana_value"

        Empty list returned if nothing is found
        """

        if verbose:
            sys.stderr.write("Inside get_proteins_sharing_ec with proteinPiana= %s\n" %proteinPiana_value)

        list_proteins_sharing_ec = self.db.select_db_content(
            PianaGlobals.SelectCommonCodeProteins.get_sqlquery( proteinCode_table = PianaGlobals.proteinEC_table,
                                                                proteinCodeID_col = PianaGlobals.ecID_col,
                                                                proteinPiana_value = proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return list_proteins_sharing_ec

    def get_proteins_sharing_cog(self, proteinPiana_value ):
        """
        Returns a list of proteinPiana proteins that share a COG identifier with "proteinPiana_value"

        Empty list returned if nothing is found
        """

        if verbose:
            sys.stderr.write("Inside get_proteins_sharing_cog with proteinPiana= %s\n" %proteinPiana_value)

        list_proteins_sharing_cog = self.db.select_db_content(
            PianaGlobals.SelectCommonCodeProteins.get_sqlquery( proteinCode_table = PianaGlobals.proteinCog_table,
                                                                proteinCodeID_col = PianaGlobals.cogID_col,
                                                                proteinPiana_value = proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return list_proteins_sharing_cog
    

    def get_proteins_sharing_scop(self, proteinPiana_value ):
        """
        Returns a list of proteinPiana proteins that share a SCOP family with "proteinPiana_value"

        Empty list returned if nothing is found
        """

        if verbose:
            sys.stderr.write("Inside get_proteins_sharing_scop with proteinPiana= %s\n" %proteinPiana_value)

        list_proteins_sharing_scop = self.db.select_db_content(
            PianaGlobals.SelectCommonCodeProteins.get_sqlquery( proteinCode_table = PianaGlobals.proteinScop_table,
                                                                proteinCodeID_col = PianaGlobals.fa_col,
                                                                proteinPiana_value = proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return list_proteins_sharing_scop
    

    def get_proteins_sharing_interpro(self, proteinPiana_value ):
        """
        Returns a list of proteinPiana proteins that share a interpro identifier with "proteinPiana_value"

        Empty list returned if nothing is found
        """

        if verbose:
            sys.stderr.write("Inside get_proteins_sharing_interpro with proteinPiana= %s\n" %proteinPiana_value)

        list_proteins_sharing_interpro = self.db.select_db_content(
            PianaGlobals.SelectCommonCodeProteins.get_sqlquery( proteinCode_table = PianaGlobals.interPro_table,
                                                                proteinCodeID_col = PianaGlobals.interProID_col,
                                                                proteinPiana_value = proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return list_proteins_sharing_interpro
    
    
    # -------------------------------------------
    # Access methods that retrieve all proteins with a common given characteristic (not used by expansion methods)
    # -------------------------------------------

    def get_proteins_sharing_species(self, species_name_value= None, taxonomy_value= None ):
        """
        Returns a list of proteinPiana proteins that have species taxonomy_value (or speciesName)

        The user can input "taxonomy_value" (9606 for human, etc) or a "speciesName" string ('human', 'yeast', ...)

        in case both species_name_value and taxonomy_value are set to something different from None, taxonomy_value is used

        Empty list returned if nothing is found
        
        """
        if taxonomy_value is None:
            if species_name_value is None:
                raise ValueError("a taxonomy or a species name is needed to get the proteins that have it")
            else:
                list_taxonomy_value = self.get_taxonomies_from_species_name(species_name_value = species_name_value)
        # END OF if taxonomy_value is None:
        else:
            list_taxonomy_value = [taxonomy_value]

        list_proteins_given_species=[]
        
        for taxonomy_value in list_taxonomy_value:
            list_proteins_given_species.extend(self.db.select_db_content(
                PianaGlobals.SelectCommonCharacteristicProteins.get_sqlquery( proteinCode_table = PianaGlobals.proteinSpecies_table,
                                                                              proteinCodeID_col = PianaGlobals.speciesNCBI_col,
                                                                              proteinCodeID_value = taxonomy_value)
                
                , answer_mode="list", remove_duplicates="yes"))
        # END OF for taxonomy_value in list_taxonomy_value:

        # remove duplicates:
        dic_proteins = {}
        for protein in list_proteins_given_species:
            dic_proteins[protein] = None
        
            
        return dic_proteins.keys()
        
    # -------------------------------------------
    # Access (insertions and retrievals) methods to protein attributes tables
    # -------------------------------------------

    def insert_species(self, tax_id, tax_name, tax_comment, tax_kingdom=None, source_db=None):
        """
        Inserts a species into Piana

        "tax_id" is the ncbi taxonomy identifier (eg. 1)

        "tax_name" is the name given to the species by ncbi (eg. 'human')

        "tax_comment" is the associated comment to the species (eg 'nothing')

        "tax_kingdom" is the kingdom of the species (eg. 'Eukaryota')

        "source_db" is the external database that is giving this data (eg. 'uniprot')
        """
        if tax_kingdom is None:
            tax_kingdom = "unknown"

        self.db.insert_db_content( PianaGlobals.InsertSpecies.get_sqlquery(speciesNCBIid_value= tax_id,
                                                                           speciesName_value= tax_name,
                                                                           speciesDescription_value= tax_comment,
                                                                           speciesKingdom_value= tax_kingdom,
                                                                           speciesSource_value= self.get_sourceDBID(source_db))
                                   , answer_mode=None)

        
    def insert_species_kingdom(self, tax_id, tax_name, tax_kingdom, source_db):
        """
        If tax_id exists in pianaDB, inserts the kingdom for tax id "tax_id" (tax name will be ignored)
        
        If tax_id doesn't exist, inserts the "tax_id", the "tax_name", the "tax_kingdom" and "source_db" as in insert_species()
        """

        if self.get_species_names_from_taxonomies(list_taxonomy_ids = [tax_id]):
            # if there is already an entry for this tax id, update kingdom
            self.db.insert_db_content( PianaGlobals.UpdateSpeciesKingdom.get_sqlquery(speciesNCBIid_value= tax_id,
                                                                                      speciesKingdom_value= tax_kingdom)
                                       , answer_mode=None)
        else:
            # if there was no entry for tax id, insert all the information
            self.insert_species(tax_id=tax_id, tax_name=tax_name, tax_comment="", tax_kingdom=tax_kingdom, source_db=source_db)
        

    def insert_go(self, go_id, go_name, acc, term_type, distance2root, source_db):
        """
        Inserts a GO (Gene Ontology) entry into Piana

        "go_id" is the go term id

        "go_name" is the text string associated to the term id

        "acc" is the go accession (code that looks like 'GO:234234')

        "term_type" is the type of GO term
         -> can be one of the following (or None)
          - "molecular_function"
          - "biological_process"
          - "cellular_component"

        "distance2root" is the distance between this term id and the root of the GO hierarchy

        "source_db" is the external database that is giving this information
        """
        self.db.insert_db_content( PianaGlobals.InsertGo.get_sqlquery(goID_value= go_id,
                                                                      go_name_value= go_name,
                                                                      go_acc_value= acc,
                                                                      go_term_type_value=term_type,
                                                                      go_distance2root_value=distance2root,
                                                                      goSource_value= self.get_sourceDBID(source_db))
                                   , answer_mode=None)
        
    def insert_go_term2term_distance(self,term1_id,term2_id,distance):
        """
        Inserts distance "distance" between GO terms "term1" and "term2"

        This is the distance between those terms in the GO hierarchy
        """
        self.db.insert_db_content(PianaGlobals.InsertGo_term2term_distance.get_sqlquery(term1_id_value=term1_id,
                                                                                        term2_id_value=term2_id,
                                                                                        distance_value=distance))
        
    def insert_cog(self, cog_id, cog_description, cog_function, source_db):
        """
        Inserts a COG (Cluster of Orthologous Genes) entry into Piana

        "cog_id" is the COG identifier

        "cog_description" is the text string describing the COG

        "cog_function" is the text string describing the function of genes in this cluster

        "source_db" is the database that is giving this information
        """
        self.db.insert_db_content( PianaGlobals.InsertCog.get_sqlquery(cogID_value= cog_id,
                                                                       cogDescription_value= cog_description,
                                                                       cogFunction_value= cog_function,
                                                                       cogSource_value= self.get_sourceDBID(source_db))
                                   , answer_mode=None)
        
    # -----------------------------------------------------
    # Access (insertions and retrievals) methods to protein external databases tables
    # -----------------------------------------------------
    
    def insert_uniprotInfo(self, proteinPiana_value, swissProtID_value, swissAccessionID_value, data_class_value, description_value, geneName_value,
                           organism_value, organelle_value, proteinSequenceLength_value, proteinMW_value):
        """
        Inserts a uniprot entry into piana (all info found in uniprot... this is independent from uniprot entries and uniprot accession numbers)

        (this is not very used... just have it here for being able to query piana about uniprot info)

        For a description of these fields, please refer to the uniprot manual
        
        """

        self.db.insert_db_content( PianaGlobals.InsertUniprotInfo.get_sqlquery( proteinPiana_value= proteinPiana_value,
                                                                                swissProtID_value= swissProtID_value,
                                                                                swissAccessionID_value= swissAccessionID_value,
                                                                                data_class_value= data_class_value,
                                                                                description_value= description_value,
                                                                                geneName_value= geneName_value,
                                                                                organism_value= organism_value,
                                                                                organelle_value= organelle_value,
                                                                                proteinSequenceLength_value= proteinSequenceLength_value,
                                                                                proteinMW_value= proteinMW_value)
                                   , answer_mode=None)
         
    def insert_cell_fitness(self, fitnessScore_value, reaction_value, conditions_value, cellFitnessSource_value, proteinPiana_value):
        """

        Inserts a row into table cell_fitness (cell mutants fitness under stress conditions (sorbitol) )
        
        Used by parser fitness2piana.py. Takes data from webpage http://genomics.lbl.gov/YeastFitnessData/websitefiles/cel_index.html
        """

        self.db.insert_db_content(
            PianaGlobals.InsertCellFitness.get_sqlquery( fitnessScore_value= fitnessScore_value,
                                                         reaction_value = reaction_value,
                                                         conditions_value = conditions_value,
                                                         cellFitnessSource_value= self.get_sourceDBID(cellFitnessSource_value),
                                                         proteinPiana_value= proteinPiana_value)
            , answer_mode=None)

    def get_fitness_score(self, proteinPiana_value):
        """

        Returns a list with fitness scores from table cellFitness (fitness of a cell with a mutant) for a given "proteinPiana_value"

        There can be several cell_fitness values for a single proteinPiana, depending on the conditions of the experiment

        Empty list returned if nothing is found
        """
        cell_fitness = self.db.select_db_content(
            PianaGlobals.SelectFitnessScore.get_sqlquery( proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return cell_fitness
    
    def get_fitness_reaction(self, proteinPiana_value):
        """
        Returns a list with fitness reactions from table cell_fitness (fitness of a cell with a mutant) for a given proteinPiana "proteinPiana_value"
        
        There can be several reaction values for a single proteinPiana, depending on the conditions of the experiment

        Empty list returned if nothing is found
        """
        cell_reaction = self.db.select_db_content(
            PianaGlobals.SelectFitnessReaction.get_sqlquery( proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return cell_reaction
    
    def get_fitness_conditions(self, proteinPiana_value):
        """
        Returns the fitness conditions from table cell_fitness (fitness of a cell with a mutant) for a given protein "proteinPiana_value"
        
        There can be several conditions values for a single proteinPiana

        Empty list returned if nothing is found
        """
        cell_reaction = self.db.select_db_content(
            PianaGlobals.SelectFitnessConditions.get_sqlquery( proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return cell_reaction
    
    def get_fitness_score_reaction_conditions(self, proteinPiana_value):
        """
        Returns the fitness score, reaction and conditions from table cell_fitness for a given "proteinPiana_value"

        Returns list with  triplets: [ [fitness_score1, reaction1, conditions1], [fitness_score2, reaction2, conditions2], ..]

        Empty list returned if nothing is found
        """
        cell_reaction = self.db.select_db_content(
            PianaGlobals.SelectFitnessScoreReactionConditions.get_sqlquery( proteinPiana_value= proteinPiana_value)
            , answer_mode="list", remove_duplicates="yes")

        return cell_reaction

   
    # ------------------------------------
    # Access  methods (insertions, deletions, retrievals) to interaction tables
    # ------------------------------------

    def delete_interaction(self, interactionPiana_value):
        """
        Deletes entry of table interaction with interactionPiana = interactionPiana_value

        Attention! You should not delete an interactionPiana before making sure that other tables of interactions "interactionPiana" are
        deleted as well: this will just delete the row in table interaction
        """
        self.db.insert_db_content(PianaGlobals.DeleteInteraction.get_sqlquery(interactionPiana_value= interactionPiana_value),
                                                     answer_mode = None)


    def delete_interaction_sourceDB_for_sourceDBID(self, sourceDBID_value):
        """
        Deletes entries of table interactionSourceDB with sourceDBID = "sourceDBID_value"

        To do it, before the deletion performs a select to keep a list of those entries that are going to be deleted.

        Returns a list of interactionPiana that have been deleted from table interactionSourceDB
        """
        deleted_interactionPiana = self.db.select_db_content(PianaGlobals.SelectInteractionSourceDB.get_sqlquery(sourceDBID_value= sourceDBID_value),
                                                     answer_mode = "list")

        # insert_db_content can also be used for deleting... since it follows same format as insertions
        self.db.insert_db_content(PianaGlobals.DeleteInteractionSourceDB.get_sqlquery(sourceDBID_value= sourceDBID_value),
                                                     answer_mode = None)

        return deleted_interactionPiana

        
    def delete_interaction_method_for_sourceDBID(self, sourceDBID_value):
        """
        Deletes entries of table interactionMethod with sourceDBID = "sourceDBID_value"
        """
        self.db.insert_db_content(PianaGlobals.DeleteInteractionMethod.get_sqlquery(sourceDBID_value= sourceDBID_value),
                                  answer_mode = None)
        
    def delete_interaction_scores_for_sourceDBID(self, sourceDBID_value):
        """
        Deletes entries of table interactionScores with sourceDBID = "sourceDBID_value"
        """
        self.db.insert_db_content(PianaGlobals.DeleteInteractionScores.get_sqlquery(sourceDBID_value= sourceDBID_value),
                                  answer_mode = None)
        
    def delete_interaction_protein_source_for_sourceDBID(self, sourceDBID_value):
        """
        Deletes entries of table interactionProteinSource with sourceDBID = "sourceDBID_value"
        """
        self.db.insert_db_content(PianaGlobals.DeleteInteractionProteinSource.get_sqlquery(sourceDBID_value= sourceDBID_value),
                                  answer_mode = None)

 
    def insert_interaction(self, proteinPianaA_value, isSourceA_value, proteinPianaB_value, isSourceB_value, interactionConfidence_value,
                           methodDescription_value, sourceDBDescription_value, confidenceAssignedSourceDB_value, pubmed_id_value="unknown"  ):

        """
        Inserts a new interaction into piana database.

        "proteinPianaA_value" is the proteinPiana for one side of the interaction
        "isSourceA_value" sets if the interaction goes from A to B (1) or not (0)
        
        "proteinPianaB_value" is the proteinPiana for the other side of the interaction
        "isSourceB_value" sets if the interaction goes from B to A (1) or not (0)

        if "isSourceA_value" and "isSourceB_value" are 1, the interaction is bi-directional

        "interactionConfidence_value" is the relyability of the interaction (not being used currently)

        "methodDescription_value" is the method name that detected this interaction (eg. 'yeast two hybrids')
          --> must appear in PianaGlobals.method_names
          
        "sourceDBDescription_value" is the source database that contains this interaction (eg. 'DIP')
          --> must appear in PianaGlobals.interaction_databases

          
        "confidenceAssignedSourceDB_value" is the relyability assigned to the interaction by the source database

        "pubmed_id_value" is the pubmed identifier for the article where this interaction was described

        Things this method does are:

           - makes sure the order proteinPianaA < proteinPianaB is respected
           - searches the methodID corresponding to the method description
           - searches the sourceDBID corresponding to the sourceDB description

            1. check if interaction exists already, retrieve interactionPiana in case it does
            2. if interactionPiana is  None:
                   insert information of table interaction and retrieve new interactionPiana
            3. insert interactionSourceDB with interactionPiana 
            4. insert interactionMethod according to this sourceDB with interactionPiana 


        Attention! I don't allow the user to limit the insertions to those from an specific database or method. If he wants to
        filter the database, do it afterwards... the pianaDB will contains all interactions regardless of origin
         (if one day somebody wants to change this, he will have to receive arguments list_source_dbs and list_source_methods
          and use them below to limit the insertion)
        
        """
        # TO DO!!!! interactionConfidence_value should not be given by user, but calculated internally from the other confidences
        #           availables... therefore, it shouldn't be an argument of this method

        if isinstance(proteinPianaA_value, str) or isinstance(proteinPianaB_value, str) :
            raise TypeError("insert_interaction needs numeric values for proteinPianaA_value and proteinPianaB_value")
        

            
        # making sure that order proteinPianaA < proteinPianaB is respected
        if proteinPianaB_value < proteinPianaA_value:
            temp                = (proteinPianaA_value, isSourceA_value)
            (proteinPianaA_value, isSourceA_value) = (proteinPianaB_value, isSourceB_value)
            (proteinPianaB_value, isSourceB_value) = temp



        # obtaining the methodID corresponding to methodDescription_value
        methodID_value = self.get_methodID(methodDescription_value = methodDescription_value)
        
        # obtaining the sourceDBID corresponding to sourceDBDescription_value
        sourceDBID_value = self.get_sourceDBID(sourceDBDescription_value = sourceDBDescription_value)


        if verbose_insert_interaction:
            sys.stderr.write("Inserting interaction from PianaDBaccess with values: \n")
            sys.stderr.write("proteinPianaA_value: %s\n" %proteinPianaA_value)
            sys.stderr.write("isSourceA_value: %s\n" %isSourceA_value)
            sys.stderr.write("proteinPianaB_value: %s\n" %proteinPianaB_value)
            sys.stderr.write("isSourceB_value: %s\n" %isSourceB_value)
            sys.stderr.write("interactionConfidence_value: %s\n" %interactionConfidence_value)
            sys.stderr.write("methodID_value: %s\n" %methodID_value)
            sys.stderr.write("sourceDBID_value: %s\n" %sourceDBID_value)
            sys.stderr.write("pubmed_id_value: %s\n" %pubmed_id_value)


        # TO DO!!!!!!! I am not taking into account if interactions are bi-directional or not!!!!

        # 1. check if interaction exists already, retrieve interactionPiana in case it does (ignoring source dbs and methods)
        last_interaction_id = self.get_interactionPiana(proteinPianaA_value=proteinPianaA_value,
                                                        proteinPianaB_value=proteinPianaB_value,
                                                        list_source_dbs= "all",
                                                        inverse_dbs= "no",
                                                        list_source_methods= "all",
                                                        inverse_methods= "no")
        
        # 2. if interaction wasn't in DB: insert new information into table interaction_table
        if last_interaction_id is None:
            last_interaction_id= self.db.insert_db_content(
                PianaGlobals.InsertInteraction.get_sqlquery(proteinPianaA_value=proteinPianaA_value,
                                                            isSourceA_value=isSourceA_value,
                                                            proteinPianaB_value=proteinPianaB_value,
                                                            isSourceB_value=isSourceB_value,
                                                            interactionConfidence_value=interactionConfidence_value )
                , answer_mode = "last_id")

        # END OF if last_interaction_id is not None:
        
        # 3. inserting information into table interactionSourceDB_table
        #    in case the row (interactionPiana, sourceDBID) already exists, the insertion is ignored (INSERT IGNORE)
        self.db.insert_db_content(
            PianaGlobals.InsertInteractionSourceDB.get_sqlquery(sourceDBID_value=sourceDBID_value,
                                                                confidenceAssignedSourceDB_value=confidenceAssignedSourceDB_value,
                                                                interactionPiana_value=last_interaction_id)
            , answer_mode = None)
        
        # 4. inserting information into table interactionMethod_table
        #    in case the row (interactionPiana, methodID, sourceDBID) already exists, the insertion is ignored (INSERT IGNORE)
        self.db.insert_db_content( PianaGlobals.InsertInteractionMethod.get_sqlquery(methodID_value=methodID_value,
                                                                                     interactionPiana_value=last_interaction_id,
                                                                                     sourceDBID_value=sourceDBID_value)
                                   , answer_mode = None)
        
        # 5. inserting information into table interactionFeatures_table
        #    in case the row (interactionPiana, pubmedID, sourceDBID) already exists, the insertion is ignored (INSERT IGNORE)
        self.db.insert_db_content( PianaGlobals.InsertInteractionFeatures.get_sqlquery(pubmedID_value=pubmed_id_value,
                                                                                       interactionPiana_value=last_interaction_id,
                                                                                       sourceDBID_value=sourceDBID_value)
                                   , answer_mode = None)


        return last_interaction_id
    
    def insert_interaction_scores(self, interactionPiana_value, sourceDBDescription_value,
                                  equiv_nscore_value, equiv_nscore_transferred_value,
                                  equiv_fscore_value,
                                  equiv_pscore_value, equiv_hscore_value, array_score_value, array_score_transferred_value,
                                  experimental_score_value, experimental_score_transferred_value, database_score_value, database_score_transferred_value,
                                  textmining_score_value, textmining_score_transferred_value, combined_score_value  ):
        """
        Inserts interaction scores into table interactionScores_table

        This table only holds information for interactions contained in STRING 

        Refer to the string manual for description of the different arguments
        """
        # obtaining the sourceDBID corresponding to sourceDBDescription_value
        sourceDBID_value = self.get_sourceDBID(sourceDBDescription_value = sourceDBDescription_value)

        
        # inserting information into table interactionSourceDB_table
        self.db.insert_db_content( PianaGlobals.InsertInteractionScores.get_sqlquery(interactionPiana_value = interactionPiana_value,
                                                                                     sourceDBID_value = sourceDBID_value,
                                                                                     equiv_nscore_value = equiv_nscore_value,
                                                                                     equiv_nscore_transferred_value = equiv_nscore_transferred_value,
                                                                                     equiv_fscore_value = equiv_fscore_value,
                                                                                     equiv_pscore_value = equiv_pscore_value,
                                                                                     equiv_hscore_value = equiv_hscore_value,
                                                                                     array_score_value = array_score_value,
                                                                                     array_score_transferred_value = array_score_transferred_value,
                                                                                     experimental_score_value = experimental_score_value,
                                                                                     experimental_score_transferred_value = experimental_score_transferred_value,
                                                                                     database_score_value = database_score_value,
                                                                                     database_score_transferred_value = database_score_transferred_value,
                                                                                     textmining_score_value = textmining_score_value,
                                                                                     textmining_score_transferred_value = textmining_score_transferred_value,
                                                                                     combined_score_value = combined_score_value)
                                   , answer_mode = None)
        

    def insert_interaction_protein_source(self, interactionPiana_value = None, proteinPianaSource_value = None, sourceDBDescription_value= None):
        """
        Inserts the interaction protein source into table interactionProteinSource_table

        "proteinPianaSource_value" is the protein that originated the predicted interaction "interactionPiana_value"

        This table only has information for interactions that were obtained by a expansion prediction
        
        """
        # obtaining the sourceDBID corresponding to sourceDBDescription_value
        sourceDBID_value = self.get_sourceDBID(sourceDBDescription_value = sourceDBDescription_value)
        
        self.db.insert_db_content( PianaGlobals.InsertInteractionProteinSource.get_sqlquery(interactionPiana_value = interactionPiana_value,
                                                                                            proteinPianaSource_value = proteinPianaSource_value,
                                                                                            sourceDBID_value=sourceDBID_value)
                                   , answer_mode = None)


    def get_interactionPiana(self, proteinPianaA_value, proteinPianaB_value,
                             list_source_dbs= "all", inverse_dbs="no", list_source_methods= "all", inverse_methods="no"):
        """
        Returns the interacionPiana for the interaction between proteins "proteinPianaA_value" and "proteinPianaB_value", considering only those
        interaction source databases listed in "list_source_dbs" ('all' returns all interactions from all databases) and those interaction
        source methods listed in "list_source_methods" ('all' returns all interactions from all methods)

        No need to respect order of proteinPianaA and proteinPianaB: this method will take care of it, place them as you want.

        If there is not such interaction, returns None
        """
        interactionPiana = self.db.select_db_content(PianaGlobals.SelectInteractionPiana.get_sqlquery(proteinPianaA_value=proteinPianaA_value,
                                                                                                      proteinPianaB_value=proteinPianaB_value,
                                                                                                      list_source_dbs=list_source_dbs,
                                                                                                      inverse_dbs= inverse_dbs,
                                                                                                      list_source_methods=list_source_methods,
                                                                                                      inverse_methods= inverse_methods ),
                                                         answer_mode = "single")
        return interactionPiana


    def get_all_partners(self, proteinPiana_value= None, use_self_ints="yes",
                         list_source_dbs= "all", inverse_dbs="no", list_source_methods= "all", inverse_methods="no",
                         threshold = 0):
        """
        Returns a list with all partners (proteinPianas) of "proteinPiana_value", considering only those
        interaction source databases listed in "list_source_dbs" ('all' gets all interactions from all databases) and those interaction
        source methods listed in "list_source_methods" ('all' gets all interactions from all methods)

        If use_self_ints == "yes", it won't return itself as a partner .Set to 'no' if you want to get it in case it exists.

        "threshold" is used to limit the number of partners that can be returned:
           - if "threshold" == 0, then always return all partners
           - if threshold!=0 and number of partners > threshold, then return empty list (no partners)

        Returns empty list if no partner is found
        """
        if proteinPiana_value is None:
            raise ValueError("arguments to get_all_partners cannot be None")

        
        # get partners in table for both possible cases: proteinPiana can be the lowest value (A) or the highest (B)
        list_partnersA = self.db.select_db_content(PianaGlobals.SelectPartners.get_sqlquery( proteinPiana_value= proteinPiana_value,
                                                                                             partner_side= "A",
                                                                                             list_source_dbs=list_source_dbs,
                                                                                             inverse_dbs= inverse_dbs,
                                                                                             list_source_methods=list_source_methods,
                                                                                             inverse_methods= inverse_methods) ,
                                                   answer_mode="list", remove_duplicates="yes")
        

        list_partnersB = self.db.select_db_content(PianaGlobals.SelectPartners.get_sqlquery( proteinPiana_value= proteinPiana_value,
                                                                                             partner_side= "B",
                                                                                             list_source_dbs=list_source_dbs,
                                                                                             inverse_dbs= inverse_dbs,
                                                                                             list_source_methods=list_source_methods,
                                                                                             inverse_methods= inverse_methods) ,
                                                   answer_mode="list", remove_duplicates="yes")

        # join lists removing duplicates
        dic_partners = {}
        for partner in list_partnersA:
            dic_partners[partner] = None
        for partner in list_partnersB:
            dic_partners[partner] = None

        if use_self_ints == "no":
            # if user asked to remove self interactions, do it...
            try:
                del(dic_partners[proteinPiana_value])
            except:
                # in case itself wasn't there... just continue...
                pass
        # END OF if use_self_ints == "no":

        num_partners = len(dic_partners)

        if threshold == 0 or num_partners <= threshold:
            # use argument threshold to decide whether the partners are returned or not
            return dic_partners.keys()
        else:
            return []

            
    def get_all_g2_partners(self, proteinPiana_value= None, use_self_ints="yes",
                            list_source_dbs= "all", inverse_dbs="no", list_source_methods= "all", inverse_methods="no",
                            threshold = 0):
        """
        Returns a list with all partners (proteinPianas) at distance 2 of "proteinPiana_value", considering only those
        interaction source databases listed in "list_source_dbs" ('all' gets all interactions from all databases) and those interaction
        source methods listed in "list_source_methods" ('all' gets all interactions from all methods)

             --> If A interacts with B and C, and B interacts with D and C with no protein, then partners of A at distance 2 are [D]


        "threshold" is used to limit the number of partners that each of the proteins at distance 1 can have
           - if "threshold" == 0, then always return all partners for all proteins
           - if threshold!=0 and number of partners > threshold, then return empty list (no partners) for that particular protein

           --> ATTENTION!!!! threshold does not apply to the partners being returned by this method, but to the individual calls
                             that this method makes to get_all_partners. Therefore, this method can return whichever number of
                             partners, guaranteeing that no single protein has added to the list more than "threshold" partners
           

        Returns empty list if no partner at distance 2 is found
        """
        if proteinPiana_value is None:
            raise ValueError("arguments to get_all_g2_partners cannot be None")


        list_partners_g1 = self.get_all_partners( proteinPiana_value= proteinPiana_value,
                                                  use_self_ints= use_self_ints,
                                                  list_source_dbs= list_source_dbs, inverse_dbs=inverse_dbs, 
                                                  list_source_methods= list_source_methods, inverse_methods=inverse_methods,
                                                  threshold = threshold)

        dic_partners_g2 = {}
        for partner_g1 in list_partners_g1:

            for temp_partner_g2 in self.get_all_partners( proteinPiana_value= partner_g1,
                                                          use_self_ints= use_self_ints,
                                                          list_source_dbs= list_source_dbs, inverse_dbs=inverse_dbs, 
                                                          list_source_methods= list_source_methods, inverse_methods=inverse_methods,
                                                          threshold = threshold):
                
                dic_partners_g2[temp_partner_g2] = None
            # END OF for temp_partner_g2 in self.get_all_partners(....)
        # END OF for partner_g1 in list_partners_g1:
            
        return dic_partners_g2.keys()

    def get_all_protein_protein_interactions(self, use_self_ints="yes",
                                             list_source_dbs= "all", inverse_dbs="no", list_source_methods= "all", inverse_methods="no"):
        """
        Returns a list of triplets (proteinPianaA, proteinPianaB, interactionPiana), considering only those
        interaction source databases listed in "list_source_dbs" ('all' gets all interactions from all databases) and those interaction
        source methods listed in "list_source_methods" ('all' gets all interactions from all methods)
        
        If use_self_ints == "yes", it won't return interactions between proteinPiana_value and itself. Set to 'no' if you want to get them
        
        if no interaction is found, returns empty list
        """
        listProteinProteinInteraction = self.db.select_db_content(PianaGlobals.SelectAllProteinProteinInteractions.get_sqlquery(
            use_self_ints = use_self_ints,
            list_source_dbs=list_source_dbs,
            inverse_dbs= inverse_dbs,
            list_source_methods=list_source_methods,
            inverse_methods= inverse_methods),
                                                                  answer_mode = "list", remove_duplicates="yes", number_of_selected_elems=3)


        return listProteinProteinInteraction
         
    def get_partners_of_proteins_sharing_cog(self, proteinPiana_value= None,
                                             list_source_dbs= "all", inverse_dbs="no", list_source_methods= "all", inverse_methods="no"):
        """
        Returns interaction partners (proteinPianas) of those proteins that share the cogID with "proteinPiana_value", considering only those
        interaction source databases listed in "list_source_dbs" ('all' gets all interactions from all databases) and those interaction
        source methods listed in "list_source_methods" ('all' gets all interactions from all methods)
        """
        if proteinPiana_value is None:
            raise ValueError("arguments to get_partners_of_proteins_sharing_cog cannot be None")

        list_partnersA = self.db.select_db_content(PianaGlobals.SelectPartnersProteinSharingCog.get_sqlquery( proteinPiana_value, "A",
                                                                                                              list_source_dbs=list_source_dbs,
                                                                                                              inverse_dbs= inverse_dbs,
                                                                                                              list_source_methods=list_source_methods,
                                                                                                              inverse_methods= inverse_methods) ,
                                                   answer_mode="list", remove_duplicates="yes")
        
        list_partnersB = self.db.select_db_content(PianaGlobals.SelectPartnersProteinSharingCog.get_sqlquery( proteinPiana_value, "B",
                                                                                                              list_source_dbs=list_source_dbs,
                                                                                                              inverse_dbs= inverse_dbs,
                                                                                                              list_source_methods=list_source_methods,
                                                                                                              inverse_methods= inverse_methods) ,
                                              answer_mode="list", remove_duplicates="yes")

        # if list_partnersA is empty then return list_partnersB. Otherwise, extend it with list_partnersB
        if list_partnersA:
            if list_partnersB:
                list_partnersA.extend(list_partnersB)
        else:
            list_partnersA = list_partnersB


        # remove duplicates
        dic_partners = {}
        for partner in list_partnersA:
            dic_partners[partner] = None


        return dic_partners.keys()

    def get_partner(self, interaction_id= None, proteinPiana_value= None):
        """
        Returns the partner (proteinPiana) of "proteinPiana_value" in interaction "interaction_id"

        No need to check if the interaction belongs to a specific method: the user is only calling this method when he already
        retrieved an interactionPiana: and he will only retrieve interactionPianas that respect the source db and method restrictions
        """

        # TO DO!! change argument name interaction_id to interactionPiana_value

        if interaction_id is None or proteinPiana_value is None:
            raise ValueError("arguments to get_partner cannot be None")
        
        partnerA = self.db.select_db_content(PianaGlobals.SelectInteractionPartner.get_sqlquery( proteinPiana_value ,interaction_id, "A"),
                                             answer_mode="single")
        
        if partnerA is not None:
            return partnerA
       
        else:
            partnerB = self.db.select_db_content(PianaGlobals.SelectInteractionPartner.get_sqlquery( proteinPiana_value ,interaction_id, "B"),
                                                 answer_mode="single")
          
            if partnerB is not None:
                return partnerB
            else:
                raise Exception("Strange error: not finding partner of proteinPiana_value %s which was supposed to have an interaction %s" \
                                %(proteinPiana_value, interaction_id ))

    def get_interaction_sourceDB_list(self, interactionPiana_value):
        """
        Returns a list of sourceDB for a particular "interactionPiana_value"

        Returns empty list if nothing is found

        Attention! There is no argument list_source_dbs or list_source_methods: I let the user check if the interaction
        is in other databases or methods, even if he limited the network interactions to those in a list of source interaction databases
          -> I do it this way because this has no effect of the interactions added to the network: it will just affect the edge attribute, in the
          sense that it will have  a complete list of sourceDBs and methods, regardless of restrictions imposed by user.
        """
        sourceDB_list = self.db.select_db_content(PianaGlobals.SelectInteractionSourceDB.get_sqlquery(interactionPiana_value),
                                                  answer_mode="list", remove_duplicates="yes")
        
        return sourceDB_list

    def get_interaction_methodID_list(self, interactionPiana_value):
        """
        Returns a list of methodID for a particular "interactionPiana_value"

        Returns empty list if nothing is found

        Attention! There is no argument list_source_dbs or list_source_methods: I let the user check if the interaction
        is in other databases or methods, even if he limited the network interactions to those in a list of source interaction databases
          -> I do it this way because this has no effect of the interactions added to the network: it will just affect the edge attribute, in the
          sense that it will have  a complete list of sourceDBs and methods, regardless of restrictions imposed by user.
          -> Right now this method is only used to check if an interaction can be deleted (./dbModification/delete_interactions_from_db.py)


        """
        # TO DO!!! Right now, the edge attribute does not contain a list of methodIDs

        methodID_list = self.db.select_db_content(PianaGlobals.SelectInteractionMethod.get_sqlquery(interactionPiana_value= interactionPiana_value,
                                                                                                    sourceDBID_value = None),
                                                  answer_mode="list", remove_duplicates="yes")
        
        return methodID_list
  
    def get_interaction_methodID_list_for_sourceDB(self, interactionPiana_value, sourceDBID_value):
        """
        Returns a list of methodID for a particular interaction "interactionPiana_value" for a given "sourceDBID"

        Returns empty list if nothing is found

        Attention! There is no argument list_source_dbs: since arguments of this method fix a sourceDB, I let the user be responsible for
        not asking to get methodIDs for a database that is not the one he wants...
        """
        methodID_list = self.db.select_db_content(PianaGlobals.SelectInteractionMethod.get_sqlquery(interactionPiana_value= interactionPiana_value,
                                                                                                    sourceDBID_value = sourceDBID_value),
                                                  answer_mode="list", remove_duplicates="yes")
        
        return methodID_list

    def lock_tables(self, table_list= None):
        """
        Locks mysql tables indicated in "table_list"
        """
        if table_list is not None:
        
            self.db.select_db_content(PianaGlobals.LockTables.get_sqlquery(table_list),
                                      answer_mode="single")
        else:
            raise ValueError("lock tables called without tables to lock")

    def unlock_tables(self):
        """
        Unlocks tables previously locked with method lock_tables()
        """
        self.db.select_db_content(PianaGlobals.UnlockTables.get_sqlquery(),
                                  answer_mode="single")
