"""
File        : PSIContentHandler.py
Author      : Ramon Aragues
Creation    : 19.10.2005
Contents    : XML content handler for PSI xml files
Called from : parsePSI.py

=======================================================================================================

This file implements the content handler for PSI xml files.


Basically, it does the following:

- each label <proteinInteractor> is inserted into a psi database for keeping relationship between internal
  identifiers of the database and the external references

- each label <interaction> is parsed to retrieve the interactions and insert them into the psi database


Attention!!! PSI is beautifully nice but most databases do not
respect it, therefore this parser is not guaranteed to work with all PSI
datafiles. It has been tested with BIND, MIPS and HPRD.

To facilitate the parsing, the user has to say in the command line which is the database he is parsing.
If you want to parse a database that is not one of the dbs listed, you have two options:
   - telling the parser that you are parsing the database that most resembles your database
   - giving the parser a random name(eg the name of the database you are parsing), which will make it
     apply the general rules of PSI. I don't think it will work, but try it...
   - modifying the code...

In case you encounter problems parsing other PSI datafiles, you'll
have to modify the code. Some things you might want to look at when
modifying the code are:

   --> identifiers used to refer to external databases
   --> where does each database place its information
   

Complete step-by-step description on how to use this parser is given in README.populate_piana_db

"""

# PSIContentHandler.py: XML content handler for PSI xml files
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues


from xml.sax import make_parser, SAXException
from xml.sax.handler import ContentHandler

import re

import MySQLdb

from PianaDB import *


verbose_detailed = 0

class PSIContentHandler(ContentHandler):

    """
    -------------------
    __init__ method
    -------------------

    Initialises variables and sets output mode and mysql parameters if needed (for psiDB mode)
    """

    def __init__(self, output_mode,  verbose= 0, psi_db = None, psi_dbhost= None, psi_dbname= None, psi_dbuser= None, psi_dbpass= None):

        self.output_mode = output_mode
        self.psi_db = psi_db
        self.verbose = verbose

        self.mobile = None

        self.current_counter = None # only used by BIND and MIPS: hprd uses its own ids

        self.buffer = ''
        self.currentEntity = None  # used to know at which level of the xml we are placed
        self.currentLabel = None   # used to know at which level of the xml we are placed
        self.tag = None            # used to know at which level of the xml we are placed
        self.organismLabel = 0            # used to know at which level of the xml we are placed
        self.method = []
        self.protein_db_id = None    # initialize to None so we can detect psi formats that do not have an internal id for proteins
        self.protein_sequence = None
        self.list_pubmed_ids = ['unknown']
        self.current_tax_id = 0
        
        if self.output_mode == "psiDB":
            if psi_dbname is None or psi_dbhost is None:
                raise ValueError("trying to establish a connection to psi database without giving a host or database name")

            # opening connection to MySQL PSI database and create a cursor to work with the database
            if psi_dbuser is None and psi_dbpass is None:
                self.psi_dbconnection = MySQLdb.connect(db=psi_dbname, host=psi_dbhost)
                
            elif psi_dbpass is None and psi_dbuser is not None:
                self.psi_dbconnection = MySQLdb.connect(user=psi_dbuser, db=psi_dbname, host=psi_dbhost)
                
            else:
                self.psi_dbconnection = MySQLdb.connect(user=psi_dbuser, db=psi_dbname, host=psi_dbhost, passwd= psi_dbpass )

            self.cursor = self.psi_dbconnection.cursor()

        self.number_ints_inserted = 0
        self.number_ints_with_one_participant = 0

        if verbose_detailed:
            sys.stderr.write("connections to mysql database established\n")

    """
    -------------------
    startElement method
    -------------------

    Called whenever the parser enters an xml element (i.e. anything inside <>): set flags indicating which kind of element we are processing
    """
    
    def startElement(self, labelName, attrs):

        if self.currentEntity !="interaction" and  labelName == "proteinInteractor":
            # labelName is "proteinInteractor", this is a description of a protein unless we 
            # were inside an interaction (MIPS and BIND place proteinInteractor labels inside interactions)
            # MIPS and BIND files will never go inside this if... the protein identifiers will be handled inside the interaction itself

            self.currentEntity = "proteinInteractor"
            self.protein_db_id = attrs.get('id')  # this is the id given by the psi db
            self.codes_associated = []
        
        elif labelName == "interaction":
            # labelName is "interaction", a list of proteins that interact: initialize the list to empty
            self.currentEntity = "interaction"
            self.protein_participants = []
            self.method = []

        elif labelName == "organism":
            # set currentLabel to organism to 1) avoid it (otherwise, <names> inside organism is used as a protein description)
            #                                 2) and to get the protein taxonomy
            self.organismLabel = 1 # set to 1 so we later know that we are inside an organism label
            self.current_tax_id = int(attrs.get('ncbiTaxId'))

            if self.currentEntity == "interaction":
                # in case this is data that describes proteins inside the interactions, insert the tax id for the protein
                # --> HPRD will never go inside this if: it will use self.current_tax_id to insert the tax after reading the whole
                #     protein description
                sqlquery = """INSERT IGNORE INTO proteinFeatures \
                (counter_id, organism, tax_id) \
                VALUES (%s, "NULL", "%s");""" \
                %(self.current_counter, self.current_tax_id )
                self.cursor.execute(sqlquery)
            
        elif self.currentEntity == "proteinInteractor":
            # current labelName is not "proteinInteractor" or "interaction" (because we are inside "elif")
            # but we are inside a  proteinInteractor (indicated by self.currentEntity) and entering
            # inside elements of this protein
            # It will only go here for psi datafiles that have proteinInteractor labels at the main indentation level (eg HPRD)
            # --> MIPS and BIND will nevel go inside this if

            if labelName == "names" and not self.organismLabel:
                # here we will find the description of the protein... but at another level "shortLabel"
                # we set currentLabel to description so we are able to detect it when closing the tag (and get the info from buffer)
                # --> avoid getting the names under organism with this check...
                self.currentLabel="description_names"

            elif labelName == "shortLabel" and self.currentLabel=="description_names":
                self.currentLabel = "description_shortlabel"
            
                
            elif labelName == "secondaryRef":
                # HPRD has the ext reference as a secondaryRef tag
                current_db = attrs.get('db')
                current_code= attrs.get('id')

                if current_db == "SP" or current_db == "Swiss-Prot":
                    # list here all vocabulary used by psi files to indicate that they are referring to Uniprot
                    # Attention! current_db can be as well PDB, Unigene and Ref-Seq but I am not parsing them
                    self.codes_associated.append( (current_code, current_db))

            
        elif self.currentEntity == "interaction":
            # current labelName is not "proteinInteractor" or "interaction" (because we are inside "elif")
            # but we are inside an interaction (indicated by self.currentEntity) and entering
            # inside elements of this protein
            
            if verbose_detailed:
                sys.stderr.write("interaction label was found and label now is %s!\n" %(labelName))

                
            if labelName == "proteinInteractorRef":
                # since we are inside an interaction, we append all the protein ids to the list of participants
                # -- this is the way HPRD uses to list the participants!!! --
                if attrs.get('ref'):   protein_code = attrs.get('ref').strip()
                else:                  protein_code = ""

                ext_db = self.psi_db
                
                if protein_code and protein_code != "" and protein_code != "-":
                    if verbose_detailed:
                        sys.stderr.write("appending participant <%s> of db <%s>\n" %( protein_code, ext_db) )
                    self.protein_participants.append( (protein_code, ext_db) )
              
            elif labelName == "bibref":
                self.currentLabel = "bibref"

            elif labelName == "primaryRef" and self.currentLabel == "bibref":
                if attrs.get('db'):   bibref_db = attrs.get('db').strip()
                else: bibref_db = None

                if bibref_db == "PubMed":

                    if attrs.get('id') is not None:
                        string_pubmed_id = attrs.get('id')
                        separators = re.compile('[;,.:\s\-]')
                        ids = separators.split(string_pubmed_id)
                        if ids:
                            self.list_pubmed_ids = []
                            for id in ids:
                                if id:
                                    self.list_pubmed_ids.append(id)
                    else:
                        self.list_pubmed_ids = ['unknown']

                
            elif labelName == "interactionDetection":
                self.currentLabel = "interactionDetection"
                

                
            elif labelName == "interactionDetectionMethod" and self.psi_db=="bind": 
                # BIND does not have interactionDetection labels... consider interactionDetectionMethod as the tag to indicate the start
                # of a method description
                self.currentLabel = "interactionDetection"
                    
            elif self.currentLabel == "interactionDetection":
                if labelName == "shortLabel" and (self.psi_db == "hprd" or self.psi_db == "mips"):
                    # in HPRD and MIPS, the method name is extracted from the text inside short label
                    self.currentLabel = "method_name"
                    
                if labelName == "secondaryRef" and self.psi_db == "bind":
                    # in BIND, the method name is extracted from the secondaryRef value 'secondary'
                    if attrs.get('secondary'):   self.method.append( attrs.get('secondary').strip() )

            elif labelName == "proteinParticipant":
                # we are at a participant label of an interaction.
                # set currentLabel to "participant" so we know that we are processing a new protein
                self.currentLabel = "participant"
                
                if verbose_detailed:
                    sys.stderr.write("proteinParticipant label found!\n")

                if self.psi_db == "bind" or self.psi_db == "mips":
                    # in BIND and MIPS there are no internal ids: create one and insert it to database
                    
                    # insert to DB the virtual psi db id for the protein (we will later insert the codes associated to this virtual db id)
                    #  In fact, we just want to insert something so that a new counter_id is generated, which is in fact the internal
                    #  code that will be used for BIND and MIPS
                    sqlquery = """INSERT IGNORE INTO proteins (db_id) \
                    VALUES ("irrelevant");""" 

                    self.cursor.execute(sqlquery)

                    # get the internal id for this protein
                    self.cursor.execute("""Select LAST_INSERT_ID()""")
                    self.current_counter =self.cursor.fetchall()[0][0]

                    self.protein_participants.append((self.current_counter, "internal_id"))
                # END OF if self.psi_db == "bind" or self.psi_db == "mips":
                elif self.psi_db == "hprd":
                    # nothing to be done for hprd: protein ids were already inserted in the section of protein descriptions
                    # using the internal id for hprd
                    pass

                
            elif self.currentLabel == "participant":
                # we are inside an interaction, and we are processing a participant
                #  --> in MIPS, the participant will be either a SP, TREMBL or GB code (we read as well the description of the protein)
                #  --> HPRD will go inside this if but won't do anything since its only tag inside a participant is proteinInteractorRef
                #  --> in BIND, the participant will be a "Entrez Protein" code (ie. gi) (we read as well the description of the protein)

                if labelName == "shortLabel" and  self.psi_db == "bind" and not self.organismLabel:
                    # in BIND, this is the geneName (we consider it the protein description...)
                    self.tag = "gene_name"
                    
                if labelName == "fullName" and  self.psi_db == "mips" and not self.organismLabel:
                    # in MIPS, this is the protein description
                    self.tag = "gene_description"
                
                if labelName == "primaryRef":
                    # in BIND AND MIPS, this are ext ids for the protein
                    #  --> HPRD not going into this if: in hprd, the only label inside a participant is proteinInteractorRef
                    if attrs.get('db'):   current_db = attrs.get('db').strip()
                    else:                 current_db = ""  
                    
                    if attrs.get('id'):   current_code = attrs.get('id').strip()
                    else:                 current_code = None


                    # in MIPS, current_db can be SP, TREMBL, GB (id="AF058918") or PIR
                    #                 some special cases in MIPS:
                    #                   --> <primaryRef db="SP" id="Q9HBZ2 (human) or sp" />
                    #                   --> <primaryRef db="TREMBL" id="-" />
                    #                   --> <primaryRef db="GB" id="U47050" />
                    #                   --> <primaryRef db="TREMBL" /> 
                    #                   --> <primaryRef db="SP" id="P29354_2" />
                    #                   --> <primaryRef db="TREMBL" id=" P97582" />
                    #
                    #
                    # in BIND, current_db is "Entrez Protein"
                    
                    if current_code and current_code != "" and current_code !="-" and current_db != "":
                        # HPRD will never arrive to this point (see explanation above)
                        # codes for HPRD are handled in the section where all the proteins are listed
                        
                        if current_db=="SP" or current_db=="TREMBL":
                            # insert to DB the codes associated to this protein db id
                            temp_code = current_code[0:6]
                            
                            sqlquery = """INSERT IGNORE INTO proteinCodes \
                            (counter_id, sequence, description, spAcc_id, emblAcc_id, gi_id) \
                            VALUES (%s, "NULL", "NULL", "%s","NULL","NULL");""" \
                            %(self.current_counter, temp_code )
                            self.cursor.execute(sqlquery)
                            
                            
                        elif current_db=="GB":
                            # insert to DB the codes associated to this protein db id
                            sqlquery = """INSERT IGNORE INTO proteinCodes \
                            (counter_id, sequence, description, spAcc_id, emblAcc_id, gi_id) \
                            VALUES (%s,"NULL", "NULL", "NULL","%s","NULL");""" \
                            %(self.current_counter, current_code )
                            self.cursor.execute(sqlquery)
                            
                        elif current_db=="Entrez Protein":
                            # insert to DB the codes associated to this protein db id
                            sqlquery = """INSERT IGNORE INTO proteinCodes \
                            (counter_id, sequence, description, spAcc_id, emblAcc_id, gi_id) \
                            VALUES (%s, "NULL", "NULL", "NULL","NULL","%s");""" \
                            %(self.current_counter, current_code )
                            self.cursor.execute(sqlquery)
                            
            
            # END OF elif self.currentLabel == "participant":
        # END OF elif self.currentEntity == "interaction":

        # set buffer content to NIL, we are just starting a new element: it will be filled afterwards
        self.buffer = ''
          


    """
    -------------------
    endElement method
    -------------------

    Called whenever the parser exits an element (i.e. anything inside <>): get buffer content into corresponding variables
    """
    
    def endElement(self, labelName):

        #------------------------------
        # Assigning values from buffer
        #------------------------------
        if self.currentEntity == "proteinInteractor":

            if labelName == "sequence":
                self.protein_sequence = self.buffer.replace("\n", "").replace(" ", "").strip()
                
        elif self.currentEntity == "interaction":

            if labelName == "shortLabel" and self.tag == "gene_name":
                # when closing a shortLabel and the tag was gene_name, keep the buffer as a protein description
                # --> only BIND will go inside this if
                self.tag = None
                # insert to DB the codes (description) associated to this protein db id
                stripped_desc = self.buffer.replace("'"," ").replace('"', " ").strip()
                if stripped_desc:
                    sqlquery = """INSERT IGNORE INTO proteinCodes \
                    (counter_id, sequence, description, spAcc_id, emblAcc_id, gi_id) \
                    VALUES (%s, "NULL", "%s", "NULL","NULL","NULL");""" \
                    %(self.current_counter, stripped_desc )
                    self.cursor.execute(sqlquery)
                
            if labelName == "fullName" and self.tag == "gene_description":
                # when closing a shortLabel and the tag was gene_name, keep the buffer as a protein description
                # --> only MIPS will go inside this if
                self.tag = None
                # in this case, the buffer content looks like this:
                #            YWHAE; 14-3-3 protein epsilon
                #            PAPOLA or PAP or PLAP; Poly(A) polymerase alpha
                # read the first field as a series of or-separated names, and the following fields as descriptions
                buffer_fields = self.buffer.replace("'", " ").replace('"', " ").split(";")
                all_descriptions = buffer_fields[0].split("or")
                all_descriptions.extend(buffer_fields[1:])
                for one_description in all_descriptions:
                    stripped_desc = one_description.strip()
                    if stripped_desc:

                        # insert to DB the codes associated to this protein db id
                        sqlquery = """INSERT IGNORE INTO proteinCodes \
                        (counter_id, sequence, description, spAcc_id, emblAcc_id, gi_id) \
                        VALUES (%s, "NULL", "%s", "NULL","NULL","NULL");""" \
                        %(self.current_counter, stripped_desc )
                        self.cursor.execute(sqlquery)
        # END OF elif self.currentEntity == "interaction":
        
            
        #------------------------------
        # Processing all information read for a certain element
        #------------------------------
        
        if self.currentEntity != "interaction" and labelName == "shortLabel" and self.currentLabel == "description_shortlabel":
            # this was the place where we obtained the description of the protein (only for HPRD): get text from 
            # buffer  and close it  so that we do not mistake other labels <names> or <shortLabel> for a protein description
            # --> MIPS and BIND will never go inside this if
            self.currentLabel= None
            stripped_desc = self.buffer.replace("'", " ").replace('"', " ").strip()
            if stripped_desc:
                self.codes_associated.append((stripped_desc, "description"))
            
        if self.currentEntity != "interaction" and labelName == "proteinInteractor":
            # when closing a protein label (and not inside an interaction), process information retrieved for the protein
            # Once the information is processed (printed out, inserted in db, ...) initialize variables
            # --> MIPS and BIND will never go inside this if: they contain the references to the protein participants in the interaction itself

            
            if self.output_mode == "text" or self.verbose:
                
                # print information found associated to Node
                sys.stderr.write("==========================================\n")
                sys.stderr.write("Protein with the following information found:\n")
                sys.stderr.write("protein_db_id: <%s>\n" %self.protein_db_id)
                for code_associated in self.codes_associated:
                    sys.stderr.write("protein: <%s> (%s)\n" %(code_associated[0], code_associated[1]))
                sys.stderr.write("==========================================\n")
                
            # END OF if self.output_mode == "text" or self.verbose:
            
            if self.output_mode == "psiDB":
                # remember that MIPS and BIND are never going to be inside this block... see above...

                for code_associated in self.codes_associated:
                    # code_associated[0] is the protein code
                    # code_associated[1] is the DB ( 'Swiss-Prot', ...)
                    
                    # check if the protein is already in the db
                    sqlquery = """SELECT counter_id from proteins where db_id="%s";""" %(self.protein_db_id)
                    self.cursor.execute(sqlquery)
                    answer = self.cursor.fetchall()

                    if answer:
                        # if the db id is already in the DB, then get the counter id
                        counter_id = answer[0][0]
                    else:
                        # if protein was not in db, insert it in DB

                        # insert to DB the psi db id for the protein
                        sqlquery = """INSERT IGNORE INTO proteins \
                        (db_id) \
                        VALUES ("%s");""" \
                        %(self.protein_db_id)

                        self.cursor.execute(sqlquery)

                        # get the internal id for this protein
                        self.cursor.execute("""Select LAST_INSERT_ID()""")
                        counter_id = self.cursor.fetchall()[0][0]

                    # END OF else: (if answer:)
                    
                    if code_associated[1] == "Swiss-Prot" or code_associated[1] == "SP":
                        # insert to DB the codes associated to this protein db id
                        sqlquery = """INSERT IGNORE INTO proteinCodes \
                        (counter_id, sequence, description, spAcc_id, emblAcc_id, gi_id) \
                        VALUES (%s, "NULL", "NULL", "%s","NULL","NULL");""" \
                        %(counter_id, code_associated[0] )
                        self.cursor.execute(sqlquery)
                        
                    elif code_associated[1] == "description":
                        sqlquery = """INSERT IGNORE INTO proteinCodes \
                        (counter_id, sequence, description, spAcc_id, emblAcc_id, gi_id) \
                        VALUES (%s, "NULL", "%s", "NULL","NULL","NULL");""" \
                        %(counter_id, code_associated[0] )
                        self.cursor.execute(sqlquery)
                        

                # END OF for code_associated in self.codes_associated:

                if self.protein_sequence:
                    # Insert the protein sequence for HPRD
                    sqlquery = """INSERT IGNORE INTO proteinCodes \
                    (counter_id, sequence, description, spAcc_id, emblAcc_id, gi_id) \
                    VALUES (%s, "%s","NULL",  "NULL","NULL","NULL");""" \
                    %(counter_id, self.protein_sequence )

                    self.cursor.execute(sqlquery)
                    

                # Insert the organism for HPRD
                sqlquery = """INSERT IGNORE INTO proteinFeatures \
                (counter_id, organism, tax_id) \
                VALUES (%s, "NULL", "%s");""" \
                %(counter_id, self.current_tax_id )

                self.cursor.execute(sqlquery)

            # END OF if self.output_mode == "psiDB"
            
                        
            # since the node has been parsed, initialize all variables related to it
            
            self.currentEntity = None  # don't know why... but before, this line was currentEntity= None
            self.currentLabel = None
            self.tag = None
            self.organismLabel = 0   # it is set to 1 when we are inside a organism label
            self.currentAtt = None
            self.protein_db_id = None
            self.swissacc = None
            self.protein_sequence = None
        # END OF if labelName == "proteinInteractor":

        elif labelName == "organism":
            self.organismLabel = 0


            
        # when closing a interaction label, process information retrieved for the link
        # Once the information is processed (printed out, inserted in db, ...) initialize variables
        elif labelName == "interaction":

            if self.output_mode == "text" or self.verbose:
                # print information found associated to Edge

                if len(self.protein_participants) > 1:

                    sys.stderr.write("\n==========================================\n")
                    sys.stderr.write("Interaction with the following information found:\n")

                    sys.stderr.write("methods: %s\n" %self.method)

                    for participant in self.protein_participants:
                        sys.stderr.write("%s (%s) interacts with " %(participant[0], participant[1]))

                    sys.stderr.write("\n==========================================\n")
            # END OF if self.output_mode == "text" or self.verbose:
            
            if self.output_mode == "psiDB":

                participants_counter_ids = []

                if self.psi_db == "hprd":
                    # In HPRD, the internal counter associated to the protein is obtained by checking
                    # the list of protein participants against the database...

                    
                    for participant in self.protein_participants:
                        # participant[0] is the protein code
                        # participant[1] is the type of code being used ("Entrez Protein", "GB", "SP" or "TREMBL")

                        # For each participant, get its counter id
                        sqlquery = """select counter_id from proteins where db_id="%s" """ %(participant[0])
                        self.cursor.execute(sqlquery)

                        answer = self.cursor.fetchall()
                        if answer:
                            counter_id= answer[0][0]
                            participants_counter_ids.append(counter_id)
                    # END OF for participant in self.protein_participants:
                    
                elif self.psi_db == "bind" or self.psi_db == "mips":
                    # in BIND and MIPS, the protein participants are already the counter ids
                    for participant in self.protein_participants:
                        participants_counter_ids.append(participant[0])
                    
                number_of_participants = len(participants_counter_ids)

                if number_of_participants > 1:
                    for i in range(number_of_participants):
                        for j in range(i+1, number_of_participants) :

                            if participants_counter_ids[i] <= participants_counter_ids[j]:
                                proteinA = participants_counter_ids[i]
                                proteinB = participants_counter_ids[j]
                            else:
                                proteinA = participants_counter_ids[j]
                                proteinB = participants_counter_ids[i]

                            interaction_key = "%s.%s" %(proteinA, proteinB)
                                
                            
                            # insert to DB the interaction
                            sqlquery = """INSERT IGNORE INTO interactions \
                            (interaction_key, proteinA_counter_id, proteinB_counter_id) \
                            VALUES ("%s", %s, %s);""" \
                            %(interaction_key, proteinA, proteinB)

                            self.cursor.execute(sqlquery)

                            self.number_ints_inserted += 1

                            for one_pmid in self.list_pubmed_ids:
                                # insert to DB the method found associated to the interaction
                                sqlquery = """INSERT IGNORE INTO interactionFeatures \
                                (interaction_key, pubmed_id) \
                                VALUES ("%s", "%s");""" \
                                %(interaction_key, one_pmid)
                                
                                self.cursor.execute(sqlquery)
                                

                            for one_method in self.method:
                                # insert to DB the method found associated to the interaction
                                sqlquery = """INSERT IGNORE INTO interactionMethod \
                                (interaction_key, method) \
                                VALUES ("%s", "%s");""" \
                                %(interaction_key, one_method)
                                
                                self.cursor.execute(sqlquery)

                            
                        # END OF for j in range(number_of_participants-i) :
                    # END OF for i in range(number_of_participants):
                # END OF if number_of_participants > 1:
                else:
                    self.number_ints_with_one_participant += 1
                
            # END OF if self.output_mode == "psiDB":

            if self.verbose:
                sys.stderr.write("Number of interactions inserted: %s\n" %(self.number_ints_inserted))
                sys.stderr.write("Number of interactions not printed because there were less than two known participants: %s\n" %(
                                  self.number_ints_with_one_participant))
                

            # interaction has been closed... initialize all variables related to it
            self.currentEntity = None
            self.list_pubmed_ids = ['unknown']
            self.method = []
            self.currentLabel = None
            self.organismLabel = 0
            self.tag = None
            
        # END OF elif labelName == "interaction":
        elif labelName == "shortLabel":
            if self.currentLabel == "method_name":
                self.method.append(self.buffer)
                self.currentLabel = None
                
        elif labelName == "proteinParticipant":
            # close the self.currentLabel so we know we are not in a participant anymore
            self.currentLabel= None
            
        elif labelName == "interactionDetection" or labelName == "interactionDetectionMethod":
            # close the self.currentLabel so we know we are not in a interactionDetection anymore
            self.currentLabel= None
            
        elif labelName == "bibref":
            # close the self.currentLabel so we know we are not in a bibref anymore
            self.currentLabel= None
            
        # Buffer contents were saved: initialiate buffer after each element
        self.buffer = ''

    """
    -------------------
    characters method
    -------------------

    Called by the xml lib whenever the parser finds chars between two tags:
    its only mission in this program is to keep in a buffer the chars read
    """     
    def characters(self, charsRead):
        
        self.buffer += charsRead
            
