"""
 File       : PianaGlobals.py
 Author     : R. Aragues & D. Jaeggi
 Creation   : 2003
 Contents   : class that controls piana vocabularies, default values and database names for tables and columns
 Called from: all across piana modules

=======================================================================================================
File that sets piana vocabularies, default values and database names for tables and columns
"""

# PianaGlobals.py: class that controls piana vocabularies, default values and database names for tables and columns
#
# Copyright (C) 2005  Ramon Aragues
# author email: ramon.aragues@upf.edu and boliva@imim.es
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#    http://www.gnu.org/copyleft/gpl.html
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# University Pompeu Fabra, hereby disclaims all copyright
# interest in the program 'PIANA'
# (software for working with protein-protein interaction networks) written 
# by Ramon Aragues


# import functions needed to obtain SQL statements
#  --> calls to classes that generate sql statements are done through PianaGlobals
#      This is done this way just to centralize all interactions with piana databases
#      in this file
from PianaInsertSQL import *
from PianaSelectSQL import *


# ==========================================================
#                   PIANA DATABASE DESCRIPTION
# ==========================================================
# This will be used to retrieve values from the database.
# The variable names will remain unchanged if the database changes.
# In case the column names change, then the only thing to modify in
# the program will be the strings of this list.
# -----------------------------------------------------------------


# ================
# DATABASE TABLES
# ================

# protein tables
protein_table = "protein"
proteinConflicts_table = "proteinConflicts"
proteinCorrespondence_table = "proteinCorrespondence"
proteinSimilarity_table = "proteinSimilarity"
proteinPianaCounter_table = "proteinPianaCounter"

# external protein identifiers tables
swissProt_table = "swissProt"
swissAccession_table = "swissAccession"
geneName_table = "geneName"
emblAccession_table = "emblAccession"
emblPID_table = "emblPID"
pdb_table = "pdb"
gi_table = "gi"
pirEntry_table = "pirEntry"
pirAccession_table = "pirAccession"
protein_id_intDB_table = "protein_id_intDB"

# protein attributes relationship tables
proteinEC_table = "proteinEC"
proteinCog_table = "proteinCog"
proteinGo_table = "proteinGo"
interPro_table = "interPro"
proteinScop_table = "proteinScop"
proteinCath_table = "proteinCath"
proteinProsite_table = "proteinProsite"
proteinSpecies_table = "proteinSpecies"
proteinDescription_table = "proteinDescription"
proteinKeyword_table = "proteinKeyword"
proteinFunction_table = "proteinFunction"
proteinSubcellularLocation_table = "proteinSubcellularLocation"
proteinDBAliCluster_table = "proteinDBAliCluster"

# protein attributes tables
species_table = "species"
ec_table = "ec"
cog_table = "cog"
go_table="go"
go_term2term_distance_table="go_term2term_distance"

# external protein databases tables
uniprotInfo_table = "uniprotInfo"
cellFitness_table = "cellFitness"

# interaction tables
interaction_table = "interaction"
interactionMethod_table = "interactionMethod"
interactionSourceDB_table = "interactionSourceDB"
interactionFeatures_table = "interactionFeatures"
interactionScores_table = "interactionScores"
interactionProteinSource_table = "interactionProteinSource"


# ================
# DATABASE COLUMNS
# ================
#
# This is the place where the column names in piana databases are centralized
#
# Therefore, if there are any column name changes, or columns added to a piana database, it must be reflected here 
#

# ---------------------------
# Protein Descriptors columns
# ---------------------------

# Columns of table protein_table

proteinPiana_col = "proteinPiana"
proteinSequence_col ="proteinSequence"
proteinMD5_col ="proteinMD5"
proteinSequenceLength_col ="proteinSequenceLength"
proteinMW_col ="proteinMW"
proteinIP_col ="proteinIP"
              #- speciesNCBI field same as in species_table

# Columns of table proteinConflicts_table
conflictID_col ="conflictID"
description_col ="description"
                    #- proteinPiana field same as in protein_table

# Columns of table proteinCorrespondence_table
                    #- proteinPiana field same as in protein_table
                    #- proteinMD5 field same as in protein_table
                    #- speciesNCBI field same as in species_table
                    
# Columns of table proteinSimilarity_table
                    #- proteinPianaA_col field same as in interaction_table
                    #- proteinPianaB_col field same as in interaction_table
                    
# Columns of table proteinPianaCounter_table
proteinPianaCounter_col ="proteinPianaCounter"

# ------------------------------------
# Protein External identifiers columns
# ------------------------------------
sourceDBID_col = "sourceDBID"

# Columns of table swissProt_table
swissProtID_col ="swissProtID"
swissProtSource_col = sourceDBID_col
                    #- proteinPiana field same as in protein_table

# Columns of table swissAccession_table
swissAccessionID_col ="swissAccessionID"
isPrimary_col ="isPrimary"
swissAccessionSource_col =sourceDBID_col
                    #- proteinPiana field same as in protein_table

# Columns of table geneName_table
geneName_col ="geneName"
geneNameSource_col =sourceDBID_col
                    #- proteinPiana field same as in protein_table


# Columns of table emblAccession_table
emblAccessionID_col ="emblAccessionID"
emblAccessionVersion_col = "emblAccessionVersion"
emblAccessionSource_col = sourceDBID_col
                    #- proteinPiana field same as in protein_table

# Columns of table emblPID_table
emblPID_col ="emblPID"
emblPIDVersion_col = "emblPIDVersion"
emblPIDSource_col = sourceDBID_col
                    #- proteinPiana field same as in protein_table
                    
# Columns of table gi_table
giID_col ="giID"
giSource_col = sourceDBID_col
                    #- proteinPiana field same as in protein_table

                    
# Columns of table pdb_table
pdbID_col ="pdbID"
chain_col = "chain"
pdb_chain_col = "pdb_chain"
pdbSource_col =sourceDBID_col
                    #- proteinPiana field same as in protein_table


# Columns of table pirEntry_table
pirEntryID_col ="pirEntryID"
isComplete_col = "isComplete"
pirEntrySource_col = sourceDBID_col
                    #- proteinPiana field same as in protein_table

# Columns of table pirAccession_table
pirAccessionID_col ="pirAccessionID"
isComplete_col = "isComplete"
pirAccessionSource_col = sourceDBID_col
                    #- proteinPiana field same as in protein_table
                    
# Columns of table protein_id_intDB_table
#     intdbID is composed of a string idenfying the code
#     and the code itself (eg. dip_uid:203N)
#      ->valid strings for identying codes are:
#            - dip_uid
#            - hprd_id
#
intdbID_col ="intdbID"
                    #- proteinPiana field same as in protein_table
                    #- sourceDBID_col field same as everywhere

# ---------------------------------------
# Protein attributes relationship columns
# ---------------------------------------

# Columns of table proteinEC_table
proteinECSource_col = sourceDBID_col
        #- ecID field same as in ec_table
        #- proteinPiana field same as in protein_table

# Columns of table proteinCog_table
proteinCogSource_col = sourceDBID_col
        #- cogID field same as in cog_table
        #- proteinPiana field same as in protein_table
        
# Columns of table proteinGo_table
proteinGoSource_col = sourceDBID_col

# Columns of table interPro_table
interProID_col ="interProID"
interProDescription_col = "interProDescription"
interProSource_col = sourceDBID_col
                    #- proteinPiana field same as in protein_table

# Columns of table prosite_table
prositeID_col ="prositeID"
prositeSource_col = sourceDBID_col
                    #- proteinPiana field same as in protein_table

                    
# Columns of table proteinScop_table
proteinScopSource_col = sourceDBID_col
cf_col = "cf"
sf_col = "sf"
fa_col = "fa"
        #- proteinPiana field same as in protein_table
        
# Columns of table proteinCath_table
proteinCathSource_col = sourceDBID_col
c_col = "c"
a_col = "a"
t_col = "t"
h_col = "h"
cathID_col = "cathID"
segmentID_col = "segmentID"
res_start_col = "res_start"
res_end_col = "res_end"
        #- proteinPiana field same as in protein_table

# Columns of table proteinSpecies_table
proteinSpeciesSource_col = sourceDBID_col
        #- proteinPiana field same as in protein_table
        #- speciesNCBI field same as in species_table


# Columns of table proteinDescription_table
proteinDescription_col = "proteinDescription"
proteinDescriptionSource_col = sourceDBID_col
        #- proteinPiana field same as in protein_table
        
# Columns of table proteinKeyword_table
proteinKeyword_col = "proteinKeyword"
proteinKeywordSource_col = sourceDBID_col
        #- proteinPiana field same as in protein_table
        
# Columns of table proteinFunction_table
proteinFunction_col = "proteinFunction"
proteinFunctionSource_col = sourceDBID_col
        #- proteinPiana field same as in protein_table
        
# Columns of table proteinSubcellularLocation_table
proteinSubcellularLocation_col = "proteinSubcellularLocation"
proteinSubcellularLocationSource_col = sourceDBID_col
        #- proteinPiana field same as in protein_table
        
# Columns of table proteinDBAliCluster_table

DBAliclusterID_col = "DBAliclusterID"
DBAliclusteringMethod_col = "DBAliclusteringMethod"
patchResidues_col = "patchResidues"
        #- proteinPiana field same as in protein_table
        #- sourceDBID_col field same as everywhere

# ---------------------------------------
# Protein attributes columns
# ---------------------------------------

# Columns of table species_table
speciesNCBI_col = "speciesNCBI"
speciesName_col = "speciesName"
speciesDescription_col = "speciesDescription"
speciesKingdom_col = "speciesKingdom"
speciesSource_col = sourceDBID_col

# Columns of table ec_table
ecID_col ="ecID"
ecDescription_col = "ecDescription"
ecSource_col = sourceDBID_col

# Columns of table cog_table
cogID_col ="cogID"
cogDescription_col = "cogDescription"
cogFunction_col = "cogFunction"
cogSource_col = sourceDBID_col

# Columns of table go_table
goID_col ="goID"
go_name_col = "name"
go_acc_col = "acc"
go_term_type_col = "term_type"
go_distance2root = "distance2root"
goSource_col = sourceDBID_col

# Columns of table go_term2term_distance_table
go_term2term_distance_term1_col="go_term1"
go_term2term_distance_term2_col="go_term2"
go_term2term_distance_col="distance"
           
# --------------------------------------
# external protein databases Columns   
# --------------------------------------

# Columns of table uniprotInfo_table
swissProtID_col= "swissProtID" 
data_class_col= "data_class"
description_col= "description"
organism_col= "organism" 
organelle_col= "organelle"
        #- proteinPiana field same as in protein_table
        #- proteinMW field same as in protein_table
        #- proteinSequenceLength field same as in protein_table

        
# Columns of table cellFitness_table

fitnessScore_col= "fitnessScore"
reaction_col= "reaction"
conditions_col= "conditions"
cellFitnessSource_col= sourceDBID_col
        #- proteinPiana field same as in protein_table


# --------------------
# Interaction Columns
# --------------------


# Columns of table interaction_table

interactionPiana_col ="interactionPiana"
proteinPianaA_col ="proteinPianaA"
isSourceA_col ="isSourceA"
proteinPianaB_col ="proteinPianaB "
isSourceB_col ="isSourceB "
interactionConfidence_col ="interactionConfidence"

# Columns of table interactionSourceDB_table
sourceDBID_col = sourceDBID_col
confidenceAssigned_col ="confidenceAssigned"
                    #- interactionPiana field same as in interaction_table


# Columns of table interactionMethod_table
methodID_col ="methodID"
                    #- interactionPiana field same as in interaction_table
                    #- sourceDBID field same as in interactionSourceDB_table
                    
# Columns of table interactionFeatures_table
pubmedID_col ="pubmedID"
                    #- interactionPiana field same as in interaction_table
                    #- sourceDBID field same as in interactionSourceDB_table


# Columns of table interactionScores_table
equiv_nscore_col = "equiv_nscore"
equiv_nscore_transferred_col = "equiv_nscore_transferred"
equiv_fscore_col = "equiv_fscore"
equiv_pscore_col = "equiv_pscore"
equiv_hscore_col = "equiv_hscore"
array_score_col = "array_score"
array_score_transferred_col = "array_score_transferred"
experimental_score_col = "experimental_score"
experimental_score_transferred_col = "experimental_score_transferred"
database_score_col = "database_score"
database_score_transferred_col = "database_score_transferred"
textmining_score_col = "textmining_score"
textmining_score_transferred_col = "textmining_score_transferred"
combined_score_col = "combined_score"
                    #- interactionPiana field same as in interaction_table
                    #- sourceDBID field same as in interactionSourceDB_table


# Columns of table interactionProteinSource_table
        #- interactionPiana field same as in interaction_table
        #- proteinPiana field same as in protein_table
        #- sourceDBID field same as in interactionSourceDB_table


#Columns of table go_parent_relationship

go_parent_col = "go_parent"
go_son_col = "go_son"
 



# ==========================================================
# DEFAULTS VALUES USED IN PIANA
# ==========================================================


# ----------------------------------------------------------
# Constants for GO hierarchy values
# ----------------------------------------------------------
huge_distance = 1000
huge_depth = 1000

# ----------------------------------------------------------
# Clustering defaults
# ----------------------------------------------------------
huge_value = 100

# ==========================================================
# DICTIONARIES OF EXTERNAL KEYWORDS
# ==========================================================
#
# The following dictionaries link external keywords referring
# to a given entity to the keyword that Piana will use internally
# to refer that entity
#
# The keys of the dictionaries are the piana identifiers, and the
# list contents those values that can be used by the user
# 
# Attention! All strings must be in lowercase!! (because of the way of checking for similar strings)
# Attention! Remember that strings cannot be longer than 20 chars!!!  (database limitations)
# ---------------------------------------------------------------


# --------
# Source databases  (databases from which piana has taken information)
#
#   --  source db normalized names should not be longer than 20 characters
#       (if you need to change the length, you can change field sourceDBID_col in piana database table interactionSourceDB_table)
#
#  --> (the 'xxxx_c' means the interactions come from the database xxxx but the 
#       interaction is not reliable, either because it was obtained from a 'completed'
#       code or from an ambiguous gene name )
# --------
# Attention! Use lowercase
# Attention! Use short names

# For more information on these databases, please refer to piana/README.populate_piana_db
#  -> interactions added with parser expansion2piana are labeled as 'expansion', and appear as so in the network
#  -> 'user' is the label that can be used by users who do not want to introduce a new label here...
#  -> 'user' is the short label used to minimize the size of pianaDB_limited
#     

source_databases = {        
                     "bind":["bind"], 
                     "bind_c":["bind_c"], 
                     "blast_transfer":["blast_transfer"],  
                     "cog":["cog"],  
                     "completion":["completion"], 
                     "dbali":["dbali"],  
                     "dip":["dip"],  
                     "dip_c":["dip_c"],  
                     "expansion":["expansion"],
                     "expansion_c":["expansion_c"], 
                     "fitness":["fitness"],        
                     "genbank":["genbank","gi"],     
                     "genpept":["genpept","gi"],   
                     "go":["go"],
                     "hprd":["hprd"],
                     "hprd_c":["hprd_c"],
                     "mips":["mips"],
                     "mips_c":["mips_c"],
                     "ori":["ori"],  
                     "ori_c":["ori_c"],  
                     "pdbsprotec":["pdbsprotec"],
                     "ncbi":["ncbi"],
                     "ncbi_nr":["ncbi_nr"],
                     "ncbi_pdbaa":["ncbi_pdbaa"],
                     "ncbi_sprot":["ncbi_sprot"],
                     "pir":["pir"],
                     "pibase":["pibase"], 
                     "pibase_c":["pibase_c"],  
                     "posas":["posas"], 
                     "posas_c":["posas_c"],
                     "string":["string"],
                     "string_c":["string_c"],
                     "scop":["scop"],
                     "stelzl":["stelzl"],
                     "stelzl_c":["stelzl_c"],
                     "swissprot":["swissprot","sprot"],
                     "trembl":["trembl"],
                     "user":["user"],
                     "vidal":["vidal"],
                     "vidal_c":["vidal_c"],
                     "s":["s"]
                     }

# --------
# Interaction databases (interaction databases from which piana has taken information)
#
# If parsing a new interaction database into a PIANA database, a label for it has to be
# added here (and don't forget to add it as well in source_databases, and color section)
#    --> see README.populate_piana_db for more info (section parse your own data)
#
# this list is used by method output_interactions_table
#  --> (the 'xxxx_c' means the interactions come from the database xxxx but the 
#       interaction is not reliable, either because it was obtained from a 'completed'
#       code or from an ambiguous gene name )
# --------
# Attention! Use lowercase
# Attention! Use short names

# For more information on these databases, please refer to piana/README.populate_piana_db
#     --> 'user' is the label that can be used by users who do not want to introduce a new label here...

interaction_databases = ["dip", "dip_c",
                         "expansion", "expansion_c",
                         "mips", "mips_c",
                         "hprd", "hprd_c",
                         "bind", "bind_c",
                         "ori", "ori_c",
                         "pibase", "pibase_c",
                         "posas", "posas_c",
                         "stelzl", "stelzl_c",
                         "string", "string_c",
                         "user",
                         "vidal", "vidal_c"]

# --------
# Definition of methods used to find interactions
#   -- any new insertion of an interaction must make sure that the method name
#      appears here and is normalized
#
#   -- method_names follows the structure of a dictionary, with keys being the normalized name and content the different
#      names found in databases to refer to that name (e.g. method affinity is called in DIP "Affinity chromatogra").
#
#   --  method normalized names should not be longer than 20 characters
#       (if you need to do so, you can change field methodID_col in piana database table interactionMethod_table)
#
# --------
# Attention! Use lowercase for keys and values, even if the external database uses upper case
#            for the method description
#
# Attention! Remember than the method name (ie. the dictionary key) cannot be longer than 20 chars!!! 

# TO DO!!!!!!!! Check Protein Interactions Standard Initiative to see how they deal with method names

# TO DO!!!!!!!!!! Check if there are duplicated methods... I've added new methods that I didn see in the list
#                 coming from DIP. In particular, make sure immunoprecipitation methods are not duplicated

# TO DO!!! cluster the methods... do not write so specific methods... for example, many immunoprecipitation...

# ATTENTION!!! Do not change the description of methods! Otherwise, we won't be able to transform to standard form
#              the methods as described in external databases
#              (e.g. MIPS describes method as "coip: coimmunoprecipitation")
#              If you have a new description for the method, just add a new element to the list

method_names = { "3dstruct": ["3dstruct", "three dimensional structure", "3d structure"],
                 "adhesion":["adhesion", "interaction adhesion", "interaction adhesion assay"],
                 "affinchrom":["affinchrom" ,"affinity chromatogra", "affinity chromatografy", "affinity chromatography","affinity chromatography technologies"],
                 "alanine":["alanine", "alanine scanning"],      
                 "atomic":["atomic", "atomic force microsc", "atomic force microscopy"],      
                 "biacore":["biacore", "biacore sensor chip"],  
                 "calcium":["calcium", "calcium mobilization", "calcium mobilization assay"], 
                 "chemotaxis":["chemotaxis"],          
                 "colocalization":["colocalization", "colocalization/visualisation technologies"],       
                 "competition":["competition", "competition binding"],  
                 "copurif":["copurif", "copurification"],       
                 "cosediment":["cosediment", "cosedimentation"],      
                 "crosslink":["crosslink", "cross-linking", "cross-linking studies", "cross linking"],        
                 "denatur":["denatur", "denaturing gel elect", "denaturing gel electrophoresis"],         
                 "density":["density", "density gradient sed", "gradient sedimentation", "density gradient sedimentation"], 
                 "electron":["electron", "electron microscopy"],  
                 "electronres":["electronres", "electron resonance", "resonance energy transfer"],
                 "electromob":["electromob", "emsa:electrophoretic mobility"],  
                 "elisa":["elisa", "elisa: enzyme-linked immunosorbent assay"],  
                 "experimental":["experimental","experimental knowledge based"],    
                 "expCog1":["expcog1", "simple expansion for proteins sharing cog"],  
                 "expCog2":["expcog2", "double expansion for proteins sharing cog"],     
                 "expScop1":["expscop1", "simple expansion for proteins sharing scop"],     
                 "expScop2":["expscop2", "double expansion for proteins sharing scop"],    
                 "expInterpro1":["expinterpro1", "simple expansion for proteins sharing interpro"],     
                 "expInterpro2":["expinterpro2", "double expansion for proteins sharing interpro"],
                 "farwestern":["farwestern", "far-western blot", "far western blotting", "far western" ], 
                 "fluanis":["fluanis", "fluorescence anisotropy"], 
                 "fludepol":["fludepol", "fluorescence depolar", "fluorescence depolarization"],
                 "flumicro":["flumicro", "fluorescent microscopy"],  
                 "fluspec":["fluspec", "fluorescence spectro", "fluorescence spectroscopy"], 
                 "fret":["fret", "fret analysis"],        
                 "gelfilt":["gelfilt", "gel filtration chrom", "gel filtration chromatography"], 
                 "gelretard":["gelretard", "gel retardation assa", "gel retardation assays"], 
                 "genetic":["genetic"],              
                 "immblot":["immblot", "immunoblotting"],       
                 "immflu":["immflu", "immunofluorescence"],   
                 "immloc":["immloc", "immunolocalization"],   
                 "immprec":["immprec", "immunoprecipitation", "immuno", "coip: coimmunoprecipitation", "flagcoip", "flagcoip","flag co-immunoprecipitation","co-immunoprecipitation"],  
                 "immstain":["immstain", "immunostaining"],    
                 "ion":["ion", "ion-exchange chromat", "ion-exchange chromatography"],   
                 "isothermal":["isothermal", "isothermal titration", "isothermal titration calorimetry"],
                 "lab":["lab"],        
                 "lambda":["lambda", "lambda fusion"],
                 "light":["light", "light scattering"],         
                 "massspec":["massspec", "mass spectrometry", "mass spectrometrics", "mass spectrometric s", "mass spectrometric screening"], 
                 "microtiter":["microtiter", "microtiter plate bin"], 
                 "microarray":["microarray"], 
                 "monoclon":["monoclon", "monoclonal antibody", "monoclonal antibody blockade"],  
                 "natgel":["natgel", "native gel", "native gel electroph", "native gel electrophoresis"],           
                 "neutron":["neutron", "neutron scattering"],                            
                 "nmr":["nmr"],                  
                 "nuctrans":["nuctrans", "nuclear translocatio", "nuclear translocation", "nuclear translocation assay"], 
                 "otherbiochem":["otherbiochem", "other biochemical", "biochemical"],    
                 "otherbiophy":["otherbiophy", "other biophysical", "biophysical"],     
                 "overlay":["overlay", "filter overlay assay"],    
                 "phage":["phage", "phage display"],        
                 "photon":["photon", "photon correlation s", "photon correlation spectroscopy"],      
                 "pi_dbalig05c75i0":["pi_dbalig05c75i0",    "1st: 0.5 global struct similarity,  75% local struct coverage and no seq identity"],
                 "pi_dbalig075c75i50":["pi_dbalig075c75i50","2nd: 0.75 global struct similarity, 75% local struct coverage and 50% local seq identity"],
                 "pi_dbalig05c75i50":["pi_dbalig05c75i50",  "3rd: 0.5 global struct similarity,  75% local struct coverage and 50% local seq identity"],
                 "plasmon":["plasmon", "surface plasmon reso", "surface plasmon resonance"],
                 "pred_struct":["pred_struct","predicted from structural similarity"],
                 "proteome":["proteome"],       
                 "radiography":["radiography", "autoradiography"], 
                 "scop_fa_pred":["scop_fa_pred", "predicted because their SCOP families have been found to interact"],  
                 "spot":["spot", "peptide spot assay"], 
                 "tandaffin":["tandaffin", "tandem affinity puri", "tandem affinity purification", "tandem affinity purification (tap)"],  
                 "tapcoip":["tapcoip", "tap"], 
                 "transcript":["transcript", "transcription assay"],  
                 "transcoexp":["transcoexp", "transient coexpressi", "transient coexpression"],            
                 "ubiquitin":["ubiquitin", "split-ubiquitin syst", "split-ubiquitin system"],           
                 "unclassified":["unclassified"], 
                 "vitrobind":["vitrobind", "in vitro binding", "vt"], 
                 "invivo":["vv", "in vivo"],
                 "vivokinas":["vivokinas", "in vivo kinase activ", "in vivo kinase activity"],     
                 "xdifract":["x-ray diffraction"],    
                 "xcryst":["xcryst", "x-ray crystallograph", "x-ray: x-ray crystallography", "x-ray crystallography"], 
                 "xscat":["xscat", "x-ray scattering"], 
                 "y2h":["y2h","y2h","two hybrid test","yeast2hybrids", "yeast two hybrids", "two hybrid", "2h"]

                 }

# --------
# Definition of pibase methods and databases
#  -> write here the names of dbs and methods of pibase patches (only used to tell user which are his options)
# --------
# Attention! Use lowercase
# Attention! Use short names
pibase_dbali_methods = ["pi_dbalig05c75i0","pi_dbalig075c75i50","pi_dbalig05c75i50"]
pibase_databases     = ["pibase_patchesDB_g05c75i0","pibase_patchesDB_g075c75i50","pibase_patchesDB_g05c75i50"]


# --------
# Definition of expansion types
# --------
# Attention! Use lowercase
# Attention! Use short names
expansion_types = {"cog":["cog", "expansionsamecog" ],
                   "ec":["ec", "expansionsameec"],
                   "interpro":["interpro", "expansionsameinterpro"],
                   "scop":["scop", "expansionsamescop"]
                   }

# --------
# Definition of types of protein codes that are used in piana
#
# --> this are easy-to-remember names by users
#     They are translated to piana database tables and columns using this dictionary
#
#   it follows the structure:
#
#                  { easy-to-remember-name1: { table_name1_in_pianaDB: column_name1_for_code},
#                    easy-to-remember-name2: { table_name2_in_pianaDB: column_name2_for_code},
#                    ......................................................................
#                  }
#
#   --> this dictionary is used by utilities.get_code_type
#                                  utilities.get_code_table_column_type
#                                  utilities.get_code_*
#                                  piana.py
#                                  .....
# --------
# Attention! fasta type is linked to the sequence of the protein
# Attention! for pdb code, the format of the code must be pdb_code.chain_id
#            --> If the chain_id is None, write pdb_code. (leaving the dot)
#                if the chain is None, piana will try to find it either under chain="" or chain="A"
#
# Attention! unientry refers to uniprot entries, uniacc refers to uniprot accession numbers
#
# Attention! any new addition to valid_protein_types requires adding a description to
#            protein_types_description

# TO DO!!! should I write "unknown" here as an easy-to-remember name?


valid_protein_types= {"proteinPiana": {protein_table:        proteinPiana_col},
                      "sequence":     {protein_table:        proteinSequence_col},
                      "fasta":        {protein_table:        proteinSequence_col},
                      "md5":          {protein_table:        proteinMD5_col},
                      "unientry":     {swissProt_table:      swissProtID_col},
                      "uniacc":       {swissAccession_table: swissAccessionID_col},
                      "geneName":     {geneName_table:       geneName_col},
                      "emblacc":      {emblAccession_table:  emblAccessionID_col},
                      "emblpid":      {emblPID_table:        emblPID_col},
                      "pdb.chain":    {pdb_table:            pdb_chain_col},
                      "interpro":     {interPro_table:       interProID_col},
                      "pirEntry":     {pirEntry_table:       pirEntryID_col},
                      "pirAccession": {pirAccession_table:   pirAccessionID_col},
                      "protein_intDB":{protein_id_intDB_table: intdbID_col },
                      "gi":           {gi_table:             giID_col}
                     }

protein_types_description = { "proteinPiana": "internal PIANA identifier (eg. 12812)",
                              "sequence":     "the sequence of the protein",
                              "fasta":        "the sequence of the protein preceded\nby a title line",
                              "md5":          "the md5 checksum of the protein sequence\nfollowed by 8 aminoacids (first 4 and last\n4 of the sequence)",
                              "unientry":     "the uniprot entry of the protein\n(eg. BCLX_HUMAN)",
                              "uniacc":       "the uniprot accession number of the\nprotein (eg. Q07817)",
                              "geneName":     "the gene name for the protein (eg. bclx)",
                              "emblacc":      "the (genbank, embl, DNA DBJ) accession for the protein\n(eg. BAA22171.1, AAA16451)",
                              "emblpid":      "the embl protein identifier (eg. AAB17354) (deprecated)",
                              "pdb.chain":    "the pdb code for the protein (eg. 1r2d.A)",
                              "interpro":     "the interpro code for the protein (eg. IPR012238)",
                              "pirEntry":     "the PIR entry for the protein (eg. B47537)",
                              "pirAccession": "the PIR accession for the protein (eg. B47537)" ,
                              "protein_intDB":"protein id used in a interactionDB\n(must preceded by type of id) (eg. dip_uid:304N)\n--> valid id types are: dip_uid, hprd_id" ,
                              "gi":           "the NCBI gi code for the protein (eg. 1622940)"
                             }
# ----------------------------------------------------------
# Globals for scoring functions in protein decomposition
# ----------------------------------------------------------

valid_scoring_function_names = ["quotient", "lineal"]

# ==========================================================
# COLOR CODES FOR OUTPUT NETWORK
# ==========================================================
#
# The following dictionaries set the color code that will be used
# for output
#

# For more information on these databases, please refer to piana/README.populate_piana_db

# colors for interactions depending on the database they come from
interaction_source_databases_colors = { "dip":["red"],
                                        "dip_c":["red"],
                                        "bind":["grey"],
                                        "bind_c":["grey"],
                                        "string":["magenta"],
                                        "string_c":["magenta"],
                                        "ori":["green"],   
                                        "ori_c":["green"],     
                                        "posas":["lightblue"],     
                                        "posas_c":["lightblue"],    
                                        "expansion":["orange"],  
                                        "expansion_c":["orange"],
                                        "hprd":["blue"],    
                                        "hprd":["blue"],                   
                                        "mips":["darkgreen"],            
                                        "mips_c":["darkgreen"],     
                                        "pibase":["pink"],      
                                        "pibase_c":["pink"],      
                                        "intersection":["cyan"],     
                                        "user":["yellow"],
                                        "stelzl":["brown"],
                                        "stelzl_c":["brown"],
                                        "vidal":["brown"],
                                        "vidal_c":["brown"],
                                        "":["black"]
                                        }

# line styles depending on the type of interaction they are
interaction_line_styles = {"extended":["dashed"],
                           "propagated":["dotted"],
                           "normal":["solid"]
                           }

# colors for node background fill depending on the type of node they are
node_fill_colors = {"root":["yellow"],
                    "keyword":["red"],
                    "root_keyword":["orange"],
                    "normal":["lightblue"]
                    }

# colors for node borders depending on the origin of the node
node_border_colors  = {"expanded":["blue"],
                       "over_expressed":["red"],
                       "infra_expressed":["green"],
                       "normal":["black"]
                       }

# colors for node font colors depending on the **** of the node
#node_font_colors  = {"....":["red"],
#                      "normal":["black"]
#                      }

# ==========================================================
# PARAMETERS FOR OUTPUT .DOT NETWORK
# ==========================================================
#
# The following parameters set the parameters that will be used
# for the output .dot network. You can modify them here as you wish
#
# For better understanding the different parameters you can take a look
# to the neato documentation: http://www.graphviz.org

dot_orientation = "portrait"
dot_pack= "true"
dot_overlap= "scale"

dot_node_shape= "box"
dot_node_font_size = "10"
dot_node_width= "0.15"
dot_node_height= "0.15"
dot_node_style = "filled"
dot_standard_node_fill_color= node_fill_colors["normal"][0]  # for non standard nodes, the colors can be set above

dot_edge_lenght = "1"

# ====================================
# PARAMETERS FOR OUTPUTING INFORMATION
# ====================================
#
# Set here which separators will be used depending on the format mode chosen by user

tab_separators = { "html": " ",
                   "txt": "\t"}

line_separators = { "html": "<br>",
                    "txt": "\n"}

# Set here which values will be used depending on the format mode chosen by user

over_expressed_protein = "over_expressed"
infra_expressed_protein = "infra_expressed"

positive_fitness_value = { "html": "yes",
                           "txt": "yes",
                           "dot": ""}

negative_fitness_value = { "html": "no",
                           "txt": "no",
                           "dot": ""}

positive_location_value = { "html": "yes",
                            "txt": "same",
                           "dot": ""}

negative_location_value = { "html": "no",
                            "txt": "different",
                           "dot": ""}

positive_species_value = { "html": "yes",
                           "txt": "same",
                           "dot": ""}

negative_species_value = { "html": "no",
                           "txt": "different",
                           "dot": ""}

positive_root_value = { "html": "yes",
                        "txt": "is-root",
                        "dot": ""}

negative_root_value = { "html": "no",
                        "txt": "not-root",
                        "dot": ""}
