# ---------------------------------------------------------------------
# File       : general_template.piana_conf
# Author     : Ramon Aragues
# Creation   : 2.10.2004
# Updated    : 6.4.2007
# Contents   : template to be used to create your own piana 
#              configuration files
# ---------------------------------------------------------------------
# This file is used to configure parameters for piana.py for some
# specific use described below
#
# if you want piana.py to be configured by this file, set command line
# option --configuration-file=this_file_name
#
# configuration files can be used for both execution modes: batch or
# interactive
#
#   -> in execution mode interactive, the execution section will be 
#      ignored (ie. will only read parameters section)
#
# ----------description of this particular configuration file ---------
#
# This is a template for creating your own piana configuration files:
# edit, move, delete options in this file to tell piana what you want
# to do: in your configuration file, write here what is the use it has
# (eg creates a network from an input file and prints the interactions
# table to the screen)
#  
# 
# In your configuration file, write here which are the parameters that
# must be set through the command line
# - apart from this configuration file, the user must use piana.py
#   command line options:
# 
# (For example, you could write something like this:
#
#         --> input-file-name
#         --> input-id-type
#         --> output-id-type
#         --> results-prefix
#         --> piana-dbname
#         --> piana-dbhost
#         --> piana-dbuser (depends on the system)
#         --> piana-dbpass (depends on the system)
#         --> depth
# )
#
#   These parameters are required in the command line! In this file,
#   they are set to blank, obliging the user to set them on the
#   command line (although, they could have values assigned and still
#   be ignored, since the command line has preference over the
#   parameter values in this file)
# -------------------------------------------------------------------
#
#
#
# Attention! 
#
#     - All non-configuration lines in this file must start with '#'
#       (unless empty line)
#
#     - A configuration line that is preceded by '#' is not taken 
#       into account
#
#     - Configuration file parameters equal to blank are ignored
#
# -----------------------------------------------------------------
#

# Remark: in many parameters and commands, the PIANA reference card is
# mentioned as the place to look for method names, id types, etc. To
# print the PIANA reference card, run
# "python piana.py --print-configuration-file --piana-dbname=your_pianaDB --piana-dbhost=your_piana_host"
#
# ------------------------------------------------------------------


# ==================================================================
# configuration of execution parameters
# ------------------------------------------------------------------
# set here the input parameters for this specific configuration that 
# are not required in the command-line
# ==================================================================

# exec-mode can be interactive or batch.
#  - if interactive is chosen, the execution commands of this file are
#    ignored, and the user can chose commands from a text menu.
#  - if batch is chosen, piana.py  will execute the commands described 
#    in the execution commands section of this file

exec-mode=batch


# ***************************
# Memory usage
# ***************************
# Networks can be built and managed in different ways. This parameter
# can have the following values:
#         "high": all information of the network is stored in memory.
#		   - slower to build the network
#                  - faster when information is printed more than once.
#	  "low": information is retrieved from database when needed.
#		   - faster to build the network
#                  - slower to print and to post-process the network.
# By default, if this parameter is not used, its value will be "high"

memory-usage=high


# ****************************
# Input proteins configuration
# ****************************
# Proteins can be added to the network using commands (eg add-protein
# and add-proteins-file), via the command line by setting parameter 
# input-file or setting it here (ie. input parameters section)
# - If no input-file is set, the network will be empty unless you add
#   proteins or interactions afterwards
# - If an input-file is set, then the initial network will be built
#   using proteins in this file (more proteins can be added afterwards
#   using PIANA commands)

#
# Set here the file name that contains one protein per line
#    - all proteins must be of the same type 
#    - if you have proteins in different code type (eg. gis and uniprot)
#      then you must separate them into different files to make
#      sure each file only contains proteins of the same type
#      Then, you can use command add-proteins-file to add each
#      of your files
#    - example input files are in piana/code/execs/dummy_input_files

input-file=blank

# Set here the type of code for proteins that will be used by default 
# This parameter is also used (and required) if you set an input-file
#
# - valid input id types are those defined in
#   the PIANA reference card
#     -> type python piana.py --print-configuration-file --piana-dbname=your_pianaDB --piana-dbhost=your_piana_host"
#        to print the PIANA reference card

input-id-type=blank

# Set here the protein species that will be used by default 
#  --> valid values are all and NCBI species names
#      (egs: all, yeast, human, Candida albicans SC5314,...)
#      -> you must write the complete name or it won't work.
#         If you don't know which is the complete name for
#         your species you can look at the website:
#    http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Taxonomy
#    (alternatively, you can look into table species of your
#    PIANA mysql database)
#
# This parameter is required if you set an input-file
#
#  -> Normally, this can be set to all, since the protein code
#  already refers to a unique species.  Use this parameter to fix the
#  protein species when the protein code you are using does not
#  implicitly refer to a species: for example one geneName can refer
#  to different proteins of many species
#
#  -> if you write an unknown species name or a species name
#     that is ambiguous (ie. corresponds to more than one tax id)
#     PIANA will halt the execution
#

input-proteins-species=all


# *****************************
# Piana Database configuration 
# *****************************
#

# Set here which piana database to use, where it is and how to access
# it:
# - piana-dbname (eg pianaDB_limited) and piana-dbhost (eg sefarad)
#   are required
# - piana-dbuser and piana-dbpass are required in mysql password
#   protected systems
#
# If your system uses a mysql port different than the default one, you
# can change it by setting the correct value for variable piana_dbport
# on file piana/code/utils/piana_configuration_parameters.py

piana-dbname=blank
piana-dbhost=blank
piana-dbuser=blank
piana-dbpass=blank

# (the following parameter is only for advanced users... leave it to
#   blank if you don't understand what it does)
#
#  Set here if you want to use a secondary PIANA database or not
#    -> this secondary database will be used if the primary database
#       (which is set above) does not contain any interactions for
#       a given protein of interest.
#    -> for proteins with known interactions in the primary database,
#       the secondary database will not be used
#    -> this is a parameter that lets you do something like this:
#       "I want to build a network for my proteins of interest. For
#       those proteins for which experimental interactions are known
#       I only want to use those. If no experimental interactions 
#       are known for a given protein, then I want to look for 
#       predictions for that specific protein and add them to the 
#       network." In this scenario, the user has two synchronized
#       PIANA databases, whith one containing only experimental
#       interactions and the other one containing experimental
#       and predicted interactions (see section "PIANA databases 
#       contain all the information PIANA needs" on file 
#       REAME.piana_tutorial for more information on why would 
#       you want to have two separated synchronized PIANA databases)
#    -> if you do not want to generalize the use of the secondary
#       database you have the possibility of forcing the use of
#       the secondary database only for a specific protein or 
#       proteins using argument piana-db of commands add-protein
#       and add-proteins-file
#
#    -> Attention! The primary and the secondary PIANA databases
#       must be synchronized. Otherwise you cannot use this feature
#       By synchronized I mean that they must differ only in the
#       interaction tables: their protein information must be the
#       same (same proteinPianas, same values in the protein tables,
#       same protein databases have been parsed for both, etc)(see 
#       section "PIANA databases contain all the information PIANA 
#       needs" on file REAME.piana_tutorial for a detailed description
#       on how to keep two separate synchronized PIANA databases)
#
#    -> the secondary database that will be used is to be set by you 
#       on file piana/code/utilities/piana_configuration_parameters.py, 
#       section "PIANA secondary database"
#          -> Attention: even if you set use-secondary-db to 'no',
#             if piana_configuration_parameters.py has a secondary
#             db set, PIANA will create the connection to the 
#             secondary DB. It will not use it (since the parameter
#             use-secondary-db is set to 'no') but it will create
#             the connection. If you do not want to create that
#             connection, you must set to None all parameters in
#             piana_configuration_parameters.py related to the
#             secondary DB.
#          -> I am thinking about including all the secondary 
#             database parameters in the piana configuration files
#             (ie. here) but I don't want the 'standard' PIANA user
#              to be confused, so maybe I'll leave it like this...
#
#    -> Attention! There are two cases in which this command might not
#       have the effect you attended (ie. predictions might not be added
#       for a protein which had no experimental interactions). These two
#       cases are:
#          -> the protein interacts with itself according to experimental
#             evidence: PIANA will consider that it has experimental 
#             interactions and will not look into the secondary DB
#             -> you can avoid this by setting the input parameter 
#                use-self-ints to 'no'
#          -> even when telling PIANA not to use self interactions,
#             it might happen that there is an experimental interaction 
#             between two different proteinPianas (ie. sequences) which
#             translated to your protein identifier become a single node
#             in the network (ie. two proteinPianas pointing to the same
#             identifier. Since PIANA does not know beforehand that that
#             interaction is in fact a self-interaction, it will use it
#             and therefore will not look at predictions for that protein.
#
#       You can solve this problem by deciding which are the proteins 
#       for which you want to use the predictions and then setting
#       argument piana-db to secondary in commands add-protein and 
#       add-proteins-file
#
#  Valid values for use-secondary-db are yes, no or blank (which is
#                                                          defaulted to no)
#
use-secondary-db=blank


# *****************************
# Network options
# *****************************

# Set here the depth to which the network will be developed (ie. how
# many interaction steps will be searched for each root protein)
#
#     --> Setting depth to 1, the partners of the proteins in the input
#         file will be used to build the network. Setting depth to 2,
#         the partners of the partners of the seed proteins will be 
#         used. Etc, etc.
#     --> Setting depth to 0 will search for interactions between
#         the input proteins
#     --> Settting depth to -1 will not build a network (useful when
#         using commands that do not require a network, such as
#         translating between protein identifiers)
#  

depth=blank

# Set here the hub threshold
#
# Use this option to limit the interactions that will be added to 
# the network
#
# If a protein has hub-threshold interactions or more, these
# interactions will not be added
# -> this parameter is used to avoid inserting in the network
#    those proteins that bind to "everything"
#  -> set it to 0 if you don't want any thresholds to be applied
#
#  Attention: this threshold applies at the time of creating the
#             network. Due to the ideosincracy of PIANA (PIANA
#             keeps interactions between protein sequences, not
#             between protein identifiers), you might observe
#             in some cases that your output network does not
#             respect this threshold. Read README.piana_tutorial
#             for more details on this.

hub-threshold=blank


# Set here whether self interactions should be added to network or
# not
#  -> in some analysis, self interactions perturb the results 
#     because it causes all proteins to be at all possible
#     distances from a given protein
#
#  -> a self interaction is a protein that is known to interact
#     with itself
#
#  -> valid values are 'yes' (ie use all interactions) and no (ie
#     do not add self interactions to network)

use-self-ints=yes


# Set here the interaction databases that you want to use

# -> list-source-dbs=all will use interactions from all source
#    databases in your PIANA database
#    -> write colon-separated database names to limit the source
#       databases
#        - valid interaction database names are those defined 
#          in PianaGlobals.interaction_databases
#          --> do python2.3 piana.py --help to get a list of
#              valid database names
#        - for example, write list-source-dbs=dip:string:mips
#                       or for just one database...
#                             list-source-dbs=dip
#
# -> if inverse-dbs is yes, then this parameter does the opposite: 
#    dbs in list-source-dbs will not be taken into account.
#
#      - if list-source-dbs is all, inverse-dbs is ignored (it 
#        doesn't make sense to ignore all databases)
#
#
# Attention: any configuration different from (all, no, no) will 
#            slow down PIANA, since it has to introduce 
#            restrictions when searching for interactions
# 

list-source-dbs=all
inverse-dbs=no


# Set here the interaction methods that you want to use
#   -> list-source-methods=all will use interactions from all types 
#      of methods in piana-dbname
#   -> write colon-separated database names to limit the methods 
#        - valid method names are those defined in 
#          PianaGlobals.method_names.keys()
#        - for example, write list-source-methods=y2h:copurif
#                           or for just one method...
#                             list-source-methods=y2h
#
# -> if inverse-methods is yes, then this parameter does the opposite: 
#    methods in list-source-methods will not be taken into account
# -> if list-source-methods is all, inverse-methods is ignored 
#    (it doesn't make sense to ignore all methods)

list-source-methods=all
inverse-methods=no


# *****************************
# Output options
# *****************************
#
# Set here how the output results look like
#


# Set here the type of protein code to be used in your output
#
#   - valid output id types are those defined in 
#     the PIANA reference card
#     -> type python piana.py --print-configuration-file --piana-dbname=your_pianaDB --piana-dbhost=your_piana_host"
#        to print the PIANA reference card
#
output-id-type=blank

# Set here alternative types of protein identifiers for your output
#
#   - alternative-id-types determines which protein identifier types 
#     will be used in case no code is found for output-id-type
#
#      -> write a colon-separated list of easy-to-remember id types  
#        (for example: uniacc:unientry:gi:md5 )
#      -> valid id types are those defined in 
#         the PIANA reference card
#      -> alternative-id-types cannot be set through the command line
#      -> it is recommended to write md5 as last id type code to 
#         be used, so output  has the protein md5 at least
#           -> md5 is a checksum of the protein sequence (sequence is 
#              transformed to a unique code (shorter than the sequence 
#              itself))
#
# Attention! If you do not set at least one type of code for which you 
#            can be sure there will be a value (eg. md5) PIANA might
#            have an error when outputting results (because it won't
#            know which name to use for that protein
#

alternative-id-types=md5

# Set here which species you want your output proteins to be
#
#   - output-proteins-species determines the species that the 
#     output_proteins must be in order to be printed
#         
#      valid values are:   
#              -> all: will print network proteins regardless of the 
#                      species
#              -> and those names in the NCBI database 
#      (egs: all, yeast, human, Candida albicans SC5314,...)
#
#     -> you must write the complete name or it won't work.
#         If you don't know which is the complete name for
#         your species you can look at the website:
#    http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Taxonomy
#    (alternatively, you can look into table species of your
#    PIANA mysql database)
#
#     -> this parameter only affects what you get in the output! The  
#        network is allowed to contain proteins from other species.
#        You can control which species is used to build the network 
#        with parameter input-protein-species
#
#     -> when printing interactions, setting this command to a species
#        guarantees that at least one of the two proteins belongs to
#        that species. Then, you can make the distinction between
#        interactions where both proteins are of the same species
#        by looking to the species field ('same' when both proteins
#        are of the same species, 'different' when only one of the
#        two proteins is of the species you chose)
#
#     -> when printing information, only proteins of this species 
#        will be shown
#
#     -> connecting information (ie. linker proteins) of proteins is 
#        not affected by output-proteins-species
#
#     -> setting a species is recomended (ie. do it) when using geneName
#         as protein code (duplications across species)
#
#     -> this currently applies to outputing the following information:
#               - proteins information
#               - translating codes
#               - printing interactions and network 
#               - printing expansions (not to mode "add" in expansions)
#

output-proteins-species=all

# Set here the results prefix that will identify your results
#
#   - results-prefix will precede each results file. A file extension 
#     describing the results file content will be added to results-prefix 
#     (eg. interaction table will be printed to results-prefix.print-table)
#
results-prefix=blank

# Set here the directory where your results will be saved
#
#   - results-dir is the directory where results will be saved 
#     (e.g. /home/pepito/piana/results/ )
#       -> if it is equal to blank, results are saved in piana execution 
#          directory
#       -> you must place the slash at the end of the path! 
# 

results-dir=./


# *********************************
# Extra data to be added to network
# *********************************

# Set here the keywords that you want to use for your analysis: this
# is a way that PIANA provides to detect proteins related to specific
# words
#
#   -> write a colon-separated list of keywords 
#        - these are the keywords that you are used to detect if the 
#          proteins of the network are related to something you are 
#          interested in
#        - for example, if you are interested in cancer, you could write: 
#          list-keywords=cancer:carcinoma:onco:tumor:apoptosis 
#
#   -> the network produced by command print-network will highlight in 
#      red those proteins that contain one or more keyword in their 
#      function, description or name
#   -> the output produced by commands print-*-prots-info will:
#          - in format-mode 'txt': write tokens user_keyword=word for 
#            those proteins that contain the keyword in their function, 
#            description or name
#          - in format-mode 'html': underline the proteins  that contain 
#            the keyword in their function, description or name
#   -> for command print-table, when using format-mode 'html', this list 
#      of keywords is used to underline the proteins that contain the 
#      keyword
#
#   -> list-keywords cannot be set through the command line: it must be
#      done here
#   -> if you do not want to use keywords for your analysis, leave this 
#      to blank

list-keywords=blank


# Set here the names of files that contain proteins that you have found to  
# be over/under expressed. 
#
# These files will be used for several things:
#
#   1) to highlight over/under expressed proteins in the network 
#      (in the image file (dot or node attributes for cytoscape) 
#      over expressed proteins will appear as a box with red border. 
#      under-expressed proteins as a box with green border)
#        -> Colors can be changed in PianaGlobals.node_border_colors 
#           dictionary
#      Text and HTML tables will also provide information on the
#      expression associated to each protein in the network.
#
#   2) to be able to select proteins from the network that are over/under
#      expressed (eg. in PIANA commands such as match-proteins-to-pathways)
#
# -> these files contain one protein per line, using a code of type 
#    indicated in parameter expression-id-type
#    -> first column must be the protein name. Subsequence columns
#       can have extra informacion (which will be ignored)
#    -> expression-id-type can be any of the PIANA id types 
#        --> you can  do $> python piana.py --help, which 
#            will display valid input values for proteins id types
#
# -> input-proteins-species will be used to retrieve the proteins in these
#    files: therefore, if you are using a list of geneNames for your
#    over/under expressed lists, set your input-proteins-species to a
#    valid value (other ids are less ambigous and do not necessarily 
#    need to set the species)

file-over-expressed=blank
file-infra-expressed=blank
expression-id-type=blank

# Set here the file names that contain proteins that you want
# to label as "special" in your results
#
#   -> you should use this parameter to highlight specific proteins
#      that are 'important' to  you. For example, if you are 
#      creating a network for proteins obtained in a experiment, and
#      you have a list of proteins that are interesting because
#      they are known to be related to a disease, you can set 
#      here the files that contain those disease-related proteins.
#      These proteins can then be visualized in cytoscape with
#      a different color by mapping their labels to colours
#      (cytoscape attributes *.special_prots.noa)
#      Text and HTML tables will also provide information on 
#      the labels associated to each protein
#
#  -> Moreover, you can add one extra column to special files. This
#     extra column will be taken as a property of the protein, 
#     and will be printed along the protein in output files.
#     For example, if you have data on which is the probability of
#     a protein of being related to cancer, you can create a special
#     file that contains rows in the form of:
#             protein<TAB>probability<NEWLINE>
#             .......<TAB>...........<NEWLINE>
#     These probabilities will then be printed in output files using
#     different formats. For example, when classifying network proteins
#     according to their connections to root proteins, an extra column
#     will be added that says label=prob@0.67
#
#  -> for each file with relevant proteins you must specify the 
#     type of protein identifier you wish to use and a label.
#     The label will be used when printing out results so that
#     you know in which file was that network protein found.
#     For example, you could have files for 5 pathways of 
#     interest, with the corresponding labels, and then 
#     your results will show to which of the 5 pathways
#     does each protein belong to.
#
#  -> the format you must follow is the following:
#     special-proteins=path_to_file1,id_type1,label1:path_to_file2,id_type_2,label2:...
#      -> Eg. special-proteins=/home/brain_cancer.txt,geneName,brain:/home/liver_cancer.txt,geneName,liver
#      -> Attention! Do not end up the line with ':'
#      -> Attention! Paths must be absolute paths (i.e. ../../xxxx.txt and ./dfsdf.txt are wrong!!)
#
#  -> each special file is simply a text file with one protein per
#     line (and an optional extra column)
#
special-proteins=blank


>>> Do not remove this line: marks transition from parameters to commands <<<


# ==========================================================================
# configuration of execution commands
# ==========================================================================
# Set here the commands that piana.py  will execute
#
# these commands can be ordered as desired by the user: it is up to
# the user to make sure the command sequence makes sense (eg. not
# asking to write a table without building a network first)
#
# commands that can be used are those listed in piana.py --help
#
# some commands require extra information that can be set in this file
# as well (eg. command "species-network" requires a species_name,
# which will be provided as well in this configuration file)
#
# even if you don't want to give any value to the command arguments,
# you must leave the argument and set it to blank
#
# You should choose the commands you want to execute from the list
# following these lines. Remove those commands that you are not
# interested in, and set arguments appropiatly for those commands that
# you need. You can see some configuration file examples in this same
# directory under *.piana_conf
#
# ==========================================================================
#
# the following commands perform the actions described in the
# description of this particular configuration file
#
#  - The commands will be executed in the same order as they appear in this 
#    file
#
#  - All commands must be followed by ";", even if they do not have arguments
#    --> the command arguments are separated by ";"
#    --> configuration lines with arguments should not finish with ";"
#
#  - if you don't want the configuration line to set a given argument, write 
#    "blank" after the "=" sign
#
#  - default names for output files (used in case you set it to blank) are: 
#    results_prefix.command_name_creating_output[.format_mode]
#    --> in some cases, extra information will be added to the results file
#          name (eg. "_compact" or "_extended" output mode)
#    --> format_mode is usually added to indicate whether it is a text file 
#          or an html page that has to be visualized with a browser
#
#  - in all commands that set id_type, if nothing is found for id_type 
#    (or input-id-type) alternative-id-types are used instead (and will 
#     print protein codes as "alternative_id_type:protein_id" )
#
# ============================================================================

# *************
# create-report: this command makes PIANA keep a report with links to results
# *************
#
# Place this command before the commands for which you want their results
# to be 'centralized' in a global PIANA report. Once PIANA has finished, 
# you will find under the name you chose (see below) an HTML page describing
# the results and providing links to all files produced by PIANA. You can
# open this file using any internet browser. 
#
#
# It is recommended that if you want a global report you place this command
# at the beginning of the execution section of your configuration file.
#
# Attention! This commmand only works if you are printing results in HTML
#            format. If you are outputting results in TXT format you will
#            still be able to access the results from the report but
#            raw text is not very nice to read on a browser...
#
# Attention! In order for the global report to point correctly to the
#            results files, all PIANA files must be in the same directory.
# 
#  - If you do not want the report to be written to a default file name,
#    then you must specify a value for argument file-name
#     - leave it to blank if you want the report to be written to the
#       default name (ie. results_prefix + "create-report.html")
#       Otherwise, the report will be saved to file_name in directory 
#       results-dir (specified in parameters section)
#       ( an extension '.html' will be automatically added by PIANA to 
#         this file name )
#
#   Attention! if you are asking PIANA to produce more than one report,
#              you must specify a file-name in order to distinguish
#              between them. Otherwise, PIANA will only write (to
#              the default name) one report (for the last set of
#              commands executed).
#

create-report;file-name=blank


# *************
# reset-network: this command resets the network currently in memory
# *************
#
#   Attention! It just resets the network (the input and output parameters are 
#   not resetted: input-id-type, output-id-type, etc will be the same)
#   If you need to work with different parameters, I recommend doing a different 
#   configuration file
#
#   - no required parameters
#
#   - it can be used to do operations on several different networks with a unique 
#     configuration file
#     (eg. build one network, get its results, reset the nework, build a new 
#      network, get new results)

reset-network;


# ************
# save-network: this command saves the current network into a disk file
# ************
#
#  - file_name is required
#     - the network will be saved to file_name in directory results-dir 
#       (specified in parameters section)
#     - the saved file is not human-readable: it is managed by python  
#       (using cPickle)
#

save-network;file-name=file_name

# ************
# load-network: this command loads into memory the network that was  
#               saved in a file using save-network
# ************
#
#   - file_name is required
#       - the network will be loaded from file_name in directory 
#         results-dir (specified in parameters section)
#
#   Attention! A network can only be loaded to be used with the same
#   database from which the network was created (due to internal piana
#   distribution: proteinPiana identifiers are not coherent across
#   different piana databases)

load-network;file-name=file_name

# ***********
# add-protein: this command adds a protein (and its interactions from
#              the piana database) to the network
# ***********
#     
#   - network doesn't have to exist previously: it can be built from 
#     a single protein through "add-protein"
#
#   - required parameters (either from command line or from "parameters 
#     section of this file") are:
#       -> depth and input-id-type (in case argument 
#          id-type is blank in the execution below)
#
#   - protein_name is required. 
#
#   - if protein code type used for protein-name is different from 
#     input-id-type, then set id-type with the new type of 
#     identifier as shown (eg. input-id-type is geneName but here
#     you want to use GenBank gi to refer to your protein). 
#     if protein-name used is of input-id-type, then set 
#     id-type to blank
#      -> Valid id types are blank (ie use input-id-type) 
#         and those defined in the PIANA reference card
#           --> see comment on input-id-type
#
#   - if the species of this protein is different from 
#     input-proteins-species, then set species-name to the new species 
#     as shown. 
#     if the species of the protein is input-proteins-species, then set 
#     species-name to blank
#     Valid protein species are blank (ie use input-proteins-species), 
#     all (use all species) and those defined in NCBI (eg. human, yeast,
#     ...)	
#      -> this parameter is mainly useful for protein codes that do not 
#         implicitly establish their species (eg. geneName)
#
#
#  - if you do not want to use the primary PIANA database (the one you specified
#    in the input parameters) for retrieving interactions for this protein you 
#    can set a different one using argument 'piana-db' 
#    (reasons why you would want to do this explained on input parameter
#     use-secondary-db)
#      -> valid values are blank (ie. use primary) and secondary
#         -> if you set it to secondary, the database that will be used to
#            retrieve interactions for this protein will be the one that is
#            written as secondary on variables piana_secondary_* of file 
#            piana/code/utilities/piana_configuration_parameters.py
#         -> Attention: primary and secondary database must be synchronized
#            Read more about this on description of input parameter 
#            use-secondary-db
#         -> Attention> if you want to force a specific PIANA database for a
#            protein, make sure of the following:
#                -> either both databases are completely synchronized (the  
#                   secondary PIANA database is a superset of the primary, both
#                   for proteins and interactions)
#                -> or make sure that this protein does not appear in other 
#                   input lists to this network.
#

add-protein;protein-name=protein_name;id-type=blank;species-name=blank;piana-db=blank


# *****************
# add-proteins-file: this command adds proteins (and their
#                    interactions from the piana database) from 
#                    a file to the network
# *****************
#   
#   - network doesn't have to exist previously: it can be built 
#     with "add-proteins-file"
#
#   - complete_path_to_file is required
#     -> file with input proteins must have one protein per line 
#     -> proteins in this file must all be of the same type
#
#   - if the type of protein code used in the file is different 
#     from input-id-type then set id-type
#      -> valid id types are blank (ie use 
#         input-id-type) or those defined in 
#         the PIANA reference card
#           --> see comment on input-id-type
#
#   - if species-name is different from input-proteins-species, 
#     then set the new species as shown. Otherwise, set to blank
#     Valid protein species are blank (ie use input-proteins-species), 
#     all (use all species) and those defined in NCBI
#      -> this parameter is mainly useful for protein codes that 
#         do not implicitly establish their species (eg. geneName)
#
#  - if you do not want to use the primary PIANA database (the one you specified
#    in the input parameters) to retrieve interactions for the proteins in this
#    file you can set a different one using argument 'piana-db' 
#    (reasons why you would want to do this explained on input parameter
#     use-secondary-db)
#      -> valid values are blank (use primary) and secondary
#         -> if you set it to secondary, the database that will be used to
#            retrieve interactions for these proteins will be the one that is
#            written as secondary on variables piana_secondary_* of file 
#            piana/code/utilities/piana_configuration_parameters.py
#         -> Attention: primary and secondary database must be synchronized
#            Read more about this on description of input parameter 
#            use-secondary-db
#         -> Attention> if you want to force a specific PIANA database for a
#            list of proteins, make sure of the following:
#                -> either both databases are completely synchronized (the  
#                   secondary PIANA database is a superset of the primary, both
#                   for proteins and interactions)
#                -> or make sure that none of the proteins in this list appears
#                   in other input lists to this network.
#                
#
#   Attention!!! proteins in this file must all be of the same
#   type of identifier. If you have proteins that are of a different code type, 
#   you must split the proteins in as many files as different types of
#   identifiers there are, and add file by file separately using this command

add-proteins-file;file-name=complete_path_to_file;id-type=blank;species-name=blank;piana-db=blank


# *********************
# add-interactions-file: this command adds interactions from a file to
#                        the network
# *********************
#   
#   - network doesn't have to exist previously: it can be built with 
#     "add-interactions-file"
#
#   - complete_path_to_file is required
#      -> file with input interactions must have one interaction per 
#         line
#      -> the input file format must be the following:   
#     protein_a<TAB>protein_b<TAB>source_db<TAB>method<TAB>confidence
#           -> the format is described in detail on file 
#              piana/code/dbParsers/piana_text_intParser/README.piana_interaction_data_format
#               -> source_db must be a db appearing in 
#                  PianaGlobals.interaction_databases
#                        -> if you are using interactions from a 
#                           database that does not appear in this 
#                           list and you do not want to add a label 
#                           to PianaGlobals.interaction_databases 
#                           you can use 'user' as your source db
#               -> method must be a method appearing in 
#                  PianaGlobals.method_names
#
#   - if the type of protein code used in the file is different 
#     from input-id-type then set id-type
#      -> valid id types are blank (ie use input-id-type) 
#         or those defined in the PIANA reference card
#           --> see comment on input-id-type
#
#   - proteins in the interactions file must all be of the same type
#
#   - this command does not add any interactions from the piana 
#     database: if you want as well the interactions from the 
#     database you must create a file with proteins and use 
#     command add-proteins-file
#
#   Attention: all interactions in the file will be added!
#              No restrictions applied... that means list-source-dbs
#              list-source-methods and use-self-ints have no effect
#              on this command
#              You are responsible for having the interactions you
#              want to use on your interactions file
#              (if you think it is important for you to apply
#               restrictions to your file, send me an email and 
#               I will do it...)

add-interactions-file;file-name=complete_path_to_file;id-type=blank


# ***************
# species-network: this command builds a network for all proteins in a
#                  given species
# ***************
#   
#   Executing this command will replace the existing network with a new
#   network. Moreover, this command ignores the input list of proteins
#   and the species set in the input section: it will build a network 
#   using all proteins and all interactions of a given species.   
#
#   - a network must not previously exist (ie. commands build-network 
#     and add-protein* not active)
#
#   - to set the species for which you want to load the network, 
#     you have two options (one of the two arguments must be set 
#     to blank and the other to a correct value):
#
#       - tax_id  
#         -> valid taxonomy ids are those defined by the NCBI  
#           (9606 for human, 7227 for drosophila meg, ... )
#
#       - species_name 
#          -> valid species names are those in the NCBI database 
#             (human, yeast, Arabidopsis thaliana, ...)
#          -> if the species name given has multiple corresponding
#             taxonomy ids, the network will contain proteins 
#             from these multiple taxonomy ids 
#             (eg. "rat" is tax_id 10116 and 10114)
#
#         Attention: if both arguments are different from blank, 
#                    tax_id will be used
#                    if none of the arguments is set to a value,
#                    an error will be raised
#
#  Attention! use this command at your own discretion... 
#             networks can be huge

species-network;species-name=blank;tax-id=blank

# ***********************
# database-method-network: this command builds a network for all
#                          interactions in a given database and/or 
#                          a given method
# ***********************
#
#    This command can be useful to build a network that contains 
#    all interactions in a given database... for example, if you 
#    want to visualize the network for a database that you have 
#    inserted into a piana database
#   
#   - a network must not previously exist 
#     (ie. commands add-protein* can not appear in the same 
#      configuration file as database-method-network)
#       --> this command does not require a list of proteins, 
#           since it takes all interactions for a given database 
#           and method
#
#   - database_name is required
#     -> valid databases names are all (all databases taken into 
#        consideration) and those in 
#        PianaGlobals.interaction_databases
#     -> use at your own discretion... networks can be huge
#
#   - method_name is required
#     -> valid method names are all (all methods are taken into 
#        consideration) and those in PianaGlobals.method_names
#     -> use at your own discretion... networks can be huge
#
#   - species_name is required: network will only contain 
#     interactions between proteins of species_name
#     -> valid species names are all and those in the NCBI 
#        database 
#     -> if the species name given has multiple corresponding 
#        taxonomy ids, the network will contain proteins from 
#        these multiple taxonomy ids (eg. "rat" is tax_id 10116 
#        and 10114)
#
#   Attention! hub_threshold parameter does not affect this 
#              command: all interactions will be added regardless 
#              of the hub_threshold value
#

database-method-network;database-name=database_name;method-name=method_name;species-name=species_name


# ***********
# print-table: this command prints a table with all interactions in
#              current network
# ***********
#   
#   - if you do not want the table to be printed to default name 
#     then you should set output-target to your own file name
#      -> output-target can be blank (ie. use default name), a 
#         file name or screen if you want the results printed 
#         to stdout
#
#   - if you want the type of protein code used for printing 
#     the table different from output-id-type then set 
#     id-type.
#     -> valid id types are blank (ie use output-id-type) 
#        or those defined in the PIANA reference card 
#           --> see comment on input-id-type
#
#   - print-mode is required: set which proteins will appear in 
#     output
#      -> all: prints all interactions in the network
#      -> all_root: prints all interactions in the network where 
#         at least one of the proteins is a root protein
#      -> only_root: prints only interactions between root proteins 
#         in network
#      -> connecting will print only interactions between root 
#         proteins and those proteins that connect more than one 
#         root protein (linker proteins)
#
#   - format-mode is required: set which format will be used for 
#     printint the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#
# -> the output for all print-table commands in format-mode == html  
#    is described in the output file itself
#
# -> the output for all print-table commands in format-mode == txt  
#    is (all in one line... separated in lines for the sake of clarity):
#
#    connectivity=num of root prots connected by the not-root-prot of the pair (CURRENTLY NOT WORKING!)
#    <TAB>
#    protein_1=protein_1 code using the type chosen by user
#    <TAB>
#    neighbours_1=number of neighbours protein 1
#    <TAB>
#    root_1=(is-root or not-root) for protein 1
#    <TAB>
#    expression_1=(None, over_expressed or infra_expressed) expression info prot 1
#    <TAB>
#    protein_2=protein_2 code using the type chosen by user
#    <TAB>
#    neighbours_2=number of neighbours protein 2
#    <TAB>
#    root_2=is-root or not-root for protein 2
#    <TAB>
#    expression_2=(None, over_expressed or infra_expressed) expression info prot 2
#    <TAB>
#    location=proteins are in same cellular location (y or n)
#    <TAB>
#    species=proteins are of same species (y or n)
#    <TAB>
#    db=source database where interaction appears
#    <TAB>
#    db=source database where interaction appears
#    <TAB>
#    db=..............
#    <TAB>
#    method=method used to detect interaction
#    <TAB>
#    method=method used to detect interaction
#    <TAB>
#    method=..............
#    <NEW_LINE>

print-table;output-target=blank;id-type=blank;print-mode=print_mode;format-mode=format_mode

# ***************************
# print-table-db-intersection: this command prints a table with
#                              interactions that appear in the 
#                              intersection of several databases
# ***************************
#  
#  This command will only print those interactions that appear in all
#  the databases given by the user as argument For example, if the
#  network only has two interactions, one extracted from dip and the
#  other one extracted from dip and mips and the user sets list-dbs to
#  dip:mips, then only the second interaction will be printed by this
#  command
#   
#  - if you do not want the table to be printed to default name then 
#    you should set output-target to your own file name
#     -> output-target can be blank (ie. use default name), a file 
#        name or screen if you want the results printed to stdout
#
#  - if you want the type of protein code used for printing the 
#    table different from output-id-type then set id-type.
#     -> valid id-type values are blank (ie use 
#        output-id-type) or those defined in 
#        the PIANA reference card.
#          -> see comment on input-id-type
#
#   - list-dbs is required: used to set the databases where the 
#     interactions must appear in order to be printed
#         -> valid database names are those defined in 
#            PianaGlobals.interaction_databases
#         -> for example: list-dbs=dip:string:ori
#
#   - print-mode is required: set which proteins will appear in 
#     output
#      -> all prints all interactions in the network
#      -> all_root prints all interactions in the network where at 
#         least one of the proteins is a root protein
#      -> only_root prints only interactions between root proteins 
#         in network
#      -> connecting will print only interactions between root 
#         proteins and those proteins that connect more than one 
#         root protein
#
#   - format-mode is required: set which format will be used for 
#     printint the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#   - for obtaining several intersections, just repeat this command 
#     changing the dbnames
#
#   - the default results file name will describe the intersecting 
#     dbs
#
#   output format described in command print-table


print-table-db-intersection;output-target=blank;id-type=blank;print-mode=print_mode;list-dbs=dbname1:dbname2:dbname3:...;format-mode=format_mode


# *************
# print-network: this command prints a  file with all the interactions
#                in current network. Output formats: DOT and SIF
# *************
#   
#   - if you do not want the network to be printed to default name 
#     then you should set output-target to your own file name
#      -> output-target can be blank (ie. use default name), a file
#          name or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     network different from output-id-type then set id-type.
#      -> valid id-type values are blank 
#        (ie use output-id-type) or those defined in 
#        the PIANA reference card.
#           --> see comment on input-id-type
#
#   - print-mode is required: set which proteins will appear in output
#      -> all prints all interactions in the network
#      -> all_root prints all interactions in the network where at 
#         least one of the proteins is a root protein
#      -> only_root prints only interactions between root proteins 
#         in network
#      -> connecting prints only proteins that are either root or 
#         linkers (proteins that connect root nodes between them)
#
#   - format-mode is required: defines which format will follow the output
#       - dot : DOT format (compatible with graphviz package and others)
#       - sif : SIF format (compatible with cytoscape and others)    
#
#
#   -> The parameters that PIANA will use to generate the .DOT file 
#      can be easily changed in file  piana/code/PianaDB/PiabaGlobals.py 
#      -> section "PARAMETERS FOR OUTPUT .DOT NETWORK" describes all 
#         the parameters that you can modify
#
#
#   --> The color of the node box is an indication of the type of 
#       protein
#       Node fill colors can be easily changed in file 
#       piana/code/PianaDB/PiabaGlobals.py 
#         (dictionary node_fill_colors in section COLOR CODES FOR 
#          OUTPUT NETWORK)
#
#       Currently, these are the meanings of the colors:
#            - blue: standard protein
#            - yellow: root protein
#            - red: protein that contains a keyword (see list-keywords 
#                   in input parameters) in its function, description 
#                   or name
#            - orange: root protein that contains a keyword (see 
#                      list-keywords in input parameters)  in its 
#                      function, description or name
#
#   --> The color of the border of the node box is an indication 
#       on how that node was added to the network
#       Border colors can be easily changed in file 
#       piana/code/PianaDB/PiabaGlobals.py 
#         (dictionary node_border_colors in section 
#          COLOR CODES FOR OUTPUT NETWORK)
#
#       Currently, these are the meanings of the border colors:
#            - black: protein from the database
#            - blue: protein added to the network after a prediction 
#                    based on interologs
#            - green: protein found in the file with under expressed 
#                     proteins (from a microarray experiment)
#            - red: protein found in the file with over expressed 
#                   proteins (from a microarray experiment)
#       
#
#   --> The color of the edge line is an indication of the source 
#       database that had that interaction
#       Edge colors can be easily changed in file 
#       piana/code/PianaDB/PiabaGlobals.py 
#         (dictionary interaction_source_databases_colors in section 
#          COLOR CODES FOR OUTPUT NETWORK)
#
#       Currently, these are the meanings of the edge line colors:
#            - red: DIP
#            - green: ori (predictions from by distant 
#                     sequence/structure patterns similarity)
#            - magenta: STRING
#            - orange: prediction using interologs 
#                     (either by COG, SCOP, ...)
#            - dark green: MIPS
#            - blue: HPRD
#            - grey: BIND
#            - yellow: user (an interaction added by user with 
#                      command add-file-interactions given label 
#                      'user')
#            - cyan: interaction appears in more than one database 
#                    (you can see the list of all the dbs where it 
#                    appears by looking to the result file .print-table)
#
#      Attention!: since the colors change depending on the graphics
#      card, we have created a GIF image indicating to which database
#      corresponds each edge color:
#      piana/docs/documentation/network_colors.gif
#
#      Attention!: if you add a new database, you have to add the 
#      name of the database and a new color
#      in dictionary interaction_source_databases_colors
#
#   --> The style of the edge line is an indication on how that 
#       interaction was added to the network
#       Edge styles can be easily changed in file 
#       piana/code/PianaDB/PiabaGlobals.py 
#         (dictionary interaction_line_styles in section 
#          COLOR CODES FOR OUTPUT NETWORK)
#
#       Currently, these are the meanings of the edge line colors:
#            - solid: interaction from the database
#            - dashed: added to the network as an db interaction 
#                      of a node that was added when doing 
#                      predictions
#            - dotted: prediction by interologs
#
#   The output of this command can be used to create an image of the
#   network (read piana/code/execs/README.visualize_piana_network)


print-network;output-target=blank;id-type=blank;print-mode=print_mode;format-mode=format_mode

# *****************************
# print-network-db-intersection: this command prints a DOT file with
#                                interactions that appear in the 
#                                intersection of several databases
# *****************************
#  
#  This command will only print those interactions that appear in all
#  the databases given by the user as argument For example, if the
#  network only has two interactions, one extracted from dip and the
#  other one extracted from dip and mips and the user sets list-dbs to
#  dip:mips, then only the second interaction will be printed by this
#  command
#   
#   - if you do not want the network to be printed to default name 
#     then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file name 
#     or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     network different from output-id-type then set 
#     id-type.
#      -> valid id-type values are blank (ie use 
#         output-id-type) or those defined in 
#         the PIANA reference card.
#           --> see comment on input-id-type
#
#   - list-dbs is required: used to set the databases where the 
#     interactions must appear in order to be printed
#         -> valid database names are those defined in 
#            PianaGlobals.interaction_databases
#         -> for example: list-dbs=dip:string:ori
#
#   - print-mode is required: set which proteins will appear in 
#     output
#      -> all prints all interactions in the network
#      -> all_root prints all interactions in the network where 
#         at least one of the proteins is a root protein
#      -> only_root prints only interactions between root proteins 
#         in network
#      -> connecting prints only proteins that are either root or 
#         linkers (proteins that connect root nodes between them)
#
#   - format-mode is required: defines which format will follow the output
#       - dot : DOT format (compatible with graphviz package and others)
#       - sif : SIF format (compatible with cytoscape and others)    
#
#   - for obtaining several intersections, just repeat this command 
#     changing the dbnames
#
#   - the default results file name will describe the intersecting dbs
#
# Read the description of command print-network for a detailed
# explanation of the output of this command
#
# The output of this command can be used to create an image of the
# network (read piana/code/execs/README.visualize_piana_network)


print-network-db-intersection;output-target=blank;id-type=blank;print-mode=print_mode;list-dbs=dbname1:dbname2:dbname3:...;format-mode=dot


# ********************
# print-all-prots-info: this command prints information (protein
#                       description, other codes, ...) about all 
#                       proteins in network
# ********************
#   
#   - if you do not want the information to be printed to default
#     name then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file 
#     name or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     information different from output-id-type then set 
#     id-type.
#      -> valid id-type values are blank (ie use 
#         output-id-type) or those defined in 
#         the PIANA reference card.
#           --> see comment on input-id-type
#
#   - output-mode is required: used to set how information will be 
#     printed
#      -> valid output-mode values are: 
#           - extended (multiple lines, all available information)
#           - compact (one line, connected root proteins and 
#             description)
#      -> default results file name will describe if output_mode 
#         is extended or compact by placing '.compact.' or 
#         '.extended.' in the file name
#      -> in compact mode, when a user keyword from list-keywords 
#         appears, "user_keyword=the_word_that_appears" will be
#          printed to the protein information line
#      -> in compact mode, a list of the protein names associated 
#         to the protein is printed after the protein information. 
#         The first name, the one that is used to identify the 
#         protein, is the "unique" name that PIANA has assigned 
#         to that protein by means of inference and name checking
#      -> Please, read README.piana_tutorial section "PIANA and 
#         protein names" for better understanding how PIANA 
#         handles protein names
#
#   - format-mode is required: set which format will be used for 
#     printint the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#
#   the output for print-all-prots-info and print-root-prots-info 
#   commands  in format-mode == txt and output-mode 'compact' is:
#
#     (format-mode == txt and output-mode 'compact'  is the only 
#      type of output you might find yourself parsing... 
#      the other outputs are thought for looking directly at them, 
#      as html tables have column titles)
#
#    (for clarity, the format described here appears in multiple 
#     lines... the real output is all in the same line: one line 
#     of information for each protein)
#
#  protein name
#  <TAB>
#  ['protein description 1', 'prot desc 2', ...] (a protein can have several descriptions associated)
#  <TAB>
#  ['protein function 1', 'prot funct 2', ...] (a protein can have several functions associated)
#  <TAB>
#  root=value (where value is 1 when the protein is a root protein, and 0 otherwise)
#  <TAB>
#  expression=expression_value (None, over_expressed or infra_expressed)
#  <TAB>
#  special_labels=['special_label_1', 'special_label_2', ...]
#  <TAB>
#  tax_ids=['tax_id1', 'tax_id2', ...]
#  <TAB>
#  id_type:protein<TAB>id_type:protein<TAB>.... (as many fields as identifiers this protein has)
#                                                           -> valid id types are described in the PIANA reference card)
#  <NEWLINE>
#
#  (if you need more information about these  proteins, 
#   just look for it in the results files of command print-all-prots-info)

print-all-prots-info;output-target=blank;id-type=blank;output-mode=output_mode;format-mode=format_mode

# *********************
# print-root-prots-info: this command prints information (protein
#                        description, other codes, ...) about root 
#                        proteins in network
# *********************
#   
#   - if you do not want the information to be printed to default 
#     name then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file 
#     name or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     information different from output-id-type then set 
#     id-type.
#      -> valid id-type values are blank (ie use 
#         output-id-type) or those defined in 
#         the PIANA reference card.
#           --> see comment on input-id-type
#
#   - output-mode is required: used to set how information will be 
#     printed
#      -> valid output-mode values are: extended (multiple lines, 
#         all available information) or compact (one line, 
#         connected root proteins and description)
#      -> default results file name will describe if output_mode 
#         is extended or compact
#      -> in compact mode, when a user keyword from list-keywords 
#         appears, "user_keyword=the_word_that_appears" will be 
#         printed to the protein information line
#      -> in compact mode, a list of the protein names associated 
#         to the protein is printed after the protein information. 
#         The first name, the one that is used to identify the 
#         protein, is the "unique" name that PIANA has assigned 
#         to that protein by means of inference and name checking
#      -> Please, read README.piana_tutorial section "PIANA and 
#         protein names" for better understanding how PIANA 
#         handles protein names
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
# the output of this command is described in command 
# print-all-prots-info
  

print-root-prots-info;output-target=blank;id-type=blank;output-mode=output_mode;format-mode=format_mode


# ************************
# print-connect-prots-info: this command identifies linker proteins
#                           (proteins that connect root nodes between 
#                           them) and prints information (protein 
#                           description, other codes, linked roots, 
#                            ...) about them
# ************************
#   
#   - if you do not want the information to be printed to default 
#     name then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file 
#     name or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     information different from output-id-type then set 
#     id-type.
#      -> valid id-type values are blank (ie use 
#         output-id-type) or those defined in 
#         the PIANA reference card.
#           --> see comment on input-id-type
#
#   - output-mode is required: used to set how information will be 
#     printed
#      -> valid output-mode values are: extended (multiple lines, 
#         all available information) or compact (one line, 
#         connected root proteins and description)
#      -> default results file name will describe if output_mode 
#         is extended or compact
#      -> in compact mode, when a user keyword from list-keywords 
#         appears, "user_keyword=the_word_that_appears" will be 
#         printed to the protein information line
#      -> in compact mode, a list of the protein names associated 
#         to the protein is printed after the protein information. 
#         The first name, the one that is used to identify the 
#         protein, is the "unique" name that PIANA has assigned 
#         to that protein by means of inference and name checking
#      -> Please, read README.piana_tutorial section "PIANA and 
#         protein names" for better understanding how PIANA 
#         handles protein names
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#   the output of this command looks like this:
#
#       (for clarity, the format described here appears in multiple 
#        lines... the real output is all in the same line: one line of)
#        information for each protein)
#
#  <NEWLINE>
#  'protein name' connects # root_proteins (with # being the number of root proteins that this linker protein connects)
#  <TAB>
#  ['root_protein_1', 'root_protein_2', ...]
#  <TAB>
#  ['source interaction database 1', source inter db 2', ...]  (list of interaction dbs where these interactions where retrieved from)
#  <TAB>
#  ['protein description 1', 'protein desc 2', ...]  (a protein can have several descriptions associated)
#  <TAB>
#  ['protein function 1', 'protein func 2', ...] (a protein can have several functions associated)
#  <TAB>
#  expression=expression_value (None, over_expressed or infra_expressed)
#  <TAB>
#  special_labels=['special_label_1', 'special_label_2', ...]
#  <TAB>
#  tax_ids=['tax_id1', 'tax_id2', ...]
#  <NEWLINE>
#
#  (if you need more information about these linker proteins, 
#   just look for it in the results files of command print-all-prots-info)
 
print-connect-prots-info;output-target=blank;id-type=blank;output-mode=output_mode;format-mode=format_mode

# ************************
# classify-network-proteins: this command writes to a file all 
#                            network proteins with a label
#                            indicating whether they are a root
#                            protein, a linker or a partner, as 
#                            well as "special" labels associated
#                            to them.
# ************************
#  
#  This command can be useful for some analyses where you need to 
#  know all proteins in the network and whether they were originally
#  given by the user (root proteins), they connect two or more of the
#  root proteins (linker proteins) or they are just connected to
#  one root protein (partner proteins).
#
#    Moreover, next to the classification information, if wished, this  
#  command also writes labels associated to the protein, according to  
#  input parameter special-proteins. Leave this input parameter to blank
#   if you do not want this information
#
#    Moreover, next to the previous information, if whished, this command
#  also writes expression information associated to the protein, 
#  according to the expression files of parameters file-over-expressed
#  and file-infra-expressed
#
#  Attention: this command has not been prepared to interpret networks
#             that were built for depths 2 or higher. A protein that 
#             is connected to a root protein via another protein will
#             be considered as a partner.
#
# 
#   - if you do not want the information to be printed to default 
#     name then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file 
#     name or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     information different from output-id-type then set 
#     id-type.
#      -> valid id-type values are blank (ie use 
#         output-id-type) or those defined in 
#         the PIANA reference card.
#           --> see comment on input-id-type
#
#   the output of this command looks like this (everything in one line):
#
#  protein_id<TAB>class=value<TAB>expression=value<TAB>
#  label=value1<TAB>label=value2<TAB>...<NEWLINE>
#  
#    -> where class value can be: "root", "linker_N" or "partner"
#            -> N will have as value the number of root proteins 
#               connected by that linker
#    -> and label values are labels specified in special-proteins 
#       parameter
#    -> and expression value can be over_and_under_expressed, 
#       over_expressed, under_expressed or None	
#
#  (if you need more information about these proteins, 
#   just look for it in the results files of command print-all-prots-info)
 
classify-network-proteins;output-target=blank;id-type=blank

# ***************************
# protein-code-2-protein-code: transforms codes from input-file (which
#                              are of type input-id-type) to
#                              output-id-type
# ***************************
#
#   This command is thought to be used independently from other
#   commands: it uses piana modules to transform proteins from one
#   code to another. It doesn't make use of the network itself, it
#   just outputs a table with protein code equivalences. Moreover,
#   most of the input and output parameters are not used when
#   executing this command (ie. all parameters ignored except for
#   input-file, input-id-type and output-id-type). For
#   example, even if you set output-proteins-species to yeast, the
#   output of this command can contain proteins from all species.
#   (reason: this is not building a network, and therefore, this
#    command is considered as an 'extra' to PIANA and does not
#    use the other parameters)
#
#   - if you do not want the information to be printed to a default 
#     file name then you should set output-target to your own file 
#     name
#      -> output-target can be blank (ie. use default name), a file 
#         name or screen if you want the results printed to stdout
#
#   - retrieves proteins to be "translated" from input-file (which 
#     is set through the command line or above in this file)
#     -> uses input-id-type as the type of code of proteins 
#        in the input file
#     -> uses output-id-type as the type of code to which 
#        proteins will be "translated"
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format

protein-code-2-protein-code;output-target=blank;format-mode=format_mode


# ***********************
# protein-code-2-all-info: gets information for proteins in input-file 
#                         (which are of type input-id-type)
# ***********************
#
#   This command is thought to be used as a stand alone tool: it uses
#   piana modules to get information from proteins It doesn't make use
#   of the network itself, it just outputs a table with protein
#   information. Moreover, most of the input and output parameters 
#   are not used when executing this command (see comments on previous
#   command protein-code-2-protein-code)
#
#   The format followed for the output is described in command 
#   print-all-prots-info
#
#   - if you do not want the information to be printed to a default 
#     file name then you should set output-target to your own file name
#      -> output-target can be blank (ie. use default name), a file 
#         name or screen if you want the results printed to stdout
#
#   - will output information for proteins in input-file (which is 
#     set through the command line or above in this file)
#     -> uses input-id-type as the type of code of proteins 
#        in the input file
#
#   - if you want the type of protein code used for printing the 
#     proteins different from output-id-type then set 
#     id-type.
#      -> valid id types are blank (ie use output-id-type) 
#         or those defined in the PIANA reference card.
#           --> see comment on input-id-type
#
#   - format-mode is required: set which format will be used for printing
#     the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#   - output-mode is required: used to set how information will be 
#     printed
#      -> valid output-mode values are: extended (multiple lines, 
#         all available information), compact (one line, 
#         connected root proteins and description) 
#      -> default results file name will describe if output_mode 
#         is extended or compact
#      -> in compact mode, when a user keyword from list-keywords 
#         appears, "user_keyword=the_word_that_appears" will be 
#         printed to the protein information line
#      -> in compact mode, a list of the protein names associated 
#         to the protein is printed after the protein information. 
#         The first name, the one that is used to identify the 
#         protein, is the "unique" name that PIANA has assigned 
#         to that protein by means of inference and name checking
#      -> Please, read README.piana_tutorial section "PIANA and 
#         protein names" for better understanding how PIANA 
#         handles protein names
#

protein-code-2-all-info;output-target=blank;id-type=blank;format-mode=format_mode;output-mode=output_mode


# ***************************
# create-random-files: creates files that contain random
#                      proteins
# ***************************
#
#   This command is thought to be used independently from other
#   commands: it uses piana modules to create text files that
#   contain random groups of proteins. It doesn't make use of 
#   the network itself, it just creates the files. Moreover,
#   most of the input and output parameters are not used when
#   executing this command
#
#   This command can be useful for certain bioinformatics
#   experiments where you have to check how a random group
#   of proteins would perform in a given analysis. You can
#   use this command to create the random group of proteins 
#   (and write it to a text file) and then run piana again 
#   (with a separate configuration file) to use these random 
#   proteins as input file
#
#   This command can also be used to get all proteins in 
#   the pianaDB for which there are known interactions
#
#   If you set the appropiate arguments to this command, you
#   can get information about these random proteins on
#   their expression (according to the expression input parameters)
#   and their special labels (according to input parameter 
#   special-proteins)
#
#   output-id-type will be used as the identifier for
#   the proteins written to the files
#  
#   output-proteins-species will be used to make sure that the random
#   proteins are of that species. If the species is not 
#   relevant to you, set it to all
#
#   - if you do not want the files to be named with the default name
#     (ie. results_dir/results_prefix.*) then you should set
#     files-prefix to the prefix you want to use for your files
#     (eg. my_random_file)
#      -> output files names are formed by adding consecutive integers
#         to results-prefix (set in the input parameters section)
#      -> the output files will contain one protein per line, using the
#         type of identifier specified by the user on parameter 
#         output-id-type (from the parameters section of the
#         configuration file)
#
#   - num-files is required: set how many files with random proteins
#     you want to create
#
#   - num-prots is required: set how many proteins will be written
#     to each file (ie. the size of your random group)
#          --> if num-prots is 0, no random sampling is done: all
#              proteins that respect the criteria are written to
#              the output file
#                  --> ie. one can use this command to create a
#                          file that has all proteins in PIANA that
#                          have interactions, with special labels
#                          attached to them.
#
#   - force-ints is required: set whether you require the random
#     proteins to have interactions or not
#     Valid values are:
#       - yes: only proteins that have at least one interaction
#               will be used to create the random groups
#       - no: having interactions is not required in order to 
#              appear in the random groups
#
#       -> all input parameters are used to decide whether a 
#          protein has interactions or not. IE. use-self-ints
#          list-source-dbs, etc etc are relevant to this command!
#
#   - check-expression is required: set whether you want to write next
#     to the proteins info about the expression of that protein 
#       - yes: expression (from expression input parameters ) will be 
#              written next to the protein if it appears in the 
#              expression files
#
#              -> a field expression=value will be added to the
#                 protein line
#       - no: file will only contain one column
#
#   - check-special is required: set whether you want to write next
#     to the proteins the labels that indicate what type of special
#     protein they are.
#       - yes: labels (from parameter special-proteins) will be written
#              next to the protein if it appears in the special-proteins
#              files
#       - no: file will only contain one column
#
#
#   The output of this command will therefore be:
#      (all in one line, separated by TABs and ending with NEWLINE)
#
#   column 1 (always appears): protein 
#   column N (optional): expression=expression_value (expression_value 
#                                                     can be:
#                                                    - None
#                                                    - over_expressed
#                                                    - under_expressed
#                                                    - over_and_under_expressed
#   column N+1 (optional): label=special_label
#   column N+2 (optional): label=special_label
#   column N+3 (optional): ....
#        

create-random-files;files-prefix=files_prefix;num-files=num_files;num-prots=num_prots;force-ints=force_ints;check-expression=check_expression;check-special=check_special


# *******************
# expand-interactions: this command predicts interactions of proteins
#                      in the network using interologs (or other 
#                      methods)
# *******************
#   
#   This command propagates interactions between nodes that share 
#   a certain characteristic. For example, this command can be 
#   used to transfer interactions between proteins that have the 
#   same domain (scop) or that belong to the same orthologous 
#   group (cog)
#   
#   Each expand-interactions piana command does the following:
#   
#   For each protein in the network:
#   
#   1. find interactions of this protein in the current network
#   2. find proteins in the database that share a certain 
#      characteristic with this protein (e.g cog code)
#   3. for each protein that shares that characteristic:
#       - find interactions for protein that shares the 
#         characteristic in the database
#       - find interactions for protein that shares the 
#         characteristic in the network
#       - assign to protein being processed all interactions
#         of protein that shares the characteristic
#       - assign to protein that shares that characteristic 
#         all interactions of protein being processed
#   
#   This process can be repeated more than once, to reach 
#   far-fetched deductions 
#   
#     For example, if root protein is A, and if we know 
#     that C and D (yeast) interact, and that A =cog= C 
#     and B =cog= D
#   
#        - simple expansion will predict that A interacts 
#          with D
#        - double expansion will predict that A interacts 
#          with D and that A interacts with B
#          (ie double expansion predicts interactions from 
#           a previous prediction)
#          (this is achieved by executing two consecutive 
#           expand-interactions piana commands
#
#   - the new interactions (predictions) can be added to 
#     the network or printed out to a results file 
#
#   - expansion_type is required: use to know the type 
#     of expansion to perform
#      -> valid expansion-type values are those defined 
#         in PianaGlobals.expansion_types (currently can 
#         be cog, scop (ie. scop family), interpro or ec)
#      -> if two proteins share expansion-type, interactions 
#         are interpropagated
#
#   - expansion-nodes is required: used to define which 
#     proteins will be expanded
#      -> valid expansion-nodes values are: all (all 
#         proteins in network are expanded) or root 
#         (only root proteins are expanded)
#      -> if you are looking for new interactions (predictions) 
#         for your input proteins, use root
#      -> if you want to expand all the proteins in the 
#         network (partners of root proteins as well) use all
#      -> root proteins are the proteins used to build the 
#         network (eg. the proteins in input-file)
#
#   - expansion-threshold is required: used to avoid propagating 
#     interactions when there are too many nodes that share the 
#     expansion type
#      -> valid values are: 0 (no thresholds applied) and 
#         positive integers
#      -> depending on the expansion type, the expansion-threshold 
#         to be used varies
#
#   - exp-output-mode is required: used to define if predictions 
#     should be added to network or printed to file
#      -> valid exp-output-mode values are: add (add predictions 
#         to network) and print (print to output-target)
#        -> 'add' will add to the network the predictions found
#            by expansion
#        -> 'print' will print to output-target (or to default 
#           results file) the list of predictions found by expansion
#      -> for example, if you wanted to get predictions for root 
#         nodes using double cog expansion you would first use 
#         command expand-interactions with expansion-nodes=all 
#         and mode=add and then, another command 
#         expand-interactions with expansion-nodes=root 
#         and mode=print doing this "double expansion" you will 
#         be predicting interactions based on a previous expansion
#
#       - if exp-output-mode is add, the following arguments 
#           can be ignored: leave them to blank:
#       - if exp-output-mode is "print" then :
#
#        -> if you do not want the information to be printed 
#           to a default file name then you should set output-target 
#           to your own file name
#             -> output-target can be blank (ie. use default name), 
#                a file name or screen if you want the results printed 
#                to stdout
#
#        -> if you want the type of protein code used for printing 
#           the information different from output-id-type then 
#           set id-type.
#             -> valid id-type values are blank (ie use 
#                output-id-type) or those defined in 
#                the PIANA reference card
#           --> see comment on input-id-type
#
#        -> the results will follow the following format (one 
#           interaction per line):
#
#  protein1<TAB>protein2<TAB>expansion_type<TAB>source_interactionPiana<TAB>source_proteinPiana
#
#           This file can then be used to insert predictions into
#           a PIANA database using parser expansion2piana.py
#           
#           If you are going to insert these predictions into a 
#           PIANA database, I recommend that your output type
#           for protein codes is proteinPiana (to make sure that
#           the prediction refers to that protein sequence). In any
#           case, never use geneNames for creating a list of
#           predictions that is going to be inserted into a
#           PIANA database: geneNames do not implicitly contain
#           the species and can be ambiguous within a species.
#    
#           To learn more about inserting predictions into 
#           PIANA databases, read README.populate_piana_db and
#           README.piana_examples
#
#
#   - We do not recommend doing predictions based on predictions: 
#     ie. we do not recommend executing command expand-interactions 
#     on networks that were built from a database with predictions. 
#     To avoid this, you can use parameters list-source-dbs and 
#     list-source-methods or do what we do internally in our lab: 
#     have to piana databases, one with only experimentally 
#     detected interactions and another one with all interactions.
#
#   - for expansions, I recommend using program 
#     run_piana_protein_by_protein.py instead of piana.py
#     -> the result will be the same if you work with all proteins 
#        at the same time than if you do it one by one
#     -> it is much more faster to manage the expansion separately 
#        for each protein
#     -> read README.piana_examples for more info on this
#    

expand-interactions;expansion-type=expansion_type;expansion-nodes=expansion_nodes;expansion-threshold=expansion_threshold;exp-output-mode=mode;output-target=blank;id-type=blank


# *******************
# find-shortest-route: this command finds the shortest route between
#                      two proteins in the network
# *******************
#
#  ATTENTION: this command requires the piana 'advanced mode' or 
#             'developer mode'. By default, all users work in
#             'simple mode'. Therefore, if you want to use this
#             command you'll have to modify your working mode as
#             indicated in section 'PIANA types of users' of file
#             README.piana_tutorial
#     
#   - network must exist before running this command
#
#   - if you do not want the information to be printed to default name 
#     then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file name 
#     or screen if you want the results printed to stdout
#
#   - protein_a_name and protein_b_name are required. 
#
#   - if protein code type used for protein-name is different from 
#     input-id-type, then set id-type with the new type 
#     as shown. 
#     if protein-name used is of input-id-type, then set
#      id-type to blank
#      -> Valid id types are blank (ie use input-id-type) 
#         and those defined in the PIANA reference card
#           --> see comment on input-id-type
#
#   - the output will be written using output-id-type (read from
#     the input parameters section)
#
#     
#   format of the txt output is:
#
#    first line:  protein_a=protein_a_name<TAB>protein_b=protein_b_name<TAB>distance=distance_of_route
#    second line: START=protein_a_name<-->protein_in_route<-->protein_in_route<-->...<-->END=protein_b_name


find-shortest-route;protein-a-name=protein_a_name;protein-b-name=protein_b_name;id-type=blank;output-target=blank


# *******************
# find-distance-group: this command finds a group of proteins that are
#                      at distance N from a query protein
# *******************
#     
#   - network must exist before running this command
#
#   - if you do not want the information to be printed to default name 
#     then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file name 
#     or screen if you want the results printed to stdout
#
#   - protein_name is required. 
#
#   - if protein code type used for protein-name is different from 
#     input-id-type, then set id-type with the new type 
#     as shown. 
#     if protein-name used is of input-id-type, then set
#      id-type to blank
#      -> Valid id types are blank (ie use input-id-type) 
#         and those defined in the PIANA reference card
#           --> see comment on input-id-type
#
#   - distance is required: set the distance between your query 
#     protein and the group of proteins you are searching
#       - when distance is "all", groups 1, 2 and 3 are printed 
#         out)
#  
#   - info is used to choose the information that will be printed 
#     next to the proteins at distance N
#      - values admitted are: 
#                     - blank: no info printed
#                     - all: all info known about protein
#                     - scop: scop codes  
#                     - cath: cath codes
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format
#

find-distance-group;protein-name=protein_name;id-type=blank;distance=distance;output-target=blank;info=blank;format-mode=format_mode


# **************************
# match-proteins-to-pathways: this command finds which pathways
#                             have proteins that appear in the 
#                             current network
# **************************
#
#   this command can be used to study the pathways that are
#   'involved' in the current network, where 'involved' refers
#   to how many proteins of the current network appear as well
#   in a given pathway
#
#   pathways have to be defined by the user, and each pathway
#   must be represented by a different file where all pathway
#   members have been written. Read below for details
#
#
#   A network must exist before running this command.
#
#   - pathways-dir must be set to the directory that holds the
#     files with the pathways. Each pathway is defined by a 
#     different file, and the name of the file must uniquely
#     identify the pathway. The file name will be used to 
#     in the output to identify the matched pathways.
#
#   - if protein code type used for the pathways proteins is 
#     different from input-id-type, then set pathway-type  
#     with the new type as shown. 
#     if identifiers used are of input-id-type, then set
#     pathway-type to blank
#      -> Valid id types are blank (ie use input-id-type) 
#         and those defined in the PIANA reference card
#           --> see comment on input-id-type
#
#   - force-expression can be yes or no
#       - no: all proteins in the network will be matched 
#         against the proteins in the pathways
#
#       - yes: only proteins in the network that are over/under
#         expressed will be matched against the proteins in the
#         pathways
#             -> if you set force-expression to yes, then you 
#                must have set as well the expression files
#                in the input parameters section
#   
#   - if you do not want the information to be printed to default 
#     name then you should set output-target to your own file name
#      -> output-target can be blank (ie. use default name), a file 
#         name or screen if you want the results printed to stdout
#   
#   - if you do not want the proteins to be printed out using the
#     identifier type output-id-type, then you must write a
#     new id type in parameter id-type. Otherwise, leave
#     it to blank
#     -> Valid id types are blank (ie use output-id-type) 
#        and those defined in the PIANA reference card
#           --> see comment on input-id-type
#
#     Attention! If you want to get a report with interaction files
#                for each protein, it is mandatory that inmediately
#                before this command you make a call to print-table
#                using the same type of protein identifier 
#                (eg. uniacc) than the one you are using here
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format
#

match-proteins-to-pathways;pathways-dir=pathways_dir;pathway-type=blank;force-expression=force_expression;output-target=blank;format-mode=format_mode;id-type=blank


# **************************
# match-proteins-to-gos: this command finds which GO terms
#                        appear more frequently in the current network
# **************************
#
#   this command can be used to study the GO terms that are
#   'involved' in the current network, where 'involved' refers
#   to the frequency a GO term appears in the network
#
#
#   A network must exist before running this command.
#
#   - if protein code type you want for outputting results is 
#     different from output-id-type, then set id-type  
#     with the new type as shown. 
#     if protein-name used is of output-id-type, then set
#     id-type to blank
#      -> Valid id types are blank (ie use input-id-type) 
#         and those defined in the PIANA reference card
#           --> see comment on input-id-type
#
#   - force-expression can be yes or no
#       - no: all proteins in the network will be used for 
#             calculating GO frequencies
#
#       - yes: only proteins in the network that are over/under
#         expressed will be used for calculating GO frequencies
#             
#             -> if you set force-expression to yes, then you 
#                must have set as well the expression files
#                in the input parameters section
#   
#   - if you do not want the information to be printed to default 
#     name then you should set output-target to your own file name
#      -> output-target can be blank (ie. use default name), a file 
#         name or screen if you want the results printed to stdout
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#   - term-type sets the kind of GO terms that will be used for 
#     calculating the frequencies
#      -> term-type can be "molecular_function", "biological_process" 
#         or "cellular_component"
#
#   - level_threshold is the lowest level of the go terms that will 
#     be retrieved for calculating the frequency
#      -> all GO terms associated to each protein will be retrieved, 
#         then parents of those GO terms until reaching 
#         level_threshold will also be retrieved
#
#      -> GO is a hierarchy organized from a initial root level 
#         (level 0) that increasingly makes more specific the terms. 
#         Therefore, the higher the level used the more specific the 
#         GO terms will be. To obtain a relevant GO terms use level 
#         thresholds between 3 and 5. However, It all depends on how
#         general you want to be in the interpretation of the network.
#
#
#   - go-dbname, go-dbhost, go-dbuser and go-dbpass are the mysql 
#     parameters for your external GO database (in the same way
#     piana-dbname, piana-dbhost, etc)
#      -> attention! this database is different from your standard 
#         pianaDB. This is a different database that has to be created
#         by following the instructions on README.populate_piana_db 
#         section "parse GO"
    
match-proteins-to-gos;id-type=blank;force-expression=force_expression;output-target=blank;format-mode=format_mode;term-type=term_type;level-threshold=level_threshold;go-dbname=go_dbname;go-dbhost=go_dbhost;go-dbuser=go_dbuser;go-dbpass=go_dbpass


# ***********************
# match-proteins-to-spots: this command identifies spots in a 2D gel
#                          by matching MW and/or IP to proteins in 
#                          the network
# ***********************
#
#    we have spot ids from a 2D electrophoresis gel, with their
#    molecular weights (MW) and isoelectric points (IP). Some of those
#    spots were identified by mass spectrometry but other spots were
#    unnassigned. We can use PIANA to identify some of those
#    unnassigned spots, by comparing the MW and IP of the spots with
#    the MW and IP of the proteins in the network.
#      
#   - if you do not want the information to be printed to default 
#     name then you should set output-target to your own file name
#      -> output-target can be blank (ie. use default name), a file 
#         name or screen if you want the results printed to stdout
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#   - if you want the type of protein code used for printing the 
#     information different from output-id-type then set 
#     id-type.
#      -> valid id-type values are blank (ie use 
#         output-id-type) or those defined in 
#         the PIANA reference card.
#           --> see comment on input-id-type
#
#   - spots-file-name is a file name following the structure 
#     (one spot per line): 
#         spot_id<TAB>molecular_weight<TAB>isoeletric_point
#      -> where decimals are expressed with "."
#      -> complete_path_to_spots_file can be blank if 
#         spots-file-name was set in the command line
#
#   - list-mw-error and list-ip-error set the error bounds 
#     admitted for the matching of molecular weight and 
#     isoelectric point
#      -> the number of error bounds for mw and ip must 
#         be identical: values can be different, but the 
#         number of values not
#      -> use "." for decimals 
#      -> set to blank if you prefer to use the default 
#         error bounds (which are hard-coded in piana.py)
#      -> to set your own error bounds, write colon-separated 
#         values 
#   (e.g. list-mw-error=0.01:0.02:0.05;list-ip-error=0.1:0.2:0.5
#     
# Attention!: correspondences that appear in a given error level will
#             not be shown in higher error levels

match-proteins-to-spots;output-target=blank;format-mode=format_mode;id-type=blank;spots-file-name=complete_path_to_spots_file;list-mw-error=blank;list-ip-error=blank


# ********************
# cluster-by-go-terms: this command clusters the proteins of the
#                      network using GO terms
# ********************
#
#   - In order to cluster a network using go terms, a 
#     protein-protein interaction network must previously exist    
#      
#   - if you do not want the clustered network to be printed to 
#     the default file name then you should set output-target to 
#     your own file name
#      -> output-target can be blank (ie. use default name), a 
#         file name or screen if you want the results printed to 
#         stdout
#
#   - term-type sets the kind of GO terms that will be used for
#      the clustering (required)
#      -> term-type can be molecular_function, biological_process 
#         or cellular_component
#
#   - score-threshold is the lowest score obtained by the 
#     similarity function allowed for continuing the clustering
#      -> can be any real number from 0 to 100 (0 will group 
#         all proteins, 100 will not group any proteins). 
#         To obtain a relevant clustered network use score 
#         thresholds between 0.1 and 1
#
#   - sim-mode sets how to calculate distances between two 
#     clusters
#          - random takes a random element from each cluster 
#            and evaluates similarity between them
#          - min takes the minimal distance between elements 
#            of each cluster
#          - max takes the maximal distance between elements
#            of each cluster
#          - average takes the average distance between all 
#            elements of each cluster
#
#   - level-threshold is the lowest level of the go term in 
#     the cluster allowed for continuing the clustering
#      -> GO is a hierarchy organized from a initial root 
#         level (ie. 0) that increasingly makes more specific 
#         the terms. 
#         Therefore, the higher the level used the less 
#         clustering will be performed. To obtain a relevant 
#         clustered network use level thresholds between 1 
#         and 3. It all depends on how general you want to 
#         be in the interpretation of the network.
#
#   - distance-threshold is the maximum distance allowed 
#     between two proteins in order to be clustered
#      -> can be any integer between 1 and ...
#
#   - rep-term sets which of the GO terms of the cluster 
#     will be used for printing output
#      -> can be min (term of minimal depth in the hierarchy) 
#         or max (maximal depth)
#
#   - print-id sets which id will be used for identifying 
#     the clusters in the printed output
#      -> can be "no" (default id: go term name) or "yes" 
#         (a more complex id)
#

cluster-by-go-terms;output-target=blank;term-type=term_type;score-threshold=score_threshold;sim-mode=sim_mode;level-threshold=level_threshold;distance-threshold=distance_threshold;rep-term=rep_term;print-id=print_id

# ******************
# calculate-imotifs: calculate iMotifs of a protein as described on paper
#                    "Characterization of protein hubs by inferring interacting motifs
#                     from binary protein interactions" 
# ******************
#
#  - similarity-mode is the type of similarity function that will be applied to do the clustering
#
#        -> valid values are: (in all cases, term1 is 1 is number_of_protein_partners_in_common>0, and 0 otherwise)
#
#            - 'num_ints': number of common interaction partners (N in the paper)
#                            ->  number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)
#
#
#            - 'min_per':  Rmin in the paper
#                          ->  number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)x100/
#                                       min(number_of_protein_partners(proteins in cluster1), number_of_protein_partners(proteins in cluster2))
#
#            
#            - 'max_per':  Rmax in the paper
#                          -> number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)x100/
#                                       max(number_of_protein_partners(proteins in cluster1), number_of_protein_partners(proteins in cluster2))
#
#            - 'combined_per': Rave in the paper
#                             -> (min_per + max_per) / 2
#
#
# - min_score is the minimum score allowed in order to fuse two clusters. Any positive integer is a valid value
#
#
# - imotif_mode determines which proteins will be grouped in iMotifs
#     --> valid values are:
#        - all: all proteins in the network will be (tried to) grouped in iMotifs
#        - roots: only root proteins will be (tried to) grouped in iMotifs
#
#     Note: imotif_mode is an important parameter, because it will determine the way the iMotifs and iMotif-iMotif interactions
#           will be formed
#   
#           In terms of which proteins will appear in each iMotif, the consequence of using imotif_mode roots is that iMotifs
#           will only have more than one protein in them if a root protein appears in that iMotif. In imotif_mode all, 
#           iMotifs will be formed for all proteins, regardless of whether they have a root protein or not
#
#           In terms of iMotif-iMotif interactions, in imotif_mode all we will get extra lines "imotif_imotif" in files *.results
#           describing iMotif-iMotif interactions inferred from the clusters interactions. Read comments in 
#           piana/code/Graph/GraphCluster/GraphCluster.py (method print_imotif_imotif_interactions) for more
#           info on this.
#
# - num_ints_thres sets the threshold for considering a protein well described in terms of interactions
#     --> this is the minimum number of interactions that a protein must have in order to be considered in the clustering
#     --> valid values are from 1 to infinite
#
# - common_ints_thres sets the threshold for considering that two proteins share a relevant number of proteins
#     --> this is the minimum number of interactions that two proteins must share in order to be considered
#     --> valid values are from 1 to infinite
#
# - global-map can be used to produce files with all interactions in the form of *.results files
#              (leave to blank unless you are evaluating the results)
#
#      These files will be used to calculate with train_and_test_imotif.py whether we improve the trivial
#      results for PDB interactions. See README.imotif_evaluation_explained for more info
#
#      -> valid values are blank (don't do anything) or a directory name 
#              --> global result files are written to this directory
#              --> directory name must end with the slash!!!!
#
#      -> attention: if global-map is different from blank, execution is stopped after creating the results file for interactions,
#         and iMotifs will not be identified for the protein
#
#  Attention! Results from this command are always written using proteinPiana identifier. You should then do yourself the extra
#             step of translating them to your favorite type of identifier  (e.g. using protein_code_2_protein_code.piana_conf)

calculate-imotifs;similarity-mode=similarity_mode;min-score=min_score;imotif-mode=imotif_mode;num-ints-thres=num_ints_thres;common-ints-thres=common_ints_thres;global-map=blank


# ****
# exit: this command exits 
# ****
#

# - required in all piana configuration files, unless interactive mode
#   used

exit;


##                                               ##
# THE FOLLOWING OPTIONS ARE CURRENTLY UNAVAILABLE #
##                                               ##

# *****************
# modify-parameters: this command modifies some parameter values
                     (NOT WORKING!!!)
# *********************
#
#   ATTENTION! DO NOT USE, NOT WORKING 
#   
#
# use this command to modify parameter values at any point of
# piana execution. 
#
# - set to blank those parameters that you do not want to modify


modify-parameters;results-prefix=blank;file-over-expressed=blank;file-infra-expressed=blank;expression-id-type=blank