# ---------------------------------------------------------------------
# File       : general_template.piana_conf
# Author     : Ramon Aragues
# Creation   : 2.10.2004
# Contents   : template to be used to create your own piana 
#              configuration files
# ---------------------------------------------------------------------
# This file is used to configure parameters for piana.py for some
# specific use described below
#
# if you want piana.py to be configured by this file, set command line
# option --configuration-file=this_file_name
#
# configuration files can be used for both execution modes: batch or
# interactive
#
#   -> in execution mode interactive, the execution section will be 
#      ignored (ie. will only read parameters section)
#
# ----------description of this particular configuration file ---------
#
# This is a template for creating your own piana configuration files:
# edit, move, delete options in this file to tell piana what you want
# to do: in your configuration file, write here what is the use it has
# (eg creates a network from an input file and prints the interactions
# table to the screen)
#  
# 
# In your configuration file, write here which are the parameters that
# must be set through the command line
# - apart from this configuration file, the user must use piana.py
#   command line options:
# 
# (For example, you could write something like this:
#
#         --> input-file-name
#         --> input-proteins-type
#         --> output-proteins-type
#         --> results-prefix
#         --> piana-dbname
#         --> piana-dbhost
#         --> piana-dbuser (depends on the system)
#         --> piana-dbpass (depends on the system)
#         --> depth
# )
#
#   These parameters are required in the command line! In this file,
#   they are set to blank, obliging the user to set them on the
#   command line (although, they could have values assigned and still
#   be ignored, since the command line has preference over the
#   parameter values in this file)
# -------------------------------------------------------------------
#
#
#
# Attention! 
#
#     - All non-configuration lines in this file must start with '#'
#       (unless empty line)
#
#     - A configuration line that is preceded by '#' is not taken 
#       into account
#
#     - Configuration file parameters equal to blank are ignored
#
# -----------------------------------------------------------------
#

# Remark: in many parameters and commands, there is a reference to
# file /piana/code/PianaDB/PianaGlobals.py In that file you'll find
# (apart from other things) the valid input values for parameters You
# can alternatively type piana.py --help, which will display valid
# input values for proteins types and interaction database names

#
# ------------------------------------------------------------------


# ==================================================================
# configuration of execution parameters
# ------------------------------------------------------------------
# set here the input parameters for this specific configuration that 
# are not required in the command-line
# ==================================================================

# exec-mode can be interactive or batch.
#  - if interactive is chosen, the execution commands of this file are
#    ignored, and the user can chose commands from a text menu.
#  - if batch is chosen, piana.py  will execute the commands described 
#    in the execution commands section of this file

exec-mode=batch


# ****************************
# Input proteins configuration
# ****************************
# Proteins can be added to the network using commands (eg add-protein
# and add-proteins-file), via the command line by setting parameter 
# input-file or setting it here (ie. input parameters section)
# - If no input-file is set, the network will be empty unless you add
#   proteins or interactions afterwards
# - If an input-file is set, then the initial network will be built
#   using proteins in this file (more proteins can be added afterwards
#   using PIANA commands)

#
# Set here the file name that contains one protein per line
#    - all proteins must be of the same type 
#    - if you have proteins in different code type (eg. gis and uniprot)
#      then you must separate them into different files to make
#      sure each file only contains proteins of the same type
#      Then, you can use command add-proteins-file to add each
#      of your files
#    - example input files are in piana/code/execs/dummy_input_files

input-file=blank

# Set here the type of code for proteins that will be used by default 
# This parameter is also used (and required) if you set an input-file
#
# - valid input protein types are those defined in
#   PianaGlobals.valid_protein_types.keys()
#     -> You can alternatively do $> python piana.py --help, which
#        will display valid input values for proteins types

input-proteins-type=blank

# Set here the protein species that will be used by default 
#  --> valid values are all and NCBI species names
#      (egs: all, yeast, human, Candida albicans SC5314,...)
#      -> you must write the complete name or it won't work.
#         If you don't know which is the complete name for
#         your species you can look at the website:
#    http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Taxonomy
#    (alternatively, you can look into table species of your
#    PIANA mysql database)
#
# This parameter is required if you set an input-file
#
#  -> Normally, this can be set to all, since the protein code
#  already refers to a unique species.  Use this parameter to fix the
#  protein species when the protein code you are using does not
#  implicitly refer to a species: for example one geneName can refer
#  to different proteins of many species
#
#  -> if you write an unknown species name or a species name
#     that is ambiguous (ie. corresponds to more than one tax id)
#     PIANA will halt the execution
#

input-proteins-species=all


# *****************************
# Piana Database configuration 
# *****************************
#

# Set here which piana database to use, where it is and how to access
# it:
# - piana-dbname (eg pianaDB_limited) and piana-dbhost (eg sefarad)
#   are required
# - piana-dbuser and piana-dbpass are required in mysql password
#   protected systems

piana-dbname=blank
piana-dbhost=blank
piana-dbuser=blank
piana-dbpass=blank


# *****************************
# Network options
# *****************************

# Set here the depth to which the network will be developed (ie. how
# many interaction steps will be searched for each root protein)
#  - recommended depth = 1 (too slow otherwise) 

depth=blank

# Set here the hub threshold
#
# Use this option to limit the interactions that will be added to 
# the network
#
# If a protein has hub-threshold interactions or more, these
# interactions will not be added
# -> this parameter is used to avoid inserting in the network
#    those proteins that bind to "everything"
#  -> set it to 0 if you don't want any thresholds to be applied

hub-threshold=blank


# Set here whether self interactions should be added to network or
# not
#  -> in some analysis, self interactions perturb the results 
#     because it causes all proteins to be at all possible
#     distances from a given protein
#
#  -> a self interaction is a protein that is known to interact
#     with itself
#
#  -> valid values are 'yes' (ie use all interactions) and no (ie
#     do not add self interactions to network)

use-self-ints=yes


# Set here the interaction databases that you want to use

# -> list-source-dbs=all will use interactions from all source
#    databases in piana-dbname
#    -> write colon-separated database names to limit the source
#       databases
#        - valid interaction database names are those defined 
#          in PianaGlobals.interaction_databases
#          --> do python2.3 piana.py --help to get a list of
#              valid database names
#        - for example, write list-source-dbs=dip:string:mips
#                       or for just one database...
#                             list-source-dbs=dip
#
# -> if inverse-dbs is yes, then this parameter does the opposite: 
#    dbs in list-source-dbs will not be taken into account.
#
#      - if list-source-dbs is all, inverse-dbs is ignored (it 
#        doesn't make sense to ignore all databases)
#
# -> if ignore-unreliable is yes, then those databases tagged as 
#    unreliable (databases ending with _c) will not be used
#    for this PIANA run
#      - to learn more about unreliable databases, please refer
#        to README.piana_tutorial section "setting databases to use"
#
#      - if list-source-dbs is not all, ignore-unreliable is not
#        taken into account: if you are setting a list of source
#        dbs you are responsible for enumerating the databases
#        you want to use, both reliable and unreliable
#
# Attention: any configuration different from (all, no, no) will 
#            slow down PIANA, since it has to introduce 
#            restrictions when searching for interactions
# 

list-source-dbs=all
inverse-dbs=no
ignore-unreliable=no


# Set here the interaction methods that you want to use
#   -> list-source-methods=all will use interactions from all types 
#      of methods in piana-dbname
#   -> write colon-separated database names to limit the methods 
#        - valid method names are those defined in 
#          PianaGlobals.method_names.keys()
#        - for example, write list-source-methods=y2h:copurif
#                           or for just one method...
#                             list-source-methods=y2h
#
# -> if inverse-methods is yes, then this parameter does the opposite: 
#    methods in list-source-methods will not be taken into account
# -> if list-source-methods is all, inverse-methods is ignored 
#    (it doesn't make sense to ignore all methods)

list-source-methods=all
inverse-methods=no


# *****************************
# Output options
# *****************************
#
# Set here how the output results look like
#


# Set here the type of protein code to be used in your output
#
#   - valid output protein types are those defined in 
#     PianaGlobals.valid_protein_types.keys()
#      --> You can alternatively do 
#        $> python piana.py --help
#          which will display valid input values for proteins types
#
output-proteins-type=blank

# Set here alternative types of protein codes for your output
#
#   - list-alternative-types determines which protein code types will 
#     be used in case no code is found for output-proteins-type
#
#      -> write a colon-separated list of easy-to-remember type names 
#        (for example: uniacc:unientry:gi:md5 )
#      -> valid protein types are those defined in 
#         PianaGlobals.valid_protein_types.keys()
#      -> list-alternative-types cannot be set through the command line
#      -> it is recommended to write md5 as last protein type code to 
#         be used, so output  has the protein md5 at least
#           -> md5 is a checksum of the protein sequence (sequence is 
#              transformed to a unique code (shorter than the sequence 
#              itself))
#
# Attention! If you do not set at least one type of code for which you 
#            can be sure there will be a value (eg. md5) PIANA might
#            have an error when outputting results (because it won't
#            know which name to use for that protein
#

list-alternative-types=md5

# Set here which species you want your output proteins to be
#
#   - output-proteins-species determines the species that the 
#     output_proteins must be in order to be printed
#         
#      valid values are:   
#              -> all: will print network proteins regardless of the 
#                      species
#              -> and those names in the NCBI database 
#      (egs: all, yeast, human, Candida albicans SC5314,...)
#     -> you must write the complete name or it won't work.
#         If you don't know which is the complete name for
#         your species you can look at the website:
#    http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Taxonomy
#    (alternatively, you can look into table species of your
#    PIANA mysql database)
#
#     -> this parameter only affects what you get in the output! The  
#        network is allowed to contain proteins from other species.
#        You can control which species is used to build the network 
#        with parameter input-protein-species
#
#     -> when printing interactions, setting this command to a species
#        guarantees that at least one of the two proteins belongs to
#        that species. Then, you can make the distinction between
#        interactions where both proteins are of the same species
#        by looking to the species field ('same' when both proteins
#        are of the same species, 'different' when only one of the
#        two proteins is of the species you chose)
#
#     -> when printing information, only proteins of this species 
#        will be shown
#
#     -> connecting information (ie. linker proteins) of proteins is 
#        not affected by output-proteins-species
#
#     -> setting a species is recomended (ie. do it) when using geneName
#         as protein code (duplications across species)
#
#     -> this currently applies to outputing the following information:
#               - proteins information
#               - translating codes
#               - interactions and network in DOT format
#               - printing expansions (not to mode "add" in expansions)
#

output-proteins-species=all

# Set here the results prefix that will identify your results
#
#   - results-prefix will precede each results file. A file extension 
#     describing the results file content will be added to results-prefix 
#     (eg. interaction table will be printed to results-prefix.print-table)
#
results-prefix=blank

# Set here the directory where your results will be saved
#
#   - results-dir is the directory where results will be saved 
#     (e.g. /home/pepito/piana/results/ )
#       -> if it is equal to blank, results are saved in piana execution 
#          directory
#       -> you must place the slash at the end of the path! 
# 

results-dir=./


# ***********************
# Extra data to highlight
# ***********************

# Set here the keywords that you want to use for your analysis: this
# is a way that PIANA provides to detect proteins related to specific
# words
#
#   -> write a colon-separated list of keywords 
#        - these are the keywords that you are used to detect if the 
#          proteins of the network are related to something you are 
#          interested in
#        - for example, if you are interested in cancer, you could write: 
#          list-keywords=cancer:carcinoma:onco:tumor:apoptosis 
#
#   -> the network produced by command print-network will highlight in 
#      red those proteins that contain one or more keyword in their 
#      function, description or name
#   -> the output produced by commands print-*-prots-info will:
#          - in format-mode 'txt': write tokens user_keyword=word for 
#            those proteins that contain the keyword in their function, 
#            description or name
#          - in format-mode 'html': underline the proteins  that contain 
#            the keyword in their function, description or name
#   -> for command print-table, when using format-mode 'html', this list 
#      of keywords is used to underline the proteins that contain the 
#      keyword
#
#   -> list-keywords cannot be set through the command line: it must be
#      done here
#   -> if you do not want to use keywords for your analysis, leave this 
#      to blank

list-keywords=blank

# Set here the file names of proteins that you have found to be over/under 
# expressed. These file names will be used to highlight those proteins in 
# the network, (in the dot file, over expressed proteins will appear as a 
# box with red border. under-expressed proteins as a box with green border)
# You can change the colors in PianaGlobals.node_border_colors dictionary
#
#  -> these files contain one protein per line, using a code of type 
#     indicated in parameter expression-protein_type
#     -> expression-protein-type can be any of the PIANA codes 
#         --> you can  do $> python piana.py --help, which 
#             will display valid input values for proteins types
#
#  -> input-proteins-species will be used to retrieve the proteins in these
#     files: therefore, if you are using a list of geneNames for your
#     over/under expressed lists, set your input-proteins-species to a
#     valid value (other codes do not necessarily require it)

file-over-expressed=blank
file-infra-expressed=blank
expression-protein-type=blank

>>> Do not remove this line: marks transition from parameters to commands <<<


# ==========================================================================
# configuration of execution commands
# ==========================================================================
# Set here the commands that piana.py  will execute
#
# these commands can be ordered as desired by the user: it is up to
# the user to make sure the command sequence makes sense (eg. not
# asking to write a table without building a network first)
#
# commands that can be used are those listed in piana.py --help
#
# some commands require extra information that can be set in this file
# as well (eg. command "species-network" requires a species_name,
# which will be provided as well in this configuration file)
#
# even if you don't want to give any value to the command arguments,
# you must leave the argument and set it to blank
#
# You should choose the commands you want to execute from the list
# following these lines. Remove those commands that you are not
# interested in, and set arguments appropiatly for those commands that
# you need. You can see some configuration file examples in this same
# directory under *.piana_conf
#
# ==========================================================================
#
# the following commands perform the actions described in the
# description of this particular configuration file
#
#  - The commands will be executed in the same order as they appear in this 
#    file
#
#  - All commands must be followed by ";", even if they do not have arguments
#    --> the command arguments are separated by ";"
#    --> configuration lines with arguments should not finish with ";"
#
#  - if you don't want the configuration line to set a given argument, write 
#    "blank" after the "=" sign
#
#  - default names for output files (used in case you set it to blank) are: 
#    results_prefix.command_name_creating_output[.format_mode]
#    --> in some cases, extra information will be added to the results file
#          name (eg. "_compact" or "_extended" output mode)
#    --> format_mode is usually added to indicate whether it is a text file 
#          or an html page that has to be visualized with a browser
#
#  - in all commands that set protein_type, if nothing is found for protein_type 
#    (or input-proteins-type) list-alternative-types are used instead (and will 
#     print protein codes as "alternative_type_name:protein_code" )
#
# ============================================================================


# *************
# reset-network: this command resets the network currently in memory
# *************
#
#   Attention! It just resets the network (the input and output parameters are 
#   not resetted: input-proteins-type, output-proteins-type, etc will be the same)
#   If you need to work with different parameters, I recommend doing a different 
#   configuration file
#
#   - no required parameters
#
#   - it can be used to do operations on several different networks with a unique 
#     configuration file
#     (eg. build one network, get its results, reset the nework, build a new 
#      network, get new results)

reset-network;


# ************
# save-network: this command saves the current network into a disc file
# ************
#
#  - file_name is required
#     - the network will be saved to file_name in directory results-dir 
#       (specified in parameters section)
#     - the saved file is not human-readable: it is managed by python  
#       (using cPickle)
#

save-network;file-name=file_name

# ************
# load-network: this command loads into memory the network that was  
#               saved in a file using save-network
# ************
#
#   - file_name is required
#       - the network will be loaded from file_name in directory 
#         results-dir (specified in parameters section)
#
#   Attention! A network can only be loaded to be used with the same
#   database from which the network was created (due to internal piana
#   distribution: proteinPiana identifiers are not coherent across
#   different piana databases)

load-network;file-name=file_name

# ***********
# add-protein: this command adds a protein (and its interactions from
#              the piana database) to the network
# ***********
#     
#   - network doesn't have to exist previously: it can be built from 
#     a single protein through "add-protein"
#
#   - required parameters (either from command line or from "parameters 
#     section of this file") are:
#       -> depth and input-proteins-type (in case argument 
#          this_protein_code_type is blank in the execution below)
#
#   - protein_name is required. 
#
#   - if protein code type used for protein-name is different from 
#     input-proteins-type, then set protein-type with the new type as 
#     shown. 
#     if protein-name used is of input-proteins-type, then set 
#     protein-type to blank
#      -> Valid protein types are blank (ie use input-proteins-type) 
#         and those defined in PianaGlobals.valid_protein_types
#
#   - if the species of this protein is different from 
#     input-proteins-species, then set species-name to the new species 
#     as shown. 
#     if the species of the protein is input-proteins-species, then set 
#     species-name to blank
#     Valid protein species are blank (ie use input-proteins-species), 
#     all (use all species) and those defined in NCBI (eg. human, yeast,
#     ...)	
#      -> this parameter is mainly useful for protein codes that do not 
#         implicitly establish their species (eg. geneName)
#

add-protein;protein-name=protein_name;protein-type=blank;species-name=blank


# *****************
# add-proteins-file: this command adds proteins (and their
#                    interactions from the piana database) from 
#                    a file to the network
# *****************
#   
#   - network doesn't have to exist previously: it can be built 
#     with "add-proteins-file"
#
#   - complete_path_to_file is required
#     -> file with input proteins must have one protein per line 
#     -> proteins in this file must all be of the same type
#
#   - if the type of protein code used in the file is different 
#     from input-proteins-type then set protein-type
#      -> valid protein types are blank (ie use 
#         input-proteins-type) or those defined in 
#         PianaGlobals.valid_protein_types
#
#   - if species-name is different from input-proteins-species, 
#     then set the new species as shown. Otherwise, set to blank
#     Valid protein species are blank (ie use input-proteins-species), 
#     all (use all species) and those defined in NCBI
#      -> this parameter is mainly useful for protein codes that 
#         do not implicitly establish their species (eg. geneName)
#
#   Attention!!! proteins in this file must all be of the same
#   type. If you have proteins that are of a different code type, you
#   must split the proteins in as many files as different types of
#   code there are, and add file by file separately using this command

add-proteins-file;file-name=complete_path_to_file;protein-type=blank;species-name=blank


# *********************
# add-interactions-file: this command adds interactions from a file to
#                        the network
# *********************
#   
#   - network doesn't have to exist previously: it can be built with 
#     "add-interactions-file"
#
#   - complete_path_to_file is required
#      -> file with input interactions must have one interaction per 
#         line
#      -> the input file format must be the following:   
#     protein_a<TAB>protein_b<TAB>source_db<TAB>method<TAB>confidence
#           -> the format is described in detail on file 
#              piana/code/dbParsers/piana_text_intParser/README.piana_interaction_data_format
#               -> source_db must be a db appearing in 
#                  PianaGlobals.interaction_databases
#                        -> if you are using interactions from a 
#                           database that does not appear in this 
#                           list and you do not want to add a label 
#                           to PianaGlobals.interaction_databases 
#                           you can use 'user' as your source db
#               -> method must be a method appearing in 
#                  PianaGlobals.method_names
#
#   - if the type of protein code used in the file is different 
#     from input-proteins-type then set protein-type
#      -> valid protein types are blank (ie use input-proteins-type) 
#         or those defined in PianaGlobals.valid_protein_types
#
#   - proteins in the interactions file must all be of the same type
#
#   - this command does not add any interactions from the piana 
#     database: if you want as well the interactions from the 
#     database you must create a file with proteins and use 
#     command add-proteins-file
#
#   Attention: all interactions in the file will be added!
#              No restrictions applied... that means list-source-dbs
#              list-source-methods and use-self-ints have no effect
#              on this command
#              You are responsible for having the interactions you
#              want to use on your interactions file
#              (if you think it is important for you to apply
#               restrictions to your file, send me an email and 
#               I will do it...)

add-interactions-file;file-name=complete_path_to_file;protein-type=blank


# ***************
# species-network: this command builds a network for all proteins in a
#                  given species
# ***************
#   
#   Executing this command will replace the existing network with a new
#   network. Moreover, this command ignores the input list of proteins
#   and the species set in the input section: it will build a network 
#   using all proteins and all interactions of a given species.   
#
#   - a network must not previously exist (ie. commands build-network 
#     and add-protein* not active)
#
#   - to set the species for which you want to load the network, 
#     you have two options (one of the two arguments must be set 
#     to blank and the other to a correct value):
#
#       - tax_id  
#         -> valid taxonomy ids are those defined by the NCBI  
#           (9606 for human, 7227 for drosophila meg, ... )
#
#       - species_name 
#          -> valid species names are those in the NCBI database 
#             (human, yeast, Arabidopsis thaliana, ...)
#          -> if the species name given has multiple corresponding
#             taxonomy ids, the network will contain proteins 
#             from these multiple taxonomy ids 
#             (eg. "rat" is tax_id 10116 and 10114)
#
#         Attention: if both arguments are different from blank, 
#                    tax_id will be used
#                    if none of the arguments is set to a value,
#                    an error will be raised
#
#  Attention! use this command at your own discretion... 
#             networks can be huge

species-network;species-name=blank;tax-id=blank

# ***********************
# database-method-network: this command builds a network for all
#                          interactions in a given database and/or 
#                          a given method
# ***********************
#
#    This command can be useful to build a network that contains 
#    all interactions in a given database... for example, if you 
#    want to visualize the network for a database that you have 
#    inserted into a piana database
#   
#   - a network must not previously exist 
#     (ie. commands add-protein* can not appear in the same 
#      configuration file as database-method-network)
#       --> this command does not require a list of proteins, 
#           since it takes all interactions for a given database 
#           and method
#
#   - database_name is required
#     -> valid databases names are all (all databases taken into 
#        consideration) and those in 
#        PianaGlobals.interaction_databases
#     -> use at your own discretion... networks can be huge
#
#   - method_name is required
#     -> valid method names are all (all methods are taken into 
#        consideration) and those in PianaGlobals.method_names
#     -> use at your own discretion... networks can be huge
#
#   - species_name is required: network will only contain 
#     interactions between proteins of species_name
#     -> valid species names are all and those in the NCBI 
#        database 
#     -> if the species name given has multiple corresponding 
#        taxonomy ids, the network will contain proteins from 
#        these multiple taxonomy ids (eg. "rat" is tax_id 10116 
#        and 10114)
#
#   Attention! hub_threshold parameter does not affect this 
#              command: all interactions will be added regardless 
#              of the hub_threshold value
#

database-method-network;database-name=database_name;method-name=method_name;species-name=species_name


# ***********
# print-table: this command prints a table with all interactions in
#              current network
# ***********
#   
#   - if you do not want the table to be printed to default name 
#     then you should set output-target to your own file name
#      -> output-target can be blank (ie. use default name), a 
#         file name or screen if you want the results printed 
#         to stdout
#
#   - if you want the type of protein code used for printing 
#     the table different from output-proteins-type then set 
#     protein-type.
#     -> valid protein types are blank (ie use ouput-proteins-type) 
#        or those defined in PianaGlobals.valid_protein_types.keys()
#       -> You can alternatively do $> python piana.py --help, which
#          will display valid input values for proteins types
#
#   - print-mode is required: set which proteins will appear in 
#     output
#      -> all: prints all interactions in the network
#      -> all_root: prints all interactions in the network where 
#         at least one of the proteins is a root protein
#      -> only_root: prints only interactions between root proteins 
#         in network
#      -> connecting will print only interactions between root 
#         proteins and those proteins that connect more than one 
#         root protein (linker proteins)
#
#   - format-mode is required: set which format will be used for 
#     printint the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#
# -> the output for all print-table commands in format-mode == html  
#    is described in the output file itself
#
# -> the output for all print-table commands in format-mode == txt  
#    is (all in one line... separated in lines for the sake of clarity):
#
#    connectivity=num of root prots connected by the not-root-prot of the pair
#    <TAB>
#    protein_1=protein_1 code using the type chosen by user
#    <TAB>
#    neighbours_1=number of neighbours protein 1
#    <TAB>
#    root_1=(is-root or not-root) for protein 1
#    <TAB>
#    expression_1=(None, over_expressed or infra_expressed) expression info prot 1
#    <TAB>
#    fitness_1=fitness information protein 1 (ignore this value)
#    <TAB>
#    protein_2=protein_2 code using the type chosen by user
#    <TAB>
#    neighbours_2=number of neighbours protein 2
#    <TAB>
#    root_2=is-root or not-root for protein 2
#    <TAB>
#    expression_2=(None, over_expressed or infra_expressed) expression info prot 2
#    <TAB>
#    fitness_2=fitness information protein 2 (ignore this value)
#    <TAB>
#    location=proteins are in same cellular location (y or n)
#    <TAB>
#    species=proteins are of same species (y or n)
#    <TAB>
#    db=source database where interaction appears
#    <TAB>
#    db=source database where interaction appears
#    <TAB>
#    db=..............
#    <TAB>
#    method=method used to detect interaction
#    <TAB>
#    method=method used to detect interaction
#    <TAB>
#    method=..............
#    <NEW_LINE>

print-table;output-target=blank;protein-type=blank;print-mode=print_mode;format-mode=format_mode

# ***************************
# print-table-db-intersection: this command prints a table with
#                              interactions that appear in the 
#                              intersection of several databases
# ***************************
#  
#  This command will only print those interactions that appear in all
#  the databases given by the user as argument For example, if the
#  network only has two interactions, one extracted from dip and the
#  other one extracted from dip and mips and the user sets list-dbs to
#  dip:mips, then only the second interaction will be printed by this
#  command
#   
#  - if you do not want the table to be printed to default name then 
#    you should set output-target to your own file name
#     -> output-target can be blank (ie. use default name), a file 
#        name or screen if you want the results printed to stdout
#
#  - if you want the type of protein code used for printing the 
#    table different from output-proteins-type then set protein-type.
#     -> valid protein-type values are blank (ie use 
#        ouput-proteins-type) or those defined in 
#        PianaGlobals.valid_protein_types.keys().
#        -> You can alternatively do $> python piana.py --help, which
#           will display valid input values for proteins types
#
#   - list-dbs is required: used to set the databases where the 
#     interactions must appear in order to be printed
#         -> valid database names are those defined in 
#            PianaGlobals.interaction_databases
#         -> for example: list-dbs=dip:string:ori
#
#   - print-mode is required: set which proteins will appear in 
#     output
#      -> all prints all interactions in the network
#      -> all_root prints all interactions in the network where at 
#         least one of the proteins is a root protein
#      -> only_root prints only interactions between root proteins 
#         in network
#      -> connecting will print only interactions between root 
#         proteins and those proteins that connect more than one 
#         root protein
#
#   - format-mode is required: set which format will be used for 
#     printint the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#   - for obtaining several intersections, just repeat this command 
#     changing the dbnames
#
#   - the default results file name will describe the intersecting 
#     dbs
#
#   output format described in command print-table


print-table-db-intersection;output-target=blank;protein-type=blank;print-mode=print_mode;list-dbs=dbname1:dbname2:dbname3:...;format-mode=format_mode


# *************
# print-network: this command prints a DOT file with all interactions
#                in current network
# *************
#   
#   - if you do not want the network to be printed to default name 
#     then you should set output-target to your own file name
#      -> output-target can be blank (ie. use default name), a file
#          name or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     network different from output-proteins-type then set protein-type.
#      -> valid protein-type values are blank 
#        (ie use ouput-proteins-type) or those defined in 
#        PianaGlobals.valid_protein_types.keys().
#         --> You can alternatively do $> python piana.py --help, 
#             which will display valid input values for proteins types
#
#   - print-mode is required: set which proteins will appear in output
#      -> all prints all interactions in the network
#      -> all_root prints all interactions in the network where at 
#         least one of the proteins is a root protein
#      -> only_root prints only interactions between root proteins 
#         in network
#      -> connecting prints only proteins that are either root or 
#         linkers (proteins that connect root nodes between them)
#
#   - format-mode is always equal to dot (it will have other formats 
#     in the future...)
#
#   -> The parameters that PIANA will use to generate the .DOT file 
#      can be easily changed in file  piana/code/PianaDB/PiabaGlobals.py 
#      -> section "PARAMETERS FOR OUTPUT .DOT NETWORK" describes all 
#         the parameters that you can modify
#
#
#   --> The color of the node box is an indication of the type of 
#       protein
#       Node fill colors can be easily changed in file 
#       piana/code/PianaDB/PiabaGlobals.py 
#         (dictionary node_fill_colors in section COLOR CODES FOR 
#          OUTPUT NETWORK)
#
#       Currently, these are the meanings of the colors:
#            - blue: standard protein
#            - yellow: root protein
#            - red: protein that contains a keyword (see list-keywords 
#                   in input parameters) in its function, description 
#                   or name
#            - orange: root protein that contains a keyword (see 
#                      list-keywords in input parameters)  in its 
#                      function, description or name
#
#   --> The color of the border of the node box is an indication 
#       on how that node was added to the network
#       Border colors can be easily changed in file 
#       piana/code/PianaDB/PiabaGlobals.py 
#         (dictionary node_border_colors in section 
#          COLOR CODES FOR OUTPUT NETWORK)
#
#       Currently, these are the meanings of the border colors:
#            - black: protein from the database
#            - blue: protein added to the network after a prediction 
#                    based on interologs
#            - green: protein found in the file with under expressed 
#                     proteins (from a microarray experiment)
#            - red: protein found in the file with over expressed 
#                   proteins (from a microarray experiment)
#       
#
#   --> The color of the edge line is an indication of the source 
#       database that had that interaction
#       Edge colors can be easily changed in file 
#       piana/code/PianaDB/PiabaGlobals.py 
#         (dictionary interaction_source_databases_colors in section 
#          COLOR CODES FOR OUTPUT NETWORK)
#
#       Currently, these are the meanings of the edge line colors:
#            - red: DIP
#            - green: ori (predictions from by distant 
#                     sequence/structure patterns similarity)
#            - magenta: STRING
#            - orange: prediction using interologs 
#                     (either by COG, SCOP, ...)
#            - dark green: MIPS
#            - blue: HPRD
#            - grey: BIND
#            - yellow: user (an interaction added by user with 
#                      command add-file-interactions given label 
#                      'user')
#            - cyan: interaction appears in more than one database 
#                    (you can see the list of all the dbs where it 
#                    appears by looking to the result file .print-table)
#
#      Attention!: since the colors change depending on the graphics
#      card, we have created a GIF image indicating to which database
#      corresponds each edge color:
#      piana/docs/documentation/network_colors.gif
#
#      Attention!: if you add a new database, you have to add the 
#      name of the database and a new color
#      in dictionary interaction_source_databases_colors
#
#   --> The style of the edge line is an indication on how that 
#       interaction was added to the network
#       Edge styles can be easily changed in file 
#       piana/code/PianaDB/PiabaGlobals.py 
#         (dictionary interaction_line_styles in section 
#          COLOR CODES FOR OUTPUT NETWORK)
#
#       Currently, these are the meanings of the edge line colors:
#            - solid: interaction from the database
#            - dashed: added to the network as an db interaction 
#                      of a node that was added when doing 
#                      predictions
#            - dotted: prediction by interologs
#
#   The output of this command can be used to create an image of the
#   network (read piana/code/execs/README.visualize_piana_network)


print-network;output-target=blank;protein-type=blank;print-mode=print_mode;format-mode=dot

# *****************************
# print-network-db-intersection: this command prints a DOT file with
#                                interactions that appear in the 
#                                intersection of several databases
# *****************************
#  
#  This command will only print those interactions that appear in all
#  the databases given by the user as argument For example, if the
#  network only has two interactions, one extracted from dip and the
#  other one extracted from dip and mips and the user sets list-dbs to
#  dip:mips, then only the second interaction will be printed by this
#  command
#   
#   - if you do not want the network to be printed to default name 
#     then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file name 
#     or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     network different from output-proteins-type then set 
#     protein-type.
#      -> valid protein-type values are blank (ie use 
#         ouput-proteins-type) or those defined in 
#         PianaGlobals.valid_protein_types.keys().
#         --> You can alternatively do 
#             $> python piana.py --help 
#             which will display valid input values for 
#             proteins types
#
#   - list-dbs is required: used to set the databases where the 
#     interactions must appear in order to be printed
#         -> valid database names are those defined in 
#            PianaGlobals.interaction_databases
#         -> for example: list-dbs=dip:string:ori
#
#   - print-mode is required: set which proteins will appear in 
#     output
#      -> all prints all interactions in the network
#      -> all_root prints all interactions in the network where 
#         at least one of the proteins is a root protein
#      -> only_root prints only interactions between root proteins 
#         in network
#      -> connecting prints only proteins that are either root or 
#         linkers (proteins that connect root nodes between them)
#
#   - format-mode is always equal to dot (it will have other formats 
#     in the future...)
#
#   - for obtaining several intersections, just repeat this command 
#     changing the dbnames
#
#   - the default results file name will describe the intersecting dbs
#
# Read the description of command print-network for a detailed
# explanation of the output of this command
#
# The output of this command can be used to create an image of the
# network (read piana/code/execs/README.visualize_piana_network)


print-network-db-intersection;output-target=blank;protein-type=blank;print-mode=print_mode;list-dbs=dbname1:dbname2:dbname3:...;format-mode=dot


# ********************
# print-all-prots-info: this command prints information (protein
#                       description, other codes, ...) about all 
#                       proteins in network
# ********************
#   
#   - if you do not want the information to be printed to default
#     name then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file 
#     name or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     information different from output-proteins-type then set 
#     protein-type.
#      -> valid protein-type values are blank (ie use 
#         ouput-proteins-type) or those defined in 
#         PianaGlobals.valid_protein_types.keys().
#         --> You can alternatively do 
#             $> python piana.py --help
#              which will display valid input values 
#              for proteins types
#
#   - output-mode is required: used to set how information will be 
#     printed
#      -> valid output-mode values are: 
#           - extended (multiple lines, all available information)
#           - compact (one line, connected root proteins and 
#             description)
#      -> default results file name will describe if output_mode 
#         is extended or compact by placing '.compact.' or 
#         '.extended.' in the file name
#      -> in compact mode, when a user keyword from list-keywords 
#         appears, "user_keyword=the_word_that_appears" will be
#          printed to the protein information line
#      -> in compact mode, a list of the protein names associated 
#         to the protein is printed after the protein information. 
#         The first name, the one that is used to identify the 
#         protein, is the "unique" name that PIANA has assigned 
#         to that protein by means of inference and name checking
#      -> Please, read README.piana_tutorial section "PIANA and 
#         protein names" for better understanding how PIANA 
#         handles protein names
#
#   - format-mode is required: set which format will be used for 
#     printint the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#
#   the output for print-all-prots-info and print-root-prots-info 
#   commands  in format-mode == txt and output-mode 'compact' is:
#
#     (format-mode == txt and output-mode 'compact'  is the only 
#      type of output you might find yourself parsing... 
#      the other outputs are thought for looking directly at them, 
#      as html tables have column titles)
#
#    (for clarity, the format described here appears in multiple 
#     lines... the real output is all in the same line: one line 
#     of information for each protein)
#
#  protein name
#  <TAB>
#  ['protein description 1', 'prot desc 2', ...] (a protein can have several descriptions associated)
#  <TAB>
#  ['protein function 1', 'prot funct 2', ...] (a protein can have several functions associated)
#  <TAB>
#  root=value (where value is 1 when the protein is a root protein, and 0 otherwise)
#  <TAB>
#  expression=expression_value (None, over_expressed or infra_expressed)
#  <TAB>
#  fitness=fitness_value (ignore this field)
#  <TAB>
#  user_keyword=keyword<TAB>user_keyword=keyword<TAB>.... (as many fields as user keywords appear in the protein function or description)
#  <TAB>
#  protein_code_type:protein<TAB>protein_code_type:protein<TAB>.... (as many fields as codes that this protein has)
#                                                                    valid protein_code_type are PianaGlobals.valid_protein_types.keys())
#  <NEWLINE>
#

print-all-prots-info;output-target=blank;protein-type=blank;output-mode=output_mode;format-mode=format_mode

# *********************
# print-root-prots-info: this command prints information (protein
#                        description, other codes, ...) about root 
#                        proteins in network
# *********************
#   
#   - if you do not want the information to be printed to default 
#     name then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file 
#     name or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     information different from output-proteins-type then set 
#     protein-type.
#      -> valid protein-type values are blank (ie use 
#         ouput-proteins-type) or those defined in 
#         PianaGlobals.valid_protein_types.keys().
#         --> You can alternatively do 
#             $> python piana.py --help
#             which will display valid input values for 
#             proteins types
#
#   - output-mode is required: used to set how information will be 
#     printed
#      -> valid output-mode values are: extended (multiple lines, 
#         all available information) or compact (one line, 
#         connected root proteins and description)
#      -> default results file name will describe if output_mode 
#         is extended or compact
#      -> in compact mode, when a user keyword from list-keywords 
#         appears, "user_keyword=the_word_that_appears" will be 
#         printed to the protein information line
#      -> in compact mode, a list of the protein names associated 
#         to the protein is printed after the protein information. 
#         The first name, the one that is used to identify the 
#         protein, is the "unique" name that PIANA has assigned 
#         to that protein by means of inference and name checking
#      -> Please, read README.piana_tutorial section "PIANA and 
#         protein names" for better understanding how PIANA 
#         handles protein names
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
# the output of this command is described in command 
# print-all-prots-info
  

print-root-prots-info;output-target=blank;protein-type=blank;output-mode=output_mode;format-mode=format_mode


# ************************
# print-connect-prots-info: this command identifies linker proteins
#                           (proteins that connect root nodes between 
#                           them) and prints information (protein 
#                           description, other codes, linked roots, 
#                            ...) about them
# ************************
#   
#   - if you do not want the information to be printed to default 
#     name then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file 
#     name or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     information different from output-proteins-type then set 
#     protein-type.
#      -> valid protein-type values are blank (ie use 
#         ouput-proteins-type) or those defined in 
#         PianaGlobals.valid_protein_types.keys().
#         --> You can alternatively do 
#             $> python piana.py --help
#             which will display valid input values for 
#             proteins types
#
#   - output-mode is required: used to set how information will be 
#     printed
#      -> valid output-mode values are: extended (multiple lines, 
#         all available information) or compact (one line, 
#         connected root proteins and description)
#      -> default results file name will describe if output_mode 
#         is extended or compact
#      -> in compact mode, when a user keyword from list-keywords 
#         appears, "user_keyword=the_word_that_appears" will be 
#         printed to the protein information line
#      -> in compact mode, a list of the protein names associated 
#         to the protein is printed after the protein information. 
#         The first name, the one that is used to identify the 
#         protein, is the "unique" name that PIANA has assigned 
#         to that protein by means of inference and name checking
#      -> Please, read README.piana_tutorial section "PIANA and 
#         protein names" for better understanding how PIANA 
#         handles protein names
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#   the output of this command looks like this:
#
#       (for clarity, the format described here appears in multiple 
#        lines... the real output is all in the same line: one line of)
#        information for each protein)
#
#  <NEWLINE>
#  'protein name' connects # root_proteins (with # being the number of root proteins that this linker protein connects)
#  <TAB>
#  ['root_protein_1', 'root_protein_2', ...]
#  <TAB>
#  ['source interaction database 1', source inter db 2', ...]  (list of interaction dbs where these interactions where retrieved from)
#  <TAB>
#  ['protein description 1', 'protein desc 2', ...]  (a protein can have several descriptions associated)
#  <TAB>
#  ['protein function 1', 'protein func 2', ...] (a protein can have several functions associated)
#  <TAB>
#  expression=expression_value (None, over_expressed or infra_expressed)
#  <TAB>
#  fitness=fitness_value (ignore this field)
#  <NEWLINE>
#
#  (if you need more information about these linker proteins, 
#   just look for it in the results files of command print-all-prots-info)
 
print-connect-prots-info;output-target=blank;protein-type=blank;output-mode=output_mode;format-mode=format_mode


# ***************************
# protein-code-2-protein-code: transforms codes from input-file (which
#                              are of type input-proteins-type) to
#                              output-proteins-type
# ***************************
#
#   This command is thought to be used independently from other
#   commands: it uses piana modules to transform proteins from one
#   code to another. It doesn't make use of the network itself, it
#   just outputs a table with protein code equivalences. Moreover,
#   most of the input and output parameters are not used when
#   executing this command (ie. all parameters ignored except for
#   input-file, input-proteins-type and output-proteins-type). For
#   example, even if you set output-proteins-species to yeast, the
#   output of this command can contain proteins from all species.
#   (reason: this is not building a network, and therefore, this
#    command is considered as an 'extra' to PIANA and does not
#    use the other parameters)
#
#   - if you do not want the information to be printed to a default 
#     file name then you should set output-target to your own file 
#     name
#      -> output-target can be blank (ie. use default name), a file 
#         name or screen if you want the results printed to stdout
#
#   - retrieves proteins to be "translated" from input-file (which 
#     is set through the command line or above in this file)
#     -> uses input-proteins-type as the type of code of proteins 
#        in the input file
#     -> uses output-proteins-type as the type of code to which 
#        proteins will be "translated"
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format

protein-code-2-protein-code;output-target=blank;format-mode=format_mode


# ***********************
# protein-code-2-all-info: gets information for proteins in input-file 
#                         (which are of type input-proteins-type)
# ***********************
#
#   This command is thought to be used as a stand alone tool: it uses
#   piana modules to get information from proteins It doesn't make use
#   of the network itself, it just outputs a table with protein
#   information. Moreover, most of the input and output parameters 
#   are not used when executing this command (see comments on previous
#   command protein-code-2-protein-code)
#
#   The format followed for the output is described in command 
#   print-all-prots-info
#
#   - if you do not want the information to be printed to a default 
#     file name then you should set output-target to your own file name
#      -> output-target can be blank (ie. use default name), a file 
#         name or screen if you want the results printed to stdout
#
#   - will output information for proteins in input-file (which is 
#     set through the command line or above in this file)
#     -> uses input-proteins-type as the type of code of proteins 
#        in the input file
#
#   - if you want the type of protein code used for printing the 
#     proteins different from output-proteins-type then set 
#     protein-type.
#      -> valid protein types are blank (ie use ouput-proteins-type) 
#         or those defined in PianaGlobals.valid_protein_types.keys().
#         --> You can alternatively do $> python piana.py --help, which 
#             will display valid input values for proteins types
#
#   - format-mode is required: set which format will be used for printing
#     the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#   - output-mode is required: used to set how information will be 
#     printed
#      -> valid output-mode values are: extended (multiple lines, 
#         all available information) or compact (one line, 
#         connected root proteins and description)
#      -> default results file name will describe if output_mode 
#         is extended or compact
#      -> in compact mode, when a user keyword from list-keywords 
#         appears, "user_keyword=the_word_that_appears" will be 
#         printed to the protein information line
#      -> in compact mode, a list of the protein names associated 
#         to the protein is printed after the protein information. 
#         The first name, the one that is used to identify the 
#         protein, is the "unique" name that PIANA has assigned 
#         to that protein by means of inference and name checking
#      -> Please, read README.piana_tutorial section "PIANA and 
#         protein names" for better understanding how PIANA 
#         handles protein names
#

protein-code-2-all-info;output-target=blank;protein-type=blank;format-mode=format_mode;output-mode=output_mode


# *******************
# expand-interactions: this command predicts interactions of proteins
#                      in the network using interologs (or other 
#                      methods)
# *******************
#   
#   This command propagates interactions between nodes that share 
#   a certain characteristic. For example, this command can be 
#   used to transfer interactions between proteins that have the 
#   same domain (scop) or that belong to the same orthologous 
#   group (cog)
#   
#   Each expand-interactions piana command does the following:
#   
#   For each protein in the network:
#   
#   1. find interactions of this protein in the current network
#   2. find proteins in the database that share a certain 
#      characteristic with this protein (e.g cog code)
#   3. for each protein that shares that characteristic:
#       - find interactions for protein that shares the 
#         characteristic in the database
#       - find interactions for protein that shares the 
#         characteristic in the network
#       - assign to protein being processed all interactions
#         of protein that shares the characteristic
#       - assign to protein that shares that characteristic 
#         all interactions of protein being processed
#   
#   This process can be repeated more than once, to reach 
#   far-fetched deductions 
#   
#     For example, if root protein is A, and if we know 
#     that C and D (yeast) interact, and that A =cog= C 
#     and B =cog= D
#   
#        - simple expansion will predict that A interacts 
#          with D
#        - double expansion will predict that A interacts 
#          with D and that A interacts with B
#          (ie double expansion predicts interactions from 
#           a previous prediction)
#          (this is achieved by executing two consecutive 
#           expand-interactions piana commands
#
#   - the new interactions (predictions) can be added to 
#     the network or printed out to a results file 
#
#   - expansion_type is required: use to know the type 
#     of expansion to perform
#      -> valid expansion-type values are those defined 
#         in PianaGlobals.expansion_types (currently can 
#         be cog, scop (ie. scop family), interpro or ec)
#      -> if two proteins share expansion-type, interactions 
#         are interpropagated
#
#   - expansion-nodes is required: used to define which 
#     proteins will be expanded
#      -> valid expansion-nodes values are: all (all 
#         proteins in network are expanded) or root 
#         (only root proteins are expanded)
#      -> if you are looking for new interactions (predictions) 
#         for your input proteins, use root
#      -> if you want to expand all the proteins in the 
#         network (partners of root proteins as well) use all
#      -> root proteins are the proteins used to build the 
#         network (eg. the proteins in input-file)
#
#   - expansion-threshold is required: used to avoid propagating 
#     interactions when there are too many nodes that share the 
#     expansion type
#      -> valid values are: 0 (no thresholds applied) and 
#         positive integers
#      -> depending on the expansion type, the expansion-threshold 
#         to be used varies
#
#   - exp-output-mode is required: used to define if predictions 
#     should be added to network or printed to file
#      -> valid exp-output-mode values are: add (add predictions 
#         to network) and print (print to output-target)
#        -> 'add' will add to the network the predictions found
#            by expansion
#        -> 'print' will print to output-target (or to default 
#           results file) the list of predictions found by expansion
#      -> for example, if you wanted to get predictions for root 
#         nodes using double cog expansion you would first use 
#         command expand-interactions with expansion-nodes=all 
#         and mode=add and then, another command 
#         expand-interactions with expansion-nodes=root 
#         and mode=print doing this "double expansion" you will 
#         be predicting interactions based on a previous expansion
#
#       - if exp-output-mode is add, the following arguments 
#           can be ignored: leave them to blank:
#       - if exp-output-mode is "print" then :
#
#        -> if you do not want the information to be printed 
#           to a default file name then you should set output-target 
#           to your own file name
#             -> output-target can be blank (ie. use default name), 
#                a file name or screen if you want the results printed 
#                to stdout
#
#        -> if you want the type of protein code used for printing 
#           the information different from output-proteins-type then 
#           set protein-type.
#             -> valid protein-type values are blank (ie use 
#                ouput-proteins-type) or those defined in 
#                PianaGlobals.valid_protein_types
#
#        -> the results will follow the following format (one 
#           interaction per line):
#
#  protein1<TAB>protein2<TAB>expansion_type<TAB>source_interactionPiana<TAB>source_proteinPiana
#
#           This file can then be used to insert predictions into
#           a PIANA database using parser expansion2piana.py
#           
#           If you are going to insert these predictions into a 
#           PIANA database, I recommend that your output type
#           for protein codes is proteinPiana (to make sure that
#           the prediction refers to that protein sequence). In any
#           case, never use geneNames for creating a list of
#           predictions that is going to be inserted into a
#           PIANA database: geneNames do not implicitly contain
#           the species and can be ambiguous within a species.
#    
#           To learn more about inserting predictions into 
#           PIANA databases, read README.populate_piana_db and
#           README.piana_examples
#
#
#   - We do not recommend doing predictions based on predictions: 
#     ie. we do not recommend executing command expand-interactions 
#     on networks that were built from a database with predictions. 
#     To avoid this, you can use parameters list-source-dbs and 
#     list-source-methods or do what we do internally in our lab: 
#     have to piana databases, one with only experimentally 
#     detected interactions and another one with all interactions.
#
#   - for expansions, I recommend using program 
#     run_piana_protein_by_protein.py instead of piana.py
#     -> the result will be the same if you work with all proteins 
#        at the same time than if you do it one by one
#     -> it is much more faster to manage the expansion separately 
#        for each protein
#     -> read README.piana_examples for more info on this
#    

expand-interactions;expansion-type=expansion_type;expansion-nodes=expansion_nodes;expansion-threshold=expansion_threshold;exp-output-mode=mode;output-target=blank;protein-type=blank

# *******************
# find-shortest-route: this command finds the shortest route between
#                      two proteins in the network
# *******************
#
#  ATTENTION: this command requires the piana 'advanced mode' or 
#             'developer mode'. By default, all users work in
#             'simple mode'. Therefore, if you want to use this
#             command you'll have to modify your working mode as
#             indicated in section 'PIANA types of users' of file
#             README.piana_tutorial
#     
#   - network must exist before running this command
#
#   - if you do not want the information to be printed to default name 
#     then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file name 
#     or screen if you want the results printed to stdout
#
#   - protein_a_name and protein_b_name are required. 
#
#   - if protein code type used for protein-name is different from 
#     input-proteins-type, then set protein-type with the new type 
#     as shown. 
#     if protein-name used is of input-proteins-type, then set
#      protein-type to blank
#      -> Valid protein types are blank (ie use input-proteins-type) 
#         and those defined in PianaGlobals.valid_protein_types
#
#   - the output will be written using output-proteins-type (read from
#     the input parameters section)
#
#     
#   format of the txt output is:
#
#    first line:  protein_a=protein_a_name<TAB>protein_b=protein_b_name<TAB>distance=distance_of_route
#    second line: START=protein_a_name<-->protein_in_route<-->protein_in_route<-->...<-->END=protein_b_name


find-shortest-route;protein-a-name=protein_a_name;protein-b-name=protein_b_name;protein-type=blank;output-target=blank

# *******************
# find-distance-group: this command finds a group of proteins that are
#                      at distance N from a query protein
# *******************
#     
#   - network must exist before running this command
#
#   - if you do not want the information to be printed to default name 
#     then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file name 
#     or screen if you want the results printed to stdout
#
#   - protein_name is required. 
#
#   - if protein code type used for protein-name is different from 
#     input-proteins-type, then set protein-type with the new type 
#     as shown. 
#     if protein-name used is of input-proteins-type, then set
#      protein-type to blank
#      -> Valid protein types are blank (ie use input-proteins-type) 
#         and those defined in PianaGlobals.valid_protein_types
#
#   - distance is required: set the distance between your query 
#     protein and the group of proteins you are searching
#       - when distance is "all", groups 1, 2 and 3 are printed 
#         out)
#  
#   - info is used to choose the information that will be printed 
#     next to the proteins at distance N
#      - values admitted are: 
#                     - blank: no info printed
#                     - all: all info known about protein
#                     - scop: scop codes  
#                     - cath: cath codes
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format
#

find-distance-group;protein-name=protein_name;protein-type=blank;distance=distance;output-target=blank;info=blank;format-mode=format_mode


# ***********************
# match-proteins-to-spots: this command identifies spots in a 2D gel
#                          by matching MW and/or IP to proteins in 
#                          the network
# ***********************
#
#    we have spot ids from a 2D electrophoresis gel, with their
#    molecular weights (MW) and isoelectric points (IP). Some of those
#    spots were identified by mass spectrometry but other spots were
#    unnassigned. We can use PIANA to identify some of those
#    unnassigned spots, by comparing the MW and IP of the spots with
#    the MW and IP of the proteins in the network.
#      
#   - if you do not want the information to be printed to default 
#     name then you should set output-target to your own file name
#      -> output-target can be blank (ie. use default name), a file 
#         name or screen if you want the results printed to stdout
#
#   - format-mode is required: set which format will be used for 
#     printing the output
#      -> txt: prints flat text
#      -> html: prints in html format
#
#   - if you want the type of protein code used for printing the 
#     information different from output-proteins-type then set 
#     protein-type.
#      -> valid protein-type values are blank (ie use 
#         ouput-proteins-type) or those defined in 
#         PianaGlobals.valid_protein_types.keys().
#         --> You can alternatively do 
#             $> python piana.py --help
#             which will display valid input values for 
#             proteins types
#
#   - spots-file-name is a file name following the structure 
#     (one spot per line): 
#         spot_id<TAB>molecular_weight<TAB>isoeletric_point
#      -> where decimals are expressed with "."
#      -> complete_path_to_spots_file can be blank if 
#         spots-file-name was set in the command line
#
#   - list-mw-error and list-ip-error set the error bounds 
#     admitted for the matching of molecular weight and 
#     isoelectric point
#      -> the number of error bounds for mw and ip must 
#         be identical: values can be different, but the 
#         number of values not
#      -> use "." for decimals 
#      -> set to blank if you prefer to use the default 
#         error bounds (which are hard-coded in piana.py)
#      -> to set your own error bounds, write colon-separated 
#         values 
#   (e.g. list-mw-error=0.01:0.02:0.05;list-ip-error=0.1:0.2:0.5
#     
# Attention!: correspondences that appear in a given error level will
#             not be shown in higher error levels

match-proteins-to-spots;output-target=blank;format-mode=format_mode;protein-type=blank;spots-file-name=complete_path_to_spots_file;list-mw-error=blank;list-ip-error=blank


# ********************
# cluster-by-go-terms: this command clusters the proteins of the
#                      network using GO terms
# ********************
#
#   - In order to cluster a network using go terms, a 
#     protein-protein interaction network must previously exist    
#      
#   - if you do not want the clustered network to be printed to 
#     the default file name then you should set output-target to 
#     your own file name
#      -> output-target can be blank (ie. use default name), a 
#         file name or screen if you want the results printed to 
#         stdout
#
#   - term-type sets the kind of GO terms that will be used for
#      the clustering (required)
#      -> term-type can be molecular_function, biological_process 
#         or cellular_component
#
#   - score-threshold is the lowest score obtained by the 
#     similarity function allowed for continuing the clustering
#      -> can be any real number from 0 to 100 (0 will group 
#         all proteins, 100 will not group any proteins). 
#         To obtain a relevant clustered network use score 
#         thresholds between 0.1 and 1
#
#   - sim-mode sets how to calculate distances between two 
#     clusters
#          - random takes a random element from each cluster 
#            and evaluates similarity between them
#          - min takes the minimal distance between elements 
#            of each cluster
#          - max takes the maximal distance between elements
#            of each cluster
#          - average takes the average distance between all 
#            elements of each cluster
#
#   - level-threshold is the lowest level of the go term in 
#     the cluster allowed for continuing the clustering
#      -> GO is a hierarchy organized from a initial root 
#         level (ie. 0) that increasingly makes more specific 
#         the terms. 
#         Therefore, the higher the level used the less 
#         clustering will be performed. To obtain a relevant 
#         clustered network use level thresholds between 1 
#         and 3. It all depends on how general you want to 
#         be in the interpretation of the network.
#
#   - distance-threshold is the maximum distance allowed 
#     between two proteins in order to be clustered
#      -> can be any integer between 1 and ...
#
#   - rep-term sets which of the GO terms of the cluster 
#     will be used for printing output
#      -> can be min (term of minimal depth in the hierarchy) 
#         or max (maximal depth)
#
#   - print-id sets which id will be used for identifying 
#     the clusters in the printed output
#      -> can be "no" (default id: go term name) or "yes" 
#         (a more complex id)
#

cluster-by-go-terms;output-target=blank;term-type=term_type;score-threshold=score_threshold;sim-mode=sim_mode;level-threshold=level_threshold;distance-threshold=distance_threshold;rep-term=rep_term;print-id=print_id

# ****
# exit: this command exits 
# ****
#

# - required in all piana configuration files, unless interactive mode
#   used

exit;

##                                               ##
# THE FOLLOWING OPTIONS ARE CURRENTLY UNAVAILABLE #
##                                               ##


# *****************
# train-cirs: outputs results that can be used to train/evaluate CIRs
                     (NOT WORKING!!!)
# *********************
#
#   ATTENTION! DO NOT USE, NOT WORKING 
#   
#  - similarity-mode is the type of similarity function that will be applied to do the clustering
#
#        -> valid values are: (in all cases, term1 is 1 is number_of_protein_partners_in_common>0, and 0 otherwise)
#
#            - 'num_ints': based on global number of common interaction partners
#                            ->  number_of_protein_partners_in_common(cluster1, cluster2) x
#                                   number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)
#
#            - 'combined_per': based on which percentage of the protein partners in both clusters are also common partners between them
#                            ->   number_of_protein_partners_in_common(cluster1, cluster2) x
#
#                    1/2( number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)x100/number_of_protein_partners(proteins in cluster1)
#                         + number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)x100/number_of_protein_partners(proteins in cluster2)
#                       )
#                    
#            - 'min_per': based on which percentage of the protein partners are shared with the other protein (taking the protein with fewer partners)
#            
#                            ->  number_of_protein_partners_in_common(cluster1, cluster2) x
#                                   number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)x100/
#                                       min(number_of_protein_partners(proteins in cluster1), number_of_protein_partners(proteins in cluster2))
#
#            
#            - 'max_per': based on which percentage of the protein partners are shared with the other protein (taking the protein with more partners)
#            
#                            ->  number_of_protein_partners_in_common(cluster1, cluster2) x
#                                   number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)x100/
#                                       max(number_of_protein_partners(proteins in cluster1), number_of_protein_partners(proteins in cluster2))
#
#
# - min_score is the minimum score allowed in order to fuse two clusters. Any positive integer is a valid value
#
# - cir_method is the method used to create CIRs
#     --> valid values are:
#        - irs: by decomposing the proteins into Interacting Regions and then clustering them
#        - prots: by adding 'clusters' to the original network of proteins
#

train-cirs;similarity-mode=similarity_mode;min-score=min_score;cir-method=cir_method


# *****************
# modify-parameters: this command modifies parameter values
#                    (NOT WORKING!!!)
# *********************
#
#   ATTENTION! DO NOT USE, NOT WORKING 
#   
# - use this command to modify any parameter value at any point of
#   piana execution
# - set to blank those parameters that you do not want to modify


modify-parameters;input-proteins-type=input_proteins_type;depth=depth;output-proteins-type=output_proteins_type


# ********************
# find-protein-patches: this command divides proteins into patches  
#                       (NOT WORKING!!!)
# *********************
#
#   ATTENTION! DO NOT USE, NOT WORKING 
#   
# - clustering-steps: set the maximum number of clustering steps you
#   wish to perform
#      -> clustering_steps can be any positive integer
#
#   - score-threshold: set the score threshold for considering two 
#     patches identical
#      -> score_threshold can be any positive integer
#
#   - ranked-parameters-file: set here the file name of the file 
#     that contains the ranked parameters obtained by the training
#        -> this file is an ordered (from best to worst) list of 
#           which weights and stop conditions performed best in the 
#           training
#        -> each line of this file looks like this:
#           w_patches=1 w_prots=2 w_belong=1 score_links=huge 
#           av_spec_shared=0 av_sens_shared=0 av_fvalue_shared=0 
#           av_spec_int=1 av_sens_int=1 av_fvalue_int=1 ....
#
#           where the w_* are the weights to be used, the score_links 
#           (can be also score_clusters) is the stop condition and the 
#           rest of fields are the results obtained with those 
#           parameters during the training
#        -> this file can be created by training piana with a 
#           benchmark-configuration-file
# 
#   - save-mode: set how patches and their interactions are saved
#      -> valid values for save-mode are "memory" (everything saved on 
#         memory) and "disc" (everything saved on disc)
#      -> for retrieving results, see commands print-protein-patches, 
#         print-shared-patches and print-patches-interactions
#
#   - disc-name: if save-mode == disc, set a file name prefix for 
#     patchgroup graphs that will be saved to disc
#      -> this argument is ignored if save-mode is memory
#

find-protein-patches;clustering-steps=number_clustering_steps;score-threshold=score_threshold;ranked-parameters-file=file_name;save-mode=save_mode;disc-name=disc_name


# *********************
# print-protein-patches: this command prints for each protein, which
#                        patches does it have at a certain level 
#                        (NOT WORKING!!!)
# *********************
#
#   ATTENTION! DO NOT USE, NOT WORKING 
#   
#   - if you do not want the information to be printed to default name 
#     then you should set output-target to your own file name
#     output-target can be blank (ie. use default name), a file name 
#     or screen if you want the results printed to stdout
#
#   - if you want the type of protein code used for printing the 
#     information different from output-proteins-type then set 
#     protein-type.
#      -> valid protein-type values are blank (ie use ouput-proteins-type)
#         or those defined in PianaGlobals.valid_protein_types.keys().
#         --> You can alternatively do $> python piana.py --help, which 
#             will display valid input values for proteins types
#
#   - Set the clustering level you want to print out with clustering_level
#      -> clustering_level can be -1 (last clustering performed) or any 
#         positive integer lower than clustering_levels in 
#         find-protein-patches
#
#   - default name will contain the clustering level that is being printed

print-protein-patches;output-target=blank;protein-type=blank;clustering-level=clustering_level_to_print

# ********************
# print-shared-patches: this command prints for each patch, which proteins 
#                       do have it at a certain level   (NOT WORKING!!!)
# *********************
#
#   ATTENTION! DO NOT USE, NOT WORKING 
#   
#   - if you do not want the information to be printed to default name then 
#     you should set output-target to your own file name output-target can 
#     be blank (ie. use default name), a file name or screen if you want 
#     the results printed to stdout
#
#   - if you want the type of protein code used for printing the information 
#     different from output-proteins-type then set protein-type.
#      -> valid protein-type values are blank (ie use ouput-proteins-type) or 
#         those defined in PianaGlobals.valid_protein_types.keys().
#         --> You can alternatively do $> python piana.py --help, which will 
#             display valid input values for proteins types
#
#   - Set the clustering level you want to print out with clustering_level
#      -> clustering_level can be -1 (last clustering performed) or any 
#         positive integer lower than clustering_levels in find-protein-patches
#
#   - default name will contain the clustering level that is being printed

print-shared-patches;output-target=blank;protein-type=blank;clustering-level=clustering_level_to_print

# **************************
# print-patches-interactions: this command prints the patch-patch interactions 
#                             network at a certain level   (NOT WORKING!!!)
# *********************
#
#   ATTENTION! DO NOT USE, NOT WORKING 
#    
#   - if you do not want the information to be printed to default name then
#     you should set output-target to your own file name output-target can 
#     be blank (ie. use default name), a file name or screen if you want the 
#     results printed to stdout
#  
#   - Set the clustering level you want to print out with clustering_level
#      -> clustering_level can be -1 (last clustering performed) or any 
#         positive integer lower than clustering_levels in 
#         find-protein-patches
#
#   - default name will contain the clustering level that is being printed
#      -> for level number -1 (last clustering performed) the default name
#         will finish by ".last"
#      -> for all other level numbers  the default name will finish by ".??" 
#         (?? being the level number)

print-patches-interactions;output-target=blank;clustering-level=clustering_level_to_print

# **************************
# print-patches-network: this command prints the .dot patch-patch interactions 
#                        network at a certain level   (NOT WORKING!!!)
# *********************
#
#   ATTENTION! DO NOT USE, NOT WORKING 
#    
#   - if you do not want the information to be printed to default name then you 
#     should set output-target to your own file name output-target can be blank 
#     (ie. use default name), a file name or screen if you want the results 
#     printed to stdout
#  
#   - Set the clustering level you want to print out with clustering_level
#      -> clustering_level can be -1 (last clustering performed) or any positive 
#         integer lower than clustering_levels in find-protein-patches
#
#   - default name will contain the clustering level that is being printed
#      -> for level number -1 (last clustering performed) the default name
#         will finish by ".last"
#      -> for all other level numbers  the default name will finish by ".??" 
#         (?? being the level number)

print-patches-network;output-target=blank;clustering-level=clustering_level_to_print