# --------------------------------------------------------------------- # File : general_template.piana_conf # Author : Ramon Aragues # Creation : 2.10.2004 # Contents : template to be used to create your own piana # configuration files # --------------------------------------------------------------------- # This file is used to configure parameters for piana.py for some # specific use described below # # if you want piana.py to be configured by this file, set command line # option --configuration-file=this_file_name # # configuration files can be used for both execution modes: batch or # interactive # # -> in execution mode interactive, the execution section will be # ignored (ie. will only read parameters section) # # ----------description of this particular configuration file --------- # # This is a template for creating your own piana configuration files: # edit, move, delete options in this file to tell piana what you want # to do: in your configuration file, write here what is the use it has # (eg creates a network from an input file and prints the interactions # table to the screen) # # # In your configuration file, write here which are the parameters that # must be set through the command line # - apart from this configuration file, the user must use piana.py # command line options: # # (For example, you could write something like this: # # --> input-file-name # --> input-proteins-type # --> output-proteins-type # --> results-prefix # --> piana-dbname # --> piana-dbhost # --> piana-dbuser (depends on the system) # --> piana-dbpass (depends on the system) # --> depth # ) # # These parameters are required in the command line! In this file, # they are set to blank, obliging the user to set them on the # command line (although, they could have values assigned and still # be ignored, since the command line has preference over the # parameter values in this file) # ------------------------------------------------------------------- # # # # Attention! # # - All non-configuration lines in this file must start with '#' # (unless empty line) # # - A configuration line that is preceded by '#' is not taken # into account # # - Configuration file parameters equal to blank are ignored # # ----------------------------------------------------------------- # # Remark: in many parameters and commands, there is a reference to # file /piana/code/PianaDB/PianaGlobals.py In that file you'll find # (apart from other things) the valid input values for parameters You # can alternatively type piana.py --help, which will display valid # input values for proteins types and interaction database names # # ------------------------------------------------------------------ # ================================================================== # configuration of execution parameters # ------------------------------------------------------------------ # set here the input parameters for this specific configuration that # are not required in the command-line # ================================================================== # exec-mode can be interactive or batch. # - if interactive is chosen, the execution commands of this file are # ignored, and the user can chose commands from a text menu. # - if batch is chosen, piana.py will execute the commands described # in the execution commands section of this file exec-mode=batch # **************************** # Input proteins configuration # **************************** # Proteins can be added to the network using commands (eg add-protein # and add-proteins-file), via the command line by setting parameter # input-file or setting it here (ie. input parameters section) # - If no input-file is set, the network will be empty unless you add # proteins or interactions afterwards # - If an input-file is set, then the initial network will be built # using proteins in this file (more proteins can be added afterwards # using PIANA commands) # # Set here the file name that contains one protein per line # - all proteins must be of the same type # - if you have proteins in different code type (eg. gis and uniprot) # then you must separate them into different files to make # sure each file only contains proteins of the same type # Then, you can use command add-proteins-file to add each # of your files # - example input files are in piana/code/execs/dummy_input_files input-file=blank # Set here the type of code for proteins that will be used by default # This parameter is also used (and required) if you set an input-file # # - valid input protein types are those defined in # PianaGlobals.valid_protein_types.keys() # -> You can alternatively do $> python piana.py --help, which # will display valid input values for proteins types input-proteins-type=blank # Set here the protein species that will be used by default # --> valid values are all and NCBI species names # (egs: all, yeast, human, Candida albicans SC5314,...) # -> you must write the complete name or it won't work. # If you don't know which is the complete name for # your species you can look at the website: # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Taxonomy # (alternatively, you can look into table species of your # PIANA mysql database) # # This parameter is required if you set an input-file # # -> Normally, this can be set to all, since the protein code # already refers to a unique species. Use this parameter to fix the # protein species when the protein code you are using does not # implicitly refer to a species: for example one geneName can refer # to different proteins of many species # # -> if you write an unknown species name or a species name # that is ambiguous (ie. corresponds to more than one tax id) # PIANA will halt the execution # input-proteins-species=all # ***************************** # Piana Database configuration # ***************************** # # Set here which piana database to use, where it is and how to access # it: # - piana-dbname (eg pianaDB_limited) and piana-dbhost (eg sefarad) # are required # - piana-dbuser and piana-dbpass are required in mysql password # protected systems piana-dbname=blank piana-dbhost=blank piana-dbuser=blank piana-dbpass=blank # ***************************** # Network options # ***************************** # Set here the depth to which the network will be developed (ie. how # many interaction steps will be searched for each root protein) # - recommended depth = 1 (too slow otherwise) depth=blank # Set here the hub threshold # # Use this option to limit the interactions that will be added to # the network # # If a protein has hub-threshold interactions or more, these # interactions will not be added # -> this parameter is used to avoid inserting in the network # those proteins that bind to "everything" # -> set it to 0 if you don't want any thresholds to be applied hub-threshold=blank # Set here whether self interactions should be added to network or # not # -> in some analysis, self interactions perturb the results # because it causes all proteins to be at all possible # distances from a given protein # # -> a self interaction is a protein that is known to interact # with itself # # -> valid values are 'yes' (ie use all interactions) and no (ie # do not add self interactions to network) use-self-ints=yes # Set here the interaction databases that you want to use # -> list-source-dbs=all will use interactions from all source # databases in piana-dbname # -> write colon-separated database names to limit the source # databases # - valid interaction database names are those defined # in PianaGlobals.interaction_databases # --> do python2.3 piana.py --help to get a list of # valid database names # - for example, write list-source-dbs=dip:string:mips # or for just one database... # list-source-dbs=dip # # -> if inverse-dbs is yes, then this parameter does the opposite: # dbs in list-source-dbs will not be taken into account. # # - if list-source-dbs is all, inverse-dbs is ignored (it # doesn't make sense to ignore all databases) # # -> if ignore-unreliable is yes, then those databases tagged as # unreliable (databases ending with _c) will not be used # for this PIANA run # - to learn more about unreliable databases, please refer # to README.piana_tutorial section "setting databases to use" # # - if list-source-dbs is not all, ignore-unreliable is not # taken into account: if you are setting a list of source # dbs you are responsible for enumerating the databases # you want to use, both reliable and unreliable # # Attention: any configuration different from (all, no, no) will # slow down PIANA, since it has to introduce # restrictions when searching for interactions # list-source-dbs=all inverse-dbs=no ignore-unreliable=no # Set here the interaction methods that you want to use # -> list-source-methods=all will use interactions from all types # of methods in piana-dbname # -> write colon-separated database names to limit the methods # - valid method names are those defined in # PianaGlobals.method_names.keys() # - for example, write list-source-methods=y2h:copurif # or for just one method... # list-source-methods=y2h # # -> if inverse-methods is yes, then this parameter does the opposite: # methods in list-source-methods will not be taken into account # -> if list-source-methods is all, inverse-methods is ignored # (it doesn't make sense to ignore all methods) list-source-methods=all inverse-methods=no # ***************************** # Output options # ***************************** # # Set here how the output results look like # # Set here the type of protein code to be used in your output # # - valid output protein types are those defined in # PianaGlobals.valid_protein_types.keys() # --> You can alternatively do # $> python piana.py --help # which will display valid input values for proteins types # output-proteins-type=blank # Set here alternative types of protein codes for your output # # - list-alternative-types determines which protein code types will # be used in case no code is found for output-proteins-type # # -> write a colon-separated list of easy-to-remember type names # (for example: uniacc:unientry:gi:md5 ) # -> valid protein types are those defined in # PianaGlobals.valid_protein_types.keys() # -> list-alternative-types cannot be set through the command line # -> it is recommended to write md5 as last protein type code to # be used, so output has the protein md5 at least # -> md5 is a checksum of the protein sequence (sequence is # transformed to a unique code (shorter than the sequence # itself)) # # Attention! If you do not set at least one type of code for which you # can be sure there will be a value (eg. md5) PIANA might # have an error when outputting results (because it won't # know which name to use for that protein # list-alternative-types=md5 # Set here which species you want your output proteins to be # # - output-proteins-species determines the species that the # output_proteins must be in order to be printed # # valid values are: # -> all: will print network proteins regardless of the # species # -> and those names in the NCBI database # (egs: all, yeast, human, Candida albicans SC5314,...) # -> you must write the complete name or it won't work. # If you don't know which is the complete name for # your species you can look at the website: # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Taxonomy # (alternatively, you can look into table species of your # PIANA mysql database) # # -> this parameter only affects what you get in the output! The # network is allowed to contain proteins from other species. # You can control which species is used to build the network # with parameter input-protein-species # # -> when printing interactions, setting this command to a species # guarantees that at least one of the two proteins belongs to # that species. Then, you can make the distinction between # interactions where both proteins are of the same species # by looking to the species field ('same' when both proteins # are of the same species, 'different' when only one of the # two proteins is of the species you chose) # # -> when printing information, only proteins of this species # will be shown # # -> connecting information (ie. linker proteins) of proteins is # not affected by output-proteins-species # # -> setting a species is recomended (ie. do it) when using geneName # as protein code (duplications across species) # # -> this currently applies to outputing the following information: # - proteins information # - translating codes # - interactions and network in DOT format # - printing expansions (not to mode "add" in expansions) # output-proteins-species=all # Set here the results prefix that will identify your results # # - results-prefix will precede each results file. A file extension # describing the results file content will be added to results-prefix # (eg. interaction table will be printed to results-prefix.print-table) # results-prefix=blank # Set here the directory where your results will be saved # # - results-dir is the directory where results will be saved # (e.g. /home/pepito/piana/results/ ) # -> if it is equal to blank, results are saved in piana execution # directory # -> you must place the slash at the end of the path! # results-dir=./ # *********************** # Extra data to highlight # *********************** # Set here the keywords that you want to use for your analysis: this # is a way that PIANA provides to detect proteins related to specific # words # # -> write a colon-separated list of keywords # - these are the keywords that you are used to detect if the # proteins of the network are related to something you are # interested in # - for example, if you are interested in cancer, you could write: # list-keywords=cancer:carcinoma:onco:tumor:apoptosis # # -> the network produced by command print-network will highlight in # red those proteins that contain one or more keyword in their # function, description or name # -> the output produced by commands print-*-prots-info will: # - in format-mode 'txt': write tokens user_keyword=word for # those proteins that contain the keyword in their function, # description or name # - in format-mode 'html': underline the proteins that contain # the keyword in their function, description or name # -> for command print-table, when using format-mode 'html', this list # of keywords is used to underline the proteins that contain the # keyword # # -> list-keywords cannot be set through the command line: it must be # done here # -> if you do not want to use keywords for your analysis, leave this # to blank list-keywords=blank # Set here the file names of proteins that you have found to be over/under # expressed. These file names will be used to highlight those proteins in # the network, (in the dot file, over expressed proteins will appear as a # box with red border. under-expressed proteins as a box with green border) # You can change the colors in PianaGlobals.node_border_colors dictionary # # -> these files contain one protein per line, using a code of type # indicated in parameter expression-protein_type # -> expression-protein-type can be any of the PIANA codes # --> you can do $> python piana.py --help, which # will display valid input values for proteins types # # -> input-proteins-species will be used to retrieve the proteins in these # files: therefore, if you are using a list of geneNames for your # over/under expressed lists, set your input-proteins-species to a # valid value (other codes do not necessarily require it) file-over-expressed=blank file-infra-expressed=blank expression-protein-type=blank >>> Do not remove this line: marks transition from parameters to commands <<< # ========================================================================== # configuration of execution commands # ========================================================================== # Set here the commands that piana.py will execute # # these commands can be ordered as desired by the user: it is up to # the user to make sure the command sequence makes sense (eg. not # asking to write a table without building a network first) # # commands that can be used are those listed in piana.py --help # # some commands require extra information that can be set in this file # as well (eg. command "species-network" requires a species_name, # which will be provided as well in this configuration file) # # even if you don't want to give any value to the command arguments, # you must leave the argument and set it to blank # # You should choose the commands you want to execute from the list # following these lines. Remove those commands that you are not # interested in, and set arguments appropiatly for those commands that # you need. You can see some configuration file examples in this same # directory under *.piana_conf # # ========================================================================== # # the following commands perform the actions described in the # description of this particular configuration file # # - The commands will be executed in the same order as they appear in this # file # # - All commands must be followed by ";", even if they do not have arguments # --> the command arguments are separated by ";" # --> configuration lines with arguments should not finish with ";" # # - if you don't want the configuration line to set a given argument, write # "blank" after the "=" sign # # - default names for output files (used in case you set it to blank) are: # results_prefix.command_name_creating_output[.format_mode] # --> in some cases, extra information will be added to the results file # name (eg. "_compact" or "_extended" output mode) # --> format_mode is usually added to indicate whether it is a text file # or an html page that has to be visualized with a browser # # - in all commands that set protein_type, if nothing is found for protein_type # (or input-proteins-type) list-alternative-types are used instead (and will # print protein codes as "alternative_type_name:protein_code" ) # # ============================================================================ # ************* # reset-network: this command resets the network currently in memory # ************* # # Attention! It just resets the network (the input and output parameters are # not resetted: input-proteins-type, output-proteins-type, etc will be the same) # If you need to work with different parameters, I recommend doing a different # configuration file # # - no required parameters # # - it can be used to do operations on several different networks with a unique # configuration file # (eg. build one network, get its results, reset the nework, build a new # network, get new results) reset-network; # ************ # save-network: this command saves the current network into a disc file # ************ # # - file_name is required # - the network will be saved to file_name in directory results-dir # (specified in parameters section) # - the saved file is not human-readable: it is managed by python # (using cPickle) # save-network;file-name=file_name # ************ # load-network: this command loads into memory the network that was # saved in a file using save-network # ************ # # - file_name is required # - the network will be loaded from file_name in directory # results-dir (specified in parameters section) # # Attention! A network can only be loaded to be used with the same # database from which the network was created (due to internal piana # distribution: proteinPiana identifiers are not coherent across # different piana databases) load-network;file-name=file_name # *********** # add-protein: this command adds a protein (and its interactions from # the piana database) to the network # *********** # # - network doesn't have to exist previously: it can be built from # a single protein through "add-protein" # # - required parameters (either from command line or from "parameters # section of this file") are: # -> depth and input-proteins-type (in case argument # this_protein_code_type is blank in the execution below) # # - protein_name is required. # # - if protein code type used for protein-name is different from # input-proteins-type, then set protein-type with the new type as # shown. # if protein-name used is of input-proteins-type, then set # protein-type to blank # -> Valid protein types are blank (ie use input-proteins-type) # and those defined in PianaGlobals.valid_protein_types # # - if the species of this protein is different from # input-proteins-species, then set species-name to the new species # as shown. # if the species of the protein is input-proteins-species, then set # species-name to blank # Valid protein species are blank (ie use input-proteins-species), # all (use all species) and those defined in NCBI (eg. human, yeast, # ...) # -> this parameter is mainly useful for protein codes that do not # implicitly establish their species (eg. geneName) # add-protein;protein-name=protein_name;protein-type=blank;species-name=blank # ***************** # add-proteins-file: this command adds proteins (and their # interactions from the piana database) from # a file to the network # ***************** # # - network doesn't have to exist previously: it can be built # with "add-proteins-file" # # - complete_path_to_file is required # -> file with input proteins must have one protein per line # -> proteins in this file must all be of the same type # # - if the type of protein code used in the file is different # from input-proteins-type then set protein-type # -> valid protein types are blank (ie use # input-proteins-type) or those defined in # PianaGlobals.valid_protein_types # # - if species-name is different from input-proteins-species, # then set the new species as shown. Otherwise, set to blank # Valid protein species are blank (ie use input-proteins-species), # all (use all species) and those defined in NCBI # -> this parameter is mainly useful for protein codes that # do not implicitly establish their species (eg. geneName) # # Attention!!! proteins in this file must all be of the same # type. If you have proteins that are of a different code type, you # must split the proteins in as many files as different types of # code there are, and add file by file separately using this command add-proteins-file;file-name=complete_path_to_file;protein-type=blank;species-name=blank # ********************* # add-interactions-file: this command adds interactions from a file to # the network # ********************* # # - network doesn't have to exist previously: it can be built with # "add-interactions-file" # # - complete_path_to_file is required # -> file with input interactions must have one interaction per # line # -> the input file format must be the following: # protein_aprotein_bsource_dbmethodconfidence # -> the format is described in detail on file # piana/code/dbParsers/piana_text_intParser/README.piana_interaction_data_format # -> source_db must be a db appearing in # PianaGlobals.interaction_databases # -> if you are using interactions from a # database that does not appear in this # list and you do not want to add a label # to PianaGlobals.interaction_databases # you can use 'user' as your source db # -> method must be a method appearing in # PianaGlobals.method_names # # - if the type of protein code used in the file is different # from input-proteins-type then set protein-type # -> valid protein types are blank (ie use input-proteins-type) # or those defined in PianaGlobals.valid_protein_types # # - proteins in the interactions file must all be of the same type # # - this command does not add any interactions from the piana # database: if you want as well the interactions from the # database you must create a file with proteins and use # command add-proteins-file # # Attention: all interactions in the file will be added! # No restrictions applied... that means list-source-dbs # list-source-methods and use-self-ints have no effect # on this command # You are responsible for having the interactions you # want to use on your interactions file # (if you think it is important for you to apply # restrictions to your file, send me an email and # I will do it...) add-interactions-file;file-name=complete_path_to_file;protein-type=blank # *************** # species-network: this command builds a network for all proteins in a # given species # *************** # # Executing this command will replace the existing network with a new # network. Moreover, this command ignores the input list of proteins # and the species set in the input section: it will build a network # using all proteins and all interactions of a given species. # # - a network must not previously exist (ie. commands build-network # and add-protein* not active) # # - to set the species for which you want to load the network, # you have two options (one of the two arguments must be set # to blank and the other to a correct value): # # - tax_id # -> valid taxonomy ids are those defined by the NCBI # (9606 for human, 7227 for drosophila meg, ... ) # # - species_name # -> valid species names are those in the NCBI database # (human, yeast, Arabidopsis thaliana, ...) # -> if the species name given has multiple corresponding # taxonomy ids, the network will contain proteins # from these multiple taxonomy ids # (eg. "rat" is tax_id 10116 and 10114) # # Attention: if both arguments are different from blank, # tax_id will be used # if none of the arguments is set to a value, # an error will be raised # # Attention! use this command at your own discretion... # networks can be huge species-network;species-name=blank;tax-id=blank # *********************** # database-method-network: this command builds a network for all # interactions in a given database and/or # a given method # *********************** # # This command can be useful to build a network that contains # all interactions in a given database... for example, if you # want to visualize the network for a database that you have # inserted into a piana database # # - a network must not previously exist # (ie. commands add-protein* can not appear in the same # configuration file as database-method-network) # --> this command does not require a list of proteins, # since it takes all interactions for a given database # and method # # - database_name is required # -> valid databases names are all (all databases taken into # consideration) and those in # PianaGlobals.interaction_databases # -> use at your own discretion... networks can be huge # # - method_name is required # -> valid method names are all (all methods are taken into # consideration) and those in PianaGlobals.method_names # -> use at your own discretion... networks can be huge # # - species_name is required: network will only contain # interactions between proteins of species_name # -> valid species names are all and those in the NCBI # database # -> if the species name given has multiple corresponding # taxonomy ids, the network will contain proteins from # these multiple taxonomy ids (eg. "rat" is tax_id 10116 # and 10114) # # Attention! hub_threshold parameter does not affect this # command: all interactions will be added regardless # of the hub_threshold value # database-method-network;database-name=database_name;method-name=method_name;species-name=species_name # *********** # print-table: this command prints a table with all interactions in # current network # *********** # # - if you do not want the table to be printed to default name # then you should set output-target to your own file name # -> output-target can be blank (ie. use default name), a # file name or screen if you want the results printed # to stdout # # - if you want the type of protein code used for printing # the table different from output-proteins-type then set # protein-type. # -> valid protein types are blank (ie use ouput-proteins-type) # or those defined in PianaGlobals.valid_protein_types.keys() # -> You can alternatively do $> python piana.py --help, which # will display valid input values for proteins types # # - print-mode is required: set which proteins will appear in # output # -> all: prints all interactions in the network # -> all_root: prints all interactions in the network where # at least one of the proteins is a root protein # -> only_root: prints only interactions between root proteins # in network # -> connecting will print only interactions between root # proteins and those proteins that connect more than one # root protein (linker proteins) # # - format-mode is required: set which format will be used for # printint the output # -> txt: prints flat text # -> html: prints in html format # # # -> the output for all print-table commands in format-mode == html # is described in the output file itself # # -> the output for all print-table commands in format-mode == txt # is (all in one line... separated in lines for the sake of clarity): # # connectivity=num of root prots connected by the not-root-prot of the pair # # protein_1=protein_1 code using the type chosen by user # # neighbours_1=number of neighbours protein 1 # # root_1=(is-root or not-root) for protein 1 # # expression_1=(None, over_expressed or infra_expressed) expression info prot 1 # # fitness_1=fitness information protein 1 (ignore this value) # # protein_2=protein_2 code using the type chosen by user # # neighbours_2=number of neighbours protein 2 # # root_2=is-root or not-root for protein 2 # # expression_2=(None, over_expressed or infra_expressed) expression info prot 2 # # fitness_2=fitness information protein 2 (ignore this value) # # location=proteins are in same cellular location (y or n) # # species=proteins are of same species (y or n) # # db=source database where interaction appears # # db=source database where interaction appears # # db=.............. # # method=method used to detect interaction # # method=method used to detect interaction # # method=.............. # print-table;output-target=blank;protein-type=blank;print-mode=print_mode;format-mode=format_mode # *************************** # print-table-db-intersection: this command prints a table with # interactions that appear in the # intersection of several databases # *************************** # # This command will only print those interactions that appear in all # the databases given by the user as argument For example, if the # network only has two interactions, one extracted from dip and the # other one extracted from dip and mips and the user sets list-dbs to # dip:mips, then only the second interaction will be printed by this # command # # - if you do not want the table to be printed to default name then # you should set output-target to your own file name # -> output-target can be blank (ie. use default name), a file # name or screen if you want the results printed to stdout # # - if you want the type of protein code used for printing the # table different from output-proteins-type then set protein-type. # -> valid protein-type values are blank (ie use # ouput-proteins-type) or those defined in # PianaGlobals.valid_protein_types.keys(). # -> You can alternatively do $> python piana.py --help, which # will display valid input values for proteins types # # - list-dbs is required: used to set the databases where the # interactions must appear in order to be printed # -> valid database names are those defined in # PianaGlobals.interaction_databases # -> for example: list-dbs=dip:string:ori # # - print-mode is required: set which proteins will appear in # output # -> all prints all interactions in the network # -> all_root prints all interactions in the network where at # least one of the proteins is a root protein # -> only_root prints only interactions between root proteins # in network # -> connecting will print only interactions between root # proteins and those proteins that connect more than one # root protein # # - format-mode is required: set which format will be used for # printint the output # -> txt: prints flat text # -> html: prints in html format # # - for obtaining several intersections, just repeat this command # changing the dbnames # # - the default results file name will describe the intersecting # dbs # # output format described in command print-table print-table-db-intersection;output-target=blank;protein-type=blank;print-mode=print_mode;list-dbs=dbname1:dbname2:dbname3:...;format-mode=format_mode # ************* # print-network: this command prints a DOT file with all interactions # in current network # ************* # # - if you do not want the network to be printed to default name # then you should set output-target to your own file name # -> output-target can be blank (ie. use default name), a file # name or screen if you want the results printed to stdout # # - if you want the type of protein code used for printing the # network different from output-proteins-type then set protein-type. # -> valid protein-type values are blank # (ie use ouput-proteins-type) or those defined in # PianaGlobals.valid_protein_types.keys(). # --> You can alternatively do $> python piana.py --help, # which will display valid input values for proteins types # # - print-mode is required: set which proteins will appear in output # -> all prints all interactions in the network # -> all_root prints all interactions in the network where at # least one of the proteins is a root protein # -> only_root prints only interactions between root proteins # in network # -> connecting prints only proteins that are either root or # linkers (proteins that connect root nodes between them) # # - format-mode is always equal to dot (it will have other formats # in the future...) # # -> The parameters that PIANA will use to generate the .DOT file # can be easily changed in file piana/code/PianaDB/PiabaGlobals.py # -> section "PARAMETERS FOR OUTPUT .DOT NETWORK" describes all # the parameters that you can modify # # # --> The color of the node box is an indication of the type of # protein # Node fill colors can be easily changed in file # piana/code/PianaDB/PiabaGlobals.py # (dictionary node_fill_colors in section COLOR CODES FOR # OUTPUT NETWORK) # # Currently, these are the meanings of the colors: # - blue: standard protein # - yellow: root protein # - red: protein that contains a keyword (see list-keywords # in input parameters) in its function, description # or name # - orange: root protein that contains a keyword (see # list-keywords in input parameters) in its # function, description or name # # --> The color of the border of the node box is an indication # on how that node was added to the network # Border colors can be easily changed in file # piana/code/PianaDB/PiabaGlobals.py # (dictionary node_border_colors in section # COLOR CODES FOR OUTPUT NETWORK) # # Currently, these are the meanings of the border colors: # - black: protein from the database # - blue: protein added to the network after a prediction # based on interologs # - green: protein found in the file with under expressed # proteins (from a microarray experiment) # - red: protein found in the file with over expressed # proteins (from a microarray experiment) # # # --> The color of the edge line is an indication of the source # database that had that interaction # Edge colors can be easily changed in file # piana/code/PianaDB/PiabaGlobals.py # (dictionary interaction_source_databases_colors in section # COLOR CODES FOR OUTPUT NETWORK) # # Currently, these are the meanings of the edge line colors: # - red: DIP # - green: ori (predictions from by distant # sequence/structure patterns similarity) # - magenta: STRING # - orange: prediction using interologs # (either by COG, SCOP, ...) # - dark green: MIPS # - blue: HPRD # - grey: BIND # - yellow: user (an interaction added by user with # command add-file-interactions given label # 'user') # - cyan: interaction appears in more than one database # (you can see the list of all the dbs where it # appears by looking to the result file .print-table) # # Attention!: since the colors change depending on the graphics # card, we have created a GIF image indicating to which database # corresponds each edge color: # piana/docs/documentation/network_colors.gif # # Attention!: if you add a new database, you have to add the # name of the database and a new color # in dictionary interaction_source_databases_colors # # --> The style of the edge line is an indication on how that # interaction was added to the network # Edge styles can be easily changed in file # piana/code/PianaDB/PiabaGlobals.py # (dictionary interaction_line_styles in section # COLOR CODES FOR OUTPUT NETWORK) # # Currently, these are the meanings of the edge line colors: # - solid: interaction from the database # - dashed: added to the network as an db interaction # of a node that was added when doing # predictions # - dotted: prediction by interologs # # The output of this command can be used to create an image of the # network (read piana/code/execs/README.visualize_piana_network) print-network;output-target=blank;protein-type=blank;print-mode=print_mode;format-mode=dot # ***************************** # print-network-db-intersection: this command prints a DOT file with # interactions that appear in the # intersection of several databases # ***************************** # # This command will only print those interactions that appear in all # the databases given by the user as argument For example, if the # network only has two interactions, one extracted from dip and the # other one extracted from dip and mips and the user sets list-dbs to # dip:mips, then only the second interaction will be printed by this # command # # - if you do not want the network to be printed to default name # then you should set output-target to your own file name # output-target can be blank (ie. use default name), a file name # or screen if you want the results printed to stdout # # - if you want the type of protein code used for printing the # network different from output-proteins-type then set # protein-type. # -> valid protein-type values are blank (ie use # ouput-proteins-type) or those defined in # PianaGlobals.valid_protein_types.keys(). # --> You can alternatively do # $> python piana.py --help # which will display valid input values for # proteins types # # - list-dbs is required: used to set the databases where the # interactions must appear in order to be printed # -> valid database names are those defined in # PianaGlobals.interaction_databases # -> for example: list-dbs=dip:string:ori # # - print-mode is required: set which proteins will appear in # output # -> all prints all interactions in the network # -> all_root prints all interactions in the network where # at least one of the proteins is a root protein # -> only_root prints only interactions between root proteins # in network # -> connecting prints only proteins that are either root or # linkers (proteins that connect root nodes between them) # # - format-mode is always equal to dot (it will have other formats # in the future...) # # - for obtaining several intersections, just repeat this command # changing the dbnames # # - the default results file name will describe the intersecting dbs # # Read the description of command print-network for a detailed # explanation of the output of this command # # The output of this command can be used to create an image of the # network (read piana/code/execs/README.visualize_piana_network) print-network-db-intersection;output-target=blank;protein-type=blank;print-mode=print_mode;list-dbs=dbname1:dbname2:dbname3:...;format-mode=dot # ******************** # print-all-prots-info: this command prints information (protein # description, other codes, ...) about all # proteins in network # ******************** # # - if you do not want the information to be printed to default # name then you should set output-target to your own file name # output-target can be blank (ie. use default name), a file # name or screen if you want the results printed to stdout # # - if you want the type of protein code used for printing the # information different from output-proteins-type then set # protein-type. # -> valid protein-type values are blank (ie use # ouput-proteins-type) or those defined in # PianaGlobals.valid_protein_types.keys(). # --> You can alternatively do # $> python piana.py --help # which will display valid input values # for proteins types # # - output-mode is required: used to set how information will be # printed # -> valid output-mode values are: # - extended (multiple lines, all available information) # - compact (one line, connected root proteins and # description) # -> default results file name will describe if output_mode # is extended or compact by placing '.compact.' or # '.extended.' in the file name # -> in compact mode, when a user keyword from list-keywords # appears, "user_keyword=the_word_that_appears" will be # printed to the protein information line # -> in compact mode, a list of the protein names associated # to the protein is printed after the protein information. # The first name, the one that is used to identify the # protein, is the "unique" name that PIANA has assigned # to that protein by means of inference and name checking # -> Please, read README.piana_tutorial section "PIANA and # protein names" for better understanding how PIANA # handles protein names # # - format-mode is required: set which format will be used for # printint the output # -> txt: prints flat text # -> html: prints in html format # # # the output for print-all-prots-info and print-root-prots-info # commands in format-mode == txt and output-mode 'compact' is: # # (format-mode == txt and output-mode 'compact' is the only # type of output you might find yourself parsing... # the other outputs are thought for looking directly at them, # as html tables have column titles) # # (for clarity, the format described here appears in multiple # lines... the real output is all in the same line: one line # of information for each protein) # # protein name # # ['protein description 1', 'prot desc 2', ...] (a protein can have several descriptions associated) # # ['protein function 1', 'prot funct 2', ...] (a protein can have several functions associated) # # root=value (where value is 1 when the protein is a root protein, and 0 otherwise) # # expression=expression_value (None, over_expressed or infra_expressed) # # fitness=fitness_value (ignore this field) # # user_keyword=keyworduser_keyword=keyword.... (as many fields as user keywords appear in the protein function or description) # # protein_code_type:proteinprotein_code_type:protein.... (as many fields as codes that this protein has) # valid protein_code_type are PianaGlobals.valid_protein_types.keys()) # # print-all-prots-info;output-target=blank;protein-type=blank;output-mode=output_mode;format-mode=format_mode # ********************* # print-root-prots-info: this command prints information (protein # description, other codes, ...) about root # proteins in network # ********************* # # - if you do not want the information to be printed to default # name then you should set output-target to your own file name # output-target can be blank (ie. use default name), a file # name or screen if you want the results printed to stdout # # - if you want the type of protein code used for printing the # information different from output-proteins-type then set # protein-type. # -> valid protein-type values are blank (ie use # ouput-proteins-type) or those defined in # PianaGlobals.valid_protein_types.keys(). # --> You can alternatively do # $> python piana.py --help # which will display valid input values for # proteins types # # - output-mode is required: used to set how information will be # printed # -> valid output-mode values are: extended (multiple lines, # all available information) or compact (one line, # connected root proteins and description) # -> default results file name will describe if output_mode # is extended or compact # -> in compact mode, when a user keyword from list-keywords # appears, "user_keyword=the_word_that_appears" will be # printed to the protein information line # -> in compact mode, a list of the protein names associated # to the protein is printed after the protein information. # The first name, the one that is used to identify the # protein, is the "unique" name that PIANA has assigned # to that protein by means of inference and name checking # -> Please, read README.piana_tutorial section "PIANA and # protein names" for better understanding how PIANA # handles protein names # # - format-mode is required: set which format will be used for # printing the output # -> txt: prints flat text # -> html: prints in html format # # the output of this command is described in command # print-all-prots-info print-root-prots-info;output-target=blank;protein-type=blank;output-mode=output_mode;format-mode=format_mode # ************************ # print-connect-prots-info: this command identifies linker proteins # (proteins that connect root nodes between # them) and prints information (protein # description, other codes, linked roots, # ...) about them # ************************ # # - if you do not want the information to be printed to default # name then you should set output-target to your own file name # output-target can be blank (ie. use default name), a file # name or screen if you want the results printed to stdout # # - if you want the type of protein code used for printing the # information different from output-proteins-type then set # protein-type. # -> valid protein-type values are blank (ie use # ouput-proteins-type) or those defined in # PianaGlobals.valid_protein_types.keys(). # --> You can alternatively do # $> python piana.py --help # which will display valid input values for # proteins types # # - output-mode is required: used to set how information will be # printed # -> valid output-mode values are: extended (multiple lines, # all available information) or compact (one line, # connected root proteins and description) # -> default results file name will describe if output_mode # is extended or compact # -> in compact mode, when a user keyword from list-keywords # appears, "user_keyword=the_word_that_appears" will be # printed to the protein information line # -> in compact mode, a list of the protein names associated # to the protein is printed after the protein information. # The first name, the one that is used to identify the # protein, is the "unique" name that PIANA has assigned # to that protein by means of inference and name checking # -> Please, read README.piana_tutorial section "PIANA and # protein names" for better understanding how PIANA # handles protein names # # - format-mode is required: set which format will be used for # printing the output # -> txt: prints flat text # -> html: prints in html format # # the output of this command looks like this: # # (for clarity, the format described here appears in multiple # lines... the real output is all in the same line: one line of) # information for each protein) # # # 'protein name' connects # root_proteins (with # being the number of root proteins that this linker protein connects) # # ['root_protein_1', 'root_protein_2', ...] # # ['source interaction database 1', source inter db 2', ...] (list of interaction dbs where these interactions where retrieved from) # # ['protein description 1', 'protein desc 2', ...] (a protein can have several descriptions associated) # # ['protein function 1', 'protein func 2', ...] (a protein can have several functions associated) # # expression=expression_value (None, over_expressed or infra_expressed) # # fitness=fitness_value (ignore this field) # # # (if you need more information about these linker proteins, # just look for it in the results files of command print-all-prots-info) print-connect-prots-info;output-target=blank;protein-type=blank;output-mode=output_mode;format-mode=format_mode # *************************** # protein-code-2-protein-code: transforms codes from input-file (which # are of type input-proteins-type) to # output-proteins-type # *************************** # # This command is thought to be used independently from other # commands: it uses piana modules to transform proteins from one # code to another. It doesn't make use of the network itself, it # just outputs a table with protein code equivalences. Moreover, # most of the input and output parameters are not used when # executing this command (ie. all parameters ignored except for # input-file, input-proteins-type and output-proteins-type). For # example, even if you set output-proteins-species to yeast, the # output of this command can contain proteins from all species. # (reason: this is not building a network, and therefore, this # command is considered as an 'extra' to PIANA and does not # use the other parameters) # # - if you do not want the information to be printed to a default # file name then you should set output-target to your own file # name # -> output-target can be blank (ie. use default name), a file # name or screen if you want the results printed to stdout # # - retrieves proteins to be "translated" from input-file (which # is set through the command line or above in this file) # -> uses input-proteins-type as the type of code of proteins # in the input file # -> uses output-proteins-type as the type of code to which # proteins will be "translated" # # - format-mode is required: set which format will be used for # printing the output # -> txt: prints flat text # -> html: prints in html format protein-code-2-protein-code;output-target=blank;format-mode=format_mode # *********************** # protein-code-2-all-info: gets information for proteins in input-file # (which are of type input-proteins-type) # *********************** # # This command is thought to be used as a stand alone tool: it uses # piana modules to get information from proteins It doesn't make use # of the network itself, it just outputs a table with protein # information. Moreover, most of the input and output parameters # are not used when executing this command (see comments on previous # command protein-code-2-protein-code) # # The format followed for the output is described in command # print-all-prots-info # # - if you do not want the information to be printed to a default # file name then you should set output-target to your own file name # -> output-target can be blank (ie. use default name), a file # name or screen if you want the results printed to stdout # # - will output information for proteins in input-file (which is # set through the command line or above in this file) # -> uses input-proteins-type as the type of code of proteins # in the input file # # - if you want the type of protein code used for printing the # proteins different from output-proteins-type then set # protein-type. # -> valid protein types are blank (ie use ouput-proteins-type) # or those defined in PianaGlobals.valid_protein_types.keys(). # --> You can alternatively do $> python piana.py --help, which # will display valid input values for proteins types # # - format-mode is required: set which format will be used for printing # the output # -> txt: prints flat text # -> html: prints in html format # # - output-mode is required: used to set how information will be # printed # -> valid output-mode values are: extended (multiple lines, # all available information) or compact (one line, # connected root proteins and description) # -> default results file name will describe if output_mode # is extended or compact # -> in compact mode, when a user keyword from list-keywords # appears, "user_keyword=the_word_that_appears" will be # printed to the protein information line # -> in compact mode, a list of the protein names associated # to the protein is printed after the protein information. # The first name, the one that is used to identify the # protein, is the "unique" name that PIANA has assigned # to that protein by means of inference and name checking # -> Please, read README.piana_tutorial section "PIANA and # protein names" for better understanding how PIANA # handles protein names # protein-code-2-all-info;output-target=blank;protein-type=blank;format-mode=format_mode;output-mode=output_mode # ******************* # expand-interactions: this command predicts interactions of proteins # in the network using interologs (or other # methods) # ******************* # # This command propagates interactions between nodes that share # a certain characteristic. For example, this command can be # used to transfer interactions between proteins that have the # same domain (scop) or that belong to the same orthologous # group (cog) # # Each expand-interactions piana command does the following: # # For each protein in the network: # # 1. find interactions of this protein in the current network # 2. find proteins in the database that share a certain # characteristic with this protein (e.g cog code) # 3. for each protein that shares that characteristic: # - find interactions for protein that shares the # characteristic in the database # - find interactions for protein that shares the # characteristic in the network # - assign to protein being processed all interactions # of protein that shares the characteristic # - assign to protein that shares that characteristic # all interactions of protein being processed # # This process can be repeated more than once, to reach # far-fetched deductions # # For example, if root protein is A, and if we know # that C and D (yeast) interact, and that A =cog= C # and B =cog= D # # - simple expansion will predict that A interacts # with D # - double expansion will predict that A interacts # with D and that A interacts with B # (ie double expansion predicts interactions from # a previous prediction) # (this is achieved by executing two consecutive # expand-interactions piana commands # # - the new interactions (predictions) can be added to # the network or printed out to a results file # # - expansion_type is required: use to know the type # of expansion to perform # -> valid expansion-type values are those defined # in PianaGlobals.expansion_types (currently can # be cog, scop (ie. scop family), interpro or ec) # -> if two proteins share expansion-type, interactions # are interpropagated # # - expansion-nodes is required: used to define which # proteins will be expanded # -> valid expansion-nodes values are: all (all # proteins in network are expanded) or root # (only root proteins are expanded) # -> if you are looking for new interactions (predictions) # for your input proteins, use root # -> if you want to expand all the proteins in the # network (partners of root proteins as well) use all # -> root proteins are the proteins used to build the # network (eg. the proteins in input-file) # # - expansion-threshold is required: used to avoid propagating # interactions when there are too many nodes that share the # expansion type # -> valid values are: 0 (no thresholds applied) and # positive integers # -> depending on the expansion type, the expansion-threshold # to be used varies # # - exp-output-mode is required: used to define if predictions # should be added to network or printed to file # -> valid exp-output-mode values are: add (add predictions # to network) and print (print to output-target) # -> 'add' will add to the network the predictions found # by expansion # -> 'print' will print to output-target (or to default # results file) the list of predictions found by expansion # -> for example, if you wanted to get predictions for root # nodes using double cog expansion you would first use # command expand-interactions with expansion-nodes=all # and mode=add and then, another command # expand-interactions with expansion-nodes=root # and mode=print doing this "double expansion" you will # be predicting interactions based on a previous expansion # # - if exp-output-mode is add, the following arguments # can be ignored: leave them to blank: # - if exp-output-mode is "print" then : # # -> if you do not want the information to be printed # to a default file name then you should set output-target # to your own file name # -> output-target can be blank (ie. use default name), # a file name or screen if you want the results printed # to stdout # # -> if you want the type of protein code used for printing # the information different from output-proteins-type then # set protein-type. # -> valid protein-type values are blank (ie use # ouput-proteins-type) or those defined in # PianaGlobals.valid_protein_types # # -> the results will follow the following format (one # interaction per line): # # protein1protein2expansion_typesource_interactionPianasource_proteinPiana # # This file can then be used to insert predictions into # a PIANA database using parser expansion2piana.py # # If you are going to insert these predictions into a # PIANA database, I recommend that your output type # for protein codes is proteinPiana (to make sure that # the prediction refers to that protein sequence). In any # case, never use geneNames for creating a list of # predictions that is going to be inserted into a # PIANA database: geneNames do not implicitly contain # the species and can be ambiguous within a species. # # To learn more about inserting predictions into # PIANA databases, read README.populate_piana_db and # README.piana_examples # # # - We do not recommend doing predictions based on predictions: # ie. we do not recommend executing command expand-interactions # on networks that were built from a database with predictions. # To avoid this, you can use parameters list-source-dbs and # list-source-methods or do what we do internally in our lab: # have to piana databases, one with only experimentally # detected interactions and another one with all interactions. # # - for expansions, I recommend using program # run_piana_protein_by_protein.py instead of piana.py # -> the result will be the same if you work with all proteins # at the same time than if you do it one by one # -> it is much more faster to manage the expansion separately # for each protein # -> read README.piana_examples for more info on this # expand-interactions;expansion-type=expansion_type;expansion-nodes=expansion_nodes;expansion-threshold=expansion_threshold;exp-output-mode=mode;output-target=blank;protein-type=blank # ******************* # find-shortest-route: this command finds the shortest route between # two proteins in the network # ******************* # # ATTENTION: this command requires the piana 'advanced mode' or # 'developer mode'. By default, all users work in # 'simple mode'. Therefore, if you want to use this # command you'll have to modify your working mode as # indicated in section 'PIANA types of users' of file # README.piana_tutorial # # - network must exist before running this command # # - if you do not want the information to be printed to default name # then you should set output-target to your own file name # output-target can be blank (ie. use default name), a file name # or screen if you want the results printed to stdout # # - protein_a_name and protein_b_name are required. # # - if protein code type used for protein-name is different from # input-proteins-type, then set protein-type with the new type # as shown. # if protein-name used is of input-proteins-type, then set # protein-type to blank # -> Valid protein types are blank (ie use input-proteins-type) # and those defined in PianaGlobals.valid_protein_types # # - the output will be written using output-proteins-type (read from # the input parameters section) # # # format of the txt output is: # # first line: protein_a=protein_a_nameprotein_b=protein_b_namedistance=distance_of_route # second line: START=protein_a_name<-->protein_in_route<-->protein_in_route<-->...<-->END=protein_b_name find-shortest-route;protein-a-name=protein_a_name;protein-b-name=protein_b_name;protein-type=blank;output-target=blank # ******************* # find-distance-group: this command finds a group of proteins that are # at distance N from a query protein # ******************* # # - network must exist before running this command # # - if you do not want the information to be printed to default name # then you should set output-target to your own file name # output-target can be blank (ie. use default name), a file name # or screen if you want the results printed to stdout # # - protein_name is required. # # - if protein code type used for protein-name is different from # input-proteins-type, then set protein-type with the new type # as shown. # if protein-name used is of input-proteins-type, then set # protein-type to blank # -> Valid protein types are blank (ie use input-proteins-type) # and those defined in PianaGlobals.valid_protein_types # # - distance is required: set the distance between your query # protein and the group of proteins you are searching # - when distance is "all", groups 1, 2 and 3 are printed # out) # # - info is used to choose the information that will be printed # next to the proteins at distance N # - values admitted are: # - blank: no info printed # - all: all info known about protein # - scop: scop codes # - cath: cath codes # # - format-mode is required: set which format will be used for # printing the output # -> txt: prints flat text # -> html: prints in html format # find-distance-group;protein-name=protein_name;protein-type=blank;distance=distance;output-target=blank;info=blank;format-mode=format_mode # *********************** # match-proteins-to-spots: this command identifies spots in a 2D gel # by matching MW and/or IP to proteins in # the network # *********************** # # we have spot ids from a 2D electrophoresis gel, with their # molecular weights (MW) and isoelectric points (IP). Some of those # spots were identified by mass spectrometry but other spots were # unnassigned. We can use PIANA to identify some of those # unnassigned spots, by comparing the MW and IP of the spots with # the MW and IP of the proteins in the network. # # - if you do not want the information to be printed to default # name then you should set output-target to your own file name # -> output-target can be blank (ie. use default name), a file # name or screen if you want the results printed to stdout # # - format-mode is required: set which format will be used for # printing the output # -> txt: prints flat text # -> html: prints in html format # # - if you want the type of protein code used for printing the # information different from output-proteins-type then set # protein-type. # -> valid protein-type values are blank (ie use # ouput-proteins-type) or those defined in # PianaGlobals.valid_protein_types.keys(). # --> You can alternatively do # $> python piana.py --help # which will display valid input values for # proteins types # # - spots-file-name is a file name following the structure # (one spot per line): # spot_idmolecular_weightisoeletric_point # -> where decimals are expressed with "." # -> complete_path_to_spots_file can be blank if # spots-file-name was set in the command line # # - list-mw-error and list-ip-error set the error bounds # admitted for the matching of molecular weight and # isoelectric point # -> the number of error bounds for mw and ip must # be identical: values can be different, but the # number of values not # -> use "." for decimals # -> set to blank if you prefer to use the default # error bounds (which are hard-coded in piana.py) # -> to set your own error bounds, write colon-separated # values # (e.g. list-mw-error=0.01:0.02:0.05;list-ip-error=0.1:0.2:0.5 # # Attention!: correspondences that appear in a given error level will # not be shown in higher error levels match-proteins-to-spots;output-target=blank;format-mode=format_mode;protein-type=blank;spots-file-name=complete_path_to_spots_file;list-mw-error=blank;list-ip-error=blank # ******************** # cluster-by-go-terms: this command clusters the proteins of the # network using GO terms # ******************** # # - In order to cluster a network using go terms, a # protein-protein interaction network must previously exist # # - if you do not want the clustered network to be printed to # the default file name then you should set output-target to # your own file name # -> output-target can be blank (ie. use default name), a # file name or screen if you want the results printed to # stdout # # - term-type sets the kind of GO terms that will be used for # the clustering (required) # -> term-type can be molecular_function, biological_process # or cellular_component # # - score-threshold is the lowest score obtained by the # similarity function allowed for continuing the clustering # -> can be any real number from 0 to 100 (0 will group # all proteins, 100 will not group any proteins). # To obtain a relevant clustered network use score # thresholds between 0.1 and 1 # # - sim-mode sets how to calculate distances between two # clusters # - random takes a random element from each cluster # and evaluates similarity between them # - min takes the minimal distance between elements # of each cluster # - max takes the maximal distance between elements # of each cluster # - average takes the average distance between all # elements of each cluster # # - level-threshold is the lowest level of the go term in # the cluster allowed for continuing the clustering # -> GO is a hierarchy organized from a initial root # level (ie. 0) that increasingly makes more specific # the terms. # Therefore, the higher the level used the less # clustering will be performed. To obtain a relevant # clustered network use level thresholds between 1 # and 3. It all depends on how general you want to # be in the interpretation of the network. # # - distance-threshold is the maximum distance allowed # between two proteins in order to be clustered # -> can be any integer between 1 and ... # # - rep-term sets which of the GO terms of the cluster # will be used for printing output # -> can be min (term of minimal depth in the hierarchy) # or max (maximal depth) # # - print-id sets which id will be used for identifying # the clusters in the printed output # -> can be "no" (default id: go term name) or "yes" # (a more complex id) # cluster-by-go-terms;output-target=blank;term-type=term_type;score-threshold=score_threshold;sim-mode=sim_mode;level-threshold=level_threshold;distance-threshold=distance_threshold;rep-term=rep_term;print-id=print_id # **** # exit: this command exits # **** # # - required in all piana configuration files, unless interactive mode # used exit; ## ## # THE FOLLOWING OPTIONS ARE CURRENTLY UNAVAILABLE # ## ## # ***************** # train-cirs: outputs results that can be used to train/evaluate CIRs (NOT WORKING!!!) # ********************* # # ATTENTION! DO NOT USE, NOT WORKING # # - similarity-mode is the type of similarity function that will be applied to do the clustering # # -> valid values are: (in all cases, term1 is 1 is number_of_protein_partners_in_common>0, and 0 otherwise) # # - 'num_ints': based on global number of common interaction partners # -> number_of_protein_partners_in_common(cluster1, cluster2) x # number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2) # # - 'combined_per': based on which percentage of the protein partners in both clusters are also common partners between them # -> number_of_protein_partners_in_common(cluster1, cluster2) x # # 1/2( number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)x100/number_of_protein_partners(proteins in cluster1) # + number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)x100/number_of_protein_partners(proteins in cluster2) # ) # # - 'min_per': based on which percentage of the protein partners are shared with the other protein (taking the protein with fewer partners) # # -> number_of_protein_partners_in_common(cluster1, cluster2) x # number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)x100/ # min(number_of_protein_partners(proteins in cluster1), number_of_protein_partners(proteins in cluster2)) # # # - 'max_per': based on which percentage of the protein partners are shared with the other protein (taking the protein with more partners) # # -> number_of_protein_partners_in_common(cluster1, cluster2) x # number_of_protein_partners_in_common(proteins in cluster1, proteins in cluster2)x100/ # max(number_of_protein_partners(proteins in cluster1), number_of_protein_partners(proteins in cluster2)) # # # - min_score is the minimum score allowed in order to fuse two clusters. Any positive integer is a valid value # # - cir_method is the method used to create CIRs # --> valid values are: # - irs: by decomposing the proteins into Interacting Regions and then clustering them # - prots: by adding 'clusters' to the original network of proteins # train-cirs;similarity-mode=similarity_mode;min-score=min_score;cir-method=cir_method # ***************** # modify-parameters: this command modifies parameter values # (NOT WORKING!!!) # ********************* # # ATTENTION! DO NOT USE, NOT WORKING # # - use this command to modify any parameter value at any point of # piana execution # - set to blank those parameters that you do not want to modify modify-parameters;input-proteins-type=input_proteins_type;depth=depth;output-proteins-type=output_proteins_type # ******************** # find-protein-patches: this command divides proteins into patches # (NOT WORKING!!!) # ********************* # # ATTENTION! DO NOT USE, NOT WORKING # # - clustering-steps: set the maximum number of clustering steps you # wish to perform # -> clustering_steps can be any positive integer # # - score-threshold: set the score threshold for considering two # patches identical # -> score_threshold can be any positive integer # # - ranked-parameters-file: set here the file name of the file # that contains the ranked parameters obtained by the training # -> this file is an ordered (from best to worst) list of # which weights and stop conditions performed best in the # training # -> each line of this file looks like this: # w_patches=1 w_prots=2 w_belong=1 score_links=huge # av_spec_shared=0 av_sens_shared=0 av_fvalue_shared=0 # av_spec_int=1 av_sens_int=1 av_fvalue_int=1 .... # # where the w_* are the weights to be used, the score_links # (can be also score_clusters) is the stop condition and the # rest of fields are the results obtained with those # parameters during the training # -> this file can be created by training piana with a # benchmark-configuration-file # # - save-mode: set how patches and their interactions are saved # -> valid values for save-mode are "memory" (everything saved on # memory) and "disc" (everything saved on disc) # -> for retrieving results, see commands print-protein-patches, # print-shared-patches and print-patches-interactions # # - disc-name: if save-mode == disc, set a file name prefix for # patchgroup graphs that will be saved to disc # -> this argument is ignored if save-mode is memory # find-protein-patches;clustering-steps=number_clustering_steps;score-threshold=score_threshold;ranked-parameters-file=file_name;save-mode=save_mode;disc-name=disc_name # ********************* # print-protein-patches: this command prints for each protein, which # patches does it have at a certain level # (NOT WORKING!!!) # ********************* # # ATTENTION! DO NOT USE, NOT WORKING # # - if you do not want the information to be printed to default name # then you should set output-target to your own file name # output-target can be blank (ie. use default name), a file name # or screen if you want the results printed to stdout # # - if you want the type of protein code used for printing the # information different from output-proteins-type then set # protein-type. # -> valid protein-type values are blank (ie use ouput-proteins-type) # or those defined in PianaGlobals.valid_protein_types.keys(). # --> You can alternatively do $> python piana.py --help, which # will display valid input values for proteins types # # - Set the clustering level you want to print out with clustering_level # -> clustering_level can be -1 (last clustering performed) or any # positive integer lower than clustering_levels in # find-protein-patches # # - default name will contain the clustering level that is being printed print-protein-patches;output-target=blank;protein-type=blank;clustering-level=clustering_level_to_print # ******************** # print-shared-patches: this command prints for each patch, which proteins # do have it at a certain level (NOT WORKING!!!) # ********************* # # ATTENTION! DO NOT USE, NOT WORKING # # - if you do not want the information to be printed to default name then # you should set output-target to your own file name output-target can # be blank (ie. use default name), a file name or screen if you want # the results printed to stdout # # - if you want the type of protein code used for printing the information # different from output-proteins-type then set protein-type. # -> valid protein-type values are blank (ie use ouput-proteins-type) or # those defined in PianaGlobals.valid_protein_types.keys(). # --> You can alternatively do $> python piana.py --help, which will # display valid input values for proteins types # # - Set the clustering level you want to print out with clustering_level # -> clustering_level can be -1 (last clustering performed) or any # positive integer lower than clustering_levels in find-protein-patches # # - default name will contain the clustering level that is being printed print-shared-patches;output-target=blank;protein-type=blank;clustering-level=clustering_level_to_print # ************************** # print-patches-interactions: this command prints the patch-patch interactions # network at a certain level (NOT WORKING!!!) # ********************* # # ATTENTION! DO NOT USE, NOT WORKING # # - if you do not want the information to be printed to default name then # you should set output-target to your own file name output-target can # be blank (ie. use default name), a file name or screen if you want the # results printed to stdout # # - Set the clustering level you want to print out with clustering_level # -> clustering_level can be -1 (last clustering performed) or any # positive integer lower than clustering_levels in # find-protein-patches # # - default name will contain the clustering level that is being printed # -> for level number -1 (last clustering performed) the default name # will finish by ".last" # -> for all other level numbers the default name will finish by ".??" # (?? being the level number) print-patches-interactions;output-target=blank;clustering-level=clustering_level_to_print # ************************** # print-patches-network: this command prints the .dot patch-patch interactions # network at a certain level (NOT WORKING!!!) # ********************* # # ATTENTION! DO NOT USE, NOT WORKING # # - if you do not want the information to be printed to default name then you # should set output-target to your own file name output-target can be blank # (ie. use default name), a file name or screen if you want the results # printed to stdout # # - Set the clustering level you want to print out with clustering_level # -> clustering_level can be -1 (last clustering performed) or any positive # integer lower than clustering_levels in find-protein-patches # # - default name will contain the clustering level that is being printed # -> for level number -1 (last clustering performed) the default name # will finish by ".last" # -> for all other level numbers the default name will finish by ".??" # (?? being the level number) print-patches-network;output-target=blank;clustering-level=clustering_level_to_print