# File     : create_piana_tables.sql
# Author   : Ramon Aragues & Joan Planas
# Creation : 12.12.2003
# Contents : script that creates the tables used for PIANA
# Usage    : mysql --host=sefarad --database=pianaDB < create_piana_tables.sql
# Comments :
#
# To create tables on your database, do:
#
# mysql --host=sefarad --database=pianaDB < create_piana_tables.sql
#
# Before running this script you must drop the previous database
# (if there was already one with the name you want to use)
#
# command line options host and database are needed
# otherwise, the script doesn t know where to create the tables
#


####################################################################
#                                                                  #
#                        PROTEIN DESCRIPTORS                       #
#                                                                  #
####################################################################



# ------------------------------------------------------------------
# create table protein
# ------------------------------------------------------------------
# This table will hold all sequences entered into this DB as well as 
# the information intrinsically related to the sequence.
# Internal identifier, the sequence itself, Md5 code of that 
# sequence (obtained from an encriptation digest), the source of 
# the entry and its taxonomy id are stored in this table. 
#
#
# proteinPiana is the unique identifier
#   --> in fact, proteinPiana is just an integer that is unique
#       for each pair (sequence, tax_id)
#       Therefore, a protein entity is defined by its 
#       sequence and species.
#
# when speciesNCBI is not known, use as 'dummy' tax id 0
#   (tax_id and speciesNCBI are two terms for the same concept)
#
# Theoretical mollecular weight (MW) and Isoellectric Point (IP) are
# also stored in this squences table for each sequence
# ------------------------------------------------------------------

create table protein(

	proteinPiana integer unsigned not null, # up to 2K millions of entries
	proteinSequence mediumtext not null,
        speciesNCBI mediumint unsigned not null,
	proteinMD5 char(40) not null, 
	proteinSequenceLength mediumint unsigned not null,
	proteinMW float not null default 0,
	proteinIP float not null default 0,

	index(proteinMD5),
	index(speciesNCBI),

	primary key (proteinPiana)


     	);

	
# ------------------------------------------------------------------
# create table proteinConflicts
# ------------------------------------------------------------------
# This table will hold all possible conflict descriptions. 
# conflictID field in proteins table refers to this table
# ------------------------------------------------------------------

create table proteinConflicts(

	conflictID tinyint unsigned not null default 0,
     	proteinPiana integer unsigned not null,
	description longtext,

	index (proteinPiana),

	primary key (conflictID)
	);
	

# ------------------------------------------------------------------
# create table proteinCorrespondence
# ------------------------------------------------------------------
# This table holds the correspondence between proteinPiana values and
# (proteinMD5, tax_id) values. It is used to guarantee that proteinPiana
#  identifiers remain the same for any piana database 
#
# Its values are loaded (when starting a new database) from a data
# file in piana/data/proteinCorrespondences/proteinPiana_md5_correspondence.txt
# ------------------------------------------------------------------

create table proteinCorrespondence(

	proteinPiana integer unsigned not null,
	proteinMD5 char(40) not null, 
        speciesNCBI mediumint unsigned not null,

	index(proteinMD5), 
	index(speciesNCBI),

	primary key (proteinPiana)
	
	) ;

# Not using it: (this is a mechanism for keeping coherence between different 
#                versions of PIANA databases  but I am currently not using it)
# Filling proteinCorrespondence with data from file holding 
# proteinPiana -- MD5 correspondences

# LOAD DATA LOCAL INFILE '../../data/proteinCorrespondences/proteinPiana_md5_correspondence.txt'  REPLACE INTO TABLE proteinCorrespondence;

# ------------------------------------------------------------------
# create table proteinSimilarity
# ------------------------------------------------------------------
# This table holds pairs of proteins (ie. sequences) that appear to  
# be the same protein (very similar sequences, same codes, ...)
#
# Not very complete at this point: in the future the idea is to
# do a blast all for proteinPianas and then fill this table
# Right now, only completion (complete_piana.py) algorithms
#  insert information into this table.
# 
# proteinPianaA is always lower than proteinPianaB
#
# ------------------------------------------------------------------

create table proteinSimilarity(

	proteinPianaA integer unsigned not null,
	proteinPianaB integer unsigned not null,

	primary key (proteinPianaA, proteinPianaB)
	
	) ;



# ------------------------------------------------------------------
# create table proteinPianaCounter
# ------------------------------------------------------------------
# This table holds the value that a new proteinPiana will take
# It is controlled from PianaDBaccess to make sure that it is 
# increased by 1 when a new proteinPiana enters the database
#
# Initially, it will contain the number of entries in 
# proteinCorrespondence + 1 
# ------------------------------------------------------------------

create table proteinPianaCounter(

	proteinPianaCounter integer not null
	
	) ;


# Filling the counter table with the maximum proteinPiana found in the proteinCorrespondence table
insert into proteinPianaCounter select max(proteinPiana)+1 from proteinCorrespondence;


####################################################################
#                                                                  #
#              EXTERNAL PROTEIN CODES  TABLES                      #
#                                                                  #
# All protein identifiers obtained from external databases.        # 
# The following section will relate these codes with proteins of   #
# the  first  documented section in this script                    #
#                                                                  #
#    The ID suffix is reserved for theese codes that are unique    #
#                   identifiers of their own DB                    #
#                                                                  #
#  If you change the structure of these tables, you ll have to     #
#  change accordingly classes that manage their access:            #
#   - PianaInsertSQL - InsertProteinExternalCode                   #
#   - PianaInsertSQL - InsertProteinSwissAccessionCode             #
#  and modify PianaGlobals with new names                          #
#                                                                  #
#   so ... DO NOT CHANGE THE ORDER OF THE COLUMNS!!!               #
#     --> methods in PianaInsertSQL  won't work if you do it       #
#                                                                  #
#   Column order is db_id, proteinPiana and sourceDBID             #
#                                                                  #
####################################################################


# ------------------------------------------------------------------
# create table swissProt 
# ------------------------------------------------------------------
# This table will hold SwissProt DB names used in pianaDB 
# Note: this table holds UNIPROT entry names
#       The 'old' name is explained by the fact that PIANA existed
#        before UNIPROT was called UNIPROT
# ------------------------------------------------------------------
# EXAMPLE OF CODE: AOFA_HUMAN
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! 
#  (see column order to follow above)

create table swissProt(

	swissProtID varchar(15) not null, 
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null,

	index(swissProtID),
	index(proteinPiana),

	primary key (swissProtID, proteinPiana, sourceDBID)

	) ;


# ------------------------------------------------------------------
# create table swissAccesion
# ------------------------------------------------------------------
# This table will hold SwissProt accession codes used in 
# pianaDB
# Note: this table holds UNIPROT accession numbers
#       The name is explained by the fact that PIANA existed before
#       UNIPROT was called UNIPROT
#
# if isPrimary == 1: primary accession number used by swissProt
# if isPrimary == 0: secondary accesion numbers
# ------------------------------------------------------------------
# EXAMPLE OF CODE: P21397
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! (see column order to follow above)
	
create table swissAccession(

	swissAccessionID char(6) not null, 
	proteinPiana integer unsigned not null,  
	isPrimary integer not null default 0,
	sourceDBID varchar(20) not null,

	index(swissAccessionID),
	index(proteinPiana),

	primary key (swissAccessionID, proteinPiana, sourceDBID)

	) ;


# ------------------------------------------------------------------
# create table geneName
# ------------------------------------------------------------------
# This table will hold all gen names used in external DBs having a 
# protein of those stored in piana
# ------------------------------------------------------------------
# EXAMPLE OF CODE: MAO-A
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! (see column order to follow above)
	
create table geneName(

	geneName varchar(100) not null,
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null,

	index(geneName),
	index(proteinPiana),

	primary key (geneName, proteinPiana, sourceDBID)

	) ;



# ------------------------------------------------------------------
# create table emblAccession 
# ------------------------------------------------------------------
# This table will hold embl accession numbers used in external DBs 
# and referring any protein of those stored in piana.
# IMPORTANT: "embl accession numbers" stands for the accession nums
# of any of those DBs in the "nucleotide sequence DB consortium",
# that is EMBL itself, GenBank and DDBJ
# ------------------------------------------------------------------
# EXAMPLE OF CODE: Y00312 
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! (see column order to follow above)

create table emblAccession(

	emblAccessionID varchar(12) not null, 
	proteinPiana integer unsigned not null, 
	emblAccessionVersion mediumint unsigned not null default 0,
	sourceDBID varchar(20) not null,

	index(emblAccessionID),
	index(proteinPiana),

	primary key (emblAccessionID, proteinPiana, sourceDBID)

	) ;


# ------------------------------------------------------------------
# create table emblPID
# ------------------------------------------------------------------
# This table will hold embl proteinID numbers used in external DBs 
# and referring any protein of those stored in piana.
# IMPORTANT: "embl proteinID numbers" stands for the proteinID nums
# of any of those DBs in the "nucleotide sequence DB consortium",
# that is EMBL itself, GenBank and DDBJ
# ------------------------------------------------------------------
# EXAMPLE OF CODE: CAA68412.1 
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! (see column order to follow above)

create table emblPID(
	emblPID varchar(12) not null, 
	proteinPiana integer unsigned not null,  
	emblPIDVersion mediumint unsigned not null default 0,
	sourceDBID varchar(20) not null,

	index(emblPID),
	index(proteinPiana),

	primary key (emblPID, proteinPiana, sourceDBID)

	) ;	

# ------------------------------------------------------------------
# create table pdb
# ------------------------------------------------------------------
# This table will hold interPro information
# referring to proteins in table protein
# ------------------------------------------------------------------
# EXAMPLE OF CODE: 101m (pdb code) T(chain)
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! (see column order to follow above)

create table pdb(
	pdbID varchar(12) not null, 
	proteinPiana integer unsigned not null,  
	chain varchar(4), 
	pdb_chain varchar(10) not null,
	sourceDBID varchar(20) not null,

	index(pdb_chain),
	index(proteinPiana),

	primary key (pdb_chain, proteinPiana, sourceDBID)

	) ;	



# ------------------------------------------------------------------
# create table gi 
# ------------------------------------------------------------------
# This table will hold gi codes used in external DBs and referring 
# any protein of those stored in piana
# ------------------------------------------------------------------
# EXAMPLE OF CODE: 63879, 63880
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! (see column order to follow above)


create table gi(

	giID integer not null, 
	proteinPiana integer unsigned not null,  
	sourceDBID varchar(20) not null,

	index(giID),
	index(proteinPiana),

	primary key (giID, proteinPiana, sourceDBID)

	) ;	



# ------------------------------------------------------------------
# create table pirEntry
# ------------------------------------------------------------------
# This table will hold PIR entry codes used in external DBs and 
# referring any protein of those stored in piana
# ------------------------------------------------------------------
# EXAMPLE OF CODE: A36175; FESC
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! (see column order to follow above)

create table pirEntry(

	pirEntryID varchar(6) not null, 
	proteinPiana integer unsigned not null,
	isComplete tinyint,
	sourceDBID varchar(20) not null,

	index(pirEntryID),
	index(proteinPiana),

	primary key (pirEntryID, proteinPiana, sourceDBID)

	) ;



# ------------------------------------------------------------------
# create table pirAccession  
# ------------------------------------------------------------------
# This table will hold PIR accession codes used in external DBs and 
# referring any protein of those stored in piana	
#
# Although this code is sequence based, it is assigned when PIR imports 
# sequences by direct submission or from external sources
# Sequences can be merged posteriorly, so a single entry in PIR could 
# have more than one accession number (as a result of 
# merging sequences)

# ------------------------------------------------------------------
# EXAMPLE OF CODE: A00242 (pirEntryID = FESC)
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! (see column order to follow above)

create table pirAccession(

	pirAccessionID varchar(10) not null, 
	proteinPiana integer unsigned not null, 
	isComplete tinyint,
	sourceDBID varchar(20) not null,

	index(pirAccessionID),
	index(proteinPiana),

	primary key (pirAccessionID, proteinPiana, sourceDBID)

	) ;


# ------------------------------------------------------------------
# create table interactionDB_id  
# ------------------------------------------------------------------
# This table will hold ids used in interaction databases (eg. DIP, HPRD)
# for referring to proteins (ie. internal ids for interaction DBS)
# The id must be preceded by a string identying the type of code 
#  (eg. dipuid:234N)
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! (see column order to follow above)

create table protein_id_intDB(

	intdbID varchar(40) not null, 
	proteinPiana integer unsigned not null, 
	sourceDBID varchar(20) not null,

	index(intdbID),
	index(proteinPiana),

	primary key (intdbID, proteinPiana, sourceDBID)

	) ;


####################################################################
#                                                                  #
#                 PROTEIN ATRIBUTES TABLES                         #
#                                                                  #
# These tables contain more in detail descriptions of protein      #
# attributes. For example, according to proteinSpecies one finds   #
# that a given protein belongs to one Species. If more information #
# is needed about this species, the user will look into the table  #
# species created below                                            #
#                                                                  #
####################################################################


# ------------------------------------------------------------------
# create table species
# ------------------------------------------------------------------
# This table will hold all scientific names and their related NCBI
# taxon code for further identification
#
# up to 1.67 milion of possible species 
# THIS FIELD REPRESENTS NCBI TAXON CODE FOR EACH SPECIES
# ------------------------------------------------------------------

create table species(

	speciesNCBI mediumint unsigned not null, 
	speciesName varchar(100) not null,
	speciesDescription varchar(255),
	speciesKingdom varchar(40) not null, 
	sourceDBID varchar(20) not null, 

	index(speciesNCBI),
	index(speciesName),

	primary key (speciesNCBI, speciesName, sourceDBID) 

	) ;

# ------------------------------------------------------------------
# create table ec
# ------------------------------------------------------------------
# This table will hold all ec codes and their description
#
# ------------------------------------------------------------------

create table ec(

	ecID varchar(10) not null, 
	ecDescription varchar(255),
	sourceDBID varchar(20) not null, 

	primary key (ecID) 

	) ;

# ------------------------------------------------------------------
# create table go
# ------------------------------------------------------------------
# This table will hold all go terms and their description
#
#
# ------------------------------------------------------------------

create table go(

	goID integer unsigned not null,
	
	name varchar(255),
	acc  varchar(12),
	term_type varchar(50),
	distance2root integer unsigned not null,

	sourceDBID varchar(20) not null, 

	primary key (goID) 

	) ;

# ------------------------------------------------------------------
# create table go_term2term_distance
# ------------------------------------------------------------------
# This table will contains the distance between two go terms
# distance is the number of steps that we have to follow going from
# go_term1 to go_term2
# ------------------------------------------------------------------

create table go_term2term_distance(
	go_term1 integer unsigned not null,
	go_term2 integer unsigned not null,
	distance integer unsigned not null,
	index(go_term1),
	index(go_term2),
	primary key (go_term1, go_term2)
	);

# ------------------------------------------------------------------
# create table cog
# ------------------------------------------------------------------
# This table will hold all cog codes and their description
#
# cogFunction is a letter: if you want to know to which function it
# corresponds to, you have to check it by hand in file fun.txt
#
# ------------------------------------------------------------------

create table cog(

	cogID varchar(10) not null, 
	cogDescription varchar(255),
	cogFunction varchar(10),
	sourceDBID varchar(20) not null, 

	primary key (cogID) 

	) ;


####################################################################
#                                                                  #
#              PROTEIN-ATTRIBUTES RELATIONSHIP TABLES              #
#                                                                  #
#   The following tables are devoted to stablishing relationships  #
# between different protein attributes from external databases     #
# and a given unitary entity in this database, a protein identified# 
# by the internal piana identifier                                 #
#    The ID suffix is reserved for these codes that are unique     #
#                   identifiers of their own DB                    #
#                                                                  #
#   DO NOT CHANGE THE ORDER OF THE COLUMNS!!!                      #
#     --> method PianaInsertSQL.InsertProteinAttributeRelationship #
#                                                                  #
#         does not work if these tables do not follow the order:   #
#                                                                  #
#                 - attribute code                                 #
#                 - proteinPiana                                   #
#                 - sourceDBID                                     #
#                                                                  #
####################################################################


# ------------------------------------------------------------------
# create table proteinGo
# ------------------------------------------------------------------
# This table will hold GO terms used in pianaDB and its relationship
# whith pianaDB proteins
# ------------------------------------------------------------------
# EXAMPLE OF CODE: 
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! 
#    (see column order to follow above)

create table proteinGo(

	goID integer unsigned not null, 
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null, 

	index(goID),
	index(proteinPiana),
	
	primary key(goID, proteinPiana, sourceDBID)
	
	) ;

# ------------------------------------------------------------------
# create table proteinCog
# ------------------------------------------------------------------
# This table will hold COG codes used in pianaDB and its 
# relationship whith pianaDB proteins
# ------------------------------------------------------------------
# EXAMPLE OF CODE: COG0001
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! 
#     (see column order to follow above)

create table proteinCog(

	cogID varchar(10) not null, 
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null, 

	index(cogID),
	index(proteinPiana),
	
	primary key(cogID, proteinPiana, sourceDBID)
	
	) ;

# ------------------------------------------------------------------
# create table proteinDescription
# ------------------------------------------------------------------
# This table will hold all protein descriptions used in 
# external DBs having  a protein of those stored in piana
# ------------------------------------------------------------------
# EXAMPLE: Monoamine oxidase
# ------------------------------------------------------------------
#   DO NOT CHANGE THE ORDER OF THE COLUMNS!!!   
#     (see column order to follow above)
	
create table proteinDescription(

	proteinDescription varchar(255) not null,
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null,

	index(proteinPiana),
        index(proteinDescription)

	) ;

# ------------------------------------------------------------------
# create table proteinKeyword
# ------------------------------------------------------------------
# This table will hold all protein keywords used in 
# external DBs having  a protein of those stored in piana
# ------------------------------------------------------------------
# EXAMPLE: 
# ------------------------------------------------------------------
#   DO NOT CHANGE THE ORDER OF THE COLUMNS!!!   
#      (see column order to follow above)
	
create table proteinKeyword(

	proteinKeyword varchar(50) not null,
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null,

	index(proteinPiana)

	) ;

# ------------------------------------------------------------------
# create table proteinFunction
# ------------------------------------------------------------------
# This table will hold all protein functions described in 
# external DBs having a protein of those stored in piana
#
# proteinFunction removed from primary key and index because it 
# has to be of type text, and text types cannot go into primary key
# ------------------------------------------------------------------
# EXAMPLE: 
# ------------------------------------------------------------------
# removed the primary key because of type... 
# ...text cannot be primary key
#
#   DO NOT CHANGE THE ORDER OF THE COLUMNS!!!   
#     (see column order to follow above)                    

create table proteinFunction(

	proteinFunction text not null,
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null,

	index(proteinPiana)

	) ;

# ------------------------------------------------------------------
# create table proteinSubcellularLocation
# ------------------------------------------------------------------
# This table will hold all protein functions described in 
# external DBs having a protein of those stored in piana
# ------------------------------------------------------------------
# EXAMPLE: 
# ------------------------------------------------------------------
#   DO NOT CHANGE THE ORDER OF THE COLUMNS!!!     
#    (see column order to follow above) 
	
create table proteinSubcellularLocation(

	proteinSubcellularLocation varchar(255) not null,
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null,

	index(proteinPiana)

	) ;

# ------------------------------------------------------------------
# create table proteinEC
# ------------------------------------------------------------------
# This table will hold known EC codes for proteins proteinPiana
#
# One protein can have multiple ecID
# One EC code can have multiple proteinPiana
# 
# 
# ------------------------------------------------------------------
# EXAMPLE OF CODE: EC 1.4.3.4
# ------------------------------------------------------------------
#   DO NOT CHANGE THE ORDER OF THE COLUMNS!!!    
#     (see column order to follow above)  
	
create table proteinEC(

	ecID varchar(15) not null,
	proteinPiana integer unsigned not null, 
	sourceDBID varchar(20) not null,
	

	index(ecID),
	index(proteinPiana),

	primary key(ecID, proteinPiana, sourceDBID)

	) ;


# ------------------------------------------------------------------
# create table proteinSpecies
# ------------------------------------------------------------------
# This table is a bidirectional link table between protein and species 
#
# This table will hold the primary keys of both tables protein and 
# species 
#
# speciesNCBI is the NCBI taxon code
# ------------------------------------------------------------------
#   DO NOT CHANGE THE ORDER OF THE COLUMNS!!!    
#     (see column order to follow above)  

create table proteinSpecies(

	speciesNCBI mediumint unsigned not null, 
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null, 

	index(proteinPiana),
	index(speciesNCBI),

	primary key (proteinPiana, speciesNCBI, sourceDBID)

	) ;

# ------------------------------------------------------------------
# create table proteinProsite
# ------------------------------------------------------------------
# This table will hold Prosite ids for proteins
# ------------------------------------------------------------------
# This table is the correspondence between proteinPiana and prosite ids
#
# ------------------------------------------------------------------
#   DO NOT CHANGE THE ORDER OF THE COLUMNS!!!    
#     (see column order to follow above)  

create table proteinProsite(

	prositeID  varchar(20) not null, 
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null, 

	index(prositeID),
	index(proteinPiana),

	primary key (proteinPiana, prositeID, sourceDBID)

	) ;

# ------------------------------------------------------------------
# create table interPro
# ------------------------------------------------------------------
# This table will hold interPro information
# referring to proteins in table protein
# ------------------------------------------------------------------
# EXAMPLE OF CODE: IPR001254
# ------------------------------------------------------------------
#  DO NOT CHANGE THE ORDER OF THE COLUMNS!!! 
#     (see column order to follow above)

create table interPro(
	interProID varchar(12) not null, 
	proteinPiana integer unsigned not null,  
	interProDescription varchar(24) not null, 
	sourceDBID varchar(20) not null,

	index(interProID),
	index(proteinPiana),

	primary key (interProID, proteinPiana, sourceDBID)

	) ;	


# ------------------------------------------------------------------
# create table proteinDBAliCluster
# ------------------------------------------------------------------
# This table will hold DBAli clusters for binding sites 
# of pibase
# ------------------------------------------------------------------
# This table is a bidirectional link table between protein 
# and dbali cluster 
#
# Attention! cluster ids are not relevant by themselves... 
# the only important thing is to know if two proteins share 
# the cluster or not 
#
# clusteringMethod can be used for determining the method used,
# but clusterIDs from different clusteringMethod values cannot 
# be compared
# 
#
# ------------------------------------------------------------------ 

create table proteinDBAliCluster(

	DBAliclusterID  varchar(20) not null, 
	proteinPiana integer unsigned not null,
	patchResidues text,
	DBAliclusteringMethod varchar(20) not null,
	sourceDBID varchar(20) not null, 

	index(proteinPiana),
	index(DBAliclusterID),
	index(DBAliclusteringMethod),

	primary key (proteinPiana, DBAliclusterID, DBAliclusteringMethod)

	) ;

# ------------------------------------------------------------------
# create table proteinScop
# ------------------------------------------------------------------
# This table will hold SCOP codes used in pianaDB and its 
# relationship whith pianaDB proteins
# ------------------------------------------------------------------
# EXAMPLE OF CODE:
# ------------------------------------------------------------------
# This table is different from other protein attributes, 
# since there are three relevant codes: cf, sf, fa
# Instead of creating three tables, we will create methods 
# that handle this particularity

create table proteinScop(

	cf integer not null,
	sf integer not null, 
	fa integer not null,  
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null, 

	index(cf),
	index(sf),
	index(fa),
	index(proteinPiana),
	
	primary key(cf, sf, fa, proteinPiana, sourceDBID)

	
	) ;

# ------------------------------------------------------------------
# create table proteinCath
# ------------------------------------------------------------------
# This table will hold CATH codes used in pianaDB and its 
# relationship whith pianaDB proteins
#
# res_start and res_end define the residues that delimit this 
# segment a cath domain can have several segments
#
# Attention! Even if the source database does not give information 
# about segmentID, insert something (eg. 1) since the primary key 
#  needs it
# ------------------------------------------------------------------
# EXAMPLE OF CODE:
# ------------------------------------------------------------------
# This table is different from other protein attributes, since  
# there are four relevant codes:  C, A, T and H
# Instead of creating three tables, we will create methods 
# that handle this particularity

create table proteinCath(

	c integer ,
	a integer , 
	t integer , 
	h integer , 
	cathID varchar(20) not null,
	segmentID integer unsigned not null,
	res_start integer unsigned not null,
        res_end   integer unsigned not null,   
	proteinPiana integer unsigned not null,
	sourceDBID varchar(20) not null, 

	index(c),
	index(a),
	index(t),
	index(h),
	index(cathID),
	index(proteinPiana),
	
	primary key(cathID, segmentID, proteinPiana, sourceDBID, res_start, res_end)

	
	) ;


####################################################################
#                                                                  #
#              EXTERNAL PROTEIN DATABASES TABLES                   #
#                                                                  #
#   The following tables are devoted to storing information found  #
#   on external protein databases.                                 #
#                                                                  #
#   Iḿ not sure why am I creating a new table for it instead of    #
#   inserting information in  external codes tables... maybe a     #
#   matter of separating information... maybe something else...    #
#                                                                  #
####################################################################

# ------------------------------------------------------------------
# create table uniprotInfo
# ------------------------------------------------------------------
# This table will hold information found in uniProt (Swissprot +
# TrEMBL) for a given protein. It doesn contain all information
# only, those fields that are not found somewhere else in piana
# 
#
# For a description of all uniprot fields read 
#                       http://us.expasy.org/sprot/userman.html
# 
#
# IMPORTANT:
# Molecular weight and sequence lenght are those given by Uniprot.
# In table protein, the values are calculated with internal 
# algorithms and therefore these values might differ between them
# ------------------------------------------------------------------
	
create table uniprotInfo(

	proteinPiana integer unsigned not null,
	swissProtID varchar(15) not null, 
	swissAccessionID varchar(15) not null, 
	data_class varchar(13), 
	description varchar(255),
	geneName varchar(255),  
	organism varchar(50), 
	organelle varchar(50), 
	proteinSequenceLength mediumint unsigned,
	proteinMW float,

	index(proteinPiana),
	
	primary key (swissprotID)

	) ;

# ------------------------------------------------------------------
# create table cellFitness
# ------------------------------------------------------------------
# This table will hold information about the fitness of cell mutants
# under different conditions using sorbitol
# ------------------------------------------------------------------

create table cellFitness(

	fitnessScore float not null default 0,
	reaction enum("sen", "res") not null,
	conditions varchar(20) not null,
	sourceDBID varchar(20) not null,
	proteinPiana integer unsigned not null,

	index(proteinPiana),
	index(conditions),
	index(reaction),

	primary key (proteinPiana, conditions)


     	);


####################################################################
#                                                                  #
#                        INTERACTION TABLES                        #
#                                                                  #
#  Tables containing information about the interactions            #
####################################################################




# ------------------------------------------------------------------
# create table interaction
# ------------------------------------------------------------------
# This table will hold all the interactions for which we have 
# information
#
# Contains the two proteins involved in the 
# interaction, as well as interactionConfidence.
#
# The order proteinPianaA < proteinPianaB must be respected
#
# interactionConfidence is a value that gives a measure of the 
# trustworthiness of this interaction. It is dynamically calculated 
# from field interactionSourceDB.confidenceAssigned
# and parameters of PianaGlobals: confidenceDB and confidenceMethod
#  (currently, interactionConfidence is always 1: it will be 
#   calculated as decribed above in the future)
#
# isSourceA and isSourceB are used to indicate the direction of the 
# interaction
# if isSourceA ==1 and isSourceB ==1 then it is a bidirectional 
# interaction
# if isSourceA ==0 or isSourceB ==0 then it is just monodirectional
#  (currently, both proteins are always considered as being Source)
# ------------------------------------------------------------------

create table interaction (

     interactionPiana integer not null auto_increment,
     proteinPianaA integer unsigned not null,
     isSourceA integer not null default 1,
     proteinPianaB integer unsigned not null,
     isSourceB integer not null default 1,
     interactionConfidence float not null,

     index(proteinPianaA),
     index(proteinPianaB),
     index(proteinPianaA, proteinPianaB ),
     index(proteinPianaB, proteinPianaA ),

     primary key (interactionPiana)

     ) ;



# ------------------------------------------------------------------
# create table interactionSourceDB
# ------------------------------------------------------------------
# This table will hold (interactionPiana, sourceDBID)
# with an extra value confidenceAssigned that will be used 
# to store confidences given by dbs to interactions 
#  (e.g. combined score of String)
# When the db doesn t give a confidence value, default is 1
#
# confidenceAssigned is the score given by the sourceDBID to this 
# interaction
# 
# ------------------------------------------------------------------

create table interactionSourceDB(

     sourceDBID varchar(20) not null,
     interactionPiana integer not null,
     confidenceAssigned float not null default 1,

     index(sourceDBID),
     index(interactionPiana),

     primary key (interactionPiana, sourceDBID)

     ) ;

# ------------------------------------------------------------------
# create table interactionMethod
# ------------------------------------------------------------------
# This table will hold (interactionPiana, methodName)
# For each interactionPiana, it will state the methodName that 
# was used to find it. It can have several methods for one 
# interactionPiana (i.e. an interaction that was found through 
# several different methods)
# 
# methodID values are controlled by method PianaDBaccess.get_methodID()
# and variable PianaGlobals.method_names 
# methodID should never be inserted without a previous call to 
# get_methodID() Otherwise you risk of introducing a methodID that 
# will not be recognized by the system
#
# The confidence given to methods is a parameter of PianaGlobals
# ------------------------------------------------------------------

create table interactionMethod(

     methodID varchar(20) not null,
     interactionPiana integer not null,
     sourceDBID  varchar(20) not null,

     index(methodID),
     index(sourceDBID),
     index(interactionPiana),
     index(methodID, interactionPiana),


     primary key (interactionPiana, methodID, sourceDBID)

     ) ;

# ------------------------------------------------------------------
# create table interactionFeatures
# ------------------------------------------------------------------
# This table will hold (interactionPiana, feature1, feature2, ..)
# For each interactionPiana, it will have an entry with different 
# features we might be interested in
# 
# ------------------------------------------------------------------
# Note: in case you add more features to this table, you ll have
#       to pay attention to the primary key, since right now
#       PianaDBaccess.insert_interaction relies on -insert ignore-
#       in order not to insert duplicated rows... if there are
#       more than one feature, this cannot be handled this way

create table interactionFeatures(

     pubmedID varchar(20) not null,
     interactionPiana integer not null,
     sourceDBID varchar(20) not null,

     index(interactionPiana),
     index(pubmedID, interactionPiana),


     primary key (interactionPiana, pubmedID, sourceDBID)

     ) ;


# ------------------------------------------------------------------
# create table interactionScores
# ------------------------------------------------------------------
# This table will hold confidence scores given by source databases
# (e.g. STRING gives scores for several potential interaction predictors)
# ------------------------------------------------------------------

create table interactionScores(

     sourceDBID varchar(20) not null,
     interactionPiana integer not null,
     equiv_nscore  integer,
     equiv_nscore_transferred  integer,
     equiv_fscore integer,
     equiv_pscore   integer,
     equiv_hscore  integer,
     array_score  integer,
     array_score_transferred  integer,
     experimental_score  integer,
     experimental_score_transferred  integer,
     database_score  integer,
     database_score_transferred  integer,
     textmining_score  integer,
     textmining_score_transferred  integer,
     combined_score integer,

     index(sourceDBID),
     index(interactionPiana),

     primary key (interactionPiana, sourceDBID)

     ) ;

# ------------------------------------------------------------------
# create table interactionProteinSource
# ------------------------------------------------------------------
# This table will hold the protein source of expansion 
# interaction predictions
# 
# ------------------------------------------------------------------

create table interactionProteinSource(


     interactionPiana integer not null,
     proteinPiana integer unsigned not null,
     sourceDBID varchar(20) not null,

     index(interactionPiana),
     index(proteinPiana),
     index(sourceDBID),

     primary key (interactionPiana, proteinPiana, sourceDBID)

     ) ;
	
#
# END
#
