Package biana :: Package utilities :: Module TsvReader
[hide private]
[frames] | no frames]

Source Code for Module biana.utilities.TsvReader

 1   
 2  from FormattedFileProcessor import FormattedFileProcessor 
 3  import sets 
 4   
5 -class TsvReader(FormattedFileProcessor):
6 """ 7 Read/process TSV (tab seperated) formatted files 8 """
9 - def __init__(self, input_file_name):
10 FormattedFileProcessor.__init__(self, input_file_name=input_file_name, input_type="tsv") 11 return
12
13 - def process(self, out_method, fields_to_include=None):
14 """ 15 Read and process an input file line by line. If out_method is None a dictionary storing read lines are returned. 16 out_method: method to output columns in current line on the fly in tsv format 17 fields_to_include: columns that would be included in the dictionary or processed with the function 18 """ 19 file = open(self.input_file_name) 20 line = file.readline() 21 cols = [ c.lower() for c in line.strip().split('\t') ] 22 if fields_to_include is None: 23 first_column = cols[0] 24 else: 25 fields_to_include = [ f.lower() for f in fields_to_include ] 26 first_column = fields_to_include[0] 27 columns = dict(zip(cols, range(len(cols)))) 28 id_to_value = {} 29 i=0 30 line = file.readline() 31 while line: 32 vals = line.strip().split('\t') 33 if out_method is None: 34 if fields_to_include is None: 35 id_to_value[vals[columns[first_column]]] = vals 36 else: 37 id_to_value[vals[columns[first_column]]] = [ vals[columns[f]] for f in fields_to_include] 38 else: 39 out_method("%s\n" % "\t".join([ vals[columns[f]] for f in fields_to_include ])) 40 i+=1 41 #if i > 20: 42 # break 43 line = file.readline() 44 file.close() 45 if out_method is None: 46 if fields_to_include is not None: 47 cols2 = [] 48 for c in cols: 49 if c in fields_to_include: 50 cols2.append(c) 51 columns = dict(zip(cols2, range(len(cols2)))) 52 return columns, id_to_value 53 else: 54 return
55