ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/io_module.py
Revision: 56
Committed: Mon Mar 21 23:51:57 2011 UTC (8 years, 8 months ago) by clausted
File size: 16867 byte(s)
Log Message:
Change to field names printed by writebiosensor() function.
Line User Rev File contents
1 clausted 7 """
2 clausted 47 io_module
3     ---------
4 clausted 7
5 clausted 47 Input/Output module for converting files to Biosensor Array class.
6     Supported sensorgram types include Biosensor, CLAMP, SPRit, and Plexera ICM.
7     Supported microarray types include GAL and ISB Map.
8    
9     .. moduleauthor:: Christopher Lausted,
10     Institute for Systems Biology,
11 clausted 54 OSPRAI developers.
12    
13 clausted 47 Examples::
14    
15     >>> import io_module as io
16     >>> ba1 = io.readsprit("spritdata.txt")
17     >>> ba1 = io.readclamp("spritdata.txt")
18     >>> ba1 = io.readicmtxt("icmdata.txt")
19     >>> ba1 = io.readbiosensor("icmdata.txt")
20 clausted 49 >>> ba1 = io.readcsv("rawdata.csv")
21 clausted 47 >>> ba2 = io.applygal(ba1, "galfile.gal")
22     >>> ba2 = io.applykey(ba1, "keyfile.tsv")
23     >>> ba3 = io.applymethod(ba2, "icmmethod.xls")
24     >>> io.writesprit(ba3, "newspritfile.txt")
25     >>> io.writeclamp(ba3, "newclampfile.txt")
26 clausted 7 """
27 clausted 56 __version__ = "110321"
28 clausted 7
29 clausted 47
30 clausted 7 ## Import libraries.
31     from copy import deepcopy
32     from os import getcwd
33     from time import mktime
34 clausted 56 import re
35 clausted 7 import numpy as np ## Numpy array library.
36     import ba_class as ba ## Our Biosensor Array class.
37     reload(ba)
38    
39     def readsprit(fname):
40     """
41     Read a SPRit text file into a ba class.
42     It has two tab-delimited columns and two header lines.
43 clausted 47 Here is a very simple example with 2 rois and 3 datapoints::
44 clausted 7
45 clausted 47 Elapsed Time (Seconds) Average Intensity (Pixel Intensity)
46     BEGIN
47     0.000000e+000 2.863145e+003
48     5.013000e+000 2.863367e+003
49     1.002500e+001 2.862950e+003
50     0.000000e+000 2.862875e+003
51     5.013000e+000 2.862510e+003
52 clausted 7 """
53    
54     ## Try to open file. Return tiny ba object if it fails.
55     ba0 = ba.BiosensorArray(1,1)
56     try:
57     fp = open(fname, "r")
58     except IOError:
59     print 'Error: Cannot open file %s for reading' % fname
60     return ba0
61    
62     ## Check header lines for signature text.
63     txt = fp.readline() # 'Elapsed Time...'
64     txt = fp.readline() # 'BEGIN'
65     if ('BEGIN' not in txt):
66     print "Error: Second line is not BEGIN."
67     fp.close()
68     return ba0
69    
70     ## Put data into one big text string and close.
71     txtfile = fp.readlines()
72     fp.close()
73    
74     ## Change text into array
75     sprtable = np.zeros((len(txtfile),2), dtype=float) ## Redimension Nx2
76     for i in range(len(txtfile)):
77     txt = txtfile[i].split('\t')
78     sprtable[i,0] = float(txt[0]) ## Time column.
79     sprtable[i,1] = float(txt[1]) ## Signal column.
80    
81     ## Find number of ROIs in file.
82     for i in range(1,len(sprtable)):
83     ## See when the time drops back to zero.
84     if (sprtable[i,0] < sprtable[i-1,0]): break
85     rows = i ## Datapoints or new array rows.
86     rois = int(len(sprtable) / rows)
87    
88     ## Move data the old-fashioned iterative way.
89     ba0 = ba.BiosensorArray(rois,rows)
90     k = 0
91     for i in range(rois):
92     for j in range(rows):
93     ba0.roi[i].time[j] = sprtable[k,0]
94     ba0.roi[i].value[j] = sprtable[k,1]
95     k +=1
96    
97     return ba0
98 clausted 47 """End of readsprit() function"""
99 clausted 7
100    
101     def writesprit(ba0, fname):
102 clausted 47 """
103     Write a ba class to a SPRit text file.
104     """
105 clausted 7 fp=file(fname,'w')
106     fp.write("Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\r\n")
107     fp.write("BEGIN\r\n")
108     for roi in range(len(ba0.roi)): ## Could also use ba.rois.
109     for dpoint in range(len(ba0.roi[0].time)): ## Could also use ba.dpoints.
110     txt = "%f\t%f\r\n" % (ba0.roi[roi].time[dpoint], ba0.roi[roi].value[dpoint])
111     fp.write(txt)
112     fp.close
113     print "File %s saved in %s." % (fname, getcwd())
114 clausted 47 return
115     """End of writesprit() function"""
116 clausted 7
117    
118     def readclamp(fname):
119     """
120     Read a Clamp text file into a ba class.
121     It has two tab-delimited columns per SPR flowcell/roi..
122     It has a varying number of header lines with injection information.
123 clausted 47 Here is a very simple example with 2 rois and 3 datapoints::
124 clausted 7
125 clausted 47 Vers 3.41 Data
126     Conc1 0 0 0 0
127     Start1 301.5 301.5 301.5 301.5
128     Stop1 949.8 949.8 949.8 949.8
129     RInd1 0 0 0 0
130     Conc2 0 0 0 0
131     Start2 986.4 0 0 0
132     Stop2 1626 0 0 0
133     RInd2 0 0 0 0
134     Flow 1 1 1 1
135     Time1 Data1 Time2 Data2
136     0.094 0.062 0.094 0.053
137     1.094 0.026 1.094 0.05
138     2.094 0.119 2.094 0.055
139 clausted 7 """
140    
141     print "This feature is under construction."
142 clausted 47 return
143     """"End of readclamp() function"""
144 clausted 7
145    
146     def writeclamp(ba0, fname):
147 clausted 47 """
148     Write a ba class to a Clamp text file.
149     """
150 clausted 7
151     fp=file(fname,'w')
152     ## First header line.
153     fp.write("Vers 3.41 Data\r\n")
154    
155     ## Next write injection information.
156     for inj in range(len(ba0.roi[0].injconc)):
157     fp.write("Conc%i" % (inj+1))
158     for roi in ba0.roi: fp.write("\t%.3f" % roi.injconc[inj])
159     fp.write("\r\nStart%i" % (inj+1))
160     for roi in ba0.roi: fp.write("\t%.3f" % roi.injstart[inj])
161     fp.write("\r\nStop%i" % (inj+1))
162     for roi in ba0.roi: fp.write("\t%.3f" % roi.injstop[inj])
163     fp.write("\r\nRInd%i" % (inj+1))
164     for roi in ba0.roi: fp.write("\t%.3f" % roi.injrind[inj])
165     ## Next write flowrate line.
166     fp.write("\r\nFlow")
167     for roi in ba0.roi: fp.write("\t%.3f" % roi.flow)
168     fp.write("\r\n")
169    
170     ## Write sensorgram data header line.
171     for i,roi in enumerate(ba0.roi):
172     if (i>0): fp.write("\t")
173     fp.write("Time%i" % (i+1))
174     fp.write("\t")
175     fp.write(roi.name)
176     fp.write("\r\n")
177     ## Write sensorgram data lines. Three decimal places.
178     for dpoint in range(len(ba0.roi[0].time)):
179     for i,roi in enumerate(ba0.roi):
180     if (i>0): fp.write("\t")
181     fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint]))
182     fp.write("\r\n")
183    
184     ## Close file handle and print message.
185     fp.close
186     print "File %s saved in %s." % (fname, getcwd())
187 clausted 47 return
188     """End of writeclamp() function"""
189 clausted 7
190    
191     def readicmtxt(fname):
192     """
193     Read a ICM text file into a ba class.
194 clausted 47 Here is a very simple example of the tab-delimited format::
195 clausted 7
196 clausted 47 03/05/2010 13:37:21.312 249.408 0.000 0.000
197     03/05/2010 13:37:22.312 249.306 0.000 0.000
198 clausted 7 """
199    
200     ## Try to open file. Return tiny ba object if it fails.
201     ba0 = ba.BiosensorArray(1,1)
202     try:
203     fp = open(fname, "r")
204     except IOError:
205     print 'Error: Cannot open file %s for reading' % fname
206     return ba0
207    
208     ## Put data into one big text string and close.
209     txtfile = fp.readlines()
210     fp.close()
211     dpoints = len(txtfile)
212     rois = txtfile[0].count("\t") ## Usually 25.
213    
214     ## Create and size ba object.
215     print "This ICM file has %i datapoints for %i ROIs." % (dpoints, rois)
216     ba0 = ba.BiosensorArray(rois, dpoints)
217    
218     ## Determine experiment start time so we can make other times relative.
219     x = txtfile[0].split("\t") ## E.g. "03/05/2010 13:37:20.218"
220 clausted 50 t0 = __icmtime2sec(x[0]) ## E.g. 1267825040.22
221 clausted 7
222     ## Move the data from txtfile to ba0.
223     for i, txtline in enumerate(txtfile):
224     x = txtline.split("\t")
225 clausted 50 tx = __icmtime2sec(x[0]) - t0
226 clausted 7 for j in range(1,rois):
227     ba0.roi[j].time[i] = tx
228     ba0.roi[j].value[i] = float(x[j])
229    
230     return ba0
231 clausted 47 """End of readicmtxt"""
232 clausted 7
233    
234 clausted 50 def __icmtime2sec(timetxt):
235 clausted 7 """
236     Take a time of the form "03/05/2010 15:19:27.312" and return seconds.
237     """
238     cal, clock = timetxt.split(" ")
239     mm, dd, yy = cal.split("/")
240     hh, min, ss = clock.split(":")
241     stime = (int(yy), int(mm), int(dd), int(hh), int(min), 0, 0, 0, 0)
242     return mktime(stime) + float(ss)
243 clausted 50 """End of __icmtime2sec"""
244 clausted 7
245    
246 clausted 9 def readbiosensor(fname):
247     """
248     Read a Biacore-style text file into a ba class.
249 clausted 47 Here is a very simple example of the tab-delimited format::
250 clausted 9
251 clausted 47 Ab1 Fc=4- 1_X Ab1 Fc=4 -1_Y Ab2 Fc=4 -1_X Ab2 Fc=4 -1_Y
252     13.1 23.7644 93.1 0.713912
253     13.6 23.4265 93.6 0.0541172
254     14.1 23.1625 94.1 0.332768
255     14.6 23.5752 94.6 0.849459
256 clausted 9 """
257    
258     ## Try to open file. Return tiny ba object if it fails.
259     ba0 = ba.BiosensorArray(1,1)
260     try:
261     fp = open(fname, "r")
262     except IOError:
263     print 'Error: Cannot open file %s for reading' % fname
264     return ba0
265    
266     ## Read header line. Check number of pairs of _X and _Y labels.
267     txthdr = fp.readline()
268     cols = txthdr.count("Fc=")
269     xys = txthdr.count("_X\t") + txthdr.count("_Y\t")+1
270     if ((cols != xys) or ((cols%2) != 0)):
271     print "Error: This is not a valid Biosensor file."
272     return ba0
273    
274     ## Put data into one big text string and close.
275     txtfile = fp.readlines()
276     fp.close()
277     dpoints = len(txtfile)
278     rois = int(cols/2)
279    
280     ## Create and size ba object.
281     print "This Biosensor file has %i datapoints for %i ROIs." % (dpoints, rois)
282     ba0 = ba.BiosensorArray(rois, dpoints)
283    
284     ## Get names of ROIs from header like "Ab1 Fc=4 - 1_Y"
285     txthdr = txthdr.replace(" -","-").replace("- ","-") ## Remove unwanted spaces.
286     names = txthdr.split("\t") ## Use tab delimiter.
287     for j in range(rois):
288     name = names[j*2+1].replace("_Y","").strip(" -")
289     name = name.partition("Fc=") ## Now a 3-tuple.
290     ba0.roi[j].name = name[0] ## Text left of Fc.
291     ba0.roi[j].desc = ("Fc=" + name[2]) ## Text right of Fc.
292    
293     ## Move the data from txtfile to ba0.
294     for i, txtline in enumerate(txtfile):
295     x = txtline.split("\t")
296     for j in range(rois):
297     ba0.roi[j].time[i] = float(x[j*2+0])
298     ba0.roi[j].value[i] = float(x[j*2+1])
299    
300     return ba0
301 clausted 47 """End of readbiosensor()"""
302 clausted 9
303    
304     def writebiosensor(ba0, fname):
305 clausted 47 """
306     Write a ba class to a Biosensor text file.
307     """
308 clausted 9
309     ## This is like a simplified Clamp file.
310     fp=file(fname,'w')
311     ## Write sensorgram data header line.
312     for i,roi in enumerate(ba0.roi):
313     if (i>0): fp.write("\t")
314 clausted 56 name = roi.name.strip()
315     name = re.sub(r'\W+', '', name) ## Remove nonprinting characters.
316     name = re.sub(r'_ ', '', name) ## Remove other unwanted stuff.
317     name = re.sub(r'Fc=[0-9]+', '', name)
318     name = re.sub(r'[0-9]+_[XY]', '', name)
319     txt = name + " Fc=" + str(i+1) + "-1_X\t"
320     txt = txt + name + " Fc=" + str(i+1) + "-1_Y"
321     fp.write(txt)
322 clausted 9 ## Write sensorgram data lines. Three decimal places.
323     for dpoint in range(len(ba0.roi[0].time)):
324     fp.write("\r\n")
325     for i,roi in enumerate(ba0.roi):
326     if (i>0): fp.write("\t")
327     fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint]))
328    
329     ## Close file handle and print message.
330     fp.close
331     print "File %s saved in %s." % (fname, getcwd())
332 clausted 47 return
333     """End of writebiosensor()"""
334 clausted 9
335    
336 clausted 49 def readcsv(fname):
337     """
338     Read a comma-separated value text file into a ba class.
339     The first column contains time data while the others contain response data.
340 clausted 51 Here is a very simple example of the format::
341 clausted 49
342     1.0001, 23.7644, 0.7139
343     2.0001, 23.4265, 0.0541
344     3.0001, 23.1625, 0.3327
345     4.0001, 23.5752, 0.8494
346     """
347    
348     ## Try to open file. Return tiny ba object if it fails.
349     ba0 = ba.BiosensorArray(1,1)
350     try:
351     fp = open(fname, "r")
352     except IOError:
353     print 'Error: Cannot open file %s for reading' % fname
354     return ba0
355    
356     ## Put data into one big text string, determine array size, and close.
357     txtfile = fp.readlines() ## One big text string.
358     fp.close()
359     dpoints = len(txtfile) ## Rows in the csv file.
360     rois = len(txtfile[0].split(',')) - 1 ## Columns are comma-delimited.
361    
362     ## Move the data from txtfile to ba0.
363     ba0 = ba.BiosensorArray(rois, dpoints)
364     for i, txtline in enumerate(txtfile):
365     x = txtline.split(',')
366     for j in range(rois):
367     ba0.roi[j].time[i] = float(x[0])
368     ba0.roi[j].value[i] = float(x[j+1])
369    
370     print "The file %s has %i ROIs and %i datapoints." % (fname, rois, dpoints)
371     return ba0
372     """End of readcsv()"""
373    
374    
375 clausted 7 def applygal(ba0, fname):
376 clausted 47 """
377     Read a Gal file and apply its microarray information.
378    
379     *(This function is under construction)*
380     """
381 clausted 7 print "This feature is under construction."
382 clausted 47 return
383     """End of applygal()"""
384 clausted 7
385    
386     def applykey(ba0, fname):
387     """
388     Read a Key file and apply its microarray information.
389     Multiple background ROIs are not yet supported.
390 clausted 47 Here is a very simple example::
391 clausted 7
392 clausted 47 No. Description1 Description2 Background ROI Col Row
393     1 Rat TNF Antibody01 2 1 1
394     2 ratIgG Antibody02 4 2 1
395     3 Hum TNF Antibody03 4 3 1
396     4 humIgG Antibody04 2 4 1
397 clausted 7 """
398    
399     ## Try to open file. Return unchanged ba object if it fails.
400     try:
401     fp = open(fname, "r")
402     except IOError:
403     print 'Cannot open file %s for reading' % fname
404     return ba0
405     ## Check header lines for signature text.
406     txt = fp.readline()
407     if ("No." not in txt):
408     print "Error: The header line is unfamiliar."
409     fp.close()
410     return ba0
411    
412     ## Read all text, convert to 2d list, then six lists (one for each column).
413     ## There's probably an easier way to do this. Maybe in CSV module.
414     txtfile = fp.readlines()
415     fp.close()
416     ## Change text into array
417     keytable = txtfile[:] ## Dimension rows in new 2d list.
418     for i in range(len(txtfile)):
419     txt = str(txtfile[i].strip())
420     keytable[i] = txt.split('\t')
421     ## Use list comprehensions to get one list for each column.
422 clausted 14 id = [int(x[0]) for x in keytable] ## First column is integer.
423     desc1 = [x[1] for x in keytable] ## Second column is text.
424     desc2 = [x[2] for x in keytable] ## Third column is text.
425     bg = [int(x[3]) for x in keytable] ## Fourth column is integer. Change base1 to base0?
426 clausted 7 col = [int(x[4]) for x in keytable] ## Fifth column is integer.
427     row = [int(x[5]) for x in keytable] ## Sixth column is integer.
428    
429     ## Check if key file looks valid. These tests are not very thorough!
430     keys = 1 + max(id) - min(id)
431     if (ba0.rois != keys):
432     print "Error: We have %i ROIs but %i keyfile entries." % (ba0.rois, keys)
433     return ba0
434     if ((min(bg) < min(id)) or (max(bg) > max (id))):
435 clausted 14 print "Error: at least one specified background ROI does not exist"
436     print "Identifiers (id) range from %i to %i." % (min(id), max(id))
437     print "Background references range from %i to %i." % (min(bg), max(bg))
438 clausted 7 return ba0
439    
440     ## Create new object and put data in it.
441     ba1 = deepcopy(ba0)
442     for i in range(len(id)):
443     ba1.roi[id[i]-1].name = desc1[i]
444     ba1.roi[id[i]-1].desc = desc2[i]
445 clausted 14 ba1.roi[id[i]-1].bgroi = [bg[i]] ## ToDo: Decide if bg will be id or index. Base1 now.
446 clausted 7 ba1.roi[id[i]-1].spotx = col[i]
447     ba1.roi[id[i]-1].spoty = row[i]
448    
449     print "Successfully loaded information for %i ROIs." % keys
450     return ba1
451 clausted 47 """End of applykey()"""
452 clausted 7
453    
454     def applymethod(ba0, fname):
455 clausted 47 """
456     Read a ICM Analyte/Method xls file and apply its information.
457    
458     *(This feature is under construction.)*
459     """
460 clausted 7 print "This feature is under construction."
461 clausted 47 return
462     """End of applymethod()"""
463 clausted 14
464    
465 clausted 7
466 clausted 14 def outputbindinglevels(ba0, fname, interval, *baselines):
467     """
468     Measure the binding level changes at multiple intervals along a sensorgram.
469     Write the data to a file. If interval is 500 and baselines are [100, 1100] then
470     binding levels are changes between 100-600s and between 1100-01600s.
471     Average 30s of data.
472     """
473     fp=file(fname,'w')
474    
475     ## Measurements
476     for iroi in ba0.roi:
477     fp.write("%i\t%s" % (iroi.index, iroi.name))
478     for j in baselines:
479     t1 = j
480     t2 = j + interval
481     y1 = np.average(iroi.time2val(t1-15, t1+15))
482     y2 = np.average(iroi.time2val(t2-15, t2+15))
483     fp.write("\t%.3f" % (y2-y1))
484     fp.write("\n")
485     fp.close
486     print "File %s saved in %s." % (fname, getcwd())
487 clausted 47 return
488     """End of outputbindinglevels()"""
489 clausted 7
490 clausted 14
491 clausted 7 ## Here are a few lines to test this module.
492     if __name__ == '__main__':
493     print "This module isn't a stand-alone app."
494    
495 clausted 47 ################################# End of module #################################