ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/io_module.py
Revision: 51
Committed: Wed Feb 9 01:09:05 2011 UTC (8 years, 4 months ago) by clausted
File size: 16581 byte(s)
Log Message:
Fix minor docstring error.
Line User Rev File contents
1 clausted 7 """
2 clausted 47 io_module
3     ---------
4 clausted 7
5 clausted 47 Input/Output module for converting files to Biosensor Array class.
6     Supported sensorgram types include Biosensor, CLAMP, SPRit, and Plexera ICM.
7     Supported microarray types include GAL and ISB Map.
8    
9     .. moduleauthor:: Christopher Lausted,
10     Institute for Systems Biology,
11     OSPRAI developers.
12     Examples::
13    
14     >>> import io_module as io
15     >>> ba1 = io.readsprit("spritdata.txt")
16     >>> ba1 = io.readclamp("spritdata.txt")
17     >>> ba1 = io.readicmtxt("icmdata.txt")
18     >>> ba1 = io.readbiosensor("icmdata.txt")
19 clausted 49 >>> ba1 = io.readcsv("rawdata.csv")
20 clausted 47 >>> ba2 = io.applygal(ba1, "galfile.gal")
21     >>> ba2 = io.applykey(ba1, "keyfile.tsv")
22     >>> ba3 = io.applymethod(ba2, "icmmethod.xls")
23     >>> io.writesprit(ba3, "newspritfile.txt")
24     >>> io.writeclamp(ba3, "newclampfile.txt")
25 clausted 7 """
26 clausted 50 __version__ = "110208"
27 clausted 7
28 clausted 47
29 clausted 7 ## Import libraries.
30     from copy import deepcopy
31     from os import getcwd
32     from time import mktime
33     import numpy as np ## Numpy array library.
34     import ba_class as ba ## Our Biosensor Array class.
35     reload(ba)
36    
37     def readsprit(fname):
38     """
39     Read a SPRit text file into a ba class.
40     It has two tab-delimited columns and two header lines.
41 clausted 47 Here is a very simple example with 2 rois and 3 datapoints::
42 clausted 7
43 clausted 47 Elapsed Time (Seconds) Average Intensity (Pixel Intensity)
44     BEGIN
45     0.000000e+000 2.863145e+003
46     5.013000e+000 2.863367e+003
47     1.002500e+001 2.862950e+003
48     0.000000e+000 2.862875e+003
49     5.013000e+000 2.862510e+003
50 clausted 7 """
51    
52     ## Try to open file. Return tiny ba object if it fails.
53     ba0 = ba.BiosensorArray(1,1)
54     try:
55     fp = open(fname, "r")
56     except IOError:
57     print 'Error: Cannot open file %s for reading' % fname
58     return ba0
59    
60     ## Check header lines for signature text.
61     txt = fp.readline() # 'Elapsed Time...'
62     txt = fp.readline() # 'BEGIN'
63     if ('BEGIN' not in txt):
64     print "Error: Second line is not BEGIN."
65     fp.close()
66     return ba0
67    
68     ## Put data into one big text string and close.
69     txtfile = fp.readlines()
70     fp.close()
71    
72     ## Change text into array
73     sprtable = np.zeros((len(txtfile),2), dtype=float) ## Redimension Nx2
74     for i in range(len(txtfile)):
75     txt = txtfile[i].split('\t')
76     sprtable[i,0] = float(txt[0]) ## Time column.
77     sprtable[i,1] = float(txt[1]) ## Signal column.
78    
79     ## Find number of ROIs in file.
80     for i in range(1,len(sprtable)):
81     ## See when the time drops back to zero.
82     if (sprtable[i,0] < sprtable[i-1,0]): break
83     rows = i ## Datapoints or new array rows.
84     rois = int(len(sprtable) / rows)
85    
86     ## Move data the old-fashioned iterative way.
87     ba0 = ba.BiosensorArray(rois,rows)
88     k = 0
89     for i in range(rois):
90     for j in range(rows):
91     ba0.roi[i].time[j] = sprtable[k,0]
92     ba0.roi[i].value[j] = sprtable[k,1]
93     k +=1
94    
95     return ba0
96 clausted 47 """End of readsprit() function"""
97 clausted 7
98    
99     def writesprit(ba0, fname):
100 clausted 47 """
101     Write a ba class to a SPRit text file.
102     """
103 clausted 7 fp=file(fname,'w')
104     fp.write("Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\r\n")
105     fp.write("BEGIN\r\n")
106     for roi in range(len(ba0.roi)): ## Could also use ba.rois.
107     for dpoint in range(len(ba0.roi[0].time)): ## Could also use ba.dpoints.
108     txt = "%f\t%f\r\n" % (ba0.roi[roi].time[dpoint], ba0.roi[roi].value[dpoint])
109     fp.write(txt)
110     fp.close
111     print "File %s saved in %s." % (fname, getcwd())
112 clausted 47 return
113     """End of writesprit() function"""
114 clausted 7
115    
116     def readclamp(fname):
117     """
118     Read a Clamp text file into a ba class.
119     It has two tab-delimited columns per SPR flowcell/roi..
120     It has a varying number of header lines with injection information.
121 clausted 47 Here is a very simple example with 2 rois and 3 datapoints::
122 clausted 7
123 clausted 47 Vers 3.41 Data
124     Conc1 0 0 0 0
125     Start1 301.5 301.5 301.5 301.5
126     Stop1 949.8 949.8 949.8 949.8
127     RInd1 0 0 0 0
128     Conc2 0 0 0 0
129     Start2 986.4 0 0 0
130     Stop2 1626 0 0 0
131     RInd2 0 0 0 0
132     Flow 1 1 1 1
133     Time1 Data1 Time2 Data2
134     0.094 0.062 0.094 0.053
135     1.094 0.026 1.094 0.05
136     2.094 0.119 2.094 0.055
137 clausted 7 """
138    
139     print "This feature is under construction."
140 clausted 47 return
141     """"End of readclamp() function"""
142 clausted 7
143    
144     def writeclamp(ba0, fname):
145 clausted 47 """
146     Write a ba class to a Clamp text file.
147     """
148 clausted 7
149     fp=file(fname,'w')
150     ## First header line.
151     fp.write("Vers 3.41 Data\r\n")
152    
153     ## Next write injection information.
154     for inj in range(len(ba0.roi[0].injconc)):
155     fp.write("Conc%i" % (inj+1))
156     for roi in ba0.roi: fp.write("\t%.3f" % roi.injconc[inj])
157     fp.write("\r\nStart%i" % (inj+1))
158     for roi in ba0.roi: fp.write("\t%.3f" % roi.injstart[inj])
159     fp.write("\r\nStop%i" % (inj+1))
160     for roi in ba0.roi: fp.write("\t%.3f" % roi.injstop[inj])
161     fp.write("\r\nRInd%i" % (inj+1))
162     for roi in ba0.roi: fp.write("\t%.3f" % roi.injrind[inj])
163     ## Next write flowrate line.
164     fp.write("\r\nFlow")
165     for roi in ba0.roi: fp.write("\t%.3f" % roi.flow)
166     fp.write("\r\n")
167    
168     ## Write sensorgram data header line.
169     for i,roi in enumerate(ba0.roi):
170     if (i>0): fp.write("\t")
171     fp.write("Time%i" % (i+1))
172     fp.write("\t")
173     fp.write(roi.name)
174     fp.write("\r\n")
175     ## Write sensorgram data lines. Three decimal places.
176     for dpoint in range(len(ba0.roi[0].time)):
177     for i,roi in enumerate(ba0.roi):
178     if (i>0): fp.write("\t")
179     fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint]))
180     fp.write("\r\n")
181    
182     ## Close file handle and print message.
183     fp.close
184     print "File %s saved in %s." % (fname, getcwd())
185 clausted 47 return
186     """End of writeclamp() function"""
187 clausted 7
188    
189     def readicmtxt(fname):
190     """
191     Read a ICM text file into a ba class.
192 clausted 47 Here is a very simple example of the tab-delimited format::
193 clausted 7
194 clausted 47 03/05/2010 13:37:21.312 249.408 0.000 0.000
195     03/05/2010 13:37:22.312 249.306 0.000 0.000
196 clausted 7 """
197    
198     ## Try to open file. Return tiny ba object if it fails.
199     ba0 = ba.BiosensorArray(1,1)
200     try:
201     fp = open(fname, "r")
202     except IOError:
203     print 'Error: Cannot open file %s for reading' % fname
204     return ba0
205    
206     ## Put data into one big text string and close.
207     txtfile = fp.readlines()
208     fp.close()
209     dpoints = len(txtfile)
210     rois = txtfile[0].count("\t") ## Usually 25.
211    
212     ## Create and size ba object.
213     print "This ICM file has %i datapoints for %i ROIs." % (dpoints, rois)
214     ba0 = ba.BiosensorArray(rois, dpoints)
215    
216     ## Determine experiment start time so we can make other times relative.
217     x = txtfile[0].split("\t") ## E.g. "03/05/2010 13:37:20.218"
218 clausted 50 t0 = __icmtime2sec(x[0]) ## E.g. 1267825040.22
219 clausted 7
220     ## Move the data from txtfile to ba0.
221     for i, txtline in enumerate(txtfile):
222     x = txtline.split("\t")
223 clausted 50 tx = __icmtime2sec(x[0]) - t0
224 clausted 7 for j in range(1,rois):
225     ba0.roi[j].time[i] = tx
226     ba0.roi[j].value[i] = float(x[j])
227    
228     return ba0
229 clausted 47 """End of readicmtxt"""
230 clausted 7
231    
232 clausted 50 def __icmtime2sec(timetxt):
233 clausted 7 """
234     Take a time of the form "03/05/2010 15:19:27.312" and return seconds.
235     """
236     cal, clock = timetxt.split(" ")
237     mm, dd, yy = cal.split("/")
238     hh, min, ss = clock.split(":")
239     stime = (int(yy), int(mm), int(dd), int(hh), int(min), 0, 0, 0, 0)
240     return mktime(stime) + float(ss)
241 clausted 50 """End of __icmtime2sec"""
242 clausted 7
243    
244 clausted 9 def readbiosensor(fname):
245     """
246     Read a Biacore-style text file into a ba class.
247 clausted 47 Here is a very simple example of the tab-delimited format::
248 clausted 9
249 clausted 47 Ab1 Fc=4- 1_X Ab1 Fc=4 -1_Y Ab2 Fc=4 -1_X Ab2 Fc=4 -1_Y
250     13.1 23.7644 93.1 0.713912
251     13.6 23.4265 93.6 0.0541172
252     14.1 23.1625 94.1 0.332768
253     14.6 23.5752 94.6 0.849459
254 clausted 9 """
255    
256     ## Try to open file. Return tiny ba object if it fails.
257     ba0 = ba.BiosensorArray(1,1)
258     try:
259     fp = open(fname, "r")
260     except IOError:
261     print 'Error: Cannot open file %s for reading' % fname
262     return ba0
263    
264     ## Read header line. Check number of pairs of _X and _Y labels.
265     txthdr = fp.readline()
266     cols = txthdr.count("Fc=")
267     xys = txthdr.count("_X\t") + txthdr.count("_Y\t")+1
268     if ((cols != xys) or ((cols%2) != 0)):
269     print "Error: This is not a valid Biosensor file."
270     return ba0
271    
272     ## Put data into one big text string and close.
273     txtfile = fp.readlines()
274     fp.close()
275     dpoints = len(txtfile)
276     rois = int(cols/2)
277    
278     ## Create and size ba object.
279     print "This Biosensor file has %i datapoints for %i ROIs." % (dpoints, rois)
280     ba0 = ba.BiosensorArray(rois, dpoints)
281    
282     ## Get names of ROIs from header like "Ab1 Fc=4 - 1_Y"
283     txthdr = txthdr.replace(" -","-").replace("- ","-") ## Remove unwanted spaces.
284     names = txthdr.split("\t") ## Use tab delimiter.
285     for j in range(rois):
286     name = names[j*2+1].replace("_Y","").strip(" -")
287     name = name.partition("Fc=") ## Now a 3-tuple.
288     ba0.roi[j].name = name[0] ## Text left of Fc.
289     ba0.roi[j].desc = ("Fc=" + name[2]) ## Text right of Fc.
290    
291     ## Move the data from txtfile to ba0.
292     for i, txtline in enumerate(txtfile):
293     x = txtline.split("\t")
294     for j in range(rois):
295     ba0.roi[j].time[i] = float(x[j*2+0])
296     ba0.roi[j].value[i] = float(x[j*2+1])
297    
298     return ba0
299 clausted 47 """End of readbiosensor()"""
300 clausted 9
301    
302     def writebiosensor(ba0, fname):
303 clausted 47 """
304     Write a ba class to a Biosensor text file.
305     """
306 clausted 9
307     ## This is like a simplified Clamp file.
308     fp=file(fname,'w')
309     ## Write sensorgram data header line.
310     for i,roi in enumerate(ba0.roi):
311     if (i>0): fp.write("\t")
312     name = roi.name.strip() + " " + roi.desc.strip()
313     fp.write("%s_X\t%s_Y" % (name, name))
314     ## Write sensorgram data lines. Three decimal places.
315     for dpoint in range(len(ba0.roi[0].time)):
316     fp.write("\r\n")
317     for i,roi in enumerate(ba0.roi):
318     if (i>0): fp.write("\t")
319     fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint]))
320    
321     ## Close file handle and print message.
322     fp.close
323     print "File %s saved in %s." % (fname, getcwd())
324 clausted 47 return
325     """End of writebiosensor()"""
326 clausted 9
327    
328 clausted 49 def readcsv(fname):
329     """
330     Read a comma-separated value text file into a ba class.
331     The first column contains time data while the others contain response data.
332 clausted 51 Here is a very simple example of the format::
333 clausted 49
334     1.0001, 23.7644, 0.7139
335     2.0001, 23.4265, 0.0541
336     3.0001, 23.1625, 0.3327
337     4.0001, 23.5752, 0.8494
338     """
339    
340     ## Try to open file. Return tiny ba object if it fails.
341     ba0 = ba.BiosensorArray(1,1)
342     try:
343     fp = open(fname, "r")
344     except IOError:
345     print 'Error: Cannot open file %s for reading' % fname
346     return ba0
347    
348     ## Put data into one big text string, determine array size, and close.
349     txtfile = fp.readlines() ## One big text string.
350     fp.close()
351     dpoints = len(txtfile) ## Rows in the csv file.
352     rois = len(txtfile[0].split(',')) - 1 ## Columns are comma-delimited.
353    
354     ## Move the data from txtfile to ba0.
355     ba0 = ba.BiosensorArray(rois, dpoints)
356     for i, txtline in enumerate(txtfile):
357     x = txtline.split(',')
358     for j in range(rois):
359     ba0.roi[j].time[i] = float(x[0])
360     ba0.roi[j].value[i] = float(x[j+1])
361    
362     print "The file %s has %i ROIs and %i datapoints." % (fname, rois, dpoints)
363     return ba0
364     """End of readcsv()"""
365    
366    
367 clausted 7 def applygal(ba0, fname):
368 clausted 47 """
369     Read a Gal file and apply its microarray information.
370    
371     *(This function is under construction)*
372     """
373 clausted 7 print "This feature is under construction."
374 clausted 47 return
375     """End of applygal()"""
376 clausted 7
377    
378     def applykey(ba0, fname):
379     """
380     Read a Key file and apply its microarray information.
381     Multiple background ROIs are not yet supported.
382 clausted 47 Here is a very simple example::
383 clausted 7
384 clausted 47 No. Description1 Description2 Background ROI Col Row
385     1 Rat TNF Antibody01 2 1 1
386     2 ratIgG Antibody02 4 2 1
387     3 Hum TNF Antibody03 4 3 1
388     4 humIgG Antibody04 2 4 1
389 clausted 7 """
390    
391     ## Try to open file. Return unchanged ba object if it fails.
392     try:
393     fp = open(fname, "r")
394     except IOError:
395     print 'Cannot open file %s for reading' % fname
396     return ba0
397     ## Check header lines for signature text.
398     txt = fp.readline()
399     if ("No." not in txt):
400     print "Error: The header line is unfamiliar."
401     fp.close()
402     return ba0
403    
404     ## Read all text, convert to 2d list, then six lists (one for each column).
405     ## There's probably an easier way to do this. Maybe in CSV module.
406     txtfile = fp.readlines()
407     fp.close()
408     ## Change text into array
409     keytable = txtfile[:] ## Dimension rows in new 2d list.
410     for i in range(len(txtfile)):
411     txt = str(txtfile[i].strip())
412     keytable[i] = txt.split('\t')
413     ## Use list comprehensions to get one list for each column.
414 clausted 14 id = [int(x[0]) for x in keytable] ## First column is integer.
415     desc1 = [x[1] for x in keytable] ## Second column is text.
416     desc2 = [x[2] for x in keytable] ## Third column is text.
417     bg = [int(x[3]) for x in keytable] ## Fourth column is integer. Change base1 to base0?
418 clausted 7 col = [int(x[4]) for x in keytable] ## Fifth column is integer.
419     row = [int(x[5]) for x in keytable] ## Sixth column is integer.
420    
421     ## Check if key file looks valid. These tests are not very thorough!
422     keys = 1 + max(id) - min(id)
423     if (ba0.rois != keys):
424     print "Error: We have %i ROIs but %i keyfile entries." % (ba0.rois, keys)
425     return ba0
426     if ((min(bg) < min(id)) or (max(bg) > max (id))):
427 clausted 14 print "Error: at least one specified background ROI does not exist"
428     print "Identifiers (id) range from %i to %i." % (min(id), max(id))
429     print "Background references range from %i to %i." % (min(bg), max(bg))
430 clausted 7 return ba0
431    
432     ## Create new object and put data in it.
433     ba1 = deepcopy(ba0)
434     for i in range(len(id)):
435     ba1.roi[id[i]-1].name = desc1[i]
436     ba1.roi[id[i]-1].desc = desc2[i]
437 clausted 14 ba1.roi[id[i]-1].bgroi = [bg[i]] ## ToDo: Decide if bg will be id or index. Base1 now.
438 clausted 7 ba1.roi[id[i]-1].spotx = col[i]
439     ba1.roi[id[i]-1].spoty = row[i]
440    
441     print "Successfully loaded information for %i ROIs." % keys
442     return ba1
443 clausted 47 """End of applykey()"""
444 clausted 7
445    
446     def applymethod(ba0, fname):
447 clausted 47 """
448     Read a ICM Analyte/Method xls file and apply its information.
449    
450     *(This feature is under construction.)*
451     """
452 clausted 7 print "This feature is under construction."
453 clausted 47 return
454     """End of applymethod()"""
455 clausted 14
456    
457 clausted 7
458 clausted 14 def outputbindinglevels(ba0, fname, interval, *baselines):
459     """
460     Measure the binding level changes at multiple intervals along a sensorgram.
461     Write the data to a file. If interval is 500 and baselines are [100, 1100] then
462     binding levels are changes between 100-600s and between 1100-01600s.
463     Average 30s of data.
464     """
465     fp=file(fname,'w')
466    
467     ## Measurements
468     for iroi in ba0.roi:
469     fp.write("%i\t%s" % (iroi.index, iroi.name))
470     for j in baselines:
471     t1 = j
472     t2 = j + interval
473     y1 = np.average(iroi.time2val(t1-15, t1+15))
474     y2 = np.average(iroi.time2val(t2-15, t2+15))
475     fp.write("\t%.3f" % (y2-y1))
476     fp.write("\n")
477     fp.close
478     print "File %s saved in %s." % (fname, getcwd())
479 clausted 47 return
480     """End of outputbindinglevels()"""
481 clausted 7
482 clausted 14
483 clausted 7 ## Here are a few lines to test this module.
484     if __name__ == '__main__':
485     print "This module isn't a stand-alone app."
486    
487 clausted 47 ################################# End of module #################################