ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/io_module.py
Revision: 54
Committed: Wed Feb 16 21:29:02 2011 UTC (8 years, 4 months ago) by clausted
File size: 16560 byte(s)
Log Message:
Added fit_module and mdl_module to sphinx documentation.
Line User Rev File contents
1 clausted 7 """
2 clausted 47 io_module
3     ---------
4 clausted 7
5 clausted 47 Input/Output module for converting files to Biosensor Array class.
6     Supported sensorgram types include Biosensor, CLAMP, SPRit, and Plexera ICM.
7     Supported microarray types include GAL and ISB Map.
8    
9     .. moduleauthor:: Christopher Lausted,
10     Institute for Systems Biology,
11 clausted 54 OSPRAI developers.
12    
13 clausted 47 Examples::
14    
15     >>> import io_module as io
16     >>> ba1 = io.readsprit("spritdata.txt")
17     >>> ba1 = io.readclamp("spritdata.txt")
18     >>> ba1 = io.readicmtxt("icmdata.txt")
19     >>> ba1 = io.readbiosensor("icmdata.txt")
20 clausted 49 >>> ba1 = io.readcsv("rawdata.csv")
21 clausted 47 >>> ba2 = io.applygal(ba1, "galfile.gal")
22     >>> ba2 = io.applykey(ba1, "keyfile.tsv")
23     >>> ba3 = io.applymethod(ba2, "icmmethod.xls")
24     >>> io.writesprit(ba3, "newspritfile.txt")
25     >>> io.writeclamp(ba3, "newclampfile.txt")
26 clausted 7 """
27 clausted 54 __version__ = "110214"
28 clausted 7
29 clausted 47
30 clausted 7 ## Import libraries.
31     from copy import deepcopy
32     from os import getcwd
33     from time import mktime
34     import numpy as np ## Numpy array library.
35     import ba_class as ba ## Our Biosensor Array class.
36     reload(ba)
37    
38     def readsprit(fname):
39     """
40     Read a SPRit text file into a ba class.
41     It has two tab-delimited columns and two header lines.
42 clausted 47 Here is a very simple example with 2 rois and 3 datapoints::
43 clausted 7
44 clausted 47 Elapsed Time (Seconds) Average Intensity (Pixel Intensity)
45     BEGIN
46     0.000000e+000 2.863145e+003
47     5.013000e+000 2.863367e+003
48     1.002500e+001 2.862950e+003
49     0.000000e+000 2.862875e+003
50     5.013000e+000 2.862510e+003
51 clausted 7 """
52    
53     ## Try to open file. Return tiny ba object if it fails.
54     ba0 = ba.BiosensorArray(1,1)
55     try:
56     fp = open(fname, "r")
57     except IOError:
58     print 'Error: Cannot open file %s for reading' % fname
59     return ba0
60    
61     ## Check header lines for signature text.
62     txt = fp.readline() # 'Elapsed Time...'
63     txt = fp.readline() # 'BEGIN'
64     if ('BEGIN' not in txt):
65     print "Error: Second line is not BEGIN."
66     fp.close()
67     return ba0
68    
69     ## Put data into one big text string and close.
70     txtfile = fp.readlines()
71     fp.close()
72    
73     ## Change text into array
74     sprtable = np.zeros((len(txtfile),2), dtype=float) ## Redimension Nx2
75     for i in range(len(txtfile)):
76     txt = txtfile[i].split('\t')
77     sprtable[i,0] = float(txt[0]) ## Time column.
78     sprtable[i,1] = float(txt[1]) ## Signal column.
79    
80     ## Find number of ROIs in file.
81     for i in range(1,len(sprtable)):
82     ## See when the time drops back to zero.
83     if (sprtable[i,0] < sprtable[i-1,0]): break
84     rows = i ## Datapoints or new array rows.
85     rois = int(len(sprtable) / rows)
86    
87     ## Move data the old-fashioned iterative way.
88     ba0 = ba.BiosensorArray(rois,rows)
89     k = 0
90     for i in range(rois):
91     for j in range(rows):
92     ba0.roi[i].time[j] = sprtable[k,0]
93     ba0.roi[i].value[j] = sprtable[k,1]
94     k +=1
95    
96     return ba0
97 clausted 47 """End of readsprit() function"""
98 clausted 7
99    
100     def writesprit(ba0, fname):
101 clausted 47 """
102     Write a ba class to a SPRit text file.
103     """
104 clausted 7 fp=file(fname,'w')
105     fp.write("Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\r\n")
106     fp.write("BEGIN\r\n")
107     for roi in range(len(ba0.roi)): ## Could also use ba.rois.
108     for dpoint in range(len(ba0.roi[0].time)): ## Could also use ba.dpoints.
109     txt = "%f\t%f\r\n" % (ba0.roi[roi].time[dpoint], ba0.roi[roi].value[dpoint])
110     fp.write(txt)
111     fp.close
112     print "File %s saved in %s." % (fname, getcwd())
113 clausted 47 return
114     """End of writesprit() function"""
115 clausted 7
116    
117     def readclamp(fname):
118     """
119     Read a Clamp text file into a ba class.
120     It has two tab-delimited columns per SPR flowcell/roi..
121     It has a varying number of header lines with injection information.
122 clausted 47 Here is a very simple example with 2 rois and 3 datapoints::
123 clausted 7
124 clausted 47 Vers 3.41 Data
125     Conc1 0 0 0 0
126     Start1 301.5 301.5 301.5 301.5
127     Stop1 949.8 949.8 949.8 949.8
128     RInd1 0 0 0 0
129     Conc2 0 0 0 0
130     Start2 986.4 0 0 0
131     Stop2 1626 0 0 0
132     RInd2 0 0 0 0
133     Flow 1 1 1 1
134     Time1 Data1 Time2 Data2
135     0.094 0.062 0.094 0.053
136     1.094 0.026 1.094 0.05
137     2.094 0.119 2.094 0.055
138 clausted 7 """
139    
140     print "This feature is under construction."
141 clausted 47 return
142     """"End of readclamp() function"""
143 clausted 7
144    
145     def writeclamp(ba0, fname):
146 clausted 47 """
147     Write a ba class to a Clamp text file.
148     """
149 clausted 7
150     fp=file(fname,'w')
151     ## First header line.
152     fp.write("Vers 3.41 Data\r\n")
153    
154     ## Next write injection information.
155     for inj in range(len(ba0.roi[0].injconc)):
156     fp.write("Conc%i" % (inj+1))
157     for roi in ba0.roi: fp.write("\t%.3f" % roi.injconc[inj])
158     fp.write("\r\nStart%i" % (inj+1))
159     for roi in ba0.roi: fp.write("\t%.3f" % roi.injstart[inj])
160     fp.write("\r\nStop%i" % (inj+1))
161     for roi in ba0.roi: fp.write("\t%.3f" % roi.injstop[inj])
162     fp.write("\r\nRInd%i" % (inj+1))
163     for roi in ba0.roi: fp.write("\t%.3f" % roi.injrind[inj])
164     ## Next write flowrate line.
165     fp.write("\r\nFlow")
166     for roi in ba0.roi: fp.write("\t%.3f" % roi.flow)
167     fp.write("\r\n")
168    
169     ## Write sensorgram data header line.
170     for i,roi in enumerate(ba0.roi):
171     if (i>0): fp.write("\t")
172     fp.write("Time%i" % (i+1))
173     fp.write("\t")
174     fp.write(roi.name)
175     fp.write("\r\n")
176     ## Write sensorgram data lines. Three decimal places.
177     for dpoint in range(len(ba0.roi[0].time)):
178     for i,roi in enumerate(ba0.roi):
179     if (i>0): fp.write("\t")
180     fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint]))
181     fp.write("\r\n")
182    
183     ## Close file handle and print message.
184     fp.close
185     print "File %s saved in %s." % (fname, getcwd())
186 clausted 47 return
187     """End of writeclamp() function"""
188 clausted 7
189    
190     def readicmtxt(fname):
191     """
192     Read a ICM text file into a ba class.
193 clausted 47 Here is a very simple example of the tab-delimited format::
194 clausted 7
195 clausted 47 03/05/2010 13:37:21.312 249.408 0.000 0.000
196     03/05/2010 13:37:22.312 249.306 0.000 0.000
197 clausted 7 """
198    
199     ## Try to open file. Return tiny ba object if it fails.
200     ba0 = ba.BiosensorArray(1,1)
201     try:
202     fp = open(fname, "r")
203     except IOError:
204     print 'Error: Cannot open file %s for reading' % fname
205     return ba0
206    
207     ## Put data into one big text string and close.
208     txtfile = fp.readlines()
209     fp.close()
210     dpoints = len(txtfile)
211     rois = txtfile[0].count("\t") ## Usually 25.
212    
213     ## Create and size ba object.
214     print "This ICM file has %i datapoints for %i ROIs." % (dpoints, rois)
215     ba0 = ba.BiosensorArray(rois, dpoints)
216    
217     ## Determine experiment start time so we can make other times relative.
218     x = txtfile[0].split("\t") ## E.g. "03/05/2010 13:37:20.218"
219 clausted 50 t0 = __icmtime2sec(x[0]) ## E.g. 1267825040.22
220 clausted 7
221     ## Move the data from txtfile to ba0.
222     for i, txtline in enumerate(txtfile):
223     x = txtline.split("\t")
224 clausted 50 tx = __icmtime2sec(x[0]) - t0
225 clausted 7 for j in range(1,rois):
226     ba0.roi[j].time[i] = tx
227     ba0.roi[j].value[i] = float(x[j])
228    
229     return ba0
230 clausted 47 """End of readicmtxt"""
231 clausted 7
232    
233 clausted 50 def __icmtime2sec(timetxt):
234 clausted 7 """
235     Take a time of the form "03/05/2010 15:19:27.312" and return seconds.
236     """
237     cal, clock = timetxt.split(" ")
238     mm, dd, yy = cal.split("/")
239     hh, min, ss = clock.split(":")
240     stime = (int(yy), int(mm), int(dd), int(hh), int(min), 0, 0, 0, 0)
241     return mktime(stime) + float(ss)
242 clausted 50 """End of __icmtime2sec"""
243 clausted 7
244    
245 clausted 9 def readbiosensor(fname):
246     """
247     Read a Biacore-style text file into a ba class.
248 clausted 47 Here is a very simple example of the tab-delimited format::
249 clausted 9
250 clausted 47 Ab1 Fc=4- 1_X Ab1 Fc=4 -1_Y Ab2 Fc=4 -1_X Ab2 Fc=4 -1_Y
251     13.1 23.7644 93.1 0.713912
252     13.6 23.4265 93.6 0.0541172
253     14.1 23.1625 94.1 0.332768
254     14.6 23.5752 94.6 0.849459
255 clausted 9 """
256    
257     ## Try to open file. Return tiny ba object if it fails.
258     ba0 = ba.BiosensorArray(1,1)
259     try:
260     fp = open(fname, "r")
261     except IOError:
262     print 'Error: Cannot open file %s for reading' % fname
263     return ba0
264    
265     ## Read header line. Check number of pairs of _X and _Y labels.
266     txthdr = fp.readline()
267     cols = txthdr.count("Fc=")
268     xys = txthdr.count("_X\t") + txthdr.count("_Y\t")+1
269     if ((cols != xys) or ((cols%2) != 0)):
270     print "Error: This is not a valid Biosensor file."
271     return ba0
272    
273     ## Put data into one big text string and close.
274     txtfile = fp.readlines()
275     fp.close()
276     dpoints = len(txtfile)
277     rois = int(cols/2)
278    
279     ## Create and size ba object.
280     print "This Biosensor file has %i datapoints for %i ROIs." % (dpoints, rois)
281     ba0 = ba.BiosensorArray(rois, dpoints)
282    
283     ## Get names of ROIs from header like "Ab1 Fc=4 - 1_Y"
284     txthdr = txthdr.replace(" -","-").replace("- ","-") ## Remove unwanted spaces.
285     names = txthdr.split("\t") ## Use tab delimiter.
286     for j in range(rois):
287     name = names[j*2+1].replace("_Y","").strip(" -")
288     name = name.partition("Fc=") ## Now a 3-tuple.
289     ba0.roi[j].name = name[0] ## Text left of Fc.
290     ba0.roi[j].desc = ("Fc=" + name[2]) ## Text right of Fc.
291    
292     ## Move the data from txtfile to ba0.
293     for i, txtline in enumerate(txtfile):
294     x = txtline.split("\t")
295     for j in range(rois):
296     ba0.roi[j].time[i] = float(x[j*2+0])
297     ba0.roi[j].value[i] = float(x[j*2+1])
298    
299     return ba0
300 clausted 47 """End of readbiosensor()"""
301 clausted 9
302    
303     def writebiosensor(ba0, fname):
304 clausted 47 """
305     Write a ba class to a Biosensor text file.
306     """
307 clausted 9
308     ## This is like a simplified Clamp file.
309     fp=file(fname,'w')
310     ## Write sensorgram data header line.
311     for i,roi in enumerate(ba0.roi):
312     if (i>0): fp.write("\t")
313     name = roi.name.strip() + " " + roi.desc.strip()
314     fp.write("%s_X\t%s_Y" % (name, name))
315     ## Write sensorgram data lines. Three decimal places.
316     for dpoint in range(len(ba0.roi[0].time)):
317     fp.write("\r\n")
318     for i,roi in enumerate(ba0.roi):
319     if (i>0): fp.write("\t")
320     fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint]))
321    
322     ## Close file handle and print message.
323     fp.close
324     print "File %s saved in %s." % (fname, getcwd())
325 clausted 47 return
326     """End of writebiosensor()"""
327 clausted 9
328    
329 clausted 49 def readcsv(fname):
330     """
331     Read a comma-separated value text file into a ba class.
332     The first column contains time data while the others contain response data.
333 clausted 51 Here is a very simple example of the format::
334 clausted 49
335     1.0001, 23.7644, 0.7139
336     2.0001, 23.4265, 0.0541
337     3.0001, 23.1625, 0.3327
338     4.0001, 23.5752, 0.8494
339     """
340    
341     ## Try to open file. Return tiny ba object if it fails.
342     ba0 = ba.BiosensorArray(1,1)
343     try:
344     fp = open(fname, "r")
345     except IOError:
346     print 'Error: Cannot open file %s for reading' % fname
347     return ba0
348    
349     ## Put data into one big text string, determine array size, and close.
350     txtfile = fp.readlines() ## One big text string.
351     fp.close()
352     dpoints = len(txtfile) ## Rows in the csv file.
353     rois = len(txtfile[0].split(',')) - 1 ## Columns are comma-delimited.
354    
355     ## Move the data from txtfile to ba0.
356     ba0 = ba.BiosensorArray(rois, dpoints)
357     for i, txtline in enumerate(txtfile):
358     x = txtline.split(',')
359     for j in range(rois):
360     ba0.roi[j].time[i] = float(x[0])
361     ba0.roi[j].value[i] = float(x[j+1])
362    
363     print "The file %s has %i ROIs and %i datapoints." % (fname, rois, dpoints)
364     return ba0
365     """End of readcsv()"""
366    
367    
368 clausted 7 def applygal(ba0, fname):
369 clausted 47 """
370     Read a Gal file and apply its microarray information.
371    
372     *(This function is under construction)*
373     """
374 clausted 7 print "This feature is under construction."
375 clausted 47 return
376     """End of applygal()"""
377 clausted 7
378    
379     def applykey(ba0, fname):
380     """
381     Read a Key file and apply its microarray information.
382     Multiple background ROIs are not yet supported.
383 clausted 47 Here is a very simple example::
384 clausted 7
385 clausted 47 No. Description1 Description2 Background ROI Col Row
386     1 Rat TNF Antibody01 2 1 1
387     2 ratIgG Antibody02 4 2 1
388     3 Hum TNF Antibody03 4 3 1
389     4 humIgG Antibody04 2 4 1
390 clausted 7 """
391    
392     ## Try to open file. Return unchanged ba object if it fails.
393     try:
394     fp = open(fname, "r")
395     except IOError:
396     print 'Cannot open file %s for reading' % fname
397     return ba0
398     ## Check header lines for signature text.
399     txt = fp.readline()
400     if ("No." not in txt):
401     print "Error: The header line is unfamiliar."
402     fp.close()
403     return ba0
404    
405     ## Read all text, convert to 2d list, then six lists (one for each column).
406     ## There's probably an easier way to do this. Maybe in CSV module.
407     txtfile = fp.readlines()
408     fp.close()
409     ## Change text into array
410     keytable = txtfile[:] ## Dimension rows in new 2d list.
411     for i in range(len(txtfile)):
412     txt = str(txtfile[i].strip())
413     keytable[i] = txt.split('\t')
414     ## Use list comprehensions to get one list for each column.
415 clausted 14 id = [int(x[0]) for x in keytable] ## First column is integer.
416     desc1 = [x[1] for x in keytable] ## Second column is text.
417     desc2 = [x[2] for x in keytable] ## Third column is text.
418     bg = [int(x[3]) for x in keytable] ## Fourth column is integer. Change base1 to base0?
419 clausted 7 col = [int(x[4]) for x in keytable] ## Fifth column is integer.
420     row = [int(x[5]) for x in keytable] ## Sixth column is integer.
421    
422     ## Check if key file looks valid. These tests are not very thorough!
423     keys = 1 + max(id) - min(id)
424     if (ba0.rois != keys):
425     print "Error: We have %i ROIs but %i keyfile entries." % (ba0.rois, keys)
426     return ba0
427     if ((min(bg) < min(id)) or (max(bg) > max (id))):
428 clausted 14 print "Error: at least one specified background ROI does not exist"
429     print "Identifiers (id) range from %i to %i." % (min(id), max(id))
430     print "Background references range from %i to %i." % (min(bg), max(bg))
431 clausted 7 return ba0
432    
433     ## Create new object and put data in it.
434     ba1 = deepcopy(ba0)
435     for i in range(len(id)):
436     ba1.roi[id[i]-1].name = desc1[i]
437     ba1.roi[id[i]-1].desc = desc2[i]
438 clausted 14 ba1.roi[id[i]-1].bgroi = [bg[i]] ## ToDo: Decide if bg will be id or index. Base1 now.
439 clausted 7 ba1.roi[id[i]-1].spotx = col[i]
440     ba1.roi[id[i]-1].spoty = row[i]
441    
442     print "Successfully loaded information for %i ROIs." % keys
443     return ba1
444 clausted 47 """End of applykey()"""
445 clausted 7
446    
447     def applymethod(ba0, fname):
448 clausted 47 """
449     Read a ICM Analyte/Method xls file and apply its information.
450    
451     *(This feature is under construction.)*
452     """
453 clausted 7 print "This feature is under construction."
454 clausted 47 return
455     """End of applymethod()"""
456 clausted 14
457    
458 clausted 7
459 clausted 14 def outputbindinglevels(ba0, fname, interval, *baselines):
460     """
461     Measure the binding level changes at multiple intervals along a sensorgram.
462     Write the data to a file. If interval is 500 and baselines are [100, 1100] then
463     binding levels are changes between 100-600s and between 1100-01600s.
464     Average 30s of data.
465     """
466     fp=file(fname,'w')
467    
468     ## Measurements
469     for iroi in ba0.roi:
470     fp.write("%i\t%s" % (iroi.index, iroi.name))
471     for j in baselines:
472     t1 = j
473     t2 = j + interval
474     y1 = np.average(iroi.time2val(t1-15, t1+15))
475     y2 = np.average(iroi.time2val(t2-15, t2+15))
476     fp.write("\t%.3f" % (y2-y1))
477     fp.write("\n")
478     fp.close
479     print "File %s saved in %s." % (fname, getcwd())
480 clausted 47 return
481     """End of outputbindinglevels()"""
482 clausted 7
483 clausted 14
484 clausted 7 ## Here are a few lines to test this module.
485     if __name__ == '__main__':
486     print "This module isn't a stand-alone app."
487    
488 clausted 47 ################################# End of module #################################