ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/io_module.py
Revision: 50
Committed: Wed Feb 9 00:32:35 2011 UTC (8 years, 8 months ago) by clausted
File size: 16595 byte(s)
Log Message:
Remove deprecated matplotlib graph from ba_class.  Other minor changes.
Line File contents
1 """
2 io_module
3 ---------
4
5 Input/Output module for converting files to Biosensor Array class.
6 Supported sensorgram types include Biosensor, CLAMP, SPRit, and Plexera ICM.
7 Supported microarray types include GAL and ISB Map.
8
9 .. moduleauthor:: Christopher Lausted,
10 Institute for Systems Biology,
11 OSPRAI developers.
12 Examples::
13
14 >>> import io_module as io
15 >>> ba1 = io.readsprit("spritdata.txt")
16 >>> ba1 = io.readclamp("spritdata.txt")
17 >>> ba1 = io.readicmtxt("icmdata.txt")
18 >>> ba1 = io.readbiosensor("icmdata.txt")
19 >>> ba1 = io.readcsv("rawdata.csv")
20 >>> ba2 = io.applygal(ba1, "galfile.gal")
21 >>> ba2 = io.applykey(ba1, "keyfile.tsv")
22 >>> ba3 = io.applymethod(ba2, "icmmethod.xls")
23 >>> io.writesprit(ba3, "newspritfile.txt")
24 >>> io.writeclamp(ba3, "newclampfile.txt")
25 """
26 __version__ = "110208"
27
28
29 ## Import libraries.
30 from copy import deepcopy
31 from os import getcwd
32 from time import mktime
33 import numpy as np ## Numpy array library.
34 import ba_class as ba ## Our Biosensor Array class.
35 reload(ba)
36
37 def readsprit(fname):
38 """
39 Read a SPRit text file into a ba class.
40 It has two tab-delimited columns and two header lines.
41 Here is a very simple example with 2 rois and 3 datapoints::
42
43 Elapsed Time (Seconds) Average Intensity (Pixel Intensity)
44 BEGIN
45 0.000000e+000 2.863145e+003
46 5.013000e+000 2.863367e+003
47 1.002500e+001 2.862950e+003
48 0.000000e+000 2.862875e+003
49 5.013000e+000 2.862510e+003
50 """
51
52 ## Try to open file. Return tiny ba object if it fails.
53 ba0 = ba.BiosensorArray(1,1)
54 try:
55 fp = open(fname, "r")
56 except IOError:
57 print 'Error: Cannot open file %s for reading' % fname
58 return ba0
59
60 ## Check header lines for signature text.
61 txt = fp.readline() # 'Elapsed Time...'
62 txt = fp.readline() # 'BEGIN'
63 if ('BEGIN' not in txt):
64 print "Error: Second line is not BEGIN."
65 fp.close()
66 return ba0
67
68 ## Put data into one big text string and close.
69 txtfile = fp.readlines()
70 fp.close()
71
72 ## Change text into array
73 sprtable = np.zeros((len(txtfile),2), dtype=float) ## Redimension Nx2
74 for i in range(len(txtfile)):
75 txt = txtfile[i].split('\t')
76 sprtable[i,0] = float(txt[0]) ## Time column.
77 sprtable[i,1] = float(txt[1]) ## Signal column.
78
79 ## Find number of ROIs in file.
80 for i in range(1,len(sprtable)):
81 ## See when the time drops back to zero.
82 if (sprtable[i,0] < sprtable[i-1,0]): break
83 rows = i ## Datapoints or new array rows.
84 rois = int(len(sprtable) / rows)
85
86 ## Move data the old-fashioned iterative way.
87 ba0 = ba.BiosensorArray(rois,rows)
88 k = 0
89 for i in range(rois):
90 for j in range(rows):
91 ba0.roi[i].time[j] = sprtable[k,0]
92 ba0.roi[i].value[j] = sprtable[k,1]
93 k +=1
94
95 return ba0
96 """End of readsprit() function"""
97
98
99 def writesprit(ba0, fname):
100 """
101 Write a ba class to a SPRit text file.
102 """
103 fp=file(fname,'w')
104 fp.write("Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\r\n")
105 fp.write("BEGIN\r\n")
106 for roi in range(len(ba0.roi)): ## Could also use ba.rois.
107 for dpoint in range(len(ba0.roi[0].time)): ## Could also use ba.dpoints.
108 txt = "%f\t%f\r\n" % (ba0.roi[roi].time[dpoint], ba0.roi[roi].value[dpoint])
109 fp.write(txt)
110 fp.close
111 print "File %s saved in %s." % (fname, getcwd())
112 return
113 """End of writesprit() function"""
114
115
116 def readclamp(fname):
117 """
118 Read a Clamp text file into a ba class.
119 It has two tab-delimited columns per SPR flowcell/roi..
120 It has a varying number of header lines with injection information.
121 Here is a very simple example with 2 rois and 3 datapoints::
122
123 Vers 3.41 Data
124 Conc1 0 0 0 0
125 Start1 301.5 301.5 301.5 301.5
126 Stop1 949.8 949.8 949.8 949.8
127 RInd1 0 0 0 0
128 Conc2 0 0 0 0
129 Start2 986.4 0 0 0
130 Stop2 1626 0 0 0
131 RInd2 0 0 0 0
132 Flow 1 1 1 1
133 Time1 Data1 Time2 Data2
134 0.094 0.062 0.094 0.053
135 1.094 0.026 1.094 0.05
136 2.094 0.119 2.094 0.055
137 """
138
139 print "This feature is under construction."
140 return
141 """"End of readclamp() function"""
142
143
144 def writeclamp(ba0, fname):
145 """
146 Write a ba class to a Clamp text file.
147 """
148
149 fp=file(fname,'w')
150 ## First header line.
151 fp.write("Vers 3.41 Data\r\n")
152
153 ## Next write injection information.
154 for inj in range(len(ba0.roi[0].injconc)):
155 fp.write("Conc%i" % (inj+1))
156 for roi in ba0.roi: fp.write("\t%.3f" % roi.injconc[inj])
157 fp.write("\r\nStart%i" % (inj+1))
158 for roi in ba0.roi: fp.write("\t%.3f" % roi.injstart[inj])
159 fp.write("\r\nStop%i" % (inj+1))
160 for roi in ba0.roi: fp.write("\t%.3f" % roi.injstop[inj])
161 fp.write("\r\nRInd%i" % (inj+1))
162 for roi in ba0.roi: fp.write("\t%.3f" % roi.injrind[inj])
163 ## Next write flowrate line.
164 fp.write("\r\nFlow")
165 for roi in ba0.roi: fp.write("\t%.3f" % roi.flow)
166 fp.write("\r\n")
167
168 ## Write sensorgram data header line.
169 for i,roi in enumerate(ba0.roi):
170 if (i>0): fp.write("\t")
171 fp.write("Time%i" % (i+1))
172 fp.write("\t")
173 fp.write(roi.name)
174 fp.write("\r\n")
175 ## Write sensorgram data lines. Three decimal places.
176 for dpoint in range(len(ba0.roi[0].time)):
177 for i,roi in enumerate(ba0.roi):
178 if (i>0): fp.write("\t")
179 fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint]))
180 fp.write("\r\n")
181
182 ## Close file handle and print message.
183 fp.close
184 print "File %s saved in %s." % (fname, getcwd())
185 return
186 """End of writeclamp() function"""
187
188
189 def readicmtxt(fname):
190 """
191 Read a ICM text file into a ba class.
192 Here is a very simple example of the tab-delimited format::
193
194 03/05/2010 13:37:21.312 249.408 0.000 0.000
195 03/05/2010 13:37:22.312 249.306 0.000 0.000
196 """
197
198 ## Try to open file. Return tiny ba object if it fails.
199 ba0 = ba.BiosensorArray(1,1)
200 try:
201 fp = open(fname, "r")
202 except IOError:
203 print 'Error: Cannot open file %s for reading' % fname
204 return ba0
205
206 ## Put data into one big text string and close.
207 txtfile = fp.readlines()
208 fp.close()
209 dpoints = len(txtfile)
210 rois = txtfile[0].count("\t") ## Usually 25.
211
212 ## Create and size ba object.
213 print "This ICM file has %i datapoints for %i ROIs." % (dpoints, rois)
214 ba0 = ba.BiosensorArray(rois, dpoints)
215
216 ## Determine experiment start time so we can make other times relative.
217 x = txtfile[0].split("\t") ## E.g. "03/05/2010 13:37:20.218"
218 t0 = __icmtime2sec(x[0]) ## E.g. 1267825040.22
219
220 ## Move the data from txtfile to ba0.
221 for i, txtline in enumerate(txtfile):
222 x = txtline.split("\t")
223 tx = __icmtime2sec(x[0]) - t0
224 for j in range(1,rois):
225 ba0.roi[j].time[i] = tx
226 ba0.roi[j].value[i] = float(x[j])
227
228 return ba0
229 """End of readicmtxt"""
230
231
232 def __icmtime2sec(timetxt):
233 """
234 Take a time of the form "03/05/2010 15:19:27.312" and return seconds.
235 """
236 cal, clock = timetxt.split(" ")
237 mm, dd, yy = cal.split("/")
238 hh, min, ss = clock.split(":")
239 stime = (int(yy), int(mm), int(dd), int(hh), int(min), 0, 0, 0, 0)
240 return mktime(stime) + float(ss)
241 """End of __icmtime2sec"""
242
243
244 def readbiosensor(fname):
245 """
246 Read a Biacore-style text file into a ba class.
247 Here is a very simple example of the tab-delimited format::
248
249 Ab1 Fc=4- 1_X Ab1 Fc=4 -1_Y Ab2 Fc=4 -1_X Ab2 Fc=4 -1_Y
250 13.1 23.7644 93.1 0.713912
251 13.6 23.4265 93.6 0.0541172
252 14.1 23.1625 94.1 0.332768
253 14.6 23.5752 94.6 0.849459
254 """
255
256 ## Try to open file. Return tiny ba object if it fails.
257 ba0 = ba.BiosensorArray(1,1)
258 try:
259 fp = open(fname, "r")
260 except IOError:
261 print 'Error: Cannot open file %s for reading' % fname
262 return ba0
263
264 ## Read header line. Check number of pairs of _X and _Y labels.
265 txthdr = fp.readline()
266 cols = txthdr.count("Fc=")
267 xys = txthdr.count("_X\t") + txthdr.count("_Y\t")+1
268 if ((cols != xys) or ((cols%2) != 0)):
269 print "Error: This is not a valid Biosensor file."
270 return ba0
271
272 ## Put data into one big text string and close.
273 txtfile = fp.readlines()
274 fp.close()
275 dpoints = len(txtfile)
276 rois = int(cols/2)
277
278 ## Create and size ba object.
279 print "This Biosensor file has %i datapoints for %i ROIs." % (dpoints, rois)
280 ba0 = ba.BiosensorArray(rois, dpoints)
281
282 ## Get names of ROIs from header like "Ab1 Fc=4 - 1_Y"
283 txthdr = txthdr.replace(" -","-").replace("- ","-") ## Remove unwanted spaces.
284 names = txthdr.split("\t") ## Use tab delimiter.
285 for j in range(rois):
286 name = names[j*2+1].replace("_Y","").strip(" -")
287 name = name.partition("Fc=") ## Now a 3-tuple.
288 ba0.roi[j].name = name[0] ## Text left of Fc.
289 ba0.roi[j].desc = ("Fc=" + name[2]) ## Text right of Fc.
290
291 ## Move the data from txtfile to ba0.
292 for i, txtline in enumerate(txtfile):
293 x = txtline.split("\t")
294 for j in range(rois):
295 ba0.roi[j].time[i] = float(x[j*2+0])
296 ba0.roi[j].value[i] = float(x[j*2+1])
297
298 return ba0
299 """End of readbiosensor()"""
300
301
302 def writebiosensor(ba0, fname):
303 """
304 Write a ba class to a Biosensor text file.
305 """
306
307 ## This is like a simplified Clamp file.
308 fp=file(fname,'w')
309 ## Write sensorgram data header line.
310 for i,roi in enumerate(ba0.roi):
311 if (i>0): fp.write("\t")
312 name = roi.name.strip() + " " + roi.desc.strip()
313 fp.write("%s_X\t%s_Y" % (name, name))
314 ## Write sensorgram data lines. Three decimal places.
315 for dpoint in range(len(ba0.roi[0].time)):
316 fp.write("\r\n")
317 for i,roi in enumerate(ba0.roi):
318 if (i>0): fp.write("\t")
319 fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint]))
320
321 ## Close file handle and print message.
322 fp.close
323 print "File %s saved in %s." % (fname, getcwd())
324 return
325 """End of writebiosensor()"""
326
327
328 def readcsv(fname):
329 """
330 Read a comma-separated value text file into a ba class.
331 The first column contains time data while the others contain response data.
332 Here is a very simple example of the tab-delimited format::
333
334 1.0001, 23.7644, 0.7139
335 2.0001, 23.4265, 0.0541
336 3.0001, 23.1625, 0.3327
337 4.0001, 23.5752, 0.8494
338 """
339
340 ## Try to open file. Return tiny ba object if it fails.
341 ba0 = ba.BiosensorArray(1,1)
342 try:
343 fp = open(fname, "r")
344 except IOError:
345 print 'Error: Cannot open file %s for reading' % fname
346 return ba0
347
348 ## Put data into one big text string, determine array size, and close.
349 txtfile = fp.readlines() ## One big text string.
350 fp.close()
351 dpoints = len(txtfile) ## Rows in the csv file.
352 rois = len(txtfile[0].split(',')) - 1 ## Columns are comma-delimited.
353
354 ## Move the data from txtfile to ba0.
355 ba0 = ba.BiosensorArray(rois, dpoints)
356 for i, txtline in enumerate(txtfile):
357 x = txtline.split(',')
358 for j in range(rois):
359 ba0.roi[j].time[i] = float(x[0])
360 ba0.roi[j].value[i] = float(x[j+1])
361
362 print "The file %s has %i ROIs and %i datapoints." % (fname, rois, dpoints)
363 return ba0
364 """End of readcsv()"""
365
366
367 def applygal(ba0, fname):
368 """
369 Read a Gal file and apply its microarray information.
370
371 *(This function is under construction)*
372 """
373 print "This feature is under construction."
374 return
375 """End of applygal()"""
376
377
378 def applykey(ba0, fname):
379 """
380 Read a Key file and apply its microarray information.
381 Multiple background ROIs are not yet supported.
382 Here is a very simple example::
383
384 No. Description1 Description2 Background ROI Col Row
385 1 Rat TNF Antibody01 2 1 1
386 2 ratIgG Antibody02 4 2 1
387 3 Hum TNF Antibody03 4 3 1
388 4 humIgG Antibody04 2 4 1
389 """
390
391 ## Try to open file. Return unchanged ba object if it fails.
392 try:
393 fp = open(fname, "r")
394 except IOError:
395 print 'Cannot open file %s for reading' % fname
396 return ba0
397 ## Check header lines for signature text.
398 txt = fp.readline()
399 if ("No." not in txt):
400 print "Error: The header line is unfamiliar."
401 fp.close()
402 return ba0
403
404 ## Read all text, convert to 2d list, then six lists (one for each column).
405 ## There's probably an easier way to do this. Maybe in CSV module.
406 txtfile = fp.readlines()
407 fp.close()
408 ## Change text into array
409 keytable = txtfile[:] ## Dimension rows in new 2d list.
410 for i in range(len(txtfile)):
411 txt = str(txtfile[i].strip())
412 keytable[i] = txt.split('\t')
413 ## Use list comprehensions to get one list for each column.
414 id = [int(x[0]) for x in keytable] ## First column is integer.
415 desc1 = [x[1] for x in keytable] ## Second column is text.
416 desc2 = [x[2] for x in keytable] ## Third column is text.
417 bg = [int(x[3]) for x in keytable] ## Fourth column is integer. Change base1 to base0?
418 col = [int(x[4]) for x in keytable] ## Fifth column is integer.
419 row = [int(x[5]) for x in keytable] ## Sixth column is integer.
420
421 ## Check if key file looks valid. These tests are not very thorough!
422 keys = 1 + max(id) - min(id)
423 if (ba0.rois != keys):
424 print "Error: We have %i ROIs but %i keyfile entries." % (ba0.rois, keys)
425 return ba0
426 if ((min(bg) < min(id)) or (max(bg) > max (id))):
427 print "Error: at least one specified background ROI does not exist"
428 print "Identifiers (id) range from %i to %i." % (min(id), max(id))
429 print "Background references range from %i to %i." % (min(bg), max(bg))
430 return ba0
431
432 ## Create new object and put data in it.
433 ba1 = deepcopy(ba0)
434 for i in range(len(id)):
435 ba1.roi[id[i]-1].name = desc1[i]
436 ba1.roi[id[i]-1].desc = desc2[i]
437 ba1.roi[id[i]-1].bgroi = [bg[i]] ## ToDo: Decide if bg will be id or index. Base1 now.
438 ba1.roi[id[i]-1].spotx = col[i]
439 ba1.roi[id[i]-1].spoty = row[i]
440
441 print "Successfully loaded information for %i ROIs." % keys
442 return ba1
443 """End of applykey()"""
444
445
446 def applymethod(ba0, fname):
447 """
448 Read a ICM Analyte/Method xls file and apply its information.
449
450 *(This feature is under construction.)*
451 """
452 print "This feature is under construction."
453 return
454 """End of applymethod()"""
455
456
457
458 def outputbindinglevels(ba0, fname, interval, *baselines):
459 """
460 Measure the binding level changes at multiple intervals along a sensorgram.
461 Write the data to a file. If interval is 500 and baselines are [100, 1100] then
462 binding levels are changes between 100-600s and between 1100-01600s.
463 Average 30s of data.
464 """
465 fp=file(fname,'w')
466
467 ## Measurements
468 for iroi in ba0.roi:
469 fp.write("%i\t%s" % (iroi.index, iroi.name))
470 for j in baselines:
471 t1 = j
472 t2 = j + interval
473 y1 = np.average(iroi.time2val(t1-15, t1+15))
474 y2 = np.average(iroi.time2val(t2-15, t2+15))
475 fp.write("\t%.3f" % (y2-y1))
476 fp.write("\n")
477 fp.close
478 print "File %s saved in %s." % (fname, getcwd())
479 return
480 """End of outputbindinglevels()"""
481
482
483 ## Here are a few lines to test this module.
484 if __name__ == '__main__':
485 print "This module isn't a stand-alone app."
486
487 ################################# End of module #################################