ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/io_module.py
Revision: 56
Committed: Mon Mar 21 23:51:57 2011 UTC (8 years, 3 months ago) by clausted
File size: 16867 byte(s)
Log Message:
Change to field names printed by writebiosensor() function.
Line File contents
1 """
2 io_module
3 ---------
4
5 Input/Output module for converting files to Biosensor Array class.
6 Supported sensorgram types include Biosensor, CLAMP, SPRit, and Plexera ICM.
7 Supported microarray types include GAL and ISB Map.
8
9 .. moduleauthor:: Christopher Lausted,
10 Institute for Systems Biology,
11 OSPRAI developers.
12
13 Examples::
14
15 >>> import io_module as io
16 >>> ba1 = io.readsprit("spritdata.txt")
17 >>> ba1 = io.readclamp("spritdata.txt")
18 >>> ba1 = io.readicmtxt("icmdata.txt")
19 >>> ba1 = io.readbiosensor("icmdata.txt")
20 >>> ba1 = io.readcsv("rawdata.csv")
21 >>> ba2 = io.applygal(ba1, "galfile.gal")
22 >>> ba2 = io.applykey(ba1, "keyfile.tsv")
23 >>> ba3 = io.applymethod(ba2, "icmmethod.xls")
24 >>> io.writesprit(ba3, "newspritfile.txt")
25 >>> io.writeclamp(ba3, "newclampfile.txt")
26 """
27 __version__ = "110321"
28
29
30 ## Import libraries.
31 from copy import deepcopy
32 from os import getcwd
33 from time import mktime
34 import re
35 import numpy as np ## Numpy array library.
36 import ba_class as ba ## Our Biosensor Array class.
37 reload(ba)
38
39 def readsprit(fname):
40 """
41 Read a SPRit text file into a ba class.
42 It has two tab-delimited columns and two header lines.
43 Here is a very simple example with 2 rois and 3 datapoints::
44
45 Elapsed Time (Seconds) Average Intensity (Pixel Intensity)
46 BEGIN
47 0.000000e+000 2.863145e+003
48 5.013000e+000 2.863367e+003
49 1.002500e+001 2.862950e+003
50 0.000000e+000 2.862875e+003
51 5.013000e+000 2.862510e+003
52 """
53
54 ## Try to open file. Return tiny ba object if it fails.
55 ba0 = ba.BiosensorArray(1,1)
56 try:
57 fp = open(fname, "r")
58 except IOError:
59 print 'Error: Cannot open file %s for reading' % fname
60 return ba0
61
62 ## Check header lines for signature text.
63 txt = fp.readline() # 'Elapsed Time...'
64 txt = fp.readline() # 'BEGIN'
65 if ('BEGIN' not in txt):
66 print "Error: Second line is not BEGIN."
67 fp.close()
68 return ba0
69
70 ## Put data into one big text string and close.
71 txtfile = fp.readlines()
72 fp.close()
73
74 ## Change text into array
75 sprtable = np.zeros((len(txtfile),2), dtype=float) ## Redimension Nx2
76 for i in range(len(txtfile)):
77 txt = txtfile[i].split('\t')
78 sprtable[i,0] = float(txt[0]) ## Time column.
79 sprtable[i,1] = float(txt[1]) ## Signal column.
80
81 ## Find number of ROIs in file.
82 for i in range(1,len(sprtable)):
83 ## See when the time drops back to zero.
84 if (sprtable[i,0] < sprtable[i-1,0]): break
85 rows = i ## Datapoints or new array rows.
86 rois = int(len(sprtable) / rows)
87
88 ## Move data the old-fashioned iterative way.
89 ba0 = ba.BiosensorArray(rois,rows)
90 k = 0
91 for i in range(rois):
92 for j in range(rows):
93 ba0.roi[i].time[j] = sprtable[k,0]
94 ba0.roi[i].value[j] = sprtable[k,1]
95 k +=1
96
97 return ba0
98 """End of readsprit() function"""
99
100
101 def writesprit(ba0, fname):
102 """
103 Write a ba class to a SPRit text file.
104 """
105 fp=file(fname,'w')
106 fp.write("Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\r\n")
107 fp.write("BEGIN\r\n")
108 for roi in range(len(ba0.roi)): ## Could also use ba.rois.
109 for dpoint in range(len(ba0.roi[0].time)): ## Could also use ba.dpoints.
110 txt = "%f\t%f\r\n" % (ba0.roi[roi].time[dpoint], ba0.roi[roi].value[dpoint])
111 fp.write(txt)
112 fp.close
113 print "File %s saved in %s." % (fname, getcwd())
114 return
115 """End of writesprit() function"""
116
117
118 def readclamp(fname):
119 """
120 Read a Clamp text file into a ba class.
121 It has two tab-delimited columns per SPR flowcell/roi..
122 It has a varying number of header lines with injection information.
123 Here is a very simple example with 2 rois and 3 datapoints::
124
125 Vers 3.41 Data
126 Conc1 0 0 0 0
127 Start1 301.5 301.5 301.5 301.5
128 Stop1 949.8 949.8 949.8 949.8
129 RInd1 0 0 0 0
130 Conc2 0 0 0 0
131 Start2 986.4 0 0 0
132 Stop2 1626 0 0 0
133 RInd2 0 0 0 0
134 Flow 1 1 1 1
135 Time1 Data1 Time2 Data2
136 0.094 0.062 0.094 0.053
137 1.094 0.026 1.094 0.05
138 2.094 0.119 2.094 0.055
139 """
140
141 print "This feature is under construction."
142 return
143 """"End of readclamp() function"""
144
145
146 def writeclamp(ba0, fname):
147 """
148 Write a ba class to a Clamp text file.
149 """
150
151 fp=file(fname,'w')
152 ## First header line.
153 fp.write("Vers 3.41 Data\r\n")
154
155 ## Next write injection information.
156 for inj in range(len(ba0.roi[0].injconc)):
157 fp.write("Conc%i" % (inj+1))
158 for roi in ba0.roi: fp.write("\t%.3f" % roi.injconc[inj])
159 fp.write("\r\nStart%i" % (inj+1))
160 for roi in ba0.roi: fp.write("\t%.3f" % roi.injstart[inj])
161 fp.write("\r\nStop%i" % (inj+1))
162 for roi in ba0.roi: fp.write("\t%.3f" % roi.injstop[inj])
163 fp.write("\r\nRInd%i" % (inj+1))
164 for roi in ba0.roi: fp.write("\t%.3f" % roi.injrind[inj])
165 ## Next write flowrate line.
166 fp.write("\r\nFlow")
167 for roi in ba0.roi: fp.write("\t%.3f" % roi.flow)
168 fp.write("\r\n")
169
170 ## Write sensorgram data header line.
171 for i,roi in enumerate(ba0.roi):
172 if (i>0): fp.write("\t")
173 fp.write("Time%i" % (i+1))
174 fp.write("\t")
175 fp.write(roi.name)
176 fp.write("\r\n")
177 ## Write sensorgram data lines. Three decimal places.
178 for dpoint in range(len(ba0.roi[0].time)):
179 for i,roi in enumerate(ba0.roi):
180 if (i>0): fp.write("\t")
181 fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint]))
182 fp.write("\r\n")
183
184 ## Close file handle and print message.
185 fp.close
186 print "File %s saved in %s." % (fname, getcwd())
187 return
188 """End of writeclamp() function"""
189
190
191 def readicmtxt(fname):
192 """
193 Read a ICM text file into a ba class.
194 Here is a very simple example of the tab-delimited format::
195
196 03/05/2010 13:37:21.312 249.408 0.000 0.000
197 03/05/2010 13:37:22.312 249.306 0.000 0.000
198 """
199
200 ## Try to open file. Return tiny ba object if it fails.
201 ba0 = ba.BiosensorArray(1,1)
202 try:
203 fp = open(fname, "r")
204 except IOError:
205 print 'Error: Cannot open file %s for reading' % fname
206 return ba0
207
208 ## Put data into one big text string and close.
209 txtfile = fp.readlines()
210 fp.close()
211 dpoints = len(txtfile)
212 rois = txtfile[0].count("\t") ## Usually 25.
213
214 ## Create and size ba object.
215 print "This ICM file has %i datapoints for %i ROIs." % (dpoints, rois)
216 ba0 = ba.BiosensorArray(rois, dpoints)
217
218 ## Determine experiment start time so we can make other times relative.
219 x = txtfile[0].split("\t") ## E.g. "03/05/2010 13:37:20.218"
220 t0 = __icmtime2sec(x[0]) ## E.g. 1267825040.22
221
222 ## Move the data from txtfile to ba0.
223 for i, txtline in enumerate(txtfile):
224 x = txtline.split("\t")
225 tx = __icmtime2sec(x[0]) - t0
226 for j in range(1,rois):
227 ba0.roi[j].time[i] = tx
228 ba0.roi[j].value[i] = float(x[j])
229
230 return ba0
231 """End of readicmtxt"""
232
233
234 def __icmtime2sec(timetxt):
235 """
236 Take a time of the form "03/05/2010 15:19:27.312" and return seconds.
237 """
238 cal, clock = timetxt.split(" ")
239 mm, dd, yy = cal.split("/")
240 hh, min, ss = clock.split(":")
241 stime = (int(yy), int(mm), int(dd), int(hh), int(min), 0, 0, 0, 0)
242 return mktime(stime) + float(ss)
243 """End of __icmtime2sec"""
244
245
246 def readbiosensor(fname):
247 """
248 Read a Biacore-style text file into a ba class.
249 Here is a very simple example of the tab-delimited format::
250
251 Ab1 Fc=4- 1_X Ab1 Fc=4 -1_Y Ab2 Fc=4 -1_X Ab2 Fc=4 -1_Y
252 13.1 23.7644 93.1 0.713912
253 13.6 23.4265 93.6 0.0541172
254 14.1 23.1625 94.1 0.332768
255 14.6 23.5752 94.6 0.849459
256 """
257
258 ## Try to open file. Return tiny ba object if it fails.
259 ba0 = ba.BiosensorArray(1,1)
260 try:
261 fp = open(fname, "r")
262 except IOError:
263 print 'Error: Cannot open file %s for reading' % fname
264 return ba0
265
266 ## Read header line. Check number of pairs of _X and _Y labels.
267 txthdr = fp.readline()
268 cols = txthdr.count("Fc=")
269 xys = txthdr.count("_X\t") + txthdr.count("_Y\t")+1
270 if ((cols != xys) or ((cols%2) != 0)):
271 print "Error: This is not a valid Biosensor file."
272 return ba0
273
274 ## Put data into one big text string and close.
275 txtfile = fp.readlines()
276 fp.close()
277 dpoints = len(txtfile)
278 rois = int(cols/2)
279
280 ## Create and size ba object.
281 print "This Biosensor file has %i datapoints for %i ROIs." % (dpoints, rois)
282 ba0 = ba.BiosensorArray(rois, dpoints)
283
284 ## Get names of ROIs from header like "Ab1 Fc=4 - 1_Y"
285 txthdr = txthdr.replace(" -","-").replace("- ","-") ## Remove unwanted spaces.
286 names = txthdr.split("\t") ## Use tab delimiter.
287 for j in range(rois):
288 name = names[j*2+1].replace("_Y","").strip(" -")
289 name = name.partition("Fc=") ## Now a 3-tuple.
290 ba0.roi[j].name = name[0] ## Text left of Fc.
291 ba0.roi[j].desc = ("Fc=" + name[2]) ## Text right of Fc.
292
293 ## Move the data from txtfile to ba0.
294 for i, txtline in enumerate(txtfile):
295 x = txtline.split("\t")
296 for j in range(rois):
297 ba0.roi[j].time[i] = float(x[j*2+0])
298 ba0.roi[j].value[i] = float(x[j*2+1])
299
300 return ba0
301 """End of readbiosensor()"""
302
303
304 def writebiosensor(ba0, fname):
305 """
306 Write a ba class to a Biosensor text file.
307 """
308
309 ## This is like a simplified Clamp file.
310 fp=file(fname,'w')
311 ## Write sensorgram data header line.
312 for i,roi in enumerate(ba0.roi):
313 if (i>0): fp.write("\t")
314 name = roi.name.strip()
315 name = re.sub(r'\W+', '', name) ## Remove nonprinting characters.
316 name = re.sub(r'_ ', '', name) ## Remove other unwanted stuff.
317 name = re.sub(r'Fc=[0-9]+', '', name)
318 name = re.sub(r'[0-9]+_[XY]', '', name)
319 txt = name + " Fc=" + str(i+1) + "-1_X\t"
320 txt = txt + name + " Fc=" + str(i+1) + "-1_Y"
321 fp.write(txt)
322 ## Write sensorgram data lines. Three decimal places.
323 for dpoint in range(len(ba0.roi[0].time)):
324 fp.write("\r\n")
325 for i,roi in enumerate(ba0.roi):
326 if (i>0): fp.write("\t")
327 fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint]))
328
329 ## Close file handle and print message.
330 fp.close
331 print "File %s saved in %s." % (fname, getcwd())
332 return
333 """End of writebiosensor()"""
334
335
336 def readcsv(fname):
337 """
338 Read a comma-separated value text file into a ba class.
339 The first column contains time data while the others contain response data.
340 Here is a very simple example of the format::
341
342 1.0001, 23.7644, 0.7139
343 2.0001, 23.4265, 0.0541
344 3.0001, 23.1625, 0.3327
345 4.0001, 23.5752, 0.8494
346 """
347
348 ## Try to open file. Return tiny ba object if it fails.
349 ba0 = ba.BiosensorArray(1,1)
350 try:
351 fp = open(fname, "r")
352 except IOError:
353 print 'Error: Cannot open file %s for reading' % fname
354 return ba0
355
356 ## Put data into one big text string, determine array size, and close.
357 txtfile = fp.readlines() ## One big text string.
358 fp.close()
359 dpoints = len(txtfile) ## Rows in the csv file.
360 rois = len(txtfile[0].split(',')) - 1 ## Columns are comma-delimited.
361
362 ## Move the data from txtfile to ba0.
363 ba0 = ba.BiosensorArray(rois, dpoints)
364 for i, txtline in enumerate(txtfile):
365 x = txtline.split(',')
366 for j in range(rois):
367 ba0.roi[j].time[i] = float(x[0])
368 ba0.roi[j].value[i] = float(x[j+1])
369
370 print "The file %s has %i ROIs and %i datapoints." % (fname, rois, dpoints)
371 return ba0
372 """End of readcsv()"""
373
374
375 def applygal(ba0, fname):
376 """
377 Read a Gal file and apply its microarray information.
378
379 *(This function is under construction)*
380 """
381 print "This feature is under construction."
382 return
383 """End of applygal()"""
384
385
386 def applykey(ba0, fname):
387 """
388 Read a Key file and apply its microarray information.
389 Multiple background ROIs are not yet supported.
390 Here is a very simple example::
391
392 No. Description1 Description2 Background ROI Col Row
393 1 Rat TNF Antibody01 2 1 1
394 2 ratIgG Antibody02 4 2 1
395 3 Hum TNF Antibody03 4 3 1
396 4 humIgG Antibody04 2 4 1
397 """
398
399 ## Try to open file. Return unchanged ba object if it fails.
400 try:
401 fp = open(fname, "r")
402 except IOError:
403 print 'Cannot open file %s for reading' % fname
404 return ba0
405 ## Check header lines for signature text.
406 txt = fp.readline()
407 if ("No." not in txt):
408 print "Error: The header line is unfamiliar."
409 fp.close()
410 return ba0
411
412 ## Read all text, convert to 2d list, then six lists (one for each column).
413 ## There's probably an easier way to do this. Maybe in CSV module.
414 txtfile = fp.readlines()
415 fp.close()
416 ## Change text into array
417 keytable = txtfile[:] ## Dimension rows in new 2d list.
418 for i in range(len(txtfile)):
419 txt = str(txtfile[i].strip())
420 keytable[i] = txt.split('\t')
421 ## Use list comprehensions to get one list for each column.
422 id = [int(x[0]) for x in keytable] ## First column is integer.
423 desc1 = [x[1] for x in keytable] ## Second column is text.
424 desc2 = [x[2] for x in keytable] ## Third column is text.
425 bg = [int(x[3]) for x in keytable] ## Fourth column is integer. Change base1 to base0?
426 col = [int(x[4]) for x in keytable] ## Fifth column is integer.
427 row = [int(x[5]) for x in keytable] ## Sixth column is integer.
428
429 ## Check if key file looks valid. These tests are not very thorough!
430 keys = 1 + max(id) - min(id)
431 if (ba0.rois != keys):
432 print "Error: We have %i ROIs but %i keyfile entries." % (ba0.rois, keys)
433 return ba0
434 if ((min(bg) < min(id)) or (max(bg) > max (id))):
435 print "Error: at least one specified background ROI does not exist"
436 print "Identifiers (id) range from %i to %i." % (min(id), max(id))
437 print "Background references range from %i to %i." % (min(bg), max(bg))
438 return ba0
439
440 ## Create new object and put data in it.
441 ba1 = deepcopy(ba0)
442 for i in range(len(id)):
443 ba1.roi[id[i]-1].name = desc1[i]
444 ba1.roi[id[i]-1].desc = desc2[i]
445 ba1.roi[id[i]-1].bgroi = [bg[i]] ## ToDo: Decide if bg will be id or index. Base1 now.
446 ba1.roi[id[i]-1].spotx = col[i]
447 ba1.roi[id[i]-1].spoty = row[i]
448
449 print "Successfully loaded information for %i ROIs." % keys
450 return ba1
451 """End of applykey()"""
452
453
454 def applymethod(ba0, fname):
455 """
456 Read a ICM Analyte/Method xls file and apply its information.
457
458 *(This feature is under construction.)*
459 """
460 print "This feature is under construction."
461 return
462 """End of applymethod()"""
463
464
465
466 def outputbindinglevels(ba0, fname, interval, *baselines):
467 """
468 Measure the binding level changes at multiple intervals along a sensorgram.
469 Write the data to a file. If interval is 500 and baselines are [100, 1100] then
470 binding levels are changes between 100-600s and between 1100-01600s.
471 Average 30s of data.
472 """
473 fp=file(fname,'w')
474
475 ## Measurements
476 for iroi in ba0.roi:
477 fp.write("%i\t%s" % (iroi.index, iroi.name))
478 for j in baselines:
479 t1 = j
480 t2 = j + interval
481 y1 = np.average(iroi.time2val(t1-15, t1+15))
482 y2 = np.average(iroi.time2val(t2-15, t2+15))
483 fp.write("\t%.3f" % (y2-y1))
484 fp.write("\n")
485 fp.close
486 print "File %s saved in %s." % (fname, getcwd())
487 return
488 """End of outputbindinglevels()"""
489
490
491 ## Here are a few lines to test this module.
492 if __name__ == '__main__':
493 print "This module isn't a stand-alone app."
494
495 ################################# End of module #################################