1 |
""" |
2 |
io_module |
3 |
--------- |
4 |
|
5 |
Input/Output module for converting files to Biosensor Array class. |
6 |
Supported sensorgram types include Biosensor, CLAMP, SPRit, and Plexera ICM. |
7 |
Supported microarray types include GAL and ISB Map. |
8 |
|
9 |
.. moduleauthor:: Christopher Lausted, |
10 |
Institute for Systems Biology, |
11 |
OSPRAI developers. |
12 |
Examples:: |
13 |
|
14 |
>>> import io_module as io |
15 |
>>> ba1 = io.readsprit("spritdata.txt") |
16 |
>>> ba1 = io.readclamp("spritdata.txt") |
17 |
>>> ba1 = io.readicmtxt("icmdata.txt") |
18 |
>>> ba1 = io.readbiosensor("icmdata.txt") |
19 |
>>> ba1 = io.readcsv("rawdata.csv") |
20 |
>>> ba2 = io.applygal(ba1, "galfile.gal") |
21 |
>>> ba2 = io.applykey(ba1, "keyfile.tsv") |
22 |
>>> ba3 = io.applymethod(ba2, "icmmethod.xls") |
23 |
>>> io.writesprit(ba3, "newspritfile.txt") |
24 |
>>> io.writeclamp(ba3, "newclampfile.txt") |
25 |
""" |
26 |
__version__ = "110208" |
27 |
|
28 |
|
29 |
## Import libraries. |
30 |
from copy import deepcopy |
31 |
from os import getcwd |
32 |
from time import mktime |
33 |
import numpy as np ## Numpy array library. |
34 |
import ba_class as ba ## Our Biosensor Array class. |
35 |
reload(ba) |
36 |
|
37 |
def readsprit(fname): |
38 |
""" |
39 |
Read a SPRit text file into a ba class. |
40 |
It has two tab-delimited columns and two header lines. |
41 |
Here is a very simple example with 2 rois and 3 datapoints:: |
42 |
|
43 |
Elapsed Time (Seconds) Average Intensity (Pixel Intensity) |
44 |
BEGIN |
45 |
0.000000e+000 2.863145e+003 |
46 |
5.013000e+000 2.863367e+003 |
47 |
1.002500e+001 2.862950e+003 |
48 |
0.000000e+000 2.862875e+003 |
49 |
5.013000e+000 2.862510e+003 |
50 |
""" |
51 |
|
52 |
## Try to open file. Return tiny ba object if it fails. |
53 |
ba0 = ba.BiosensorArray(1,1) |
54 |
try: |
55 |
fp = open(fname, "r") |
56 |
except IOError: |
57 |
print 'Error: Cannot open file %s for reading' % fname |
58 |
return ba0 |
59 |
|
60 |
## Check header lines for signature text. |
61 |
txt = fp.readline() # 'Elapsed Time...' |
62 |
txt = fp.readline() # 'BEGIN' |
63 |
if ('BEGIN' not in txt): |
64 |
print "Error: Second line is not BEGIN." |
65 |
fp.close() |
66 |
return ba0 |
67 |
|
68 |
## Put data into one big text string and close. |
69 |
txtfile = fp.readlines() |
70 |
fp.close() |
71 |
|
72 |
## Change text into array |
73 |
sprtable = np.zeros((len(txtfile),2), dtype=float) ## Redimension Nx2 |
74 |
for i in range(len(txtfile)): |
75 |
txt = txtfile[i].split('\t') |
76 |
sprtable[i,0] = float(txt[0]) ## Time column. |
77 |
sprtable[i,1] = float(txt[1]) ## Signal column. |
78 |
|
79 |
## Find number of ROIs in file. |
80 |
for i in range(1,len(sprtable)): |
81 |
## See when the time drops back to zero. |
82 |
if (sprtable[i,0] < sprtable[i-1,0]): break |
83 |
rows = i ## Datapoints or new array rows. |
84 |
rois = int(len(sprtable) / rows) |
85 |
|
86 |
## Move data the old-fashioned iterative way. |
87 |
ba0 = ba.BiosensorArray(rois,rows) |
88 |
k = 0 |
89 |
for i in range(rois): |
90 |
for j in range(rows): |
91 |
ba0.roi[i].time[j] = sprtable[k,0] |
92 |
ba0.roi[i].value[j] = sprtable[k,1] |
93 |
k +=1 |
94 |
|
95 |
return ba0 |
96 |
"""End of readsprit() function""" |
97 |
|
98 |
|
99 |
def writesprit(ba0, fname): |
100 |
""" |
101 |
Write a ba class to a SPRit text file. |
102 |
""" |
103 |
fp=file(fname,'w') |
104 |
fp.write("Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\r\n") |
105 |
fp.write("BEGIN\r\n") |
106 |
for roi in range(len(ba0.roi)): ## Could also use ba.rois. |
107 |
for dpoint in range(len(ba0.roi[0].time)): ## Could also use ba.dpoints. |
108 |
txt = "%f\t%f\r\n" % (ba0.roi[roi].time[dpoint], ba0.roi[roi].value[dpoint]) |
109 |
fp.write(txt) |
110 |
fp.close |
111 |
print "File %s saved in %s." % (fname, getcwd()) |
112 |
return |
113 |
"""End of writesprit() function""" |
114 |
|
115 |
|
116 |
def readclamp(fname): |
117 |
""" |
118 |
Read a Clamp text file into a ba class. |
119 |
It has two tab-delimited columns per SPR flowcell/roi.. |
120 |
It has a varying number of header lines with injection information. |
121 |
Here is a very simple example with 2 rois and 3 datapoints:: |
122 |
|
123 |
Vers 3.41 Data |
124 |
Conc1 0 0 0 0 |
125 |
Start1 301.5 301.5 301.5 301.5 |
126 |
Stop1 949.8 949.8 949.8 949.8 |
127 |
RInd1 0 0 0 0 |
128 |
Conc2 0 0 0 0 |
129 |
Start2 986.4 0 0 0 |
130 |
Stop2 1626 0 0 0 |
131 |
RInd2 0 0 0 0 |
132 |
Flow 1 1 1 1 |
133 |
Time1 Data1 Time2 Data2 |
134 |
0.094 0.062 0.094 0.053 |
135 |
1.094 0.026 1.094 0.05 |
136 |
2.094 0.119 2.094 0.055 |
137 |
""" |
138 |
|
139 |
print "This feature is under construction." |
140 |
return |
141 |
""""End of readclamp() function""" |
142 |
|
143 |
|
144 |
def writeclamp(ba0, fname): |
145 |
""" |
146 |
Write a ba class to a Clamp text file. |
147 |
""" |
148 |
|
149 |
fp=file(fname,'w') |
150 |
## First header line. |
151 |
fp.write("Vers 3.41 Data\r\n") |
152 |
|
153 |
## Next write injection information. |
154 |
for inj in range(len(ba0.roi[0].injconc)): |
155 |
fp.write("Conc%i" % (inj+1)) |
156 |
for roi in ba0.roi: fp.write("\t%.3f" % roi.injconc[inj]) |
157 |
fp.write("\r\nStart%i" % (inj+1)) |
158 |
for roi in ba0.roi: fp.write("\t%.3f" % roi.injstart[inj]) |
159 |
fp.write("\r\nStop%i" % (inj+1)) |
160 |
for roi in ba0.roi: fp.write("\t%.3f" % roi.injstop[inj]) |
161 |
fp.write("\r\nRInd%i" % (inj+1)) |
162 |
for roi in ba0.roi: fp.write("\t%.3f" % roi.injrind[inj]) |
163 |
## Next write flowrate line. |
164 |
fp.write("\r\nFlow") |
165 |
for roi in ba0.roi: fp.write("\t%.3f" % roi.flow) |
166 |
fp.write("\r\n") |
167 |
|
168 |
## Write sensorgram data header line. |
169 |
for i,roi in enumerate(ba0.roi): |
170 |
if (i>0): fp.write("\t") |
171 |
fp.write("Time%i" % (i+1)) |
172 |
fp.write("\t") |
173 |
fp.write(roi.name) |
174 |
fp.write("\r\n") |
175 |
## Write sensorgram data lines. Three decimal places. |
176 |
for dpoint in range(len(ba0.roi[0].time)): |
177 |
for i,roi in enumerate(ba0.roi): |
178 |
if (i>0): fp.write("\t") |
179 |
fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint])) |
180 |
fp.write("\r\n") |
181 |
|
182 |
## Close file handle and print message. |
183 |
fp.close |
184 |
print "File %s saved in %s." % (fname, getcwd()) |
185 |
return |
186 |
"""End of writeclamp() function""" |
187 |
|
188 |
|
189 |
def readicmtxt(fname): |
190 |
""" |
191 |
Read a ICM text file into a ba class. |
192 |
Here is a very simple example of the tab-delimited format:: |
193 |
|
194 |
03/05/2010 13:37:21.312 249.408 0.000 0.000 |
195 |
03/05/2010 13:37:22.312 249.306 0.000 0.000 |
196 |
""" |
197 |
|
198 |
## Try to open file. Return tiny ba object if it fails. |
199 |
ba0 = ba.BiosensorArray(1,1) |
200 |
try: |
201 |
fp = open(fname, "r") |
202 |
except IOError: |
203 |
print 'Error: Cannot open file %s for reading' % fname |
204 |
return ba0 |
205 |
|
206 |
## Put data into one big text string and close. |
207 |
txtfile = fp.readlines() |
208 |
fp.close() |
209 |
dpoints = len(txtfile) |
210 |
rois = txtfile[0].count("\t") ## Usually 25. |
211 |
|
212 |
## Create and size ba object. |
213 |
print "This ICM file has %i datapoints for %i ROIs." % (dpoints, rois) |
214 |
ba0 = ba.BiosensorArray(rois, dpoints) |
215 |
|
216 |
## Determine experiment start time so we can make other times relative. |
217 |
x = txtfile[0].split("\t") ## E.g. "03/05/2010 13:37:20.218" |
218 |
t0 = __icmtime2sec(x[0]) ## E.g. 1267825040.22 |
219 |
|
220 |
## Move the data from txtfile to ba0. |
221 |
for i, txtline in enumerate(txtfile): |
222 |
x = txtline.split("\t") |
223 |
tx = __icmtime2sec(x[0]) - t0 |
224 |
for j in range(1,rois): |
225 |
ba0.roi[j].time[i] = tx |
226 |
ba0.roi[j].value[i] = float(x[j]) |
227 |
|
228 |
return ba0 |
229 |
"""End of readicmtxt""" |
230 |
|
231 |
|
232 |
def __icmtime2sec(timetxt): |
233 |
""" |
234 |
Take a time of the form "03/05/2010 15:19:27.312" and return seconds. |
235 |
""" |
236 |
cal, clock = timetxt.split(" ") |
237 |
mm, dd, yy = cal.split("/") |
238 |
hh, min, ss = clock.split(":") |
239 |
stime = (int(yy), int(mm), int(dd), int(hh), int(min), 0, 0, 0, 0) |
240 |
return mktime(stime) + float(ss) |
241 |
"""End of __icmtime2sec""" |
242 |
|
243 |
|
244 |
def readbiosensor(fname): |
245 |
""" |
246 |
Read a Biacore-style text file into a ba class. |
247 |
Here is a very simple example of the tab-delimited format:: |
248 |
|
249 |
Ab1 Fc=4- 1_X Ab1 Fc=4 -1_Y Ab2 Fc=4 -1_X Ab2 Fc=4 -1_Y |
250 |
13.1 23.7644 93.1 0.713912 |
251 |
13.6 23.4265 93.6 0.0541172 |
252 |
14.1 23.1625 94.1 0.332768 |
253 |
14.6 23.5752 94.6 0.849459 |
254 |
""" |
255 |
|
256 |
## Try to open file. Return tiny ba object if it fails. |
257 |
ba0 = ba.BiosensorArray(1,1) |
258 |
try: |
259 |
fp = open(fname, "r") |
260 |
except IOError: |
261 |
print 'Error: Cannot open file %s for reading' % fname |
262 |
return ba0 |
263 |
|
264 |
## Read header line. Check number of pairs of _X and _Y labels. |
265 |
txthdr = fp.readline() |
266 |
cols = txthdr.count("Fc=") |
267 |
xys = txthdr.count("_X\t") + txthdr.count("_Y\t")+1 |
268 |
if ((cols != xys) or ((cols%2) != 0)): |
269 |
print "Error: This is not a valid Biosensor file." |
270 |
return ba0 |
271 |
|
272 |
## Put data into one big text string and close. |
273 |
txtfile = fp.readlines() |
274 |
fp.close() |
275 |
dpoints = len(txtfile) |
276 |
rois = int(cols/2) |
277 |
|
278 |
## Create and size ba object. |
279 |
print "This Biosensor file has %i datapoints for %i ROIs." % (dpoints, rois) |
280 |
ba0 = ba.BiosensorArray(rois, dpoints) |
281 |
|
282 |
## Get names of ROIs from header like "Ab1 Fc=4 - 1_Y" |
283 |
txthdr = txthdr.replace(" -","-").replace("- ","-") ## Remove unwanted spaces. |
284 |
names = txthdr.split("\t") ## Use tab delimiter. |
285 |
for j in range(rois): |
286 |
name = names[j*2+1].replace("_Y","").strip(" -") |
287 |
name = name.partition("Fc=") ## Now a 3-tuple. |
288 |
ba0.roi[j].name = name[0] ## Text left of Fc. |
289 |
ba0.roi[j].desc = ("Fc=" + name[2]) ## Text right of Fc. |
290 |
|
291 |
## Move the data from txtfile to ba0. |
292 |
for i, txtline in enumerate(txtfile): |
293 |
x = txtline.split("\t") |
294 |
for j in range(rois): |
295 |
ba0.roi[j].time[i] = float(x[j*2+0]) |
296 |
ba0.roi[j].value[i] = float(x[j*2+1]) |
297 |
|
298 |
return ba0 |
299 |
"""End of readbiosensor()""" |
300 |
|
301 |
|
302 |
def writebiosensor(ba0, fname): |
303 |
""" |
304 |
Write a ba class to a Biosensor text file. |
305 |
""" |
306 |
|
307 |
## This is like a simplified Clamp file. |
308 |
fp=file(fname,'w') |
309 |
## Write sensorgram data header line. |
310 |
for i,roi in enumerate(ba0.roi): |
311 |
if (i>0): fp.write("\t") |
312 |
name = roi.name.strip() + " " + roi.desc.strip() |
313 |
fp.write("%s_X\t%s_Y" % (name, name)) |
314 |
## Write sensorgram data lines. Three decimal places. |
315 |
for dpoint in range(len(ba0.roi[0].time)): |
316 |
fp.write("\r\n") |
317 |
for i,roi in enumerate(ba0.roi): |
318 |
if (i>0): fp.write("\t") |
319 |
fp.write("%.3f\t%.3f" % (roi.time[dpoint], roi.value[dpoint])) |
320 |
|
321 |
## Close file handle and print message. |
322 |
fp.close |
323 |
print "File %s saved in %s." % (fname, getcwd()) |
324 |
return |
325 |
"""End of writebiosensor()""" |
326 |
|
327 |
|
328 |
def readcsv(fname): |
329 |
""" |
330 |
Read a comma-separated value text file into a ba class. |
331 |
The first column contains time data while the others contain response data. |
332 |
Here is a very simple example of the format:: |
333 |
|
334 |
1.0001, 23.7644, 0.7139 |
335 |
2.0001, 23.4265, 0.0541 |
336 |
3.0001, 23.1625, 0.3327 |
337 |
4.0001, 23.5752, 0.8494 |
338 |
""" |
339 |
|
340 |
## Try to open file. Return tiny ba object if it fails. |
341 |
ba0 = ba.BiosensorArray(1,1) |
342 |
try: |
343 |
fp = open(fname, "r") |
344 |
except IOError: |
345 |
print 'Error: Cannot open file %s for reading' % fname |
346 |
return ba0 |
347 |
|
348 |
## Put data into one big text string, determine array size, and close. |
349 |
txtfile = fp.readlines() ## One big text string. |
350 |
fp.close() |
351 |
dpoints = len(txtfile) ## Rows in the csv file. |
352 |
rois = len(txtfile[0].split(',')) - 1 ## Columns are comma-delimited. |
353 |
|
354 |
## Move the data from txtfile to ba0. |
355 |
ba0 = ba.BiosensorArray(rois, dpoints) |
356 |
for i, txtline in enumerate(txtfile): |
357 |
x = txtline.split(',') |
358 |
for j in range(rois): |
359 |
ba0.roi[j].time[i] = float(x[0]) |
360 |
ba0.roi[j].value[i] = float(x[j+1]) |
361 |
|
362 |
print "The file %s has %i ROIs and %i datapoints." % (fname, rois, dpoints) |
363 |
return ba0 |
364 |
"""End of readcsv()""" |
365 |
|
366 |
|
367 |
def applygal(ba0, fname): |
368 |
""" |
369 |
Read a Gal file and apply its microarray information. |
370 |
|
371 |
*(This function is under construction)* |
372 |
""" |
373 |
print "This feature is under construction." |
374 |
return |
375 |
"""End of applygal()""" |
376 |
|
377 |
|
378 |
def applykey(ba0, fname): |
379 |
""" |
380 |
Read a Key file and apply its microarray information. |
381 |
Multiple background ROIs are not yet supported. |
382 |
Here is a very simple example:: |
383 |
|
384 |
No. Description1 Description2 Background ROI Col Row |
385 |
1 Rat TNF Antibody01 2 1 1 |
386 |
2 ratIgG Antibody02 4 2 1 |
387 |
3 Hum TNF Antibody03 4 3 1 |
388 |
4 humIgG Antibody04 2 4 1 |
389 |
""" |
390 |
|
391 |
## Try to open file. Return unchanged ba object if it fails. |
392 |
try: |
393 |
fp = open(fname, "r") |
394 |
except IOError: |
395 |
print 'Cannot open file %s for reading' % fname |
396 |
return ba0 |
397 |
## Check header lines for signature text. |
398 |
txt = fp.readline() |
399 |
if ("No." not in txt): |
400 |
print "Error: The header line is unfamiliar." |
401 |
fp.close() |
402 |
return ba0 |
403 |
|
404 |
## Read all text, convert to 2d list, then six lists (one for each column). |
405 |
## There's probably an easier way to do this. Maybe in CSV module. |
406 |
txtfile = fp.readlines() |
407 |
fp.close() |
408 |
## Change text into array |
409 |
keytable = txtfile[:] ## Dimension rows in new 2d list. |
410 |
for i in range(len(txtfile)): |
411 |
txt = str(txtfile[i].strip()) |
412 |
keytable[i] = txt.split('\t') |
413 |
## Use list comprehensions to get one list for each column. |
414 |
id = [int(x[0]) for x in keytable] ## First column is integer. |
415 |
desc1 = [x[1] for x in keytable] ## Second column is text. |
416 |
desc2 = [x[2] for x in keytable] ## Third column is text. |
417 |
bg = [int(x[3]) for x in keytable] ## Fourth column is integer. Change base1 to base0? |
418 |
col = [int(x[4]) for x in keytable] ## Fifth column is integer. |
419 |
row = [int(x[5]) for x in keytable] ## Sixth column is integer. |
420 |
|
421 |
## Check if key file looks valid. These tests are not very thorough! |
422 |
keys = 1 + max(id) - min(id) |
423 |
if (ba0.rois != keys): |
424 |
print "Error: We have %i ROIs but %i keyfile entries." % (ba0.rois, keys) |
425 |
return ba0 |
426 |
if ((min(bg) < min(id)) or (max(bg) > max (id))): |
427 |
print "Error: at least one specified background ROI does not exist" |
428 |
print "Identifiers (id) range from %i to %i." % (min(id), max(id)) |
429 |
print "Background references range from %i to %i." % (min(bg), max(bg)) |
430 |
return ba0 |
431 |
|
432 |
## Create new object and put data in it. |
433 |
ba1 = deepcopy(ba0) |
434 |
for i in range(len(id)): |
435 |
ba1.roi[id[i]-1].name = desc1[i] |
436 |
ba1.roi[id[i]-1].desc = desc2[i] |
437 |
ba1.roi[id[i]-1].bgroi = [bg[i]] ## ToDo: Decide if bg will be id or index. Base1 now. |
438 |
ba1.roi[id[i]-1].spotx = col[i] |
439 |
ba1.roi[id[i]-1].spoty = row[i] |
440 |
|
441 |
print "Successfully loaded information for %i ROIs." % keys |
442 |
return ba1 |
443 |
"""End of applykey()""" |
444 |
|
445 |
|
446 |
def applymethod(ba0, fname): |
447 |
""" |
448 |
Read a ICM Analyte/Method xls file and apply its information. |
449 |
|
450 |
*(This feature is under construction.)* |
451 |
""" |
452 |
print "This feature is under construction." |
453 |
return |
454 |
"""End of applymethod()""" |
455 |
|
456 |
|
457 |
|
458 |
def outputbindinglevels(ba0, fname, interval, *baselines): |
459 |
""" |
460 |
Measure the binding level changes at multiple intervals along a sensorgram. |
461 |
Write the data to a file. If interval is 500 and baselines are [100, 1100] then |
462 |
binding levels are changes between 100-600s and between 1100-01600s. |
463 |
Average 30s of data. |
464 |
""" |
465 |
fp=file(fname,'w') |
466 |
|
467 |
## Measurements |
468 |
for iroi in ba0.roi: |
469 |
fp.write("%i\t%s" % (iroi.index, iroi.name)) |
470 |
for j in baselines: |
471 |
t1 = j |
472 |
t2 = j + interval |
473 |
y1 = np.average(iroi.time2val(t1-15, t1+15)) |
474 |
y2 = np.average(iroi.time2val(t2-15, t2+15)) |
475 |
fp.write("\t%.3f" % (y2-y1)) |
476 |
fp.write("\n") |
477 |
fp.close |
478 |
print "File %s saved in %s." % (fname, getcwd()) |
479 |
return |
480 |
"""End of outputbindinglevels()""" |
481 |
|
482 |
|
483 |
## Here are a few lines to test this module. |
484 |
if __name__ == '__main__': |
485 |
print "This module isn't a stand-alone app." |
486 |
|
487 |
################################# End of module ################################# |