ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
Revision: 6
Committed: Fri Apr 9 16:52:52 2010 UTC (9 years, 6 months ago) by clausted
File size: 33635 byte(s)
Log Message:
Added CONSTANTS to represent data file type identification integers.  These should be used throughout the code in the future to make it more readable.  Also added some default injection data to make it possible to convert a SPRit file to a CLAMP file without having an Analyte/injection file.  Also note a changed dictionary key IDcal->ID in one instance. --CGL
Line File contents
1 """
2 Converter: Perform the datafile reading, pre-processing and writing.
3 Yuhang Wan, Christopher Lausted
4 Last modified on 100407 (yymmdd)
5
6 Typical Pipeline:
7 >import converter as cv
8 >dat = cv.datafile_read("example.txt")
9 >dat = cv.outlier_remove(dat)
10 >dat = cv.calibrate(dat)
11 >dat = cv.background_subtract(dat)
12 >dat = cv.curve_split(dat, 100, 800, 200)
13 >dat = cv.data_output_clamp(dat, "/dir", 0,1,2,3)
14 >dat = cv.data_output_biosensor(dat, "/dir", 0)
15 """
16 __version__ = "100407"
17
18 ## Import libraries.
19 import numpy as np
20 import pylab as plt
21 import sys
22 import xlrd
23 import time
24 import os
25 import copy
26 #import matplotlib.pyplot as plt
27 import SPRdataclass as spr
28
29 ## Global constants and variables.
30 FMT_UNKNOWN = 0
31 FMT_SPRIT = 1 ## Old Plexera SPRit format.
32 FMT_ICMTXT = 2 ## Plexera Instrument Control Module text.
33 FMT_CLAMP34 = 3 ## Morton & Myszka CLAMP Version 3.4+
34 FMT_DAMXLS = 4 ## Plexera Data Analysis Module Excel export.
35
36
37 def check_format_input(fname):
38 """Examine the format of an input .txt data file."""
39 ## Modified 100407 CGL.
40
41 ## Open text file. Exit if file not found.
42 try:
43 fp = open(fname, "r")
44 except IOError:
45 print "Error: Unable to open file %s for reading." % fname
46 sys.exit(0)
47
48 ## Examine first line of text file to guess format.
49 fmt = FMT_UNKNOWN
50 sprit_line1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)"
51 clamp_line1="Vers 3.4"
52 line1 = fp.readline()
53 if (sprit_line1 in line1):
54 fmt = FMT_SPRIT
55 elif (line1[2]=="/" and line1[5]=="/"):
56 ## This format has date mm/dd/yyy in first column.
57 fmt=FMT_ICMTXT
58 elif (clamp_line1 in line1):
59 fmt = FMT_CLAMP34
60
61 fp.close()
62 return fmt, line1
63
64
65 def sprit_format(fname):
66 """Read the SPRit formatted data and reshape it.
67 Return the data in the form of list.
68 """
69 # open a file, if file doesn't exit, then exit
70 try:
71 fp = open(fname, "r")
72 except IOError:
73 print 'Cannot open file %s for reading' % fname
74 sys.exit(0)
75 status = {'DatainputType':'SPRit data'}
76 # skip the first line of the input file
77 Tmpstr = fp.readline()
78 # check if the second line is BEGIN
79 TmpStr = fp.readline()
80 if ("BEGIN" not in TmpStr):
81 print "Warning: Second line of data file is not 'BEGIN'"
82 ##sys.exit(0)
83 # count the lines of each data spot
84 TmpStr = fp.readline() # skip first "0.000000e+000"
85 num_line = 1 # so we start to count from 1
86 # count until read the "0.000000e+000"
87 while True:
88 TmpStr = fp.readline()
89 if TmpStr.split('\t')[0] == "0.000000e+000":
90 break
91 else:
92 num_line = num_line + 1
93 # count the spots in each file
94 num_spot = 0
95 # skip the first two lines
96 fp.seek(0)
97 for i in range(0,2):
98 TmpStr = fp.readline()
99 text = fp.readlines()
100 for i in text:
101 num_spot += i.count('0.000000e+000')
102 fp.close()
103 #---------------------- Reshaping the data --------------------
104 # reshape the data in the string form num_line*num_spot
105 Shaped_str = np.reshape(text,(num_spot,-1))
106 tmpshape=np.shape(Shaped_str)
107 # reshape the data in the number form: num_line*(num_spot+1)
108 # only the first column is time: time spotl spot2 ... spot n
109 time = np.zeros(num_line)
110 Tmpdata = Shaped_str[0,:]
111 for n,i in enumerate(Tmpdata):
112 time[n]=float(i.split('\t')[0])
113 Shaped_data_1 = np.zeros((tmpshape[0]+1,tmpshape[1]))
114 Shaped_data_1[0,:]=time
115 for n1,i in enumerate(Shaped_str):
116 for n2,j in enumerate(i):
117 Shaped_data_1[n1+1][n2]=float(Shaped_str[n1][n2].split('\t')[1])
118 # reshape the data in the number form:
119 # time1 spotl time2 spot2 ... timen spot n
120 Shaped_data_2 = np.zeros((tmpshape[0]*2,tmpshape[1]))
121 for i in range(tmpshape[0]):
122 Shaped_data_2[i*2]=Shaped_data_1[0]
123 Shaped_data_2[i*2+1] = Shaped_data_1[i+1]
124
125 ## status = {'Datainput Type':'SPRit data'}
126 dataformat = 0
127 ROInum = num_spot
128 dataobj=spr.DataPass(Shaped_data_1, ROInum, dataformat)
129 dataobj.status = {}
130 dataobj.updateStatus(**status)
131 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
132
133 return dataobj
134
135
136 def plexera_icm_format(fname):
137 """Read the txt data exported from Plexera ICM software and reshape it.
138 Return the data in the form of list.
139 """
140 fp=file(fname,'r')
141 Tmpdata,tmptime=[],[]
142 status = {'DatainputType':'Plexera ICM data'}
143 for eachline in fp:
144 Tmpdata.append(eachline.split('\t'))
145 num_line,num_spot=np.shape(Tmpdata)
146 # initialize the Shaped_data
147 Shaped_data = np.zeros((num_spot,num_line))
148 num_spot=num_spot-1 # the real number of spot
149 Tmpdata=np.transpose(Tmpdata)
150 # delete the last '\r\n' of the data in last column of the datafile
151 for n,i in enumerate(Tmpdata[-1]):
152 Tmpdata[-1][n]=i.rstrip()
153 # time converting: convert the first column of the datafile(date and time information) into numerical time
154 for i in Tmpdata[0]:
155 tmptime_str=i.split(' ')[1]
156 timelist=tmptime_str.split(':')
157 now=float(timelist[0])*60*60+float(timelist[1])*60+float(timelist[2])
158 tmptime.append(now)
159 Shaped_data[0]=tmptime
160 Shaped_data[0]=Shaped_data[0]-Shaped_data[0][0]
161 # assign the signal data
162 for i in range(num_spot):
163 Shaped_data[i+1]=Tmpdata[i+1]
164
165 dataformat = 0
166 ROInum = num_spot
167 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat)
168 dataobj.status = {}
169 dataobj.updateStatus(**status)
170
171 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
172 return dataobj
173
174
175
176 def dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize):
177 """Retrieve and shape data form one single sheet from the Spreadsheet.
178 Return the data, ROI infomation in the form of list.
179 """
180 ## and return the minimal spot Id (start_spot)
181 ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
182
183 Head = sh.row_values(1)
184 SpotList = []
185 num_spot = sh.row_values(0).count('Relative Time')
186
187 for i in range(num_spot):
188 TmpHead=Head[colsize*i]
189 # now there are 5 columns for each ROI for Raw Intensity
190 # and 4 columns for each ROI for Satellite subtracted Intensity
191 DataList.append(sh.col_values(2+colsize*i,start,end))
192
193 key, val = [], []
194 tmpinfo = TmpHead.split('\n')
195 for j in tmpinfo:
196 key.append(j.split(': ')[0])
197 val.append(j.split(': ')[1])
198 tmpdic = dict(zip(key,val))
199 tmpdic['Position'] = (int(tmpdic['Block'])+1, int(tmpdic['Row'])+1, int(tmpdic['Column'])+1)
200 ROIinfo.append(tmpdic)
201
202 return DataList,ROIinfo
203
204 def dam_spreadsheet_format(book):
205 """Read the spreadsheet exported from Plexera DAM software and reshape it.
206 Return the shaped data and ROI information.
207 """
208 ROIinfo, newROIinfo, n_sheet = [], [], 0
209 DataList=[]
210 status = {'DatainputType':'Plexera DAM data'}
211 sh0=book.sheet_by_index(0)
212 if sh0.row_values(0).count('Raw Intensity') == 0 and sh0.row_values(0).count('Subtracted Intensity') > 0:
213 print 'This is the satellite subtracted data.'
214 colsize = 4
215 status ['DAM spreadsheet'] = 'Satellite subtracted Intensity'
216 elif sh0.row_values(0).count('Raw Intensity') > 0 and sh0.row_values(0).count('Subtracted Intensity') == 0:
217 print 'This is the raw intensity data.'
218 colsize = 5
219 status ['DAM spreadsheet'] = 'Raw Intensity'
220
221 for i in range(2,sh0.nrows):
222 if sum(sh0.col_values(2,1,i))>0:
223 break
224 start,end = i-1,sh0.nrows
225 num_line = end - start
226 ## print 'start: ', start
227 # the time axis initialization and assignment
228 Time=sh0.col_values(1,start,end)
229 # in case we lose the time axis during the DAM processing
230 if sum(Time)==0: Time=np.arange(num_line)
231 DataList.append(Time)
232
233 # list DataList to store the Shaped_data of each ROI in each sheet,
234 # list ROIinfo to store the Spot id and the ligand name of each ROI in each sheet
235 for i in range(book.nsheets):
236 sh=book.sheet_by_index(i)
237 if sh.ncols!=0:
238 DataList,ROIinfo = dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize)
239 n_sheet=n_sheet+1
240 else:
241 break
242 # calculate the dim of the Shaped_data, and initialize the matrix
243 num_spot=len(DataList)-1
244 num_line=end-start
245 Shaped_data=np.zeros((num_spot+1,num_line))
246 Shaped_data[0]=DataList[0]
247
248 # in case that the ROIs doesn't count from spot1
249 newROIinfo = copy.deepcopy(ROIinfo)
250 StartspotList=np.array([i['ID'] for i in ROIinfo])
251 start_spot=min(StartspotList)
252 for i in range(num_spot):
253 tmp = ROIinfo[i]['ID']
254 try:
255 j=int(ROIinfo[i]['ID'])-(start_spot-1)
256 except ValueError:
257 ##return tmp
258 if str.upper(unicode.encode(tmp)).startswith(str.upper('spot')):
259 j=int(ROIinfo[i]['ID'][4:])-(start_spot-1)
260 else:
261 print 'Illegal galfile format.\n', 'ID:', tmp
262 ##return tmp
263
264 Shaped_data[j]=DataList[i+1]
265 newROIinfo[j]=ROIinfo[i]
266
267 # pack the date and relative information into SPRdata obj
268 dataformat = 0
269 ROInum = num_spot
270 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
271 dataobj.status = {}
272 dataobj.updateStatus(**status)
273 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
274
275 return dataobj
276
277
278 def read_clamp_data(fname):
279 """Read the Clamp format data
280 Return the data in the form of list.
281 """
282 fp=open(fname,'r')
283 Tmpstr=fp.readline()
284 # examine the file head
285 # e.g. 'Vers 3.41 cTBA-1 Conc:250 nM\n'
286 ROInum, dataformat, Conc, ROIdic = 1, 4, [], {}
287 try:
288 name = Tmpstr.split(' ')[2].strip()
289 ROIdic['ROI name'] = name
290 stmp = Tmpstr.split(' ')[3]
291 rest = Tmpstr[Tmpstr.find(stmp):].strip()
292 ROIdic['concentration'] = rest.split(':')
293 except IndexError:
294 pass
295
296 for Tmpstr in fp:
297 try:
298 float(Tmpstr.split('\t')[0])
299 n_conc=(Tmpstr.count('\t')+1)/2
300 ##print Tmpstr, 'the beginning of data'
301 break
302 except ValueError:
303 if Tmpstr.startswith('Conc1'):
304 n_conc=Tmpstr.count('\t')
305 Conc=Tmpstr.strip().split('\t')[1:]
306 elif Tmpstr.startswith('Start1'):
307 Start=Tmpstr.strip().split('\t')[1:]
308 n_conc=Tmpstr.count('\t')
309 elif Tmpstr.startswith('Stop1'):
310 Stop=Tmpstr.strip().split('\t')[1:]
311 n_conc=Tmpstr.count('\t')
312
313
314 # write the data into matrix
315 Tmpdata=[]
316 for eachline in fp:
317 a=eachline.strip().split('\t')
318 Tmpdata.append(np.array(a,dtype='f'))
319 Data=np.transpose(Tmpdata)
320
321 # write the infomation
322 ROIinfo = [ROIdic]
323 status = {'DatainputType':'Clamp data'}
324 sampleinfo, injinfo = [], []
325 injpara = [map(float,i) for i in [Start,Stop]]
326
327 if len(Conc) >= 1:
328 for i,c in enumerate(Conc):
329 sampledic = {}
330 name = 'Sample '+str(i+1)
331 sampledic = dict(zip(['Name','Concentration'],[name,c]))
332 ## sampledic['Name'] = name
333 ## sampledic['Concentration'] = c[injpara[0][i],injpara[1][i]]
334 sampleinfo.append(sampledic)
335 injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
336 injinfo.append(injdic)
337 dataobj=spr.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
338 dataobj.status = {}
339 dataobj.updateStatus(**status)
340
341 return dataobj
342
343
344
345 def keyfile_read(fkey):
346 # The Key File contains
347 """Function: Read the key file for old SPR instrument.
348 Input: the filename.
349 Return: the ROI information.
350 """
351 try:
352 fp = open(fkey, "r")
353 except IOError:
354 print 'Cannot open file %s for reading' % fname
355 sys.exit(0)
356 ROIinfo = []
357 firstline=fp.readline() # skip the first line
358 # Table.append([firstline.split('\t')[0],firstline.split('\t')[1],firstline.split('\t')[2],firstline.split('\t')[3]])
359 for eachline in fp:
360 ROIdic = {}
361 tmplist = eachline.strip().split('\t')
362 ROIdic['ID'] = tmplist[0] # spot id
363 ROIdic['Name'] = tmplist[1] # name of the protein immobilized
364 try:
365 ROIdic['Conc'] = tmplist[2]
366 ROIdic['Background Spot'] = int(tmplist[3])
367 except IndexError:
368 pass
369 ROIinfo.append(ROIdic)
370
371 fp.close()
372 print 'There are %d ROIs in the Key file.' % len(ROIinfo)
373 return ROIinfo
374
375
376 def galfile_read(fname):
377 """Function: Read the GAL file.
378 Input: the filename.
379 Return: the ROI information.
380 """
381 fp=file(fname,'r')
382 ROIinfo = []
383 Tmpstr = fp.readline() # skip the first line
384 Tmpstr = fp.readline()
385 Tmpn = int(Tmpstr.split('\t')[0])
386 for i in range(Tmpn):
387 labelline = fp.readline()
388
389 if labelline.startswith('"Block1'):
390 a = labelline.strip().split(',')
391 columnsize, rowsize = int(a[3]), int(a[5])
392 ## print labelline
393 def IDcal(p):
394 id = (p[0]-1)*columnsize*rowsize + (p[1]-1)*columnsize + p[2]
395 return id
396
397 for eachline in fp:
398 ROIdic = {}
399 tmplist = eachline.strip().split('\t')
400 pos = tuple(map(int,tmplist[:3]))
401 ROIdic['Position'] = pos
402 ROIdic['IDcal'] =IDcal(pos)
403 ##(pos[0]-1)*columnsize*rowsize + (pos[1]-1)*columnsize + pos[2]
404
405 ##
406 ROIdic['Name'] = tmplist[3]
407 ROIdic['ID'] = tmplist[4]
408 try:
409 ROIdic['Conc'] = tmplist[5]
410 ROIdic['Family'] = tmplist[6]
411 blist = tmplist[7][1:-1].split(';') # always something like '"1,1,2; 1,2,2"'
412 Backn = len(blist)
413 Bid = []
414
415 for i in blist:
416 if '' in i.split(','):
417 break
418 Bpos = tuple(map(int, i.split(',')))
419 Bid.append(IDcal(Bpos))
420 ##
421 ROIdic['Background Spot'] = tuple(Bid)
422 except IndexError:
423 pass
424
425 if int(ROIdic['ID'][4:]) != ROIdic['IDcal']:
426 print 'The spot ID should be consistent with the position. Please check the ID.'
427 break
428 else:
429 ROIdic['ID'] = ROIdic['IDcal']
430 ROIinfo.append(ROIdic)
431 fp.close()
432 print 'There are %d ROIs in the Gal file.' % len(ROIinfo)
433
434 return ROIinfo
435
436
437
438 def method_read(fname):
439 """Function: Read the analyte table.
440 Input: the filename
441 Return: the sample information
442 """
443 # create a concentration dictionary for the sample injected into the flowcell
444 # export a table containing the concentration of the sample injected, and the duration
445 fp=file(fname,'r')
446 sampleinfo = []
447 ##TmpTable,TmpTable2={},{}
448 labelline=fp.readline()
449 for n,eachline in enumerate(fp):
450 sampledic = {}
451 sampledic['Location'] = eachline.split('\t')[0]
452 sampledic['Name'] = eachline.split('\t')[1]
453 sampledic['Concentration'] = float(eachline.split('\t')[2])
454 sampledic['Duration'] = float(eachline.split('\t')[4])
455 sampledic['Flow Rate'] = float(eachline.split('\t')[3])
456 sampledic['Analyte Series'] = eachline.split('\t')[6]
457 sampledic['Buffer Blank Series'] = eachline.split('\t')[7]
458 sampleinfo.append(sampledic)
459
460 print 'Name','\t','Concentration'
461 for i in sampleinfo:
462 print i['Name'],'\t', i['Concentration']
463
464 return sampleinfo
465
466 def method_read_fake():
467 sampleinfo = []
468 sampleinfo.append({'Location':0, 'Name':0, 'Concentration':0, 'Duration':0, 'Flow Rate':0, 'Analyte Series':0, 'Buffer Blank Series':0})
469 return sampleinfo
470
471
472 def datafile_read(fname):
473 """Function: Read the raw data from the Plexera instrument,
474 and pack the raw data (Shaped_data) into dataobject.
475 Therefore, the initial dataformat = 0
476 Input: the filename
477 Return: the packed SPR.DataPass type data object
478 """
479 #===================check the format of the input files-------
480 if fname.split('.')[-1] == 'txt':
481
482 Format_input, Firstline = check_format_input(fname)
483 if Format_input == 1: # for SPRit file format
484 dataobj = sprit_format(fname)
485 print '-'*10,' This is a SPRit data file. ','-'*10
486
487 elif Format_input == 2: # for Plexera ICM file format
488 dataobj = plexera_icm_format(fname)
489 print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
490
491 elif Format_input == 3: # for Clamp datafile format
492 dataobj = read_clamp_data(fname)
493 print '-'*10,' This is a Clamp file. ','-'*10
494
495 else:
496 print '-'*10,' unreadable file input! ','-'*10
497 sys.exit(0)
498
499 if Format_input == 1 or Format_input == 2:
500 flag = str.upper(raw_input('Load the key file? (y/n): '))
501 if flag == 'Y':
502 fkey = raw_input('Input the path of the key file: ')
503 ROIinfo = keyfile_read(fkey)
504 dataobj.updateROIinfo(ROIinfo)
505
506 elif fname.split('.')[-1] == 'xls':
507 # for the DAM Spreadsheet file format
508 print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
509 book = xlrd.open_workbook(fname)
510 #shape the data in the Spreadsheet, whether single sheet or multiple sheets
511 dataobj=dam_spreadsheet_format(book)
512 flag = str.upper(raw_input('Load the gal file? (y/n): '))
513 if flag == 'Y':
514 fgal = raw_input('Input the path of the gal file: ')
515 ROIinfo = galfile_read(fgal)
516 dataobj.updateROIinfo(ROIinfo)
517 flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
518 if flag == 'Y':
519 fprotocol = raw_input('Input the path of the analyte table: ')
520 sampleinfo = method_read(fprotocol)
521 dataobj.updateSampleinfo(sampleinfo)
522 else:
523 dataobj.updateSampleinfo(method_read_fake())
524
525 return dataobj
526
527
528
529 def outlier_remove(obj):
530 ##dataobj.viewAll()
531 """Function: Remove unwanted data points from noisy periods.
532 Return: the data object with clean data
533 """
534 dataobj = copy.deepcopy(obj)
535 print ('Select the time area that you want to remove.')
536 tmpstr = raw_input('in the format of "200:240, 500:510" :')
537 areatodel = tmpstr.split(',')
538 data = dataobj.data
539 for i in areatodel:
540 ##if
541 t1, t2 = float(i.split(':')[0]), float(i.split(':')[1])
542 t = data[0] # for dataformat=0 Shaped_data
543 tmpind = plt.find((t>t1) & (t<t2)) # find the outliar indice
544 ## data[1:,tmpind] = 0
545 tmplist = data.tolist()
546 tmplist2 = []
547 for j in tmplist:
548 del j[min(tmpind):max(tmpind)]
549 tmplist2.append(j)
550 data = np.array(tmplist2)
551 ## tmplist = tmplist
552 dataobj.data = data
553 # dataobj.status = 'Outliar removed'
554 dataobj.updateStatus(outlier_remove=True)
555 return dataobj
556
557 def calibrate(obj):
558 """Function: calibrate the Intensity response.
559 Return: the data object with calibrated data
560 Note: at present, this function is valid only when
561 the whole procedure includes calibration precedure.
562 """
563 dataobj = copy.deepcopy(obj)
564 data = dataobj.data
565 caldata = copy.deepcopy(data)
566 t = data[0]
567 if dataobj.dataformat != 0:
568 print 'We only calibrate the raw data.'
569 return
570 else:
571 t1 = input('Enter the time position of the TOP response during calibration:')
572 s1 = input('The refractive index of the TOP response is: ')
573 t2 = input('Enter the time position of the BOTTOM response during calibration:')
574 s2 = input('The refractive index of the BOTTOM response is: ')
575 ind1 = plt.find(abs(t-t1) == min(abs(t-t1)))[0]
576 ind2 = plt.find(abs(t-t2) == min(abs(t-t2)))[0]
577
578 for i,y in enumerate(data[1:]):
579 y1, y2 = y[ind1], y[ind2]
580 slope = (s1-s2)/(y1-y2)
581 offset = s1- slope*y1
582 caldata[i+1] = slope*y+offset
583 dataobj.data = caldata
584 dataobj.updateStatus(calibrate=True)
585 return dataobj
586
587 def get_background_value(obj,bgids):
588 """Get the averaged value of background spots for each ROI."""
589 bgsignal = obj.data[0]*0
590 for j in bgids:
591 if j == obj.ROIinfo[j-1]['ID']:
592 bgsignal = bgsignal + obj.data[j]
593
594
595 elif j != obj.ROIinfo[j-1]['ID']:
596 for n,eachspot in enumerate(obj.ROIinfo):
597 if j == eachspot['ID']:
598 bgsignal = bgsignal + obj.data[n+1]
599 bgsignal = bgsignal/len(bgids)
600 return bgsignal
601
602
603 def background_subtract(obj, *bgspot):
604 """Function: Perform the Background subtraction for the UNSPLIT curve.
605 Input besides "obj" is the id of the spot taken as background.
606 The following inputs are acceptable:
607 1. background_subtract(obj): the default background in Galfile
608 will be subtracted.
609 2. background_subtract(obj, 1, 6): the average value of spot1
610 and spot6 will be subtracted.
611 """
612 dataobj = copy.deepcopy(obj)
613 ROIinfo = obj.ROIinfo
614 data = dataobj.data
615
616 if dataobj.dataformat == 0:
617 newdata = np.zeros(np.shape(data))
618 newdata[0] = data[0] # the time scale
619 if bgspot == ():
620 # The average of the default background spots listed in the galfile
621 #are to be subtracted.
622 for i in range(1,dataobj.ROInum+1):
623 bgids = ROIinfo[i-1]['Background Spot']
624 bgsignal = get_background_value(dataobj,bgids)
625 newdata[i] = data[i]-bgsignal
626 # update the status of the data object.
627 dataobj.updateStatus(BackgroundType='Default in Gal')
628
629 else:
630 # The average of the manually input background spots
631 #are to be subtracted.
632 for i in range(1,dataobj.ROInum+1):
633 bgsignal = get_background_value(dataobj,bgspot)
634 newdata[i] = data[i]-bgsignal
635 dataobj.updateStatus(BackgroundType='Manually choosen')
636 dataobj.data = newdata
637 dataobj.updateStatus(background_subtraction=True)
638 return dataobj
639
640 else:
641 print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'
642 return
643
644 def curve_split(obj, t_before, t_after, *t_inj):
645 """Function: Split the whole curve that contains several injections
646 into pieces.
647 The sample information will be checked during the split,
648 if the injection number in sampleinfo is not consistant
649 with the input injection parameter, it will help you to
650 update the sample information.
651 Input besides obj:
652 t_before: time duration before the injection time point
653 t_after: time duration after the injection time point
654 t_inj: the exact start time for each injection
655 Return: the data object with splitted data, and updated sample
656 information
657 """
658 dataobj = copy.deepcopy(obj)
659 ROInum = dataobj.ROInum
660 t=dataobj.data[0]
661 ind=[]
662 # find how many lines of data each injection
663 for i in range(len(t_inj)):
664 ind.append(np.shape(plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after))))[0])
665 m,j = max(ind),max(ind)-min(ind)
666 # the new dataset must be a m*((ROInum+1)*num_inj) matrix:
667 # T1,S_ROI1_c1,S_ROI2_c1,S_ROI3_c1...T2,S_ROI1_c2,S_ROI2_c2...
668 tmpdata = np.zeros(((ROInum+1)*len(t_inj),m))
669
670 for i in range(len(t_inj)):
671 TmpInd = plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after)))
672 Tmp1,Tmp2 = i*(ROInum+1),(i+1)*(ROInum+1)
673 tmpdata[Tmp1:Tmp2,0:len(TmpInd)] = dataobj.data[:,TmpInd]
674 if j!=0:
675 Split_data = tmpdata[:,:-j]
676 else:
677 Split_data = tmpdata[:]
678 # zero and align the split data
679 for i in range(np.shape(Split_data)[0]):
680 Split_data[i]=Split_data[i]-Split_data[i][0]
681
682 newsampleinfo, injectinfo = check_sample_info(dataobj,t_before, t_after, t_inj)
683 if newsampleinfo != None :
684 dataobj.data = Split_data
685 dataobj.updateStatus(DataSplit=True)
686 dataobj.updateSampleinfo(newsampleinfo)
687 dataobj.dataformat = 2
688 dataobj.injectinfo = injectinfo
689 return dataobj
690 else:
691 dataobj.updateStatus(DataSplit=False)
692 print 'Curve Splitting not successful!'
693
694 return
695
696
697 def check_sample_info(dataobj, t_before, t_after, t_inj):
698 """Check if the sample information consistent with the injection
699 parameter input. If not, help the user to update sample information
700 for the splitted curve.
701 """
702 injectinfo = []
703 if len(dataobj.sampleinfo) == len(t_inj):
704 for i,j in enumerate(t_inj):
705 injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
706 return dataobj.sampleinfo, injectinfo
707
708 elif len(dataobj.sampleinfo) > len(t_inj):
709 print """ The number of injection you just input does not match that in sample infomation.
710 If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object."""
711 choose = str.upper(raw_input('Continue? (y/n)'))
712 if choose == 'Y':
713 newsampleinfo, injectinfo = [], []
714 print ' Your input indicates %s injections out of %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
715 for n,i in enumerate(dataobj.sampleinfo):
716 print '--',n,'.',i['Name'],':',i['Concentration']
717 print ' Enter the ID number of the injections you just select, in the form of "2-10" or "4,5,6",'
718 tmpstr = raw_input(':')
719 if ',' in tmpstr:
720 injID = map(int, tmpstr.split(','))
721 elif '-' in tmpstr:
722 injID = range(int(tmpstr.split('-')[0]), int(tmpstr.split('-')[1])+1)
723 else:
724 print ' Illegal input!'
725 return
726
727 if len(injID) != len(t_inj):
728 print ' Please input %s injection ID numbers instead of %s You just input.' %(len(t_inj),len(injID))
729 return
730
731 for i in injID:
732 newsampleinfo.append(dataobj.sampleinfo[i])
733 injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
734 return newsampleinfo, injectinfo
735 else:
736 return
737
738 elif len(dataobj.sampleinfo) < len(t_inj):
739 print ' The number of injection you just input does not match that in sample infomation.'
740 print ' Your input indicates %s injections, where there is only %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
741 print ' Please check.'
742
743 return
744
745
746 def write_data(Head,data_out,ROI_out,DataDir,output_format):
747 """Write the data into a txt file.
748 """
749 # Modified 100405 CGL
750 # create a newfolder with time stamp to save the data
751 if output_format=='B':
752 ftitle='biosensor'
753 Tmpstr = str(ROI_out).replace(', ','_')
754 Tmpstr = Tmpstr.replace('(','-')
755 foutname = ftitle+Tmpstr[:-1]+'.txt'
756 elif output_format=='C':
757 foutname='clamp-'+str(ROI_out)+'.txt'
758 else:
759 ftitle=''
760
761 # Change directories, if requested.
762 if (DataDir != ""):
763 os.chdir('..')
764 try:
765 os.makedirs(DataDir)
766 ##except WindowsError:
767 except OSError:
768 pass
769 os.chdir(DataDir)
770
771 np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
772 fdt=file('temp.txt','r')
773
774 ## foutname=ftitle+Tmpstr[:-1]+'.txt' ##raw_input('Enter the file name of the output: ')
775 fout=file(foutname,'w+')
776 # write the head of the data first
777 fout.write(Head+'\n')
778 count = len(fdt.readlines())
779 fdt.seek(0)
780 for i in range(count):
781 temp=fdt.readline()
782 fout.write(temp.replace(' ','\t'))
783 fdt.close()
784 fout.close()
785 print '-------The path of the exported data is "', os.getcwd(),'"-------'
786
787
788 def data_output_biosensor(obj, DataDir, *ROI_out):
789 """Export the data into a biosensor format txt file,
790 which can be operated in Scrubber.
791 Input: the data object, the output path, the ROIs selected to export
792 """
793 # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
794 dataobj = copy.deepcopy(obj)
795 data = dataobj.data
796 n_inj = len(dataobj.sampleinfo)
797 ROIinfo = dataobj.ROIinfo
798 ROInum = dataobj.ROInum
799
800 data_out = np.zeros(((len(ROI_out)*2)*n_inj,np.shape(data)[1]))
801 head1 = ""
802 new_ROIinfo = []
803 for n,i in enumerate(ROI_out):
804 new_ROIinfo.append(ROIinfo[i-1]) ##the new ROI info
805 if i != ROIinfo[i-1]['IDcal']:
806 # check if the ID number matches position
807 print n,i, ROIinfo[i-1]
808 return
809 else:
810 headcomp = ROIinfo[i-1]['ID']+': '+ROIinfo[i-1]['Name']
811 try:
812 headcomp = headcomp + ', '+ROIinfo[i-1]['Conc']
813 except KeyError:
814 pass
815 headcomp = headcomp+' Fc='+str(n+1)+' - '
816 for j in range(n_inj):
817 headtmp = headcomp+str(j+1)+'_'
818 head1 = head1+headtmp+'X\t'+headtmp+'Y\t'
819 data_out[2*n*n_inj+2*j]=data[j*(ROInum+1)]
820 data_out[2*n*n_inj+2*j+1]=data[j*(ROInum+1)+i]
821 head1=head1[:-1]
822
823 dataobj.data = data_out
824 dataobj.dataformat = 3 # biosensor dataformat
825 dataobj.updateStatus(DataType='Biosensor data')
826 dataobj.updateStatus(Head=head1)
827 dataobj.updateROIinfo(new_ROIinfo)
828
829 write_data(head1,data_out,ROI_out,DataDir,'B')
830
831 return dataobj
832
833 def data_output_clamp(obj, DataDir, ROI_out):
834 """Export the data into a Clamp format txt file,
835 which can be operated in Clamp.
836 Input: the data object, the output path, the selected ROI
837 """
838 # initialize the data output matrix, which will contain '2*n_inj' columns
839 dataobj = copy.deepcopy(obj)
840 data = dataobj.data
841 n_inj = len(dataobj.sampleinfo)
842 n_inj = max(n_inj, 1) # Must have at least one injection.
843 ROIinfo = dataobj.ROIinfo
844 ROInum = dataobj.ROInum
845 sampleinfo = dataobj.sampleinfo
846
847 data_out = np.zeros((2*n_inj,np.shape(data)[1]))
848 ## if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
849 if (ROI_out != int(ROIinfo[ROI_out-1]['ID'])):
850 print 'Please check the ROI information.'
851 print ROI_out, ROIinfo[ROI_out-1]
852 return
853 else:
854 tmp = ROIinfo[ROI_out-1]['ID']+': '+ROIinfo[ROI_out-1]['Name']
855 try:
856 tmp = tmp +', Conc:'+ROIinfo[ROI_out-1]['Conc']
857 except KeyError:
858 pass
859 Clamphead = 'Vers 3.41 '+tmp
860 conc=''
861 t_start,t_stop='',''
862 head1='Time\tData\t'*n_inj
863 head1=head1[:-1]
864
865 for i in range(n_inj):
866 data_out[2*i]=data[i*(ROInum+1)]
867 data_out[2*i+1]=data[i*(ROInum+1)+ROI_out]
868 name = sampleinfo[i]['Name']
869 conc=conc+'\t'+str(sampleinfo[i]['Concentration'])
870 t_start=t_start+'\t'+str(dataobj.injectinfo[i][0])
871 t_stop=t_stop+'\t'+str(dataobj.injectinfo[i][1])
872
873 hconc,h_start,h_stop='\nConc1'+conc,'\nStart1'+t_start,'\nStop1'+t_stop
874 for i in (hconc,h_start,h_stop):
875 if i[-1]!='':
876 Clamphead=Clamphead+i
877
878 Clamphead=Clamphead+'\n'+head1
879
880 dataobj.data = data_out
881 dataobj.dataformat = 4 # biosensor dataformat
882 dataobj.updateStatus(DataType='Clamp data')
883 dataobj.updateStatus(Head=Clamphead)
884 dataobj.updateROIinfo(ROIinfo[ROI_out-1])
885
886 write_data(Clamphead,data_out,ROI_out,DataDir,'C')
887
888 return dataobj
889 ## return data_out,Clamphead
890
891
892 if (__name__ == '__main__'):
893 # Print a short message if this module is run like an app.
894 print "This is the OSPRAI file type conversion tool."
895