ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
Revision: 1
Committed: Wed Mar 17 05:34:43 2010 UTC (9 years ago) by clausted
File size: 32567 byte(s)
Log Message:
Initial import of Osprai project
Line File contents
1 '''
2 Converter: Perform the datafile reading, pre-processing and writing.
3
4 Yuhang Wan, Mar 1,2010
5 __version__ = "1.3"
6
7 Typical Pipeline:
8 DatafileRead (GalfileRead, MethodRead)
9 OutliarRemove
10 Calibrate
11 BackgroundSubtract
12 CurveSplit
13 DataOutputClamp or DataOutputBiosensor
14
15 '''
16
17 import numpy as np
18 import pylab as plt
19 import sys
20 import xlrd
21 import time
22 import os
23 import copy
24
25 ## import matplotlib.pyplot as plt
26 ## import matplotlib.mlab as mlb
27 ## import packageClamp_100210 as Pack
28 import SPRdataclass_100301 as SPR
29
30
31 def checkformatinput(fname):
32 '''Examine the format of a input .txt data file.'''
33 # open a file, if file doesn't exit, then exit
34 try:
35 fp = open(fname, "r")
36 except IOError:
37 print 'Cannot open file %s for reading' % fname
38 sys.exit(0)
39 # examine the first line of the input file
40 # for SPRit output:"Elapsed Time (Seconds) Average Intensity (Pixel Intensity)"
41 # for Plexera ICM output:
42 head_input1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\n"
43
44 Format_input = 0
45 # check file format
46 Tmpstr = fp.readline()
47 if Tmpstr==head_input1:
48 Format_input=1
49 elif Tmpstr[-1]=='\n' and Tmpstr[-2]=='\r' and Tmpstr.split(' ')[0].split('/')[2].startswith('20'):
50 Format_input=2
51 elif Tmpstr.startswith('Vers'):
52 Format_input=3
53
54 fp.close()
55 return Format_input, Tmpstr
56
57
58 def SPRitformat(fname):
59 '''Read the SPRit formatted data and reshape it.
60 Return the data in the form of list.
61 '''
62 # open a file, if file doesn't exit, then exit
63 try:
64 fp = open(fname, "r")
65 except IOError:
66 print 'Cannot open file %s for reading' % fname
67 sys.exit(0)
68 status = {'DatainputType':'SPRit data'}
69 # skip the first line of the input file
70 Tmpstr = fp.readline()
71 # check if the second line is BEGIN
72 TmpStr = fp.readline()
73 if TmpStr != "BEGIN\n":
74 print "Second line is not Begin. Break!"
75 #sys.exit(0)
76 # count the lines of each data spot
77 TmpStr = fp.readline() # skip first "0.000000e+000"
78 num_line = 1 # so we start to count from 1
79 # count until read the "0.000000e+000"
80 while True:
81 TmpStr = fp.readline()
82 if TmpStr.split('\t')[0] == "0.000000e+000":
83 break
84 else:
85 num_line = num_line + 1
86 # count the spots in each file
87 num_spot = 0
88 # skip the first two lines
89 fp.seek(0)
90 for i in range(0,2):
91 TmpStr = fp.readline()
92 text = fp.readlines()
93 for i in text:
94 num_spot += i.count('0.000000e+000')
95 fp.close()
96 #---------------------- Reshaping the data --------------------
97 # reshape the data in the string form num_line*num_spot
98 Shaped_str = np.reshape(text,(num_spot,-1))
99 tmpshape=np.shape(Shaped_str)
100 # reshape the data in the number form: num_line*(num_spot+1)
101 # only the first column is time: time spotl spot2 ... spot n
102 time = np.zeros(num_line)
103 Tmpdata = Shaped_str[0,:]
104 for n,i in enumerate(Tmpdata):
105 time[n]=float(i.split('\t')[0])
106 Shaped_data_1 = np.zeros((tmpshape[0]+1,tmpshape[1]))
107 Shaped_data_1[0,:]=time
108 for n1,i in enumerate(Shaped_str):
109 for n2,j in enumerate(i):
110 Shaped_data_1[n1+1][n2]=float(Shaped_str[n1][n2].split('\t')[1])
111 # reshape the data in the number form:
112 # time1 spotl time2 spot2 ... timen spot n
113 Shaped_data_2 = np.zeros((tmpshape[0]*2,tmpshape[1]))
114 for i in range(tmpshape[0]):
115 Shaped_data_2[i*2]=Shaped_data_1[0]
116 Shaped_data_2[i*2+1] = Shaped_data_1[i+1]
117
118 ## status = {'Datainput Type':'SPRit data'}
119 dataformat = 0
120 ROInum = num_spot
121 dataobj=SPR.DataPass(Shaped_data_1, ROInum, dataformat)
122 dataobj.status = {}
123 dataobj.updateStatus(**status)
124 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
125
126 return dataobj
127
128
129 def PlexeraICMformat(fname):
130 '''Read the txt data exported from Plexera ICM software and reshape it.
131 Return the data in the form of list.
132 '''
133 fp=file(fname,'r')
134 Tmpdata,tmptime=[],[]
135 status = {'DatainputType':'Plexera ICM data'}
136 for eachline in fp:
137 Tmpdata.append(eachline.split('\t'))
138 num_line,num_spot=np.shape(Tmpdata)
139 # initialize the Shaped_data
140 Shaped_data = np.zeros((num_spot,num_line))
141 num_spot=num_spot-1 # the real number of spot
142 Tmpdata=np.transpose(Tmpdata)
143 # delete the last '\r\n' of the data in last column of the datafile
144 for n,i in enumerate(Tmpdata[-1]):
145 Tmpdata[-1][n]=i.rstrip()
146 # time converting: convert the first column of the datafile(date and time information) into numerical time
147 for i in Tmpdata[0]:
148 tmptime_str=i.split(' ')[1]
149 timelist=tmptime_str.split(':')
150 now=float(timelist[0])*60*60+float(timelist[1])*60+float(timelist[2])
151 tmptime.append(now)
152 Shaped_data[0]=tmptime
153 Shaped_data[0]=Shaped_data[0]-Shaped_data[0][0]
154 # assign the signal data
155 for i in range(num_spot):
156 Shaped_data[i+1]=Tmpdata[i+1]
157
158 dataformat = 0
159 ROInum = num_spot
160 dataobj=SPR.DataPass(Shaped_data, ROInum, dataformat)
161 dataobj.status = {}
162 dataobj.updateStatus(**status)
163
164 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
165 return dataobj
166
167
168
169 def DAMsinglesheet(sh,DataList,ROIinfo,start,end, colsize):
170 '''Retrieve and shape data form one single sheet from the Spreadsheet.
171 Return the data, ROI infomation in the form of list.
172 '''
173 ## and return the minimal spot Id (start_spot)
174 ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
175
176 Head = sh.row_values(1)
177 SpotList = []
178 num_spot = sh.row_values(0).count('Relative Time')
179
180 for i in range(num_spot):
181 TmpHead=Head[colsize*i]
182 # now there are 5 columns for each ROI for Raw Intensity
183 # and 4 columns for each ROI for Satellite subtracted Intensity
184 DataList.append(sh.col_values(2+colsize*i,start,end))
185
186 key, val = [], []
187 tmpinfo = TmpHead.split('\n')
188 for j in tmpinfo:
189 key.append(j.split(': ')[0])
190 val.append(j.split(': ')[1])
191 tmpdic = dict(zip(key,val))
192 tmpdic['Position'] = (int(tmpdic['Block'])+1, int(tmpdic['Row'])+1, int(tmpdic['Column'])+1)
193 ROIinfo.append(tmpdic)
194
195 return DataList,ROIinfo
196
197 def DAMspreadsheetformat(book):
198 '''Read the spreadsheet exported from Plexera DAM software and reshape it.
199 Return the shaped data and ROI information.
200 '''
201 ROIinfo, newROIinfo, n_sheet = [], [], 0
202 DataList=[]
203 status = {'DatainputType':'Plexera DAM data'}
204 sh0=book.sheet_by_index(0)
205 if sh0.row_values(0).count('Raw Intensity') == 0 and sh0.row_values(0).count('Subtracted Intensity') > 0:
206 print 'This is the satellite subtracted data.'
207 colsize = 4
208 status ['DAM spreadsheet'] = 'Satellite subtracted Intensity'
209 elif sh0.row_values(0).count('Raw Intensity') > 0 and sh0.row_values(0).count('Subtracted Intensity') == 0:
210 print 'This is the raw intensity data.'
211 colsize = 5
212 status ['DAM spreadsheet'] = 'Raw Intensity'
213
214 for i in range(2,sh0.nrows):
215 if sum(sh0.col_values(2,1,i))>0:
216 break
217 start,end = i-1,sh0.nrows
218 num_line = end - start
219 ## print 'start: ', start
220 # the time axis initialization and assignment
221 Time=sh0.col_values(1,start,end)
222 # in case we lose the time axis during the DAM processing
223 if sum(Time)==0: Time=np.arange(num_line)
224 DataList.append(Time)
225
226 # list DataList to store the Shaped_data of each ROI in each sheet,
227 # list ROIinfo to store the Spot id and the ligand name of each ROI in each sheet
228 for i in range(book.nsheets):
229 sh=book.sheet_by_index(i)
230 if sh.ncols!=0:
231 DataList,ROIinfo = DAMsinglesheet(sh,DataList,ROIinfo,start,end, colsize)
232 n_sheet=n_sheet+1
233 else:
234 break
235 # calculate the dim of the Shaped_data, and initialize the matrix
236 num_spot=len(DataList)-1
237 num_line=end-start
238 Shaped_data=np.zeros((num_spot+1,num_line))
239 Shaped_data[0]=DataList[0]
240
241 # in case that the ROIs doesn't count from spot1
242 newROIinfo = copy.deepcopy(ROIinfo)
243 StartspotList=np.array([i['ID'] for i in ROIinfo])
244 start_spot=min(StartspotList)
245 for i in range(num_spot):
246 tmp = ROIinfo[i]['ID']
247 try:
248 j=int(ROIinfo[i]['ID'])-(start_spot-1)
249 except ValueError:
250 ##return tmp
251 if str.upper(unicode.encode(tmp)).startswith(str.upper('spot')):
252 j=int(ROIinfo[i]['ID'][4:])-(start_spot-1)
253 else:
254 print 'Illegal galfile format.\n', 'ID:', tmp
255 ##return tmp
256
257 Shaped_data[j]=DataList[i+1]
258 newROIinfo[j]=ROIinfo[i]
259
260 # pack the date and relative information into SPRdata obj
261 dataformat = 0
262 ROInum = num_spot
263 dataobj=SPR.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
264 dataobj.status = {}
265 dataobj.updateStatus(**status)
266 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
267
268 return dataobj
269
270
271 def ReadClampData(fname):
272 '''Read the Clamp format data
273 Return the data in the form of list.
274 '''
275 fp=open(fname,'r')
276 Tmpstr=fp.readline()
277 # examine the file head
278 # e.g. 'Vers 3.41 cTBA-1 Conc:250 nM\n'
279 ROInum, dataformat, Conc, ROIdic = 1, 4, [], {}
280 try:
281 name = Tmpstr.split(' ')[2].strip()
282 ROIdic['ROI name'] = name
283 stmp = Tmpstr.split(' ')[3]
284 rest = Tmpstr[Tmpstr.find(stmp):].strip()
285 ROIdic['concentration'] = rest.split(':')
286 except IndexError:
287 pass
288
289 for Tmpstr in fp:
290 try:
291 float(Tmpstr.split('\t')[0])
292 n_conc=(Tmpstr.count('\t')+1)/2
293 ##print Tmpstr, 'the beginning of data'
294 break
295 except ValueError:
296 if Tmpstr.startswith('Conc1'):
297 n_conc=Tmpstr.count('\t')
298 Conc=Tmpstr.strip().split('\t')[1:]
299 elif Tmpstr.startswith('Start1'):
300 Start=Tmpstr.strip().split('\t')[1:]
301 n_conc=Tmpstr.count('\t')
302 elif Tmpstr.startswith('Stop1'):
303 Stop=Tmpstr.strip().split('\t')[1:]
304 n_conc=Tmpstr.count('\t')
305
306
307 # write the data into matrix
308 Tmpdata=[]
309 for eachline in fp:
310 a=eachline.strip().split('\t')
311 Tmpdata.append(np.array(a,dtype='f'))
312 Data=np.transpose(Tmpdata)
313
314 # write the infomation
315 ROIinfo = [ROIdic]
316 status = {'DatainputType':'Clamp data'}
317 sampleinfo, injinfo = [], []
318 injpara = [map(float,i) for i in [Start,Stop]]
319
320 if len(Conc) >= 1:
321 for i,c in enumerate(Conc):
322 sampledic = {}
323 name = 'Sample '+str(i+1)
324 sampledic = dict(zip(['Name','Concentration'],[name,c]))
325 ## sampledic['Name'] = name
326 ## sampledic['Concentration'] = c[injpara[0][i],injpara[1][i]]
327 sampleinfo.append(sampledic)
328 injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
329 injinfo.append(injdic)
330 dataobj=SPR.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
331 dataobj.status = {}
332 dataobj.updateStatus(**status)
333
334 return dataobj
335
336
337
338 def KeyfileRead(fkey):
339 # The Key File contains
340 '''Function: Read the key file for old SPR instrument.
341 Input: the filename.
342 Return: the ROI information.
343 '''
344 try:
345 fp = open(fkey, "r")
346 except IOError:
347 print 'Cannot open file %s for reading' % fname
348 sys.exit(0)
349 ROIinfo = []
350 firstline=fp.readline() # skip the first line
351 # Table.append([firstline.split('\t')[0],firstline.split('\t')[1],firstline.split('\t')[2],firstline.split('\t')[3]])
352 for eachline in fp:
353 ROIdic = {}
354 tmplist = eachline.strip().split('\t')
355 ROIdic['ID'] = tmplist[0] # spot id
356 ROIdic['Name'] = tmplist[1] # name of the protein immobilized
357 try:
358 ROIdic['Conc'] = tmplist[2]
359 ROIdic['Background Spot'] = int(tmplist[3])
360 except IndexError:
361 pass
362 ROIinfo.append(ROIdic)
363
364 fp.close()
365 print 'There are %d ROIs in the Key file.' % len(ROIinfo)
366 return ROIinfo
367
368
369 def GalfileRead(fname):
370 '''Function: Read the GAL file.
371 Input: the filename.
372 Return: the ROI information.
373 '''
374 fp=file(fname,'r')
375 ROIinfo = []
376 Tmpstr = fp.readline() # skip the first line
377 Tmpstr = fp.readline()
378 Tmpn = int(Tmpstr.split('\t')[0])
379 for i in range(Tmpn):
380 labelline = fp.readline()
381
382 if labelline.startswith('"Block1'):
383 a = labelline.strip().split(',')
384 columnsize, rowsize = int(a[3]), int(a[5])
385 ## print labelline
386 def IDcal(p):
387 id = (p[0]-1)*columnsize*rowsize + (p[1]-1)*columnsize + p[2]
388 return id
389
390 for eachline in fp:
391 ROIdic = {}
392 tmplist = eachline.strip().split('\t')
393 pos = tuple(map(int,tmplist[:3]))
394 ROIdic['Position'] = pos
395 ROIdic['IDcal'] =IDcal(pos)
396 ##(pos[0]-1)*columnsize*rowsize + (pos[1]-1)*columnsize + pos[2]
397
398 ##
399 ROIdic['Name'] = tmplist[3]
400 ROIdic['ID'] = tmplist[4]
401 try:
402 ROIdic['Conc'] = tmplist[5]
403 ROIdic['Family'] = tmplist[6]
404 blist = tmplist[7][1:-1].split(';') # always something like '"1,1,2; 1,2,2"'
405 Backn = len(blist)
406 Bid = []
407
408 for i in blist:
409 if '' in i.split(','):
410 break
411 Bpos = tuple(map(int, i.split(',')))
412 Bid.append(IDcal(Bpos))
413 ##
414 ROIdic['Background Spot'] = tuple(Bid)
415 except IndexError:
416 pass
417
418 if int(ROIdic['ID'][4:]) != ROIdic['IDcal']:
419 print 'The spot ID should be consistent with the position. Please check the ID.'
420 break
421 else:
422 ROIdic['ID'] = ROIdic['IDcal']
423 ROIinfo.append(ROIdic)
424 fp.close()
425 print 'There are %d ROIs in the Gal file.' % len(ROIinfo)
426
427 return ROIinfo
428
429
430
431 def MethodRead(fname):
432 '''Function: Read the analyte table.
433 Input: the filename
434 Return: the sample information
435 '''
436 # create a concentration dictionary for the sample injected into the flowcell
437 # export a table containing the concentration of the sample injected, and the duration
438 fp=file(fname,'r')
439 sampleinfo = []
440 ##TmpTable,TmpTable2={},{}
441 labelline=fp.readline()
442 for n,eachline in enumerate(fp):
443 sampledic = {}
444 sampledic['Location'] = eachline.split('\t')[0]
445 sampledic['Name'] = eachline.split('\t')[1]
446 sampledic['Concentration'] = float(eachline.split('\t')[2])
447 sampledic['Duration'] = float(eachline.split('\t')[4])
448 sampledic['Flow Rate'] = float(eachline.split('\t')[3])
449 sampledic['Analyte Series'] = eachline.split('\t')[6]
450 sampledic['Buffer Blank Series'] = eachline.split('\t')[7]
451 sampleinfo.append(sampledic)
452
453 print 'Name','\t','Concentration'
454 for i in sampleinfo:
455 print i['Name'],'\t', i['Concentration']
456
457 return sampleinfo
458
459
460
461 def DatafileRead(fname):
462 '''Function: Read the raw data from the Plexera instrument,
463 and pack the raw data (Shaped_data) into dataobject.
464 Therefore, the initial dataformat = 0
465 Input: the filename
466 Return: the packed SPR.DataPass type data object
467 '''
468 #===================check the format of the input files-------
469 if fname.split('.')[-1] == 'txt':
470
471 Format_input, Firstline = checkformatinput(fname)
472 if Format_input == 1: # for SPRit file format
473 dataobj = SPRitformat(fname)
474 print '-'*10,' This is a SPRit data file. ','-'*10
475
476 elif Format_input == 2: # for Plexera ICM file format
477 dataobj = PlexeraICMformat(fname)
478 print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
479
480 elif Format_input == 3: # for Clamp datafile format
481 dataobj = ReadClampData(fname)
482 print '-'*10,' This is a Clamp file. ','-'*10
483
484 else:
485 print '-'*10,' unreadable file input! ','-'*10
486 sys.exit(0)
487
488 if Format_input == 1 or Format_input == 2:
489 flag = str.upper(raw_input('Load the key file? (y/n): '))
490 if flag == 'Y':
491 fkey = raw_input('Input the path of the key file: ')
492 ROIinfo = KeyfileRead(fkey)
493 dataobj.updateROIinfo(ROIinfo)
494
495 elif fname.split('.')[-1] == 'xls':
496 # for the DAM Spreadsheet file format
497 print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
498 book = xlrd.open_workbook(fname)
499 #shape the data in the Spreadsheet, whether single sheet or multiple sheets
500 dataobj=DAMspreadsheetformat(book)
501 flag = str.upper(raw_input('Load the gal file? (y/n): '))
502 if flag == 'Y':
503 fgal = raw_input('Input the path of the gal file: ')
504 ROIinfo = GalfileRead(fgal)
505 dataobj.updateROIinfo(ROIinfo)
506 flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
507 if flag == 'Y':
508 fprotocol = raw_input('Input the path of the analyte table: ')
509 sampleinfo = MethodRead(fprotocol)
510 dataobj.updateSampleinfo(sampleinfo)
511
512 return dataobj
513
514
515
516 def OutliarRemove(obj):
517 ##dataobj.viewAll()
518 '''Function: Remove the noisy area that you want to get rid of.
519 Return: the data object with clean data
520 '''
521 dataobj = copy.deepcopy(obj)
522 print ('Select the time area that you want to remove.')
523 tmpstr = raw_input('in the format of "200:240, 500:510" :')
524 areatodel = tmpstr.split(',')
525 data = dataobj.data
526 for i in areatodel:
527 ##if
528 t1, t2 = float(i.split(':')[0]), float(i.split(':')[1])
529 t = data[0] # for dataformat=0 Shaped_data
530 tmpind = plt.find((t>t1) & (t<t2)) # find the outliar indice
531 ## data[1:,tmpind] = 0
532 tmplist = data.tolist()
533 tmplist2 = []
534 for j in tmplist:
535 del j[min(tmpind):max(tmpind)]
536 tmplist2.append(j)
537 data = np.array(tmplist2)
538 ## tmplist = tmplist
539 dataobj.data = data
540 # dataobj.status = 'Outliar removed'
541 dataobj.updateStatus(OutliarRemove=True)
542 return dataobj
543
544 def Calibrate(obj):
545 '''Function: Calibrate the Intensity response.
546 Return: the data object with calibrated data
547 Note: at present, this function is valid only when
548 the whole procedure includes calibration precedure.
549 '''
550 dataobj = copy.deepcopy(obj)
551 data = dataobj.data
552 caldata = copy.deepcopy(data)
553 t = data[0]
554 if dataobj.dataformat != 0:
555 print 'We only calibrate the raw data.'
556 return
557 else:
558 t1 = input('Enter the time position of the TOP response during calibration:')
559 s1 = input('The refractive index of the TOP response is: ')
560 t2 = input('Enter the time position of the BOTTOM response during calibration:')
561 s2 = input('The refractive index of the BOTTOM response is: ')
562 ind1 = plt.find(abs(t-t1) == min(abs(t-t1)))[0]
563 ind2 = plt.find(abs(t-t2) == min(abs(t-t2)))[0]
564
565 for i,y in enumerate(data[1:]):
566 y1, y2 = y[ind1], y[ind2]
567 slope = (s1-s2)/(y1-y2)
568 offset = s1- slope*y1
569 caldata[i+1] = slope*y+offset
570 dataobj.data = caldata
571 dataobj.updateStatus(Calibrate=True)
572 return dataobj
573
574 def getbackgroundvalue(obj,bgids):
575 '''Get the averaged value of background spots for each ROI.
576 '''
577 bgsignal = obj.data[0]*0
578 for j in bgids:
579 if j == obj.ROIinfo[j-1]['ID']:
580 bgsignal = bgsignal + obj.data[j]
581
582
583 elif j != obj.ROIinfo[j-1]['ID']:
584 for n,eachspot in enumerate(obj.ROIinfo):
585 if j == eachspot['ID']:
586 bgsignal = bgsignal + obj.data[n+1]
587 bgsignal = bgsignal/len(bgids)
588 return bgsignal
589
590
591 def BackgroundSubtract(obj, *bgspot):
592 '''Function: Perform the Background subtraction for the UNSPLIT curve.
593 Input besides "obj" is the id of the spot taken as background.
594 The following inputs are acceptable:
595 1. BackgroundSubtract(obj): the default background in Galfile
596 will be subtracted.
597 2. BackgroundSubtract(obj, 1, 6): the average value of spot1
598 and spot6 will be subtracted.
599 '''
600 dataobj = copy.deepcopy(obj)
601 ROIinfo = obj.ROIinfo
602 data = dataobj.data
603
604 if dataobj.dataformat == 0:
605 newdata = np.zeros(np.shape(data))
606 newdata[0] = data[0] # the time scale
607 if bgspot == ():
608 # The average of the default background spots listed in the galfile
609 #are to be subtracted.
610 for i in range(1,dataobj.ROInum+1):
611 bgids = ROIinfo[i-1]['Background Spot']
612 bgsignal = getbackgroundvalue(dataobj,bgids)
613 newdata[i] = data[i]-bgsignal
614 # update the status of the data object.
615 dataobj.updateStatus(BackgroundType='Default in Gal')
616
617 else:
618 # The average of the manually input background spots
619 #are to be subtracted.
620 for i in range(1,dataobj.ROInum+1):
621 bgsignal = getbackgroundvalue(dataobj,bgspot)
622 newdata[i] = data[i]-bgsignal
623 dataobj.updateStatus(BackgroundType='Manually choosen')
624 dataobj.data = newdata
625 dataobj.updateStatus(BackgroundSubtraction=True)
626 return dataobj
627
628 else:
629 print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'
630 return
631
632 def CurveSplit(obj, t_before, t_after, *t_inj):
633 '''Function: Split the whole curve that contains several injections
634 into pieces.
635 The sample information will be checked during the split,
636 if the injection number in sampleinfo is not consistant
637 with the input injection parameter, it will help you to
638 update the sample information.
639 Input besides obj:
640 t_before: time duration before the injection time point
641 t_after: time duration after the injection time point
642 t_inj: the exact start time for each injection
643 Return: the data object with splitted data, and updated sample
644 information
645 '''
646 dataobj = copy.deepcopy(obj)
647 ROInum = dataobj.ROInum
648 t=dataobj.data[0]
649 ind=[]
650 # find how many lines of data each injection
651 for i in range(len(t_inj)):
652 ind.append(np.shape(plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after))))[0])
653 m,j = max(ind),max(ind)-min(ind)
654 # the new dataset must be a m*((ROInum+1)*num_inj) matrix:
655 # T1,S_ROI1_c1,S_ROI2_c1,S_ROI3_c1...T2,S_ROI1_c2,S_ROI2_c2...
656 tmpdata = np.zeros(((ROInum+1)*len(t_inj),m))
657
658 for i in range(len(t_inj)):
659 TmpInd = plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after)))
660 Tmp1,Tmp2 = i*(ROInum+1),(i+1)*(ROInum+1)
661 tmpdata[Tmp1:Tmp2,0:len(TmpInd)] = dataobj.data[:,TmpInd]
662 if j!=0:
663 Split_data = tmpdata[:,:-j]
664 else:
665 Split_data = tmpdata[:]
666 # zero and align the split data
667 for i in range(np.shape(Split_data)[0]):
668 Split_data[i]=Split_data[i]-Split_data[i][0]
669
670 newsampleinfo, injectinfo = checksampleinfo(dataobj,t_before, t_after, t_inj)
671 if newsampleinfo != None :
672 dataobj.data = Split_data
673 dataobj.updateStatus(DataSplit=True)
674 dataobj.updateSampleinfo(newsampleinfo)
675 dataobj.dataformat = 2
676 dataobj.injectinfo = injectinfo
677 return dataobj
678 else:
679 dataobj.updateStatus(DataSplit=False)
680 print 'Curve Splitting not successful!'
681
682 return
683
684
685 def checksampleinfo(dataobj, t_before, t_after, t_inj):
686 '''Check if the sample information consistent with the injection
687 parameter input. If not, help the user to update sample information
688 for the splitted curve.
689 '''
690 injectinfo = []
691 if len(dataobj.sampleinfo) == len(t_inj):
692 for i,j in enumerate(t_inj):
693 injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
694 return dataobj.sampleinfo, injectinfo
695
696 elif len(dataobj.sampleinfo) > len(t_inj):
697 print ''' The number of injection you just input does not match that in sample infomation.
698 If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object.'''
699 choose = str.upper(raw_input('Continue? (y/n)'))
700 if choose == 'Y':
701 newsampleinfo, injectinfo = [], []
702 print ' Your input indicates %s injections out of %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
703 for n,i in enumerate(dataobj.sampleinfo):
704 print '--',n,'.',i['Name'],':',i['Concentration']
705 print ' Enter the ID number of the injections you just select, in the form of "2-10" or "4,5,6",'
706 tmpstr = raw_input(':')
707 if ',' in tmpstr:
708 injID = map(int, tmpstr.split(','))
709 elif '-' in tmpstr:
710 injID = range(int(tmpstr.split('-')[0]), int(tmpstr.split('-')[1])+1)
711 else:
712 print ' Illegal input!'
713 return
714
715 if len(injID) != len(t_inj):
716 print ' Please input %s injection ID numbers instead of %s You just input.' %(len(t_inj),len(injID))
717 return
718
719 for i in injID:
720 newsampleinfo.append(dataobj.sampleinfo[i])
721 injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
722 return newsampleinfo, injectinfo
723 else:
724 return
725
726 elif len(dataobj.sampleinfo) < len(t_inj):
727 print ' The number of injection you just input does not match that in sample infomation.'
728 print ' Your input indicates %s injections, where there is only %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
729 print ' Please check.'
730
731 return
732
733
734 def writedata(Head,data_out,ROI_out,DataDir,output_format):
735 '''Write the data into a txt file.
736 '''
737 # create a nowfolder with time stamp to save the data
738 if output_format=='B':
739 ftitle='biosensor'
740 Tmpstr = str(ROI_out).replace(', ','_')
741 Tmpstr = Tmpstr.replace('(','-')
742 foutname = ftitle+Tmpstr[:-1]+'.txt'
743 elif output_format=='C':
744 foutname='clamp-'+str(ROI_out)+'.txt'
745 else:
746 ftitle=''
747 # change to the Root dir
748 os.chdir('..')
749 try:
750 os.makedirs(DataDir)
751 except WindowsError:
752 pass
753 os.chdir(DataDir)
754
755 np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
756 fdt=file('temp.txt','r')
757
758 ## foutname=ftitle+Tmpstr[:-1]+'.txt' ##raw_input('Enter the file name of the output: ')
759 fout=file(foutname,'w+')
760 # write the head of the data first
761 fout.write(Head+'\n')
762 count = len(fdt.readlines())
763 fdt.seek(0)
764 for i in range(count):
765 temp=fdt.readline()
766 fout.write(temp.replace(' ','\t'))
767 fdt.close()
768 fout.close()
769 print '-------The path of the exported data is "', os.getcwd(),'"-------'
770
771
772 def DataOutputBiosensor(obj, DataDir, *ROI_out):
773 '''Export the data into a biosensor format txt file,
774 which can be operated in Scrubber.
775 Input: the data object, the output path, the ROIs selected to export
776 '''
777 # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
778 dataobj = copy.deepcopy(obj)
779 data = dataobj.data
780 n_inj = len(dataobj.sampleinfo)
781 ROIinfo = dataobj.ROIinfo
782 ROInum = dataobj.ROInum
783
784 data_out = np.zeros(((len(ROI_out)*2)*n_inj,np.shape(data)[1]))
785 head1 = ""
786 new_ROIinfo = []
787 for n,i in enumerate(ROI_out):
788 new_ROIinfo.append(ROIinfo[i-1]) ##the new ROI info
789 if i != ROIinfo[i-1]['IDcal']:
790 # check if the ID number matchs position
791 print n,i, ROIinfo[i-1]
792 return
793 else:
794 headcomp = ROIinfo[i-1]['ID']+': '+ROIinfo[i-1]['Name']
795 try:
796 headcomp = headcomp + ', '+ROIinfo[i-1]['Conc']
797 except KeyError:
798 pass
799 headcomp = headcomp+' Fc='+str(n+1)+' - '
800 for j in range(n_inj):
801 headtmp = headcomp+str(j+1)+'_'
802 head1 = head1+headtmp+'X\t'+headtmp+'Y\t'
803 data_out[2*n*n_inj+2*j]=data[j*(ROInum+1)]
804 data_out[2*n*n_inj+2*j+1]=data[j*(ROInum+1)+i]
805 head1=head1[:-1]
806
807 dataobj.data = data_out
808 dataobj.dataformat = 3 # biosensor dataformat
809 dataobj.updateStatus(DataType='Biosensor data')
810 dataobj.updateStatus(Head=head1)
811 dataobj.updateROIinfo(new_ROIinfo)
812
813 writedata(head1,data_out,ROI_out,DataDir,'B')
814
815 return dataobj
816
817 def DataOutputClamp(obj, DataDir, ROI_out):
818 '''Export the data into a Clamp format txt file,
819 which can be operated in Clamp.
820 Input: the data object, the output path, the selected ROI
821 '''
822 # initialize the data output matrix, which will contain '2*n_inj' columns
823 dataobj = copy.deepcopy(obj)
824 data = dataobj.data
825 n_inj = len(dataobj.sampleinfo)
826 ROIinfo = dataobj.ROIinfo
827 ROInum = dataobj.ROInum
828 sampleinfo = dataobj.sampleinfo
829
830 data_out = np.zeros((2*n_inj,np.shape(data)[1]))
831 if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
832 print 'Please check the ROI information.'
833 print ROI_out, ROIinfo[ROI_out-1]
834 return
835 else:
836 tmp = ROIinfo[ROI_out-1]['ID']+': '+ROIinfo[ROI_out-1]['Name']
837 try:
838 tmp = tmp +', Conc:'+ROIinfo[ROI_out-1]['Conc']
839 except KeyError:
840 pass
841 Clamphead = 'Vers 3.41 '+tmp
842 conc=''
843 t_start,t_stop='',''
844 head1='Time\tData\t'*n_inj
845 head1=head1[:-1]
846
847 for i in range(n_inj):
848 data_out[2*i]=data[i*(ROInum+1)]
849 data_out[2*i+1]=data[i*(ROInum+1)+ROI_out]
850 name = sampleinfo[i]['Name']
851 conc=conc+'\t'+str(sampleinfo[i]['Concentration'])
852 t_start=t_start+'\t'+str(dataobj.injectinfo[i][0])
853 t_stop=t_stop+'\t'+str(dataobj.injectinfo[i][1])
854
855 hconc,h_start,h_stop='\nConc1'+conc,'\nStart1'+t_start,'\nStop1'+t_stop
856 for i in (hconc,h_start,h_stop):
857 if i[-1]!='':
858 Clamphead=Clamphead+i
859
860 Clamphead=Clamphead+'\n'+head1
861
862 dataobj.data = data_out
863 dataobj.dataformat = 4 # biosensor dataformat
864 dataobj.updateStatus(DataType='Clamp data')
865 dataobj.updateStatus(Head=Clamphead)
866 dataobj.updateROIinfo(ROIinfo[ROI_out-1])
867
868 writedata(Clamphead,data_out,ROI_out,DataDir,'C')
869
870 return dataobj
871 ## return data_out,Clamphead
872
873