ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
Revision: 3
Committed: Tue Apr 6 05:40:47 2010 UTC (9 years, 6 months ago) by clausted
File size: 33042 byte(s)
Log Message:
Functions in converter are renamed to conform to python PEP8 style.  Some uppercase names have been made lowercase.
Line File contents
1 """
2 Converter: Perform the datafile reading, pre-processing and writing.
3 Yuhang Wan, Christopher Lausted
4 Last modified on 100405 (yymmdd)
5
6 Typical Pipeline:
7 >import converter as cv
8 >dat = cv.datafile_read("example.txt")
9 >dat = cv.outlier_remove(dat)
10 >dat = cv.calibrate(dat)
11 >dat = cv.background_subtract(dat)
12 >dat = cv.curve_split(dat, 100, 800, 200)
13 >dat = cv.data_output_clamp(dat, "/dir", 0,1,2,3)
14 >dat = cv.data_output_biosensor(dat, "/dir", 0)
15 """
16 __version__ = "100405"
17
18 import numpy as np
19 import pylab as plt
20 import sys
21 import xlrd
22 import time
23 import os
24 import copy
25
26 ## import matplotlib.pyplot as plt
27 ## import matplotlib.mlab as mlb
28 ## import packageClamp_100210 as Pack
29 import SPRdataclass as spr
30
31
32 def check_format_input(fname):
33 """Examine the format of a input .txt data file."""
34 # open a file, if file doesn't exit, then exit
35 try:
36 fp = open(fname, "r")
37 except IOError:
38 print 'Cannot open file %s for reading' % fname
39 sys.exit(0)
40 # examine the first line of the input file
41 # for SPRit output:"Elapsed Time (Seconds) Average Intensity (Pixel Intensity)"
42 # for Plexera ICM output:
43 head_input1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\n"
44
45 Format_input = 0
46 # check file format
47 Tmpstr = fp.readline()
48 if Tmpstr==head_input1:
49 Format_input=1
50 elif Tmpstr[-1]=='\n' and Tmpstr[-2]=='\r' and Tmpstr.split(' ')[0].split('/')[2].startswith('20'):
51 Format_input=2
52 elif Tmpstr.startswith('Vers'):
53 Format_input=3
54
55 fp.close()
56 return Format_input, Tmpstr
57
58
59 def sprit_format(fname):
60 """Read the SPRit formatted data and reshape it.
61 Return the data in the form of list.
62 """
63 # open a file, if file doesn't exit, then exit
64 try:
65 fp = open(fname, "r")
66 except IOError:
67 print 'Cannot open file %s for reading' % fname
68 sys.exit(0)
69 status = {'DatainputType':'SPRit data'}
70 # skip the first line of the input file
71 Tmpstr = fp.readline()
72 # check if the second line is BEGIN
73 TmpStr = fp.readline()
74 if TmpStr != "BEGIN\n":
75 print "Second line is not Begin. Break!"
76 #sys.exit(0)
77 # count the lines of each data spot
78 TmpStr = fp.readline() # skip first "0.000000e+000"
79 num_line = 1 # so we start to count from 1
80 # count until read the "0.000000e+000"
81 while True:
82 TmpStr = fp.readline()
83 if TmpStr.split('\t')[0] == "0.000000e+000":
84 break
85 else:
86 num_line = num_line + 1
87 # count the spots in each file
88 num_spot = 0
89 # skip the first two lines
90 fp.seek(0)
91 for i in range(0,2):
92 TmpStr = fp.readline()
93 text = fp.readlines()
94 for i in text:
95 num_spot += i.count('0.000000e+000')
96 fp.close()
97 #---------------------- Reshaping the data --------------------
98 # reshape the data in the string form num_line*num_spot
99 Shaped_str = np.reshape(text,(num_spot,-1))
100 tmpshape=np.shape(Shaped_str)
101 # reshape the data in the number form: num_line*(num_spot+1)
102 # only the first column is time: time spotl spot2 ... spot n
103 time = np.zeros(num_line)
104 Tmpdata = Shaped_str[0,:]
105 for n,i in enumerate(Tmpdata):
106 time[n]=float(i.split('\t')[0])
107 Shaped_data_1 = np.zeros((tmpshape[0]+1,tmpshape[1]))
108 Shaped_data_1[0,:]=time
109 for n1,i in enumerate(Shaped_str):
110 for n2,j in enumerate(i):
111 Shaped_data_1[n1+1][n2]=float(Shaped_str[n1][n2].split('\t')[1])
112 # reshape the data in the number form:
113 # time1 spotl time2 spot2 ... timen spot n
114 Shaped_data_2 = np.zeros((tmpshape[0]*2,tmpshape[1]))
115 for i in range(tmpshape[0]):
116 Shaped_data_2[i*2]=Shaped_data_1[0]
117 Shaped_data_2[i*2+1] = Shaped_data_1[i+1]
118
119 ## status = {'Datainput Type':'SPRit data'}
120 dataformat = 0
121 ROInum = num_spot
122 dataobj=spr.DataPass(Shaped_data_1, ROInum, dataformat)
123 dataobj.status = {}
124 dataobj.updateStatus(**status)
125 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
126
127 return dataobj
128
129
130 def plexera_icm_format(fname):
131 """Read the txt data exported from Plexera ICM software and reshape it.
132 Return the data in the form of list.
133 """
134 fp=file(fname,'r')
135 Tmpdata,tmptime=[],[]
136 status = {'DatainputType':'Plexera ICM data'}
137 for eachline in fp:
138 Tmpdata.append(eachline.split('\t'))
139 num_line,num_spot=np.shape(Tmpdata)
140 # initialize the Shaped_data
141 Shaped_data = np.zeros((num_spot,num_line))
142 num_spot=num_spot-1 # the real number of spot
143 Tmpdata=np.transpose(Tmpdata)
144 # delete the last '\r\n' of the data in last column of the datafile
145 for n,i in enumerate(Tmpdata[-1]):
146 Tmpdata[-1][n]=i.rstrip()
147 # time converting: convert the first column of the datafile(date and time information) into numerical time
148 for i in Tmpdata[0]:
149 tmptime_str=i.split(' ')[1]
150 timelist=tmptime_str.split(':')
151 now=float(timelist[0])*60*60+float(timelist[1])*60+float(timelist[2])
152 tmptime.append(now)
153 Shaped_data[0]=tmptime
154 Shaped_data[0]=Shaped_data[0]-Shaped_data[0][0]
155 # assign the signal data
156 for i in range(num_spot):
157 Shaped_data[i+1]=Tmpdata[i+1]
158
159 dataformat = 0
160 ROInum = num_spot
161 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat)
162 dataobj.status = {}
163 dataobj.updateStatus(**status)
164
165 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
166 return dataobj
167
168
169
170 def dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize):
171 """Retrieve and shape data form one single sheet from the Spreadsheet.
172 Return the data, ROI infomation in the form of list.
173 """
174 ## and return the minimal spot Id (start_spot)
175 ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
176
177 Head = sh.row_values(1)
178 SpotList = []
179 num_spot = sh.row_values(0).count('Relative Time')
180
181 for i in range(num_spot):
182 TmpHead=Head[colsize*i]
183 # now there are 5 columns for each ROI for Raw Intensity
184 # and 4 columns for each ROI for Satellite subtracted Intensity
185 DataList.append(sh.col_values(2+colsize*i,start,end))
186
187 key, val = [], []
188 tmpinfo = TmpHead.split('\n')
189 for j in tmpinfo:
190 key.append(j.split(': ')[0])
191 val.append(j.split(': ')[1])
192 tmpdic = dict(zip(key,val))
193 tmpdic['Position'] = (int(tmpdic['Block'])+1, int(tmpdic['Row'])+1, int(tmpdic['Column'])+1)
194 ROIinfo.append(tmpdic)
195
196 return DataList,ROIinfo
197
198 def dam_spreadsheet_format(book):
199 """Read the spreadsheet exported from Plexera DAM software and reshape it.
200 Return the shaped data and ROI information.
201 """
202 ROIinfo, newROIinfo, n_sheet = [], [], 0
203 DataList=[]
204 status = {'DatainputType':'Plexera DAM data'}
205 sh0=book.sheet_by_index(0)
206 if sh0.row_values(0).count('Raw Intensity') == 0 and sh0.row_values(0).count('Subtracted Intensity') > 0:
207 print 'This is the satellite subtracted data.'
208 colsize = 4
209 status ['DAM spreadsheet'] = 'Satellite subtracted Intensity'
210 elif sh0.row_values(0).count('Raw Intensity') > 0 and sh0.row_values(0).count('Subtracted Intensity') == 0:
211 print 'This is the raw intensity data.'
212 colsize = 5
213 status ['DAM spreadsheet'] = 'Raw Intensity'
214
215 for i in range(2,sh0.nrows):
216 if sum(sh0.col_values(2,1,i))>0:
217 break
218 start,end = i-1,sh0.nrows
219 num_line = end - start
220 ## print 'start: ', start
221 # the time axis initialization and assignment
222 Time=sh0.col_values(1,start,end)
223 # in case we lose the time axis during the DAM processing
224 if sum(Time)==0: Time=np.arange(num_line)
225 DataList.append(Time)
226
227 # list DataList to store the Shaped_data of each ROI in each sheet,
228 # list ROIinfo to store the Spot id and the ligand name of each ROI in each sheet
229 for i in range(book.nsheets):
230 sh=book.sheet_by_index(i)
231 if sh.ncols!=0:
232 DataList,ROIinfo = dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize)
233 n_sheet=n_sheet+1
234 else:
235 break
236 # calculate the dim of the Shaped_data, and initialize the matrix
237 num_spot=len(DataList)-1
238 num_line=end-start
239 Shaped_data=np.zeros((num_spot+1,num_line))
240 Shaped_data[0]=DataList[0]
241
242 # in case that the ROIs doesn't count from spot1
243 newROIinfo = copy.deepcopy(ROIinfo)
244 StartspotList=np.array([i['ID'] for i in ROIinfo])
245 start_spot=min(StartspotList)
246 for i in range(num_spot):
247 tmp = ROIinfo[i]['ID']
248 try:
249 j=int(ROIinfo[i]['ID'])-(start_spot-1)
250 except ValueError:
251 ##return tmp
252 if str.upper(unicode.encode(tmp)).startswith(str.upper('spot')):
253 j=int(ROIinfo[i]['ID'][4:])-(start_spot-1)
254 else:
255 print 'Illegal galfile format.\n', 'ID:', tmp
256 ##return tmp
257
258 Shaped_data[j]=DataList[i+1]
259 newROIinfo[j]=ROIinfo[i]
260
261 # pack the date and relative information into SPRdata obj
262 dataformat = 0
263 ROInum = num_spot
264 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
265 dataobj.status = {}
266 dataobj.updateStatus(**status)
267 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
268
269 return dataobj
270
271
272 def read_clamp_data(fname):
273 """Read the Clamp format data
274 Return the data in the form of list.
275 """
276 fp=open(fname,'r')
277 Tmpstr=fp.readline()
278 # examine the file head
279 # e.g. 'Vers 3.41 cTBA-1 Conc:250 nM\n'
280 ROInum, dataformat, Conc, ROIdic = 1, 4, [], {}
281 try:
282 name = Tmpstr.split(' ')[2].strip()
283 ROIdic['ROI name'] = name
284 stmp = Tmpstr.split(' ')[3]
285 rest = Tmpstr[Tmpstr.find(stmp):].strip()
286 ROIdic['concentration'] = rest.split(':')
287 except IndexError:
288 pass
289
290 for Tmpstr in fp:
291 try:
292 float(Tmpstr.split('\t')[0])
293 n_conc=(Tmpstr.count('\t')+1)/2
294 ##print Tmpstr, 'the beginning of data'
295 break
296 except ValueError:
297 if Tmpstr.startswith('Conc1'):
298 n_conc=Tmpstr.count('\t')
299 Conc=Tmpstr.strip().split('\t')[1:]
300 elif Tmpstr.startswith('Start1'):
301 Start=Tmpstr.strip().split('\t')[1:]
302 n_conc=Tmpstr.count('\t')
303 elif Tmpstr.startswith('Stop1'):
304 Stop=Tmpstr.strip().split('\t')[1:]
305 n_conc=Tmpstr.count('\t')
306
307
308 # write the data into matrix
309 Tmpdata=[]
310 for eachline in fp:
311 a=eachline.strip().split('\t')
312 Tmpdata.append(np.array(a,dtype='f'))
313 Data=np.transpose(Tmpdata)
314
315 # write the infomation
316 ROIinfo = [ROIdic]
317 status = {'DatainputType':'Clamp data'}
318 sampleinfo, injinfo = [], []
319 injpara = [map(float,i) for i in [Start,Stop]]
320
321 if len(Conc) >= 1:
322 for i,c in enumerate(Conc):
323 sampledic = {}
324 name = 'Sample '+str(i+1)
325 sampledic = dict(zip(['Name','Concentration'],[name,c]))
326 ## sampledic['Name'] = name
327 ## sampledic['Concentration'] = c[injpara[0][i],injpara[1][i]]
328 sampleinfo.append(sampledic)
329 injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
330 injinfo.append(injdic)
331 dataobj=spr.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
332 dataobj.status = {}
333 dataobj.updateStatus(**status)
334
335 return dataobj
336
337
338
339 def keyfile_read(fkey):
340 # The Key File contains
341 """Function: Read the key file for old SPR instrument.
342 Input: the filename.
343 Return: the ROI information.
344 """
345 try:
346 fp = open(fkey, "r")
347 except IOError:
348 print 'Cannot open file %s for reading' % fname
349 sys.exit(0)
350 ROIinfo = []
351 firstline=fp.readline() # skip the first line
352 # Table.append([firstline.split('\t')[0],firstline.split('\t')[1],firstline.split('\t')[2],firstline.split('\t')[3]])
353 for eachline in fp:
354 ROIdic = {}
355 tmplist = eachline.strip().split('\t')
356 ROIdic['ID'] = tmplist[0] # spot id
357 ROIdic['Name'] = tmplist[1] # name of the protein immobilized
358 try:
359 ROIdic['Conc'] = tmplist[2]
360 ROIdic['Background Spot'] = int(tmplist[3])
361 except IndexError:
362 pass
363 ROIinfo.append(ROIdic)
364
365 fp.close()
366 print 'There are %d ROIs in the Key file.' % len(ROIinfo)
367 return ROIinfo
368
369
370 def galfile_read(fname):
371 """Function: Read the GAL file.
372 Input: the filename.
373 Return: the ROI information.
374 """
375 fp=file(fname,'r')
376 ROIinfo = []
377 Tmpstr = fp.readline() # skip the first line
378 Tmpstr = fp.readline()
379 Tmpn = int(Tmpstr.split('\t')[0])
380 for i in range(Tmpn):
381 labelline = fp.readline()
382
383 if labelline.startswith('"Block1'):
384 a = labelline.strip().split(',')
385 columnsize, rowsize = int(a[3]), int(a[5])
386 ## print labelline
387 def IDcal(p):
388 id = (p[0]-1)*columnsize*rowsize + (p[1]-1)*columnsize + p[2]
389 return id
390
391 for eachline in fp:
392 ROIdic = {}
393 tmplist = eachline.strip().split('\t')
394 pos = tuple(map(int,tmplist[:3]))
395 ROIdic['Position'] = pos
396 ROIdic['IDcal'] =IDcal(pos)
397 ##(pos[0]-1)*columnsize*rowsize + (pos[1]-1)*columnsize + pos[2]
398
399 ##
400 ROIdic['Name'] = tmplist[3]
401 ROIdic['ID'] = tmplist[4]
402 try:
403 ROIdic['Conc'] = tmplist[5]
404 ROIdic['Family'] = tmplist[6]
405 blist = tmplist[7][1:-1].split(';') # always something like '"1,1,2; 1,2,2"'
406 Backn = len(blist)
407 Bid = []
408
409 for i in blist:
410 if '' in i.split(','):
411 break
412 Bpos = tuple(map(int, i.split(',')))
413 Bid.append(IDcal(Bpos))
414 ##
415 ROIdic['Background Spot'] = tuple(Bid)
416 except IndexError:
417 pass
418
419 if int(ROIdic['ID'][4:]) != ROIdic['IDcal']:
420 print 'The spot ID should be consistent with the position. Please check the ID.'
421 break
422 else:
423 ROIdic['ID'] = ROIdic['IDcal']
424 ROIinfo.append(ROIdic)
425 fp.close()
426 print 'There are %d ROIs in the Gal file.' % len(ROIinfo)
427
428 return ROIinfo
429
430
431
432 def method_read(fname):
433 """Function: Read the analyte table.
434 Input: the filename
435 Return: the sample information
436 """
437 # create a concentration dictionary for the sample injected into the flowcell
438 # export a table containing the concentration of the sample injected, and the duration
439 fp=file(fname,'r')
440 sampleinfo = []
441 ##TmpTable,TmpTable2={},{}
442 labelline=fp.readline()
443 for n,eachline in enumerate(fp):
444 sampledic = {}
445 sampledic['Location'] = eachline.split('\t')[0]
446 sampledic['Name'] = eachline.split('\t')[1]
447 sampledic['Concentration'] = float(eachline.split('\t')[2])
448 sampledic['Duration'] = float(eachline.split('\t')[4])
449 sampledic['Flow Rate'] = float(eachline.split('\t')[3])
450 sampledic['Analyte Series'] = eachline.split('\t')[6]
451 sampledic['Buffer Blank Series'] = eachline.split('\t')[7]
452 sampleinfo.append(sampledic)
453
454 print 'Name','\t','Concentration'
455 for i in sampleinfo:
456 print i['Name'],'\t', i['Concentration']
457
458 return sampleinfo
459
460
461
462 def datafile_read(fname):
463 """Function: Read the raw data from the Plexera instrument,
464 and pack the raw data (Shaped_data) into dataobject.
465 Therefore, the initial dataformat = 0
466 Input: the filename
467 Return: the packed SPR.DataPass type data object
468 """
469 #===================check the format of the input files-------
470 if fname.split('.')[-1] == 'txt':
471
472 Format_input, Firstline = check_format_input(fname)
473 if Format_input == 1: # for SPRit file format
474 dataobj = sprit_format(fname)
475 print '-'*10,' This is a SPRit data file. ','-'*10
476
477 elif Format_input == 2: # for Plexera ICM file format
478 dataobj = plexera_icm_format(fname)
479 print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
480
481 elif Format_input == 3: # for Clamp datafile format
482 dataobj = read_clamp_data(fname)
483 print '-'*10,' This is a Clamp file. ','-'*10
484
485 else:
486 print '-'*10,' unreadable file input! ','-'*10
487 sys.exit(0)
488
489 if Format_input == 1 or Format_input == 2:
490 flag = str.upper(raw_input('Load the key file? (y/n): '))
491 if flag == 'Y':
492 fkey = raw_input('Input the path of the key file: ')
493 ROIinfo = keyfile_read(fkey)
494 dataobj.updateROIinfo(ROIinfo)
495
496 elif fname.split('.')[-1] == 'xls':
497 # for the DAM Spreadsheet file format
498 print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
499 book = xlrd.open_workbook(fname)
500 #shape the data in the Spreadsheet, whether single sheet or multiple sheets
501 dataobj=dam_spreadsheet_format(book)
502 flag = str.upper(raw_input('Load the gal file? (y/n): '))
503 if flag == 'Y':
504 fgal = raw_input('Input the path of the gal file: ')
505 ROIinfo = galfile_read(fgal)
506 dataobj.updateROIinfo(ROIinfo)
507 flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
508 if flag == 'Y':
509 fprotocol = raw_input('Input the path of the analyte table: ')
510 sampleinfo = method_read(fprotocol)
511 dataobj.updateSampleinfo(sampleinfo)
512
513 return dataobj
514
515
516
517 def outlier_remove(obj):
518 ##dataobj.viewAll()
519 """Function: Remove unwanted data points from noisy periods.
520 Return: the data object with clean data
521 """
522 dataobj = copy.deepcopy(obj)
523 print ('Select the time area that you want to remove.')
524 tmpstr = raw_input('in the format of "200:240, 500:510" :')
525 areatodel = tmpstr.split(',')
526 data = dataobj.data
527 for i in areatodel:
528 ##if
529 t1, t2 = float(i.split(':')[0]), float(i.split(':')[1])
530 t = data[0] # for dataformat=0 Shaped_data
531 tmpind = plt.find((t>t1) & (t<t2)) # find the outliar indice
532 ## data[1:,tmpind] = 0
533 tmplist = data.tolist()
534 tmplist2 = []
535 for j in tmplist:
536 del j[min(tmpind):max(tmpind)]
537 tmplist2.append(j)
538 data = np.array(tmplist2)
539 ## tmplist = tmplist
540 dataobj.data = data
541 # dataobj.status = 'Outliar removed'
542 dataobj.updateStatus(outlier_remove=True)
543 return dataobj
544
545 def calibrate(obj):
546 """Function: calibrate the Intensity response.
547 Return: the data object with calibrated data
548 Note: at present, this function is valid only when
549 the whole procedure includes calibration precedure.
550 """
551 dataobj = copy.deepcopy(obj)
552 data = dataobj.data
553 caldata = copy.deepcopy(data)
554 t = data[0]
555 if dataobj.dataformat != 0:
556 print 'We only calibrate the raw data.'
557 return
558 else:
559 t1 = input('Enter the time position of the TOP response during calibration:')
560 s1 = input('The refractive index of the TOP response is: ')
561 t2 = input('Enter the time position of the BOTTOM response during calibration:')
562 s2 = input('The refractive index of the BOTTOM response is: ')
563 ind1 = plt.find(abs(t-t1) == min(abs(t-t1)))[0]
564 ind2 = plt.find(abs(t-t2) == min(abs(t-t2)))[0]
565
566 for i,y in enumerate(data[1:]):
567 y1, y2 = y[ind1], y[ind2]
568 slope = (s1-s2)/(y1-y2)
569 offset = s1- slope*y1
570 caldata[i+1] = slope*y+offset
571 dataobj.data = caldata
572 dataobj.updateStatus(calibrate=True)
573 return dataobj
574
575 def get_background_value(obj,bgids):
576 """Get the averaged value of background spots for each ROI."""
577 bgsignal = obj.data[0]*0
578 for j in bgids:
579 if j == obj.ROIinfo[j-1]['ID']:
580 bgsignal = bgsignal + obj.data[j]
581
582
583 elif j != obj.ROIinfo[j-1]['ID']:
584 for n,eachspot in enumerate(obj.ROIinfo):
585 if j == eachspot['ID']:
586 bgsignal = bgsignal + obj.data[n+1]
587 bgsignal = bgsignal/len(bgids)
588 return bgsignal
589
590
591 def background_subtract(obj, *bgspot):
592 """Function: Perform the Background subtraction for the UNSPLIT curve.
593 Input besides "obj" is the id of the spot taken as background.
594 The following inputs are acceptable:
595 1. background_subtract(obj): the default background in Galfile
596 will be subtracted.
597 2. background_subtract(obj, 1, 6): the average value of spot1
598 and spot6 will be subtracted.
599 """
600 dataobj = copy.deepcopy(obj)
601 ROIinfo = obj.ROIinfo
602 data = dataobj.data
603
604 if dataobj.dataformat == 0:
605 newdata = np.zeros(np.shape(data))
606 newdata[0] = data[0] # the time scale
607 if bgspot == ():
608 # The average of the default background spots listed in the galfile
609 #are to be subtracted.
610 for i in range(1,dataobj.ROInum+1):
611 bgids = ROIinfo[i-1]['Background Spot']
612 bgsignal = get_background_value(dataobj,bgids)
613 newdata[i] = data[i]-bgsignal
614 # update the status of the data object.
615 dataobj.updateStatus(BackgroundType='Default in Gal')
616
617 else:
618 # The average of the manually input background spots
619 #are to be subtracted.
620 for i in range(1,dataobj.ROInum+1):
621 bgsignal = get_background_value(dataobj,bgspot)
622 newdata[i] = data[i]-bgsignal
623 dataobj.updateStatus(BackgroundType='Manually choosen')
624 dataobj.data = newdata
625 dataobj.updateStatus(background_subtraction=True)
626 return dataobj
627
628 else:
629 print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'
630 return
631
632 def curve_split(obj, t_before, t_after, *t_inj):
633 """Function: Split the whole curve that contains several injections
634 into pieces.
635 The sample information will be checked during the split,
636 if the injection number in sampleinfo is not consistant
637 with the input injection parameter, it will help you to
638 update the sample information.
639 Input besides obj:
640 t_before: time duration before the injection time point
641 t_after: time duration after the injection time point
642 t_inj: the exact start time for each injection
643 Return: the data object with splitted data, and updated sample
644 information
645 """
646 dataobj = copy.deepcopy(obj)
647 ROInum = dataobj.ROInum
648 t=dataobj.data[0]
649 ind=[]
650 # find how many lines of data each injection
651 for i in range(len(t_inj)):
652 ind.append(np.shape(plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after))))[0])
653 m,j = max(ind),max(ind)-min(ind)
654 # the new dataset must be a m*((ROInum+1)*num_inj) matrix:
655 # T1,S_ROI1_c1,S_ROI2_c1,S_ROI3_c1...T2,S_ROI1_c2,S_ROI2_c2...
656 tmpdata = np.zeros(((ROInum+1)*len(t_inj),m))
657
658 for i in range(len(t_inj)):
659 TmpInd = plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after)))
660 Tmp1,Tmp2 = i*(ROInum+1),(i+1)*(ROInum+1)
661 tmpdata[Tmp1:Tmp2,0:len(TmpInd)] = dataobj.data[:,TmpInd]
662 if j!=0:
663 Split_data = tmpdata[:,:-j]
664 else:
665 Split_data = tmpdata[:]
666 # zero and align the split data
667 for i in range(np.shape(Split_data)[0]):
668 Split_data[i]=Split_data[i]-Split_data[i][0]
669
670 newsampleinfo, injectinfo = check_sample_info(dataobj,t_before, t_after, t_inj)
671 if newsampleinfo != None :
672 dataobj.data = Split_data
673 dataobj.updateStatus(DataSplit=True)
674 dataobj.updateSampleinfo(newsampleinfo)
675 dataobj.dataformat = 2
676 dataobj.injectinfo = injectinfo
677 return dataobj
678 else:
679 dataobj.updateStatus(DataSplit=False)
680 print 'Curve Splitting not successful!'
681
682 return
683
684
685 def check_sample_info(dataobj, t_before, t_after, t_inj):
686 """Check if the sample information consistent with the injection
687 parameter input. If not, help the user to update sample information
688 for the splitted curve.
689 """
690 injectinfo = []
691 if len(dataobj.sampleinfo) == len(t_inj):
692 for i,j in enumerate(t_inj):
693 injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
694 return dataobj.sampleinfo, injectinfo
695
696 elif len(dataobj.sampleinfo) > len(t_inj):
697 print """ The number of injection you just input does not match that in sample infomation.
698 If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object."""
699 choose = str.upper(raw_input('Continue? (y/n)'))
700 if choose == 'Y':
701 newsampleinfo, injectinfo = [], []
702 print ' Your input indicates %s injections out of %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
703 for n,i in enumerate(dataobj.sampleinfo):
704 print '--',n,'.',i['Name'],':',i['Concentration']
705 print ' Enter the ID number of the injections you just select, in the form of "2-10" or "4,5,6",'
706 tmpstr = raw_input(':')
707 if ',' in tmpstr:
708 injID = map(int, tmpstr.split(','))
709 elif '-' in tmpstr:
710 injID = range(int(tmpstr.split('-')[0]), int(tmpstr.split('-')[1])+1)
711 else:
712 print ' Illegal input!'
713 return
714
715 if len(injID) != len(t_inj):
716 print ' Please input %s injection ID numbers instead of %s You just input.' %(len(t_inj),len(injID))
717 return
718
719 for i in injID:
720 newsampleinfo.append(dataobj.sampleinfo[i])
721 injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
722 return newsampleinfo, injectinfo
723 else:
724 return
725
726 elif len(dataobj.sampleinfo) < len(t_inj):
727 print ' The number of injection you just input does not match that in sample infomation.'
728 print ' Your input indicates %s injections, where there is only %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
729 print ' Please check.'
730
731 return
732
733
734 def write_data(Head,data_out,ROI_out,DataDir,output_format):
735 """Write the data into a txt file.
736 """
737 # Modified 100405 CGL
738 # create a newfolder with time stamp to save the data
739 if output_format=='B':
740 ftitle='biosensor'
741 Tmpstr = str(ROI_out).replace(', ','_')
742 Tmpstr = Tmpstr.replace('(','-')
743 foutname = ftitle+Tmpstr[:-1]+'.txt'
744 elif output_format=='C':
745 foutname='clamp-'+str(ROI_out)+'.txt'
746 else:
747 ftitle=''
748
749 # Change directories, if requested.
750 if (DataDir != ""):
751 os.chdir('..')
752 try:
753 os.makedirs(DataDir)
754 except WindowsError:
755 pass
756 os.chdir(DataDir)
757
758 np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
759 fdt=file('temp.txt','r')
760
761 ## foutname=ftitle+Tmpstr[:-1]+'.txt' ##raw_input('Enter the file name of the output: ')
762 fout=file(foutname,'w+')
763 # write the head of the data first
764 fout.write(Head+'\n')
765 count = len(fdt.readlines())
766 fdt.seek(0)
767 for i in range(count):
768 temp=fdt.readline()
769 fout.write(temp.replace(' ','\t'))
770 fdt.close()
771 fout.close()
772 print '-------The path of the exported data is "', os.getcwd(),'"-------'
773
774
775 def data_output_biosensor(obj, DataDir, *ROI_out):
776 """Export the data into a biosensor format txt file,
777 which can be operated in Scrubber.
778 Input: the data object, the output path, the ROIs selected to export
779 """
780 # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
781 dataobj = copy.deepcopy(obj)
782 data = dataobj.data
783 n_inj = len(dataobj.sampleinfo)
784 ROIinfo = dataobj.ROIinfo
785 ROInum = dataobj.ROInum
786
787 data_out = np.zeros(((len(ROI_out)*2)*n_inj,np.shape(data)[1]))
788 head1 = ""
789 new_ROIinfo = []
790 for n,i in enumerate(ROI_out):
791 new_ROIinfo.append(ROIinfo[i-1]) ##the new ROI info
792 if i != ROIinfo[i-1]['IDcal']:
793 # check if the ID number matches position
794 print n,i, ROIinfo[i-1]
795 return
796 else:
797 headcomp = ROIinfo[i-1]['ID']+': '+ROIinfo[i-1]['Name']
798 try:
799 headcomp = headcomp + ', '+ROIinfo[i-1]['Conc']
800 except KeyError:
801 pass
802 headcomp = headcomp+' Fc='+str(n+1)+' - '
803 for j in range(n_inj):
804 headtmp = headcomp+str(j+1)+'_'
805 head1 = head1+headtmp+'X\t'+headtmp+'Y\t'
806 data_out[2*n*n_inj+2*j]=data[j*(ROInum+1)]
807 data_out[2*n*n_inj+2*j+1]=data[j*(ROInum+1)+i]
808 head1=head1[:-1]
809
810 dataobj.data = data_out
811 dataobj.dataformat = 3 # biosensor dataformat
812 dataobj.updateStatus(DataType='Biosensor data')
813 dataobj.updateStatus(Head=head1)
814 dataobj.updateROIinfo(new_ROIinfo)
815
816 write_data(head1,data_out,ROI_out,DataDir,'B')
817
818 return dataobj
819
820 def data_output_clamp(obj, DataDir, ROI_out):
821 """Export the data into a Clamp format txt file,
822 which can be operated in Clamp.
823 Input: the data object, the output path, the selected ROI
824 """
825 # initialize the data output matrix, which will contain '2*n_inj' columns
826 dataobj = copy.deepcopy(obj)
827 data = dataobj.data
828 n_inj = len(dataobj.sampleinfo)
829 ROIinfo = dataobj.ROIinfo
830 ROInum = dataobj.ROInum
831 sampleinfo = dataobj.sampleinfo
832
833 data_out = np.zeros((2*n_inj,np.shape(data)[1]))
834 if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
835 print 'Please check the ROI information.'
836 print ROI_out, ROIinfo[ROI_out-1]
837 return
838 else:
839 tmp = ROIinfo[ROI_out-1]['ID']+': '+ROIinfo[ROI_out-1]['Name']
840 try:
841 tmp = tmp +', Conc:'+ROIinfo[ROI_out-1]['Conc']
842 except KeyError:
843 pass
844 Clamphead = 'Vers 3.41 '+tmp
845 conc=''
846 t_start,t_stop='',''
847 head1='Time\tData\t'*n_inj
848 head1=head1[:-1]
849
850 for i in range(n_inj):
851 data_out[2*i]=data[i*(ROInum+1)]
852 data_out[2*i+1]=data[i*(ROInum+1)+ROI_out]
853 name = sampleinfo[i]['Name']
854 conc=conc+'\t'+str(sampleinfo[i]['Concentration'])
855 t_start=t_start+'\t'+str(dataobj.injectinfo[i][0])
856 t_stop=t_stop+'\t'+str(dataobj.injectinfo[i][1])
857
858 hconc,h_start,h_stop='\nConc1'+conc,'\nStart1'+t_start,'\nStop1'+t_stop
859 for i in (hconc,h_start,h_stop):
860 if i[-1]!='':
861 Clamphead=Clamphead+i
862
863 Clamphead=Clamphead+'\n'+head1
864
865 dataobj.data = data_out
866 dataobj.dataformat = 4 # biosensor dataformat
867 dataobj.updateStatus(DataType='Clamp data')
868 dataobj.updateStatus(Head=Clamphead)
869 dataobj.updateROIinfo(ROIinfo[ROI_out-1])
870
871 write_data(Clamphead,data_out,ROI_out,DataDir,'C')
872
873 return dataobj
874 ## return data_out,Clamphead
875
876
877 if (__name__ == '__main__'):
878 # Print a short message if this module is run like an app.
879 print "This is the OSPRAI file type conversion tool."
880