ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
Revision: 5
Committed: Wed Apr 7 06:46:26 2010 UTC (9 years, 6 months ago) by clausted
File size: 33468 byte(s)
Log Message:
Changes made to allow use with Linux.  Can now read SPRit and output Clamp formats.  Replace == and != with "in" and "not in" string comparisons.  Change IDcal to ID in line 834.  Add function "method_read_fake" to provide default information.  Change WindowsError exception to OSError exception.  Allow user to specify default pathway for saving files by entering null string "".
Line File contents
1 """
2 Converter: Perform the datafile reading, pre-processing and writing.
3 Yuhang Wan, Christopher Lausted
4 Last modified on 100405 (yymmdd)
5
6 Typical Pipeline:
7 >import converter as cv
8 >dat = cv.datafile_read("example.txt")
9 >dat = cv.outlier_remove(dat)
10 >dat = cv.calibrate(dat)
11 >dat = cv.background_subtract(dat)
12 >dat = cv.curve_split(dat, 100, 800, 200)
13 >dat = cv.data_output_clamp(dat, "/dir", 0,1,2,3)
14 >dat = cv.data_output_biosensor(dat, "/dir", 0)
15 """
16 __version__ = "100405"
17
18 import numpy as np
19 import pylab as plt
20 import sys
21 import xlrd
22 import time
23 import os
24 import copy
25
26 ## import matplotlib.pyplot as plt
27 ## import matplotlib.mlab as mlb
28 ## import packageClamp_100210 as Pack
29 import SPRdataclass as spr
30
31
32 def check_format_input(fname):
33 """Examine the format of a input .txt data file."""
34 # open a file, if file doesn't exit, then exit
35 try:
36 fp = open(fname, "r")
37 except IOError:
38 print 'Cannot open file %s for reading' % fname
39 sys.exit(0)
40 # examine the first line of the input file
41 # for SPRit output:"Elapsed Time (Seconds) Average Intensity (Pixel Intensity)"
42 # for Plexera ICM output:
43 head_input1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)"
44
45 Format_input = 0
46 # check file format
47 Tmpstr = fp.readline()
48 if (head_input1 in Tmpstr):
49 Format_input=1
50 elif Tmpstr[-1]=='\n' and Tmpstr[-2]=='\r' and Tmpstr.split(' ')[0].split('/')[2].startswith('20'):
51 Format_input=2
52 elif Tmpstr.startswith('Vers'):
53 Format_input=3
54
55 fp.close()
56 return Format_input, Tmpstr
57
58
59 def sprit_format(fname):
60 """Read the SPRit formatted data and reshape it.
61 Return the data in the form of list.
62 """
63 # open a file, if file doesn't exit, then exit
64 try:
65 fp = open(fname, "r")
66 except IOError:
67 print 'Cannot open file %s for reading' % fname
68 sys.exit(0)
69 status = {'DatainputType':'SPRit data'}
70 # skip the first line of the input file
71 Tmpstr = fp.readline()
72 # check if the second line is BEGIN
73 TmpStr = fp.readline()
74 if ("BEGIN" not in TmpStr):
75 print "Warning: Second line of data file is not 'BEGIN'"
76 ##sys.exit(0)
77 # count the lines of each data spot
78 TmpStr = fp.readline() # skip first "0.000000e+000"
79 num_line = 1 # so we start to count from 1
80 # count until read the "0.000000e+000"
81 while True:
82 TmpStr = fp.readline()
83 if TmpStr.split('\t')[0] == "0.000000e+000":
84 break
85 else:
86 num_line = num_line + 1
87 # count the spots in each file
88 num_spot = 0
89 # skip the first two lines
90 fp.seek(0)
91 for i in range(0,2):
92 TmpStr = fp.readline()
93 text = fp.readlines()
94 for i in text:
95 num_spot += i.count('0.000000e+000')
96 fp.close()
97 #---------------------- Reshaping the data --------------------
98 # reshape the data in the string form num_line*num_spot
99 Shaped_str = np.reshape(text,(num_spot,-1))
100 tmpshape=np.shape(Shaped_str)
101 # reshape the data in the number form: num_line*(num_spot+1)
102 # only the first column is time: time spotl spot2 ... spot n
103 time = np.zeros(num_line)
104 Tmpdata = Shaped_str[0,:]
105 for n,i in enumerate(Tmpdata):
106 time[n]=float(i.split('\t')[0])
107 Shaped_data_1 = np.zeros((tmpshape[0]+1,tmpshape[1]))
108 Shaped_data_1[0,:]=time
109 for n1,i in enumerate(Shaped_str):
110 for n2,j in enumerate(i):
111 Shaped_data_1[n1+1][n2]=float(Shaped_str[n1][n2].split('\t')[1])
112 # reshape the data in the number form:
113 # time1 spotl time2 spot2 ... timen spot n
114 Shaped_data_2 = np.zeros((tmpshape[0]*2,tmpshape[1]))
115 for i in range(tmpshape[0]):
116 Shaped_data_2[i*2]=Shaped_data_1[0]
117 Shaped_data_2[i*2+1] = Shaped_data_1[i+1]
118
119 ## status = {'Datainput Type':'SPRit data'}
120 dataformat = 0
121 ROInum = num_spot
122 dataobj=spr.DataPass(Shaped_data_1, ROInum, dataformat)
123 dataobj.status = {}
124 dataobj.updateStatus(**status)
125 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
126
127 return dataobj
128
129
130 def plexera_icm_format(fname):
131 """Read the txt data exported from Plexera ICM software and reshape it.
132 Return the data in the form of list.
133 """
134 fp=file(fname,'r')
135 Tmpdata,tmptime=[],[]
136 status = {'DatainputType':'Plexera ICM data'}
137 for eachline in fp:
138 Tmpdata.append(eachline.split('\t'))
139 num_line,num_spot=np.shape(Tmpdata)
140 # initialize the Shaped_data
141 Shaped_data = np.zeros((num_spot,num_line))
142 num_spot=num_spot-1 # the real number of spot
143 Tmpdata=np.transpose(Tmpdata)
144 # delete the last '\r\n' of the data in last column of the datafile
145 for n,i in enumerate(Tmpdata[-1]):
146 Tmpdata[-1][n]=i.rstrip()
147 # time converting: convert the first column of the datafile(date and time information) into numerical time
148 for i in Tmpdata[0]:
149 tmptime_str=i.split(' ')[1]
150 timelist=tmptime_str.split(':')
151 now=float(timelist[0])*60*60+float(timelist[1])*60+float(timelist[2])
152 tmptime.append(now)
153 Shaped_data[0]=tmptime
154 Shaped_data[0]=Shaped_data[0]-Shaped_data[0][0]
155 # assign the signal data
156 for i in range(num_spot):
157 Shaped_data[i+1]=Tmpdata[i+1]
158
159 dataformat = 0
160 ROInum = num_spot
161 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat)
162 dataobj.status = {}
163 dataobj.updateStatus(**status)
164
165 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
166 return dataobj
167
168
169
170 def dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize):
171 """Retrieve and shape data form one single sheet from the Spreadsheet.
172 Return the data, ROI infomation in the form of list.
173 """
174 ## and return the minimal spot Id (start_spot)
175 ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
176
177 Head = sh.row_values(1)
178 SpotList = []
179 num_spot = sh.row_values(0).count('Relative Time')
180
181 for i in range(num_spot):
182 TmpHead=Head[colsize*i]
183 # now there are 5 columns for each ROI for Raw Intensity
184 # and 4 columns for each ROI for Satellite subtracted Intensity
185 DataList.append(sh.col_values(2+colsize*i,start,end))
186
187 key, val = [], []
188 tmpinfo = TmpHead.split('\n')
189 for j in tmpinfo:
190 key.append(j.split(': ')[0])
191 val.append(j.split(': ')[1])
192 tmpdic = dict(zip(key,val))
193 tmpdic['Position'] = (int(tmpdic['Block'])+1, int(tmpdic['Row'])+1, int(tmpdic['Column'])+1)
194 ROIinfo.append(tmpdic)
195
196 return DataList,ROIinfo
197
198 def dam_spreadsheet_format(book):
199 """Read the spreadsheet exported from Plexera DAM software and reshape it.
200 Return the shaped data and ROI information.
201 """
202 ROIinfo, newROIinfo, n_sheet = [], [], 0
203 DataList=[]
204 status = {'DatainputType':'Plexera DAM data'}
205 sh0=book.sheet_by_index(0)
206 if sh0.row_values(0).count('Raw Intensity') == 0 and sh0.row_values(0).count('Subtracted Intensity') > 0:
207 print 'This is the satellite subtracted data.'
208 colsize = 4
209 status ['DAM spreadsheet'] = 'Satellite subtracted Intensity'
210 elif sh0.row_values(0).count('Raw Intensity') > 0 and sh0.row_values(0).count('Subtracted Intensity') == 0:
211 print 'This is the raw intensity data.'
212 colsize = 5
213 status ['DAM spreadsheet'] = 'Raw Intensity'
214
215 for i in range(2,sh0.nrows):
216 if sum(sh0.col_values(2,1,i))>0:
217 break
218 start,end = i-1,sh0.nrows
219 num_line = end - start
220 ## print 'start: ', start
221 # the time axis initialization and assignment
222 Time=sh0.col_values(1,start,end)
223 # in case we lose the time axis during the DAM processing
224 if sum(Time)==0: Time=np.arange(num_line)
225 DataList.append(Time)
226
227 # list DataList to store the Shaped_data of each ROI in each sheet,
228 # list ROIinfo to store the Spot id and the ligand name of each ROI in each sheet
229 for i in range(book.nsheets):
230 sh=book.sheet_by_index(i)
231 if sh.ncols!=0:
232 DataList,ROIinfo = dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize)
233 n_sheet=n_sheet+1
234 else:
235 break
236 # calculate the dim of the Shaped_data, and initialize the matrix
237 num_spot=len(DataList)-1
238 num_line=end-start
239 Shaped_data=np.zeros((num_spot+1,num_line))
240 Shaped_data[0]=DataList[0]
241
242 # in case that the ROIs doesn't count from spot1
243 newROIinfo = copy.deepcopy(ROIinfo)
244 StartspotList=np.array([i['ID'] for i in ROIinfo])
245 start_spot=min(StartspotList)
246 for i in range(num_spot):
247 tmp = ROIinfo[i]['ID']
248 try:
249 j=int(ROIinfo[i]['ID'])-(start_spot-1)
250 except ValueError:
251 ##return tmp
252 if str.upper(unicode.encode(tmp)).startswith(str.upper('spot')):
253 j=int(ROIinfo[i]['ID'][4:])-(start_spot-1)
254 else:
255 print 'Illegal galfile format.\n', 'ID:', tmp
256 ##return tmp
257
258 Shaped_data[j]=DataList[i+1]
259 newROIinfo[j]=ROIinfo[i]
260
261 # pack the date and relative information into SPRdata obj
262 dataformat = 0
263 ROInum = num_spot
264 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
265 dataobj.status = {}
266 dataobj.updateStatus(**status)
267 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
268
269 return dataobj
270
271
272 def read_clamp_data(fname):
273 """Read the Clamp format data
274 Return the data in the form of list.
275 """
276 fp=open(fname,'r')
277 Tmpstr=fp.readline()
278 # examine the file head
279 # e.g. 'Vers 3.41 cTBA-1 Conc:250 nM\n'
280 ROInum, dataformat, Conc, ROIdic = 1, 4, [], {}
281 try:
282 name = Tmpstr.split(' ')[2].strip()
283 ROIdic['ROI name'] = name
284 stmp = Tmpstr.split(' ')[3]
285 rest = Tmpstr[Tmpstr.find(stmp):].strip()
286 ROIdic['concentration'] = rest.split(':')
287 except IndexError:
288 pass
289
290 for Tmpstr in fp:
291 try:
292 float(Tmpstr.split('\t')[0])
293 n_conc=(Tmpstr.count('\t')+1)/2
294 ##print Tmpstr, 'the beginning of data'
295 break
296 except ValueError:
297 if Tmpstr.startswith('Conc1'):
298 n_conc=Tmpstr.count('\t')
299 Conc=Tmpstr.strip().split('\t')[1:]
300 elif Tmpstr.startswith('Start1'):
301 Start=Tmpstr.strip().split('\t')[1:]
302 n_conc=Tmpstr.count('\t')
303 elif Tmpstr.startswith('Stop1'):
304 Stop=Tmpstr.strip().split('\t')[1:]
305 n_conc=Tmpstr.count('\t')
306
307
308 # write the data into matrix
309 Tmpdata=[]
310 for eachline in fp:
311 a=eachline.strip().split('\t')
312 Tmpdata.append(np.array(a,dtype='f'))
313 Data=np.transpose(Tmpdata)
314
315 # write the infomation
316 ROIinfo = [ROIdic]
317 status = {'DatainputType':'Clamp data'}
318 sampleinfo, injinfo = [], []
319 injpara = [map(float,i) for i in [Start,Stop]]
320
321 if len(Conc) >= 1:
322 for i,c in enumerate(Conc):
323 sampledic = {}
324 name = 'Sample '+str(i+1)
325 sampledic = dict(zip(['Name','Concentration'],[name,c]))
326 ## sampledic['Name'] = name
327 ## sampledic['Concentration'] = c[injpara[0][i],injpara[1][i]]
328 sampleinfo.append(sampledic)
329 injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
330 injinfo.append(injdic)
331 dataobj=spr.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
332 dataobj.status = {}
333 dataobj.updateStatus(**status)
334
335 return dataobj
336
337
338
339 def keyfile_read(fkey):
340 # The Key File contains
341 """Function: Read the key file for old SPR instrument.
342 Input: the filename.
343 Return: the ROI information.
344 """
345 try:
346 fp = open(fkey, "r")
347 except IOError:
348 print 'Cannot open file %s for reading' % fname
349 sys.exit(0)
350 ROIinfo = []
351 firstline=fp.readline() # skip the first line
352 # Table.append([firstline.split('\t')[0],firstline.split('\t')[1],firstline.split('\t')[2],firstline.split('\t')[3]])
353 for eachline in fp:
354 ROIdic = {}
355 tmplist = eachline.strip().split('\t')
356 ROIdic['ID'] = tmplist[0] # spot id
357 ROIdic['Name'] = tmplist[1] # name of the protein immobilized
358 try:
359 ROIdic['Conc'] = tmplist[2]
360 ROIdic['Background Spot'] = int(tmplist[3])
361 except IndexError:
362 pass
363 ROIinfo.append(ROIdic)
364
365 fp.close()
366 print 'There are %d ROIs in the Key file.' % len(ROIinfo)
367 return ROIinfo
368
369
370 def galfile_read(fname):
371 """Function: Read the GAL file.
372 Input: the filename.
373 Return: the ROI information.
374 """
375 fp=file(fname,'r')
376 ROIinfo = []
377 Tmpstr = fp.readline() # skip the first line
378 Tmpstr = fp.readline()
379 Tmpn = int(Tmpstr.split('\t')[0])
380 for i in range(Tmpn):
381 labelline = fp.readline()
382
383 if labelline.startswith('"Block1'):
384 a = labelline.strip().split(',')
385 columnsize, rowsize = int(a[3]), int(a[5])
386 ## print labelline
387 def IDcal(p):
388 id = (p[0]-1)*columnsize*rowsize + (p[1]-1)*columnsize + p[2]
389 return id
390
391 for eachline in fp:
392 ROIdic = {}
393 tmplist = eachline.strip().split('\t')
394 pos = tuple(map(int,tmplist[:3]))
395 ROIdic['Position'] = pos
396 ROIdic['IDcal'] =IDcal(pos)
397 ##(pos[0]-1)*columnsize*rowsize + (pos[1]-1)*columnsize + pos[2]
398
399 ##
400 ROIdic['Name'] = tmplist[3]
401 ROIdic['ID'] = tmplist[4]
402 try:
403 ROIdic['Conc'] = tmplist[5]
404 ROIdic['Family'] = tmplist[6]
405 blist = tmplist[7][1:-1].split(';') # always something like '"1,1,2; 1,2,2"'
406 Backn = len(blist)
407 Bid = []
408
409 for i in blist:
410 if '' in i.split(','):
411 break
412 Bpos = tuple(map(int, i.split(',')))
413 Bid.append(IDcal(Bpos))
414 ##
415 ROIdic['Background Spot'] = tuple(Bid)
416 except IndexError:
417 pass
418
419 if int(ROIdic['ID'][4:]) != ROIdic['IDcal']:
420 print 'The spot ID should be consistent with the position. Please check the ID.'
421 break
422 else:
423 ROIdic['ID'] = ROIdic['IDcal']
424 ROIinfo.append(ROIdic)
425 fp.close()
426 print 'There are %d ROIs in the Gal file.' % len(ROIinfo)
427
428 return ROIinfo
429
430
431
432 def method_read(fname):
433 """Function: Read the analyte table.
434 Input: the filename
435 Return: the sample information
436 """
437 # create a concentration dictionary for the sample injected into the flowcell
438 # export a table containing the concentration of the sample injected, and the duration
439 fp=file(fname,'r')
440 sampleinfo = []
441 ##TmpTable,TmpTable2={},{}
442 labelline=fp.readline()
443 for n,eachline in enumerate(fp):
444 sampledic = {}
445 sampledic['Location'] = eachline.split('\t')[0]
446 sampledic['Name'] = eachline.split('\t')[1]
447 sampledic['Concentration'] = float(eachline.split('\t')[2])
448 sampledic['Duration'] = float(eachline.split('\t')[4])
449 sampledic['Flow Rate'] = float(eachline.split('\t')[3])
450 sampledic['Analyte Series'] = eachline.split('\t')[6]
451 sampledic['Buffer Blank Series'] = eachline.split('\t')[7]
452 sampleinfo.append(sampledic)
453
454 print 'Name','\t','Concentration'
455 for i in sampleinfo:
456 print i['Name'],'\t', i['Concentration']
457
458 return sampleinfo
459
460 def method_read_fake():
461 sampleinfo = []
462 sampleinfo.append({'Location':0, 'Name':0, 'Concentration':0, 'Duration':0, 'Flow Rate':0, 'Analyte Series':0, 'Buffer Blank Series':0})
463 return sampleinfo
464
465
466 def datafile_read(fname):
467 """Function: Read the raw data from the Plexera instrument,
468 and pack the raw data (Shaped_data) into dataobject.
469 Therefore, the initial dataformat = 0
470 Input: the filename
471 Return: the packed SPR.DataPass type data object
472 """
473 #===================check the format of the input files-------
474 if fname.split('.')[-1] == 'txt':
475
476 Format_input, Firstline = check_format_input(fname)
477 if Format_input == 1: # for SPRit file format
478 dataobj = sprit_format(fname)
479 print '-'*10,' This is a SPRit data file. ','-'*10
480
481 elif Format_input == 2: # for Plexera ICM file format
482 dataobj = plexera_icm_format(fname)
483 print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
484
485 elif Format_input == 3: # for Clamp datafile format
486 dataobj = read_clamp_data(fname)
487 print '-'*10,' This is a Clamp file. ','-'*10
488
489 else:
490 print '-'*10,' unreadable file input! ','-'*10
491 sys.exit(0)
492
493 if Format_input == 1 or Format_input == 2:
494 flag = str.upper(raw_input('Load the key file? (y/n): '))
495 if flag == 'Y':
496 fkey = raw_input('Input the path of the key file: ')
497 ROIinfo = keyfile_read(fkey)
498 dataobj.updateROIinfo(ROIinfo)
499
500 elif fname.split('.')[-1] == 'xls':
501 # for the DAM Spreadsheet file format
502 print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
503 book = xlrd.open_workbook(fname)
504 #shape the data in the Spreadsheet, whether single sheet or multiple sheets
505 dataobj=dam_spreadsheet_format(book)
506 flag = str.upper(raw_input('Load the gal file? (y/n): '))
507 if flag == 'Y':
508 fgal = raw_input('Input the path of the gal file: ')
509 ROIinfo = galfile_read(fgal)
510 dataobj.updateROIinfo(ROIinfo)
511 flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
512 if flag == 'Y':
513 fprotocol = raw_input('Input the path of the analyte table: ')
514 sampleinfo = method_read(fprotocol)
515 dataobj.updateSampleinfo(sampleinfo)
516 else:
517 dataobj.updateSampleinfo(method_read_fake())
518
519 return dataobj
520
521
522
523 def outlier_remove(obj):
524 ##dataobj.viewAll()
525 """Function: Remove unwanted data points from noisy periods.
526 Return: the data object with clean data
527 """
528 dataobj = copy.deepcopy(obj)
529 print ('Select the time area that you want to remove.')
530 tmpstr = raw_input('in the format of "200:240, 500:510" :')
531 areatodel = tmpstr.split(',')
532 data = dataobj.data
533 for i in areatodel:
534 ##if
535 t1, t2 = float(i.split(':')[0]), float(i.split(':')[1])
536 t = data[0] # for dataformat=0 Shaped_data
537 tmpind = plt.find((t>t1) & (t<t2)) # find the outliar indice
538 ## data[1:,tmpind] = 0
539 tmplist = data.tolist()
540 tmplist2 = []
541 for j in tmplist:
542 del j[min(tmpind):max(tmpind)]
543 tmplist2.append(j)
544 data = np.array(tmplist2)
545 ## tmplist = tmplist
546 dataobj.data = data
547 # dataobj.status = 'Outliar removed'
548 dataobj.updateStatus(outlier_remove=True)
549 return dataobj
550
551 def calibrate(obj):
552 """Function: calibrate the Intensity response.
553 Return: the data object with calibrated data
554 Note: at present, this function is valid only when
555 the whole procedure includes calibration precedure.
556 """
557 dataobj = copy.deepcopy(obj)
558 data = dataobj.data
559 caldata = copy.deepcopy(data)
560 t = data[0]
561 if dataobj.dataformat != 0:
562 print 'We only calibrate the raw data.'
563 return
564 else:
565 t1 = input('Enter the time position of the TOP response during calibration:')
566 s1 = input('The refractive index of the TOP response is: ')
567 t2 = input('Enter the time position of the BOTTOM response during calibration:')
568 s2 = input('The refractive index of the BOTTOM response is: ')
569 ind1 = plt.find(abs(t-t1) == min(abs(t-t1)))[0]
570 ind2 = plt.find(abs(t-t2) == min(abs(t-t2)))[0]
571
572 for i,y in enumerate(data[1:]):
573 y1, y2 = y[ind1], y[ind2]
574 slope = (s1-s2)/(y1-y2)
575 offset = s1- slope*y1
576 caldata[i+1] = slope*y+offset
577 dataobj.data = caldata
578 dataobj.updateStatus(calibrate=True)
579 return dataobj
580
581 def get_background_value(obj,bgids):
582 """Get the averaged value of background spots for each ROI."""
583 bgsignal = obj.data[0]*0
584 for j in bgids:
585 if j == obj.ROIinfo[j-1]['ID']:
586 bgsignal = bgsignal + obj.data[j]
587
588
589 elif j != obj.ROIinfo[j-1]['ID']:
590 for n,eachspot in enumerate(obj.ROIinfo):
591 if j == eachspot['ID']:
592 bgsignal = bgsignal + obj.data[n+1]
593 bgsignal = bgsignal/len(bgids)
594 return bgsignal
595
596
597 def background_subtract(obj, *bgspot):
598 """Function: Perform the Background subtraction for the UNSPLIT curve.
599 Input besides "obj" is the id of the spot taken as background.
600 The following inputs are acceptable:
601 1. background_subtract(obj): the default background in Galfile
602 will be subtracted.
603 2. background_subtract(obj, 1, 6): the average value of spot1
604 and spot6 will be subtracted.
605 """
606 dataobj = copy.deepcopy(obj)
607 ROIinfo = obj.ROIinfo
608 data = dataobj.data
609
610 if dataobj.dataformat == 0:
611 newdata = np.zeros(np.shape(data))
612 newdata[0] = data[0] # the time scale
613 if bgspot == ():
614 # The average of the default background spots listed in the galfile
615 #are to be subtracted.
616 for i in range(1,dataobj.ROInum+1):
617 bgids = ROIinfo[i-1]['Background Spot']
618 bgsignal = get_background_value(dataobj,bgids)
619 newdata[i] = data[i]-bgsignal
620 # update the status of the data object.
621 dataobj.updateStatus(BackgroundType='Default in Gal')
622
623 else:
624 # The average of the manually input background spots
625 #are to be subtracted.
626 for i in range(1,dataobj.ROInum+1):
627 bgsignal = get_background_value(dataobj,bgspot)
628 newdata[i] = data[i]-bgsignal
629 dataobj.updateStatus(BackgroundType='Manually choosen')
630 dataobj.data = newdata
631 dataobj.updateStatus(background_subtraction=True)
632 return dataobj
633
634 else:
635 print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'
636 return
637
638 def curve_split(obj, t_before, t_after, *t_inj):
639 """Function: Split the whole curve that contains several injections
640 into pieces.
641 The sample information will be checked during the split,
642 if the injection number in sampleinfo is not consistant
643 with the input injection parameter, it will help you to
644 update the sample information.
645 Input besides obj:
646 t_before: time duration before the injection time point
647 t_after: time duration after the injection time point
648 t_inj: the exact start time for each injection
649 Return: the data object with splitted data, and updated sample
650 information
651 """
652 dataobj = copy.deepcopy(obj)
653 ROInum = dataobj.ROInum
654 t=dataobj.data[0]
655 ind=[]
656 # find how many lines of data each injection
657 for i in range(len(t_inj)):
658 ind.append(np.shape(plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after))))[0])
659 m,j = max(ind),max(ind)-min(ind)
660 # the new dataset must be a m*((ROInum+1)*num_inj) matrix:
661 # T1,S_ROI1_c1,S_ROI2_c1,S_ROI3_c1...T2,S_ROI1_c2,S_ROI2_c2...
662 tmpdata = np.zeros(((ROInum+1)*len(t_inj),m))
663
664 for i in range(len(t_inj)):
665 TmpInd = plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after)))
666 Tmp1,Tmp2 = i*(ROInum+1),(i+1)*(ROInum+1)
667 tmpdata[Tmp1:Tmp2,0:len(TmpInd)] = dataobj.data[:,TmpInd]
668 if j!=0:
669 Split_data = tmpdata[:,:-j]
670 else:
671 Split_data = tmpdata[:]
672 # zero and align the split data
673 for i in range(np.shape(Split_data)[0]):
674 Split_data[i]=Split_data[i]-Split_data[i][0]
675
676 newsampleinfo, injectinfo = check_sample_info(dataobj,t_before, t_after, t_inj)
677 if newsampleinfo != None :
678 dataobj.data = Split_data
679 dataobj.updateStatus(DataSplit=True)
680 dataobj.updateSampleinfo(newsampleinfo)
681 dataobj.dataformat = 2
682 dataobj.injectinfo = injectinfo
683 return dataobj
684 else:
685 dataobj.updateStatus(DataSplit=False)
686 print 'Curve Splitting not successful!'
687
688 return
689
690
691 def check_sample_info(dataobj, t_before, t_after, t_inj):
692 """Check if the sample information consistent with the injection
693 parameter input. If not, help the user to update sample information
694 for the splitted curve.
695 """
696 injectinfo = []
697 if len(dataobj.sampleinfo) == len(t_inj):
698 for i,j in enumerate(t_inj):
699 injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
700 return dataobj.sampleinfo, injectinfo
701
702 elif len(dataobj.sampleinfo) > len(t_inj):
703 print """ The number of injection you just input does not match that in sample infomation.
704 If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object."""
705 choose = str.upper(raw_input('Continue? (y/n)'))
706 if choose == 'Y':
707 newsampleinfo, injectinfo = [], []
708 print ' Your input indicates %s injections out of %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
709 for n,i in enumerate(dataobj.sampleinfo):
710 print '--',n,'.',i['Name'],':',i['Concentration']
711 print ' Enter the ID number of the injections you just select, in the form of "2-10" or "4,5,6",'
712 tmpstr = raw_input(':')
713 if ',' in tmpstr:
714 injID = map(int, tmpstr.split(','))
715 elif '-' in tmpstr:
716 injID = range(int(tmpstr.split('-')[0]), int(tmpstr.split('-')[1])+1)
717 else:
718 print ' Illegal input!'
719 return
720
721 if len(injID) != len(t_inj):
722 print ' Please input %s injection ID numbers instead of %s You just input.' %(len(t_inj),len(injID))
723 return
724
725 for i in injID:
726 newsampleinfo.append(dataobj.sampleinfo[i])
727 injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
728 return newsampleinfo, injectinfo
729 else:
730 return
731
732 elif len(dataobj.sampleinfo) < len(t_inj):
733 print ' The number of injection you just input does not match that in sample infomation.'
734 print ' Your input indicates %s injections, where there is only %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
735 print ' Please check.'
736
737 return
738
739
740 def write_data(Head,data_out,ROI_out,DataDir,output_format):
741 """Write the data into a txt file.
742 """
743 # Modified 100405 CGL
744 # create a newfolder with time stamp to save the data
745 if output_format=='B':
746 ftitle='biosensor'
747 Tmpstr = str(ROI_out).replace(', ','_')
748 Tmpstr = Tmpstr.replace('(','-')
749 foutname = ftitle+Tmpstr[:-1]+'.txt'
750 elif output_format=='C':
751 foutname='clamp-'+str(ROI_out)+'.txt'
752 else:
753 ftitle=''
754
755 # Change directories, if requested.
756 if (DataDir != ""):
757 os.chdir('..')
758 try:
759 os.makedirs(DataDir)
760 ##except WindowsError:
761 except OSError:
762 pass
763 os.chdir(DataDir)
764
765 np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
766 fdt=file('temp.txt','r')
767
768 ## foutname=ftitle+Tmpstr[:-1]+'.txt' ##raw_input('Enter the file name of the output: ')
769 fout=file(foutname,'w+')
770 # write the head of the data first
771 fout.write(Head+'\n')
772 count = len(fdt.readlines())
773 fdt.seek(0)
774 for i in range(count):
775 temp=fdt.readline()
776 fout.write(temp.replace(' ','\t'))
777 fdt.close()
778 fout.close()
779 print '-------The path of the exported data is "', os.getcwd(),'"-------'
780
781
782 def data_output_biosensor(obj, DataDir, *ROI_out):
783 """Export the data into a biosensor format txt file,
784 which can be operated in Scrubber.
785 Input: the data object, the output path, the ROIs selected to export
786 """
787 # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
788 dataobj = copy.deepcopy(obj)
789 data = dataobj.data
790 n_inj = len(dataobj.sampleinfo)
791 ROIinfo = dataobj.ROIinfo
792 ROInum = dataobj.ROInum
793
794 data_out = np.zeros(((len(ROI_out)*2)*n_inj,np.shape(data)[1]))
795 head1 = ""
796 new_ROIinfo = []
797 for n,i in enumerate(ROI_out):
798 new_ROIinfo.append(ROIinfo[i-1]) ##the new ROI info
799 if i != ROIinfo[i-1]['IDcal']:
800 # check if the ID number matches position
801 print n,i, ROIinfo[i-1]
802 return
803 else:
804 headcomp = ROIinfo[i-1]['ID']+': '+ROIinfo[i-1]['Name']
805 try:
806 headcomp = headcomp + ', '+ROIinfo[i-1]['Conc']
807 except KeyError:
808 pass
809 headcomp = headcomp+' Fc='+str(n+1)+' - '
810 for j in range(n_inj):
811 headtmp = headcomp+str(j+1)+'_'
812 head1 = head1+headtmp+'X\t'+headtmp+'Y\t'
813 data_out[2*n*n_inj+2*j]=data[j*(ROInum+1)]
814 data_out[2*n*n_inj+2*j+1]=data[j*(ROInum+1)+i]
815 head1=head1[:-1]
816
817 dataobj.data = data_out
818 dataobj.dataformat = 3 # biosensor dataformat
819 dataobj.updateStatus(DataType='Biosensor data')
820 dataobj.updateStatus(Head=head1)
821 dataobj.updateROIinfo(new_ROIinfo)
822
823 write_data(head1,data_out,ROI_out,DataDir,'B')
824
825 return dataobj
826
827 def data_output_clamp(obj, DataDir, ROI_out):
828 """Export the data into a Clamp format txt file,
829 which can be operated in Clamp.
830 Input: the data object, the output path, the selected ROI
831 """
832 # initialize the data output matrix, which will contain '2*n_inj' columns
833 dataobj = copy.deepcopy(obj)
834 data = dataobj.data
835 n_inj = len(dataobj.sampleinfo)
836 n_inj = max(n_inj, 1) # Must have at least one injection.
837 ROIinfo = dataobj.ROIinfo
838 ROInum = dataobj.ROInum
839 sampleinfo = dataobj.sampleinfo
840
841 data_out = np.zeros((2*n_inj,np.shape(data)[1]))
842 ## if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
843 if (ROI_out != int(ROIinfo[ROI_out-1]['ID'])):
844 print 'Please check the ROI information.'
845 print ROI_out, ROIinfo[ROI_out-1]
846 return
847 else:
848 tmp = ROIinfo[ROI_out-1]['ID']+': '+ROIinfo[ROI_out-1]['Name']
849 try:
850 tmp = tmp +', Conc:'+ROIinfo[ROI_out-1]['Conc']
851 except KeyError:
852 pass
853 Clamphead = 'Vers 3.41 '+tmp
854 conc=''
855 t_start,t_stop='',''
856 head1='Time\tData\t'*n_inj
857 head1=head1[:-1]
858
859 for i in range(n_inj):
860 data_out[2*i]=data[i*(ROInum+1)]
861 data_out[2*i+1]=data[i*(ROInum+1)+ROI_out]
862 name = sampleinfo[i]['Name']
863 conc=conc+'\t'+str(sampleinfo[i]['Concentration'])
864 t_start=t_start+'\t'+str(dataobj.injectinfo[i][0])
865 t_stop=t_stop+'\t'+str(dataobj.injectinfo[i][1])
866
867 hconc,h_start,h_stop='\nConc1'+conc,'\nStart1'+t_start,'\nStop1'+t_stop
868 for i in (hconc,h_start,h_stop):
869 if i[-1]!='':
870 Clamphead=Clamphead+i
871
872 Clamphead=Clamphead+'\n'+head1
873
874 dataobj.data = data_out
875 dataobj.dataformat = 4 # biosensor dataformat
876 dataobj.updateStatus(DataType='Clamp data')
877 dataobj.updateStatus(Head=Clamphead)
878 dataobj.updateROIinfo(ROIinfo[ROI_out-1])
879
880 write_data(Clamphead,data_out,ROI_out,DataDir,'C')
881
882 return dataobj
883 ## return data_out,Clamphead
884
885
886 if (__name__ == '__main__'):
887 # Print a short message if this module is run like an app.
888 print "This is the OSPRAI file type conversion tool."
889