ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
Revision: 25
Committed: Wed Apr 28 20:22:24 2010 UTC (9 years, 1 month ago) by rjaynes
File size: 35996 byte(s)
Log Message:
Add py and obj files to allow modeling of more SPR experiments with converter and curvefitting modules.  This is the work of Yuhang Wan and Rui Hou.

1. In "converter.py": 
      Add the saving and reading function for the sprclass data object.
      Also add function "keyfile_read_fake" to provide default information for SPRit and ICM formats in case of the bug when do background_subtract.
      Fix the bugs in "background_subtract".
      Tested by DAM and ICM formats.
2. In model modules:
      "modelclass.py" is the parent class for all the other model classes that performs the theoretical simulating, loading and saving of the parameter or simulated data. Rui and I also add some other model modules like competing model, twostate model, parallel model, and the time variable concentrated models, where the simulated result is compared with Clamp's simulation to make sure the equations are correct. 
       The basicmodel and basicmodel_varyC class are tested. 
3. In "curvefitting.py":
      Add typical pipeline for operation. The examples are packed with the file. 
      Add function to show the Elapsed time for each fitting.
Line File contents
1 """
2 Converter: Perform the datafile reading, pre-processing and writing.
3 Yuhang Wan, Christopher Lausted
4 Last modified on 100427 (yymmdd) by YW
5
6 Typical Pipeline:
7 >import converter as cv
8 >dat = cv.datafile_read("example.txt")
9 >dat = cv.outlier_remove(dat)
10 >dat = cv.calibrate(dat)
11 >dat = cv.background_subtract(dat)
12 >dat = cv.curve_split(dat, 100, 800, 200)
13 >dat = cv.data_output_clamp(dat, "/dir", 1)
14 >dat = cv.data_output_biosensor(dat, "/dir", 1,2,3,4)
15 >dat = cv.obj_save(dat, "saved_dataobj")
16 """
17 __version__ = "100427"
18
19 ## Import libraries.
20 import numpy as np
21 import pylab as plt
22 ##import sys
23 import xlrd
24 import time
25 import os
26 import copy
27 import pickle
28 import SPRdataclass as spr
29
30 ## Global constants and variables.
31 FMT_UNKNOWN = 0
32 FMT_SPRIT = 1 ## Old Plexera SPRit format.
33 FMT_ICMTXT = 2 ## Plexera Instrument Control Module text.
34 FMT_CLAMP34 = 3 ## Morton & Myszka CLAMP Version 3.4+
35 FMT_DAMXLS = 4 ## Plexera Data Analysis Module Excel export.
36 FMT_DATAOBJ = 5 ## Packed spr Data Object
37
38 def check_format_input(fname):
39 """Examine the format of an input .txt data file."""
40 ## Modified 100407 CGL.
41
42 ## Open text file. Exit if file not found.
43 try:
44 fp = open(fname, "r")
45 except IOError:
46 print "Error: Unable to open file %s for reading." % fname
47 return
48
49 ## Examine first line of text file to guess format.
50 fmt = FMT_UNKNOWN
51 sprit_line1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)"
52 clamp_line1="Vers 3.4"
53 line1 = fp.readline()
54 if (sprit_line1 in line1):
55 fmt = FMT_SPRIT
56 elif (line1[2]=="/" and line1[5]=="/"):
57 ## This format has date mm/dd/yyy in first column.
58 fmt=FMT_ICMTXT
59 elif (clamp_line1 in line1):
60 fmt = FMT_CLAMP34
61
62 fp.close()
63 return fmt, line1
64
65
66 def sprit_format(fname):
67 """Read the SPRit formatted data and reshape it.
68 Return the data in the form of list.
69 """
70 # open a file, if file doesn't exit, then exit
71 try:
72 fp = open(fname, "r")
73 except IOError:
74 print 'Cannot open file %s for reading' % fname
75 return
76 status = {'DatainputType':'SPRit data'}
77 # skip the first line of the input file
78 Tmpstr = fp.readline()
79 # check if the second line is BEGIN
80 TmpStr = fp.readline()
81 if ("BEGIN" not in TmpStr):
82 print "Warning: Second line of data file is not 'BEGIN'"
83 ##sys.exit(0)
84 # count the lines of each data spot
85 TmpStr = fp.readline() # skip first "0.000000e+000"
86 num_line = 1 # so we start to count from 1
87 # count until read the "0.000000e+000"
88 while True:
89 TmpStr = fp.readline()
90 if TmpStr.split('\t')[0] == "0.000000e+000":
91 break
92 else:
93 num_line = num_line + 1
94 # count the spots in each file
95 num_spot = 0
96 # skip the first two lines
97 fp.seek(0)
98 for i in range(0,2):
99 TmpStr = fp.readline()
100 text = fp.readlines()
101 for i in text:
102 num_spot += i.count('0.000000e+000')
103 fp.close()
104 #---------------------- Reshaping the data --------------------
105 # reshape the data in the string form num_line*num_spot
106 Shaped_str = np.reshape(text,(num_spot,-1))
107 tmpshape=np.shape(Shaped_str)
108 # reshape the data in the number form: num_line*(num_spot+1)
109 # only the first column is time: time spotl spot2 ... spot n
110 time = np.zeros(num_line)
111 Tmpdata = Shaped_str[0,:]
112 for n,i in enumerate(Tmpdata):
113 time[n]=float(i.split('\t')[0])
114 Shaped_data_1 = np.zeros((tmpshape[0]+1,tmpshape[1]))
115 Shaped_data_1[0,:]=time
116 for n1,i in enumerate(Shaped_str):
117 for n2,j in enumerate(i):
118 Shaped_data_1[n1+1][n2]=float(Shaped_str[n1][n2].split('\t')[1])
119 # reshape the data in the number form:
120 # time1 spotl time2 spot2 ... timen spot n
121 Shaped_data_2 = np.zeros((tmpshape[0]*2,tmpshape[1]))
122 for i in range(tmpshape[0]):
123 Shaped_data_2[i*2]=Shaped_data_1[0]
124 Shaped_data_2[i*2+1] = Shaped_data_1[i+1]
125
126 ## status = {'Datainput Type':'SPRit data'}
127 dataformat = 0
128 ROInum = num_spot
129 dataobj=spr.DataPass(Shaped_data_1, ROInum, dataformat)
130 dataobj.status = {}
131 dataobj.updateStatus(**status)
132 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
133
134 return dataobj
135
136
137 def plexera_icm_format(fname):
138 """Read the txt data exported from Plexera ICM software and reshape it.
139 Return the data in the form of list.
140 """
141 fp=file(fname,'r')
142 Tmpdata,tmptime=[],[]
143 status = {'DatainputType':'Plexera ICM data'}
144 for eachline in fp:
145 Tmpdata.append(eachline.split('\t'))
146 num_line,num_spot=np.shape(Tmpdata)
147 # initialize the Shaped_data
148 Shaped_data = np.zeros((num_spot,num_line))
149 num_spot=num_spot-1 # the real number of spot
150 Tmpdata=np.transpose(Tmpdata)
151 # delete the last '\r\n' of the data in last column of the datafile
152 for n,i in enumerate(Tmpdata[-1]):
153 Tmpdata[-1][n]=i.rstrip()
154 # time converting: convert the first column of the datafile(date and time information) into numerical time
155 for i in Tmpdata[0]:
156 tmptime_str=i.split(' ')[1]
157 timelist=tmptime_str.split(':')
158 now=float(timelist[0])*60*60+float(timelist[1])*60+float(timelist[2])
159 tmptime.append(now)
160 Shaped_data[0]=tmptime
161 Shaped_data[0]=Shaped_data[0]-Shaped_data[0][0]
162 # assign the signal data
163 for i in range(num_spot):
164 Shaped_data[i+1]=Tmpdata[i+1]
165
166 dataformat = 0
167 ROInum = num_spot
168 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat)
169 dataobj.status = {}
170 dataobj.updateStatus(**status)
171
172 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
173 return dataobj
174
175
176
177 def dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize):
178 """Retrieve and shape data form one single sheet from the Spreadsheet.
179 Return the data, ROI infomation in the form of list.
180 """
181 ## and return the minimal spot Id (start_spot)
182 ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
183
184 Head = sh.row_values(1)
185 SpotList = []
186 num_spot = sh.row_values(0).count('Relative Time')
187
188 for i in range(num_spot):
189 TmpHead=unicode.encode(Head[colsize*i])
190 # now there are 5 columns for each ROI for Raw Intensity
191 # and 4 columns for each ROI for Satellite subtracted Intensity
192 DataList.append(sh.col_values(2+colsize*i,start,end))
193
194 key, val = [], []
195 tmpinfo = TmpHead.split('\n')
196 for j in tmpinfo:
197 key.append(j.split(': ')[0])
198 val.append(j.split(': ')[1])
199 tmpdic = dict(zip(key,val))
200 try: # spot id
201 tmpdic['ID'] = int(tmpdic['ID']) # either in form of "spot1" or "1"
202 except ValueError:
203 if str.upper(tmpdic['ID']).startswith('SPOT'):
204 tmpdic['ID'] = int(tmpdic['ID'][4:])
205 else:
206 print "Warning: Illegal galfile format."
207 print "The ID is neither 'spot1' form nor '1' form. \n", "'ID:'", tmp
208
209 tmpdic['Position'] = (int(tmpdic['Block'])+1, int(tmpdic['Row'])+1, int(tmpdic['Column'])+1)
210 ROIinfo.append(tmpdic)
211
212 return DataList,ROIinfo
213
214 def dam_spreadsheet_format(book):
215 """Read the spreadsheet exported from Plexera DAM software and reshape it.
216 Return the shaped data and ROI information.
217 """
218 ROIinfo, newROIinfo, n_sheet = [], [], 0
219 DataList=[]
220 status = {'DatainputType':'Plexera DAM data'}
221 sh0=book.sheet_by_index(0)
222 if sh0.row_values(0).count('Raw Intensity') == 0 and sh0.row_values(0).count('Subtracted Intensity') > 0:
223 print 'This is the satellite subtracted data.'
224 colsize = 4
225 status ['DAM spreadsheet'] = 'Satellite subtracted Intensity'
226 elif sh0.row_values(0).count('Raw Intensity') > 0 and sh0.row_values(0).count('Subtracted Intensity') == 0:
227 print 'This is the raw intensity data.'
228 colsize = 5
229 status ['DAM spreadsheet'] = 'Raw Intensity'
230
231 for i in range(2,sh0.nrows):
232 if sum(sh0.col_values(2,1,i))>0:
233 break
234 start,end = i-1,sh0.nrows
235 num_line = end - start
236 ## print 'start: ', start
237 # the time axis initialization and assignment
238 Time=sh0.col_values(1,start,end)
239 # in case we lose the time axis during the DAM processing
240 if sum(Time)==0: Time=np.arange(num_line)
241 DataList.append(Time)
242
243 # list DataList to store the Shaped_data of each ROI in each sheet,
244 # list ROIinfo to store the Spot id and the ligand name of each ROI in each sheet
245 for i in range(book.nsheets):
246 sh=book.sheet_by_index(i)
247 if sh.ncols!=0:
248 DataList,ROIinfo = dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize)
249 n_sheet=n_sheet+1
250 else:
251 break
252 # calculate the dim of the Shaped_data, and initialize the matrix
253 num_spot=len(DataList)-1
254 num_line=end-start
255 Shaped_data=np.zeros((num_spot+1,num_line))
256 Shaped_data[0]=DataList[0]
257
258 # in case that the ROIs doesn't count from spot1
259 newROIinfo = copy.deepcopy(ROIinfo)
260 StartspotList=np.array([i['ID'] for i in ROIinfo])
261 start_spot=min(StartspotList)
262
263 for i in range(num_spot):
264 tmp = ROIinfo[i]['ID']
265 j=tmp-(start_spot-1)
266 Shaped_data[j]=DataList[i+1]
267 newROIinfo[j-1]=ROIinfo[i]
268
269 # pack the date and relative information into SPRdata obj
270 dataformat = 0
271 ROInum = num_spot
272 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
273 dataobj.status = {}
274 dataobj.updateStatus(**status)
275 print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
276
277 return dataobj
278
279
280 def read_clamp_data(fname):
281 """Read the Clamp format data
282 Return the data in the form of list.
283 """
284 fp=open(fname,'r')
285 Tmpstr=fp.readline()
286 # examine the file head
287 # e.g. 'Vers 3.41 cTBA-1 Conc:250 nM\n'
288 ROInum, dataformat, Conc, ROIdic = 1, 4, [], {}
289 try:
290 name = Tmpstr.split(' ')[2].strip()
291 ROIdic['ROI name'] = name
292 stmp = Tmpstr.split(' ')[3]
293 rest = Tmpstr[Tmpstr.find(stmp):].strip()
294 ROIdic['concentration'] = rest.split(':')
295 except IndexError:
296 pass
297
298 for Tmpstr in fp:
299 try:
300 float(Tmpstr.split('\t')[0])
301 n_conc=(Tmpstr.count('\t')+1)/2
302 ##print Tmpstr, 'the beginning of data'
303 break
304 except ValueError:
305 if Tmpstr.startswith('Conc1'):
306 n_conc=Tmpstr.count('\t')
307 Conc=Tmpstr.strip().split('\t')[1:]
308 elif Tmpstr.startswith('Start1'):
309 Start=Tmpstr.strip().split('\t')[1:]
310 n_conc=Tmpstr.count('\t')
311 elif Tmpstr.startswith('Stop1'):
312 Stop=Tmpstr.strip().split('\t')[1:]
313 n_conc=Tmpstr.count('\t')
314
315
316 # write the data into matrix
317 Tmpdata=[]
318 for eachline in fp:
319 a=eachline.strip().split('\t')
320 Tmpdata.append(np.array(a,dtype='f'))
321 Data=np.transpose(Tmpdata)
322
323 # write the infomation
324 ROIinfo = [ROIdic]
325 status = {'DatainputType':'Clamp data'}
326 sampleinfo, injinfo = [], []
327 injpara = [map(float,i) for i in [Start,Stop]]
328
329 if len(Conc) >= 1:
330 for i,c in enumerate(Conc):
331 sampledic = {}
332 name = 'Sample '+str(i+1)
333 sampledic = dict(zip(['Name','Concentration'],[name,c]))
334 ## sampledic['Name'] = name
335 ## sampledic['Concentration'] = c[injpara[0][i],injpara[1][i]]
336 sampleinfo.append(sampledic)
337 injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
338 injinfo.append(injdic)
339 dataobj=spr.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
340 dataobj.status = {}
341 dataobj.updateStatus(**status)
342
343 return dataobj
344
345
346
347 def keyfile_read(fkey):
348 # The Key File contains
349 """Function: Read the key file for old SPR instrument.
350 Input: the filename.
351 Return: the ROI information.
352 """
353 try:
354 fp = open(fkey, "r")
355 except IOError:
356 print "Cannot open file %s for reading" % fname
357 return
358 ROIinfo = []
359 firstline=fp.readline() # skip the first line
360 # Table.append([firstline.split('\t')[0],firstline.split('\t')[1],firstline.split('\t')[2],firstline.split('\t')[3]])
361 for eachline in fp:
362 ROIdic = {}
363 tmplist = eachline.strip().split('\t')
364
365 try: # spot id
366 ROIdic['ID'] = int(tmplist[0]) # either in form of "spot1" or "1"
367 except ValueError:
368 if str.upper(tmplist[0]).startswith('SPOT'):
369 ROIdic['ID'] = int(tmplist[0][4:])
370 else:
371 print "Error: Unable to read Spot IDs"
372
373 ROIdic['Name'] = tmplist[1] # name of the protein immobilized
374 try:
375 ROIdic['Conc'] = tmplist[2]
376 ROIdic['Background Spot'] = map(int, tmplist[3].split(';')) # in case more than one spot set as background with ";" as SEPARATOR
377 except IndexError:
378 pass
379 ROIinfo.append(ROIdic)
380
381 fp.close()
382 print 'There are %d ROIs in the Key file.' % len(ROIinfo)
383 return ROIinfo
384
385 def keyfile_read_fake(obj):
386 ROIinfo = []
387 spot_num = obj.ROInum
388 for i in range(spot_num):
389 ROIdic = {}
390 ROIdic['ID'] = i+1
391 ROIdic['Name'] = "Ligand"+str(i+1)
392 ROIinfo.append(ROIdic)
393 return ROIinfo
394
395
396 def galfile_read(fname):
397 """Function: Read the GAL file.
398 Input: the filename.
399 Return: the ROI information.
400 """
401 fp=file(fname,'r')
402 ROIinfo = []
403 Tmpstr = fp.readline() # skip the first line
404 Tmpstr = fp.readline()
405 Tmpn = int(Tmpstr.split('\t')[0])
406 for i in range(Tmpn):
407 labelline = fp.readline()
408
409 if labelline.startswith('"Block1'):
410 a = labelline.strip().split(',')
411 columnsize, rowsize = int(a[3]), int(a[5])
412 ## print labelline
413 def IDcal(p):
414 id = (p[0]-1)*columnsize*rowsize + (p[1]-1)*columnsize + p[2]
415 return id
416
417 for eachline in fp:
418 ROIdic = {}
419 tmplist = eachline.strip().split('\t')
420 pos = tuple(map(int,tmplist[:3]))
421 ROIdic['Position'] = pos
422 ROIdic['IDcal'] =IDcal(pos)
423 ##(pos[0]-1)*columnsize*rowsize + (pos[1]-1)*columnsize + pos[2]
424
425 ##
426 ROIdic['Name'] = tmplist[3]
427 ## ROIdic['ID'] = tmplist[4]
428 try: # spot id
429 ROIdic['ID'] = int(tmplist[4]) # either in form of "spot1" or "1"
430 except ValueError:
431 if str.upper(tmplist[4]).startswith('SPOT'):
432 ROIdic['ID'] = int(tmplist[4][4:])
433 else:
434 print "Error: Unable to read Spot IDs"
435
436 try:
437 ROIdic['Conc'] = tmplist[5]
438 ROIdic['Family'] = tmplist[6]
439 blist = tmplist[7][1:-1].split(';') # always something like '"1,1,2; 1,2,2"'
440 Backn = len(blist)
441 Bid = []
442
443 for i in blist:
444 if '' in i.split(','):
445 break
446 Bpos = tuple(map(int, i.split(',')))
447 Bid.append(IDcal(Bpos))
448 ##
449 ROIdic['Background Spot'] = Bid ##tuple(Bid)
450 except IndexError:
451 pass
452
453 if ROIdic['ID'] != ROIdic['IDcal']:
454 print 'The spot ID should be consistent with the position. Please check the ID.'
455 break
456 else:
457 ROIdic['ID'] = ROIdic['IDcal']
458 ROIinfo.append(ROIdic)
459 fp.close()
460 print 'There are %d ROIs in the Gal file.' % len(ROIinfo)
461
462 return ROIinfo
463
464
465
466 def method_read(fname):
467 """Function: Read the analyte table.
468 Input: the filename
469 Return: the sample information
470 """
471 # create a concentration dictionary for the sample injected into the flowcell
472 # export a table containing the concentration of the sample injected, and the duration
473 fp=file(fname,'r')
474 sampleinfo = []
475 ##TmpTable,TmpTable2={},{}
476 labelline=fp.readline()
477 for n,eachline in enumerate(fp):
478 sampledic = {}
479 sampledic['Location'] = eachline.split('\t')[0]
480 sampledic['Name'] = eachline.split('\t')[1]
481 sampledic['Concentration'] = float(eachline.split('\t')[2])
482 sampledic['Duration'] = float(eachline.split('\t')[4])
483 sampledic['Flow Rate'] = float(eachline.split('\t')[3])
484 sampledic['Analyte Series'] = eachline.split('\t')[6]
485 sampledic['Buffer Blank Series'] = eachline.split('\t')[7]
486 sampleinfo.append(sampledic)
487
488 print 'Name','\t','Concentration'
489 for i in sampleinfo:
490 print i['Name'],'\t', i['Concentration']
491
492 return sampleinfo
493
494 def method_read_fake():
495 sampleinfo = []
496 sampleinfo.append({'Location':0, 'Name':0, 'Concentration':0, 'Duration':0, 'Flow Rate':0, 'Analyte Series':0, 'Buffer Blank Series':0})
497 return sampleinfo
498
499
500 def datafile_read(fname):
501 """Function: Read the raw data from the Plexera instrument,
502 and pack the raw data (Shaped_data) into dataobject.
503 Therefore, the initial dataformat = 0
504 Input: the filename
505 Return: the packed SPR.DataPass type data object
506 """
507 #===================check the format of the input files-------
508 if fname.split('.')[-1] == 'txt':
509
510 Format_input, Firstline = check_format_input(fname)
511 if Format_input == FMT_SPRIT: # for SPRit file format
512 dataobj = sprit_format(fname)
513 print '-'*10,' This is a SPRit data file. ','-'*10
514
515 elif Format_input == FMT_ICMTXT: # for Plexera ICM file format
516 dataobj = plexera_icm_format(fname)
517 print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
518
519 elif Format_input == FMT_CLAMP34: # for Clamp datafile format
520 dataobj = read_clamp_data(fname)
521 print '-'*10,' This is a Clamp file. ','-'*10
522
523 else:
524 print '-'*10,' unreadable file input! ','-'*10
525 return
526
527 if Format_input == FMT_SPRIT or Format_input == FMT_ICMTXT:
528 flag = str.upper(raw_input('Load the key file? (y/n): '))
529 if flag == 'Y':
530 fkey = raw_input('Input the path of the key file: ')
531 ROIinfo = keyfile_read(fkey)
532 dataobj.updateROIinfo(ROIinfo)
533 else:
534 dataobj.updateROIinfo(keyfile_read_fake(dataobj))
535
536 elif fname.split('.')[-1] == 'xls':
537 # for the DAM Spreadsheet file format
538 print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
539 Format_input = FMT_DAMXLS
540 book = xlrd.open_workbook(fname)
541 #shape the data in the Spreadsheet, whether single sheet or multiple sheets
542 dataobj=dam_spreadsheet_format(book)
543 flag = str.upper(raw_input('Load the gal file? (y/n): '))
544 if flag == 'Y':
545 fgal = raw_input('Input the path of the gal file: ')
546 ROIinfo = galfile_read(fgal)
547 dataobj.updateROIinfo(ROIinfo)
548
549 elif fname.split('.')[-1] == 'obj':
550 # for the saved data object file format
551 print '-'*10,' This is a saved data object file. ','-'*10
552 Format_input = FMT_DATAOBJ
553 fp = file(fname, 'r+')
554 dataobj = pickle.load(fp)
555 fp.close()
556
557 flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
558 if flag == 'Y':
559 fprotocol = raw_input('Input the path of the analyte table: ')
560 sampleinfo = method_read(fprotocol)
561 dataobj.updateSampleinfo(sampleinfo)
562 elif Format_input != FMT_CLAMP34 and Format_input != FMT_DATAOBJ:
563 # clamp data and obj data may contain the experimental protocol already
564 dataobj.updateSampleinfo(method_read_fake())
565
566 return dataobj
567
568
569 def outlier_remove(obj):
570 ##dataobj.viewAll()
571 """Function: Remove unwanted data points from noisy periods.
572 Return: the data object with clean data
573 """
574 dataobj = copy.deepcopy(obj)
575 print ('Select the time area that you want to remove.')
576 tmpstr = raw_input('in the format of "200:240, 500:510" :')
577 areatodel = tmpstr.split(',')
578 data = dataobj.data
579 for i in areatodel:
580 ##if
581 t1, t2 = float(i.split(':')[0]), float(i.split(':')[1])
582 t = data[0] # for dataformat=0 Shaped_data
583 tmpind = plt.find((t>t1) & (t<t2)) # find the outliar indice
584 ## data[1:,tmpind] = 0
585 tmplist = data.tolist()
586 tmplist2 = []
587 for j in tmplist:
588 del j[min(tmpind):max(tmpind)]
589 tmplist2.append(j)
590 data = np.array(tmplist2)
591 ## tmplist = tmplist
592 dataobj.data = data
593 # dataobj.status = 'Outliar removed'
594 dataobj.updateStatus(outlier_remove=True)
595 return dataobj
596
597 def calibrate(obj):
598 """Function: calibrate the Intensity response.
599 Return: the data object with calibrated data
600 Note: at present, this function is valid only when
601 the whole procedure includes calibration precedure.
602 """
603 dataobj = copy.deepcopy(obj)
604 data = dataobj.data
605 caldata = copy.deepcopy(data)
606 t = data[0]
607 if dataobj.dataformat != 0:
608 print 'We only calibrate the raw data.'
609 return
610 else:
611 t1 = input('Enter the time position of the TOP response during calibration:')
612 s1 = input('The refractive index of the TOP response is: ')
613 t2 = input('Enter the time position of the BOTTOM response during calibration:')
614 s2 = input('The refractive index of the BOTTOM response is: ')
615 ind1 = plt.find(abs(t-t1) == min(abs(t-t1)))[0]
616 ind2 = plt.find(abs(t-t2) == min(abs(t-t2)))[0]
617
618 for i,y in enumerate(data[1:]):
619 y1, y2 = y[ind1], y[ind2]
620 slope = (s1-s2)/(y1-y2)
621 offset = s1- slope*y1
622 caldata[i+1] = slope*y+offset
623 dataobj.data = caldata
624 dataobj.updateStatus(calibrate=True)
625 return dataobj
626
627 def get_background_value(obj,bgids):
628 """Get the averaged value of background spots for each ROI."""
629 bgsignal = obj.data[0]*0
630 for j in bgids:
631 if j == int(obj.ROIinfo[j-1]['ID']): # in case the ID in ROIinfo is string type
632 bgsignal = bgsignal + obj.data[j]
633
634 elif j != int(obj.ROIinfo[j-1]['ID']):
635 for n,eachspot in enumerate(obj.ROIinfo):
636 if j == int(eachspot['ID']):
637 bgsignal = bgsignal + obj.data[n+1]
638
639 bgsignal = bgsignal/len(bgids)
640 return bgsignal
641
642
643 def background_subtract(obj, *bgspot):
644 """Function: Perform the Background subtraction for the UNSPLIT curve.
645 Input besides "obj" is the id of the spot taken as background.
646 The following inputs are acceptable:
647 1. background_subtract(obj): the default background in Galfile
648 will be subtracted.
649 2. background_subtract(obj, 1, 6): the average value of spot1
650 and spot6 will be subtracted.
651 """
652 dataobj = copy.deepcopy(obj)
653 ROIinfo = obj.ROIinfo
654 data = dataobj.data
655
656 if dataobj.dataformat == 0:
657 newdata = np.zeros(np.shape(data))
658 newdata[0] = data[0] # the time scale
659 if bgspot == ():
660 # The average of the default background spots listed in the galfile
661 #are to be subtracted.
662 for i in range(1,dataobj.ROInum+1):
663 bgids = ROIinfo[i-1]['Background Spot']
664 bgsignal = get_background_value(dataobj,bgids)
665 newdata[i] = data[i]-bgsignal
666 # update the status of the data object.
667 dataobj.updateStatus(BackgroundType='Default in Gal')
668 print "The background spot embodied in gal is used."
669
670 else:
671 # The average of the manually input background spots
672 #are to be subtracted.
673 for i in range(1,dataobj.ROInum+1):
674 bgsignal = get_background_value(dataobj,bgspot)
675 newdata[i] = data[i]-bgsignal
676 dataobj.updateStatus(BackgroundType='Manually chosen')
677 print "The background spot is manually chosen."
678
679 dataobj.data = newdata
680 dataobj.updateStatus(background_subtraction=True)
681 return dataobj
682
683 else:
684 print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'
685 return
686
687 def curve_split(obj, t_before, t_after, *t_inj):
688 """Function: Split the whole curve that contains several injections
689 into pieces.
690 The sample information will be checked during the split,
691 if the injection number in sampleinfo is not consistant
692 with the input injection parameter, it will help you to
693 update the sample information.
694 Input besides obj:
695 t_before: time duration before the injection time point
696 t_after: time duration after the injection time point
697 t_inj: the exact start time for each injection
698 Return: the data object with splitted data, and updated sample
699 information
700 """
701 dataobj = copy.deepcopy(obj)
702 ROInum = dataobj.ROInum
703 t=dataobj.data[0]
704 ind=[]
705 # find how many lines of data each injection
706 for i in range(len(t_inj)):
707 ind.append(np.shape(plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after))))[0])
708 m,j = max(ind),max(ind)-min(ind)
709 # the new dataset must be a m*((ROInum+1)*num_inj) matrix:
710 # T1,S_ROI1_c1,S_ROI2_c1,S_ROI3_c1...T2,S_ROI1_c2,S_ROI2_c2...
711 tmpdata = np.zeros(((ROInum+1)*len(t_inj),m))
712
713 for i in range(len(t_inj)):
714 TmpInd = plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after)))
715 Tmp1,Tmp2 = i*(ROInum+1),(i+1)*(ROInum+1)
716 tmpdata[Tmp1:Tmp2,0:len(TmpInd)] = dataobj.data[:,TmpInd]
717 if j!=0:
718 Split_data = tmpdata[:,:-j]
719 else:
720 Split_data = tmpdata[:]
721 # zero and align the split data
722 for i in range(np.shape(Split_data)[0]):
723 Split_data[i]=Split_data[i]-Split_data[i][0]
724
725 newsampleinfo, injectinfo = check_sample_info(dataobj,t_before, t_after, t_inj)
726 if newsampleinfo != None :
727 dataobj.data = Split_data
728 dataobj.updateStatus(DataSplit=True)
729 dataobj.updateSampleinfo(newsampleinfo)
730 dataobj.dataformat = 2
731 dataobj.injectinfo = injectinfo
732 return dataobj
733 else:
734 dataobj.updateStatus(DataSplit=False)
735 print 'Curve Splitting not successful!'
736
737 return
738
739
740 def check_sample_info(dataobj, t_before, t_after, t_inj):
741 """Check if the sample information consistent with the injection
742 parameter input. If not, help the user to update sample information
743 for the splitted curve.
744 """
745 injectinfo = []
746 if len(dataobj.sampleinfo) == len(t_inj):
747 for i,j in enumerate(t_inj):
748 injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
749 return dataobj.sampleinfo, injectinfo
750
751 elif len(dataobj.sampleinfo) > len(t_inj):
752 print """ The number of injection you just input does not match that in sample infomation.
753 If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object."""
754 choose = str.upper(raw_input('Continue? (y/n)'))
755 if choose == 'Y':
756 newsampleinfo, injectinfo = [], []
757 print ' Your input indicates %s injections out of %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
758 for n,i in enumerate(dataobj.sampleinfo):
759 print '--',n,'.',i['Name'],':',i['Concentration']
760 print ' Enter the ID number of the injections you just select, in the form of "2-10" or "4,5,6",'
761 tmpstr = raw_input(':')
762 if ',' in tmpstr:
763 injID = map(int, tmpstr.split(','))
764 elif '-' in tmpstr:
765 injID = range(int(tmpstr.split('-')[0]), int(tmpstr.split('-')[1])+1)
766 else:
767 print ' Illegal input!'
768 return
769
770 if len(injID) != len(t_inj):
771 print ' Please input %s injection ID numbers instead of %s You just input.' %(len(t_inj),len(injID))
772 return
773
774 for i in injID:
775 newsampleinfo.append(dataobj.sampleinfo[i])
776 injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
777 return newsampleinfo, injectinfo
778 else:
779 return
780
781 elif len(dataobj.sampleinfo) < len(t_inj):
782 print ' The number of injection you just input does not match that in sample infomation.'
783 print ' Your input indicates %s injections, where there is only %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
784 print ' Please check.'
785
786 return
787
788
789 def write_data(Head,data_out,ROI_out,DataDir,output_format):
790 """Write the data into a txt file.
791 """
792 # Modified 100405 CGL
793 # create a newfolder with time stamp to save the data
794 if output_format=='B':
795 ftitle='biosensor'
796 Tmpstr = str(ROI_out).replace(', ','_')
797 Tmpstr = Tmpstr.replace('(','-')
798 foutname = ftitle+Tmpstr[:-1]+'.txt'
799 elif output_format=='C':
800 foutname='clamp-'+str(ROI_out)+'.txt'
801 else:
802 ftitle=''
803
804 # Change directories, if requested.
805 if (DataDir != ""):
806 os.chdir('..')
807 try:
808 os.makedirs(DataDir)
809 ##except WindowsError:
810 except OSError:
811 pass
812 os.chdir(DataDir)
813
814 np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
815 fdt=file('temp.txt','r')
816
817 ## foutname=ftitle+Tmpstr[:-1]+'.txt' ##raw_input('Enter the file name of the output: ')
818 fout=file(foutname,'w+')
819 # write the head of the data first
820 fout.write(Head+'\n')
821 count = len(fdt.readlines())
822 fdt.seek(0)
823 for i in range(count):
824 temp=fdt.readline()
825 fout.write(temp.replace(' ','\t'))
826 fdt.close()
827 fout.close()
828 print '-------The path of the exported data is "', os.getcwd(),'"-------'
829 return
830
831
832 def data_output_biosensor(obj, DataDir, *ROI_out):
833 """Export the data into a biosensor format txt file,
834 which can be operated in Scrubber.
835 Input: the data object, the output path, the ROIs selected to export
836 """
837 # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
838 dataobj = copy.deepcopy(obj)
839 data = dataobj.data
840 n_inj = len(dataobj.sampleinfo)
841 ROIinfo = dataobj.ROIinfo
842 ROInum = dataobj.ROInum
843
844 data_out = np.zeros(((len(ROI_out)*2)*n_inj,np.shape(data)[1]))
845 head1 = ""
846 new_ROIinfo = []
847 for n,i in enumerate(ROI_out):
848 new_ROIinfo.append(ROIinfo[i-1]) ##the new ROI info
849 if i != ROIinfo[i-1]['IDcal']:
850 # check if the ID number matches position
851 print n,i, ROIinfo[i-1]
852 return
853 else:
854 headcomp = ROIinfo[i-1]['ID']+': '+ROIinfo[i-1]['Name']
855 try:
856 headcomp = headcomp + ', '+ROIinfo[i-1]['Conc']
857 except KeyError:
858 pass
859 headcomp = headcomp+' Fc='+str(n+1)+' - '
860 for j in range(n_inj):
861 headtmp = headcomp+str(j+1)+'_'
862 head1 = head1+headtmp+'X\t'+headtmp+'Y\t'
863 data_out[2*n*n_inj+2*j]=data[j*(ROInum+1)]
864 data_out[2*n*n_inj+2*j+1]=data[j*(ROInum+1)+i]
865 head1=head1[:-1]
866
867 dataobj.data = data_out
868 dataobj.dataformat = 3 # biosensor dataformat
869 dataobj.updateStatus(DataType='Biosensor data')
870 dataobj.updateStatus(Head=head1)
871 dataobj.updateROIinfo(new_ROIinfo)
872
873 write_data(head1,data_out,ROI_out,DataDir,'B')
874
875 return dataobj
876
877 def data_output_clamp(obj, DataDir, ROI_out):
878 """Export the data into a Clamp format txt file,
879 which can be operated in Clamp.
880 Input: the data object, the output path, the selected ROI
881 """
882 # initialize the data output matrix, which will contain '2*n_inj' columns
883 dataobj = copy.deepcopy(obj)
884 data = dataobj.data
885 n_inj = len(dataobj.sampleinfo)
886 n_inj = max(n_inj, 1) # Must have at least one injection.
887 ROIinfo = dataobj.ROIinfo
888 ROInum = dataobj.ROInum
889 sampleinfo = dataobj.sampleinfo
890
891 data_out = np.zeros((2*n_inj,np.shape(data)[1]))
892 ## if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
893 if (ROI_out != int(ROIinfo[ROI_out-1]['ID'])):
894 print 'Please check the ROI information.'
895 print ROI_out, ROIinfo[ROI_out-1]
896 return
897 else:
898 tmp = ROIinfo[ROI_out-1]['ID']+': '+ROIinfo[ROI_out-1]['Name']
899 try:
900 tmp = tmp +', Conc:'+ROIinfo[ROI_out-1]['Conc']
901 except KeyError:
902 pass
903 Clamphead = 'Vers 3.41 '+tmp
904 conc=''
905 t_start,t_stop='',''
906 head1='Time\tData\t'*n_inj
907 head1=head1[:-1]
908
909 for i in range(n_inj):
910 data_out[2*i]=data[i*(ROInum+1)]
911 data_out[2*i+1]=data[i*(ROInum+1)+ROI_out]
912 name = sampleinfo[i]['Name']
913 conc=conc+'\t'+str(sampleinfo[i]['Concentration'])
914 t_start=t_start+'\t'+str(dataobj.injectinfo[i][0])
915 t_stop=t_stop+'\t'+str(dataobj.injectinfo[i][1])
916
917 hconc,h_start,h_stop='\nConc1'+conc,'\nStart1'+t_start,'\nStop1'+t_stop
918 for i in (hconc,h_start,h_stop):
919 if i[-1]!='':
920 Clamphead=Clamphead+i
921
922 Clamphead=Clamphead+'\n'+head1
923
924 dataobj.data = data_out
925 dataobj.dataformat = 4 # clamp dataformat
926 dataobj.updateStatus(DataType='Clamp data')
927 dataobj.updateStatus(Head=Clamphead)
928 dataobj.updateROIinfo(ROIinfo[ROI_out-1])
929
930 write_data(Clamphead,data_out,ROI_out,DataDir,'C')
931
932 return dataobj
933 ## return data_out,Clamphead
934
935 def obj_save(obj, foutname):
936 foutname = foutname + '.obj' # the file extension of the saved data object is .obj
937 fout = file(foutname, 'w+')
938 pickle.dump(obj, fout)
939 fout.close()
940 print '-------The path of the exported obj is "', os.getcwd(),'"-------'
941 return
942
943 if (__name__ == '__main__'):
944 # Print a short message if this module is run like an app.
945 print "This is the OSPRAI file type conversion tool."
946