ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
Revision: 6
Committed: Fri Apr 9 16:52:52 2010 UTC (9 years, 3 months ago) by clausted
File size: 33635 byte(s)
Log Message:
Added CONSTANTS to represent data file type identification integers.  These should be used throughout the code in the future to make it more readable.  Also added some default injection data to make it possible to convert a SPRit file to a CLAMP file without having an Analyte/injection file.  Also note a changed dictionary key IDcal->ID in one instance. --CGL
Line User Rev File contents
1 clausted 3 """
2 clausted 1 Converter: Perform the datafile reading, pre-processing and writing.
3 clausted 3 Yuhang Wan, Christopher Lausted
4 clausted 6 Last modified on 100407 (yymmdd)
5 clausted 1
6 clausted 3 Typical Pipeline:
7     >import converter as cv
8     >dat = cv.datafile_read("example.txt")
9     >dat = cv.outlier_remove(dat)
10     >dat = cv.calibrate(dat)
11     >dat = cv.background_subtract(dat)
12     >dat = cv.curve_split(dat, 100, 800, 200)
13     >dat = cv.data_output_clamp(dat, "/dir", 0,1,2,3)
14     >dat = cv.data_output_biosensor(dat, "/dir", 0)
15     """
16 clausted 6 __version__ = "100407"
17 clausted 1
18 clausted 6 ## Import libraries.
19 clausted 1 import numpy as np
20     import pylab as plt
21     import sys
22     import xlrd
23     import time
24     import os
25     import copy
26 clausted 6 #import matplotlib.pyplot as plt
27 clausted 3 import SPRdataclass as spr
28 clausted 1
29 clausted 6 ## Global constants and variables.
30     FMT_UNKNOWN = 0
31     FMT_SPRIT = 1 ## Old Plexera SPRit format.
32     FMT_ICMTXT = 2 ## Plexera Instrument Control Module text.
33     FMT_CLAMP34 = 3 ## Morton & Myszka CLAMP Version 3.4+
34     FMT_DAMXLS = 4 ## Plexera Data Analysis Module Excel export.
35 clausted 1
36 clausted 6
37 clausted 3 def check_format_input(fname):
38 clausted 6 """Examine the format of an input .txt data file."""
39     ## Modified 100407 CGL.
40    
41     ## Open text file. Exit if file not found.
42 clausted 1 try:
43     fp = open(fname, "r")
44     except IOError:
45 clausted 6 print "Error: Unable to open file %s for reading." % fname
46 clausted 1 sys.exit(0)
47    
48 clausted 6 ## Examine first line of text file to guess format.
49     fmt = FMT_UNKNOWN
50     sprit_line1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)"
51     clamp_line1="Vers 3.4"
52     line1 = fp.readline()
53     if (sprit_line1 in line1):
54     fmt = FMT_SPRIT
55     elif (line1[2]=="/" and line1[5]=="/"):
56     ## This format has date mm/dd/yyy in first column.
57     fmt=FMT_ICMTXT
58     elif (clamp_line1 in line1):
59     fmt = FMT_CLAMP34
60    
61 clausted 1 fp.close()
62 clausted 6 return fmt, line1
63 clausted 1
64    
65 clausted 3 def sprit_format(fname):
66     """Read the SPRit formatted data and reshape it.
67 clausted 1 Return the data in the form of list.
68 clausted 3 """
69 clausted 1 # open a file, if file doesn't exit, then exit
70     try:
71     fp = open(fname, "r")
72     except IOError:
73     print 'Cannot open file %s for reading' % fname
74     sys.exit(0)
75     status = {'DatainputType':'SPRit data'}
76     # skip the first line of the input file
77     Tmpstr = fp.readline()
78     # check if the second line is BEGIN
79     TmpStr = fp.readline()
80 clausted 5 if ("BEGIN" not in TmpStr):
81     print "Warning: Second line of data file is not 'BEGIN'"
82     ##sys.exit(0)
83 clausted 1 # count the lines of each data spot
84     TmpStr = fp.readline() # skip first "0.000000e+000"
85     num_line = 1 # so we start to count from 1
86     # count until read the "0.000000e+000"
87     while True:
88     TmpStr = fp.readline()
89     if TmpStr.split('\t')[0] == "0.000000e+000":
90     break
91     else:
92     num_line = num_line + 1
93     # count the spots in each file
94     num_spot = 0
95     # skip the first two lines
96     fp.seek(0)
97     for i in range(0,2):
98     TmpStr = fp.readline()
99     text = fp.readlines()
100     for i in text:
101     num_spot += i.count('0.000000e+000')
102     fp.close()
103     #---------------------- Reshaping the data --------------------
104     # reshape the data in the string form num_line*num_spot
105     Shaped_str = np.reshape(text,(num_spot,-1))
106     tmpshape=np.shape(Shaped_str)
107     # reshape the data in the number form: num_line*(num_spot+1)
108     # only the first column is time: time spotl spot2 ... spot n
109     time = np.zeros(num_line)
110     Tmpdata = Shaped_str[0,:]
111     for n,i in enumerate(Tmpdata):
112     time[n]=float(i.split('\t')[0])
113     Shaped_data_1 = np.zeros((tmpshape[0]+1,tmpshape[1]))
114     Shaped_data_1[0,:]=time
115     for n1,i in enumerate(Shaped_str):
116     for n2,j in enumerate(i):
117     Shaped_data_1[n1+1][n2]=float(Shaped_str[n1][n2].split('\t')[1])
118     # reshape the data in the number form:
119     # time1 spotl time2 spot2 ... timen spot n
120     Shaped_data_2 = np.zeros((tmpshape[0]*2,tmpshape[1]))
121     for i in range(tmpshape[0]):
122     Shaped_data_2[i*2]=Shaped_data_1[0]
123     Shaped_data_2[i*2+1] = Shaped_data_1[i+1]
124    
125     ## status = {'Datainput Type':'SPRit data'}
126     dataformat = 0
127     ROInum = num_spot
128 clausted 3 dataobj=spr.DataPass(Shaped_data_1, ROInum, dataformat)
129 clausted 1 dataobj.status = {}
130     dataobj.updateStatus(**status)
131     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
132    
133     return dataobj
134    
135    
136 clausted 3 def plexera_icm_format(fname):
137     """Read the txt data exported from Plexera ICM software and reshape it.
138 clausted 1 Return the data in the form of list.
139 clausted 3 """
140 clausted 1 fp=file(fname,'r')
141     Tmpdata,tmptime=[],[]
142     status = {'DatainputType':'Plexera ICM data'}
143     for eachline in fp:
144     Tmpdata.append(eachline.split('\t'))
145     num_line,num_spot=np.shape(Tmpdata)
146     # initialize the Shaped_data
147     Shaped_data = np.zeros((num_spot,num_line))
148     num_spot=num_spot-1 # the real number of spot
149     Tmpdata=np.transpose(Tmpdata)
150     # delete the last '\r\n' of the data in last column of the datafile
151     for n,i in enumerate(Tmpdata[-1]):
152     Tmpdata[-1][n]=i.rstrip()
153     # time converting: convert the first column of the datafile(date and time information) into numerical time
154     for i in Tmpdata[0]:
155     tmptime_str=i.split(' ')[1]
156     timelist=tmptime_str.split(':')
157     now=float(timelist[0])*60*60+float(timelist[1])*60+float(timelist[2])
158     tmptime.append(now)
159     Shaped_data[0]=tmptime
160     Shaped_data[0]=Shaped_data[0]-Shaped_data[0][0]
161     # assign the signal data
162     for i in range(num_spot):
163     Shaped_data[i+1]=Tmpdata[i+1]
164    
165     dataformat = 0
166     ROInum = num_spot
167 clausted 3 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat)
168 clausted 1 dataobj.status = {}
169     dataobj.updateStatus(**status)
170    
171     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
172     return dataobj
173    
174    
175    
176 clausted 3 def dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize):
177     """Retrieve and shape data form one single sheet from the Spreadsheet.
178 clausted 1 Return the data, ROI infomation in the form of list.
179 clausted 3 """
180 clausted 1 ## and return the minimal spot Id (start_spot)
181     ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
182    
183     Head = sh.row_values(1)
184     SpotList = []
185     num_spot = sh.row_values(0).count('Relative Time')
186    
187     for i in range(num_spot):
188     TmpHead=Head[colsize*i]
189     # now there are 5 columns for each ROI for Raw Intensity
190     # and 4 columns for each ROI for Satellite subtracted Intensity
191     DataList.append(sh.col_values(2+colsize*i,start,end))
192    
193     key, val = [], []
194     tmpinfo = TmpHead.split('\n')
195     for j in tmpinfo:
196     key.append(j.split(': ')[0])
197     val.append(j.split(': ')[1])
198     tmpdic = dict(zip(key,val))
199     tmpdic['Position'] = (int(tmpdic['Block'])+1, int(tmpdic['Row'])+1, int(tmpdic['Column'])+1)
200     ROIinfo.append(tmpdic)
201    
202     return DataList,ROIinfo
203    
204 clausted 3 def dam_spreadsheet_format(book):
205     """Read the spreadsheet exported from Plexera DAM software and reshape it.
206 clausted 1 Return the shaped data and ROI information.
207 clausted 3 """
208 clausted 1 ROIinfo, newROIinfo, n_sheet = [], [], 0
209     DataList=[]
210     status = {'DatainputType':'Plexera DAM data'}
211     sh0=book.sheet_by_index(0)
212     if sh0.row_values(0).count('Raw Intensity') == 0 and sh0.row_values(0).count('Subtracted Intensity') > 0:
213     print 'This is the satellite subtracted data.'
214     colsize = 4
215     status ['DAM spreadsheet'] = 'Satellite subtracted Intensity'
216     elif sh0.row_values(0).count('Raw Intensity') > 0 and sh0.row_values(0).count('Subtracted Intensity') == 0:
217     print 'This is the raw intensity data.'
218     colsize = 5
219     status ['DAM spreadsheet'] = 'Raw Intensity'
220    
221     for i in range(2,sh0.nrows):
222     if sum(sh0.col_values(2,1,i))>0:
223     break
224     start,end = i-1,sh0.nrows
225     num_line = end - start
226     ## print 'start: ', start
227     # the time axis initialization and assignment
228     Time=sh0.col_values(1,start,end)
229     # in case we lose the time axis during the DAM processing
230     if sum(Time)==0: Time=np.arange(num_line)
231     DataList.append(Time)
232    
233     # list DataList to store the Shaped_data of each ROI in each sheet,
234     # list ROIinfo to store the Spot id and the ligand name of each ROI in each sheet
235     for i in range(book.nsheets):
236     sh=book.sheet_by_index(i)
237     if sh.ncols!=0:
238 clausted 3 DataList,ROIinfo = dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize)
239 clausted 1 n_sheet=n_sheet+1
240     else:
241     break
242     # calculate the dim of the Shaped_data, and initialize the matrix
243     num_spot=len(DataList)-1
244     num_line=end-start
245     Shaped_data=np.zeros((num_spot+1,num_line))
246     Shaped_data[0]=DataList[0]
247    
248     # in case that the ROIs doesn't count from spot1
249     newROIinfo = copy.deepcopy(ROIinfo)
250     StartspotList=np.array([i['ID'] for i in ROIinfo])
251     start_spot=min(StartspotList)
252     for i in range(num_spot):
253     tmp = ROIinfo[i]['ID']
254     try:
255     j=int(ROIinfo[i]['ID'])-(start_spot-1)
256     except ValueError:
257     ##return tmp
258     if str.upper(unicode.encode(tmp)).startswith(str.upper('spot')):
259     j=int(ROIinfo[i]['ID'][4:])-(start_spot-1)
260     else:
261     print 'Illegal galfile format.\n', 'ID:', tmp
262     ##return tmp
263    
264     Shaped_data[j]=DataList[i+1]
265     newROIinfo[j]=ROIinfo[i]
266    
267     # pack the date and relative information into SPRdata obj
268     dataformat = 0
269     ROInum = num_spot
270 clausted 3 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
271 clausted 1 dataobj.status = {}
272     dataobj.updateStatus(**status)
273     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
274    
275     return dataobj
276    
277    
278 clausted 3 def read_clamp_data(fname):
279     """Read the Clamp format data
280 clausted 1 Return the data in the form of list.
281 clausted 3 """
282 clausted 1 fp=open(fname,'r')
283     Tmpstr=fp.readline()
284     # examine the file head
285     # e.g. 'Vers 3.41 cTBA-1 Conc:250 nM\n'
286     ROInum, dataformat, Conc, ROIdic = 1, 4, [], {}
287     try:
288     name = Tmpstr.split(' ')[2].strip()
289     ROIdic['ROI name'] = name
290     stmp = Tmpstr.split(' ')[3]
291     rest = Tmpstr[Tmpstr.find(stmp):].strip()
292     ROIdic['concentration'] = rest.split(':')
293     except IndexError:
294     pass
295    
296     for Tmpstr in fp:
297     try:
298     float(Tmpstr.split('\t')[0])
299     n_conc=(Tmpstr.count('\t')+1)/2
300     ##print Tmpstr, 'the beginning of data'
301     break
302     except ValueError:
303     if Tmpstr.startswith('Conc1'):
304     n_conc=Tmpstr.count('\t')
305     Conc=Tmpstr.strip().split('\t')[1:]
306     elif Tmpstr.startswith('Start1'):
307     Start=Tmpstr.strip().split('\t')[1:]
308     n_conc=Tmpstr.count('\t')
309     elif Tmpstr.startswith('Stop1'):
310     Stop=Tmpstr.strip().split('\t')[1:]
311     n_conc=Tmpstr.count('\t')
312    
313    
314     # write the data into matrix
315     Tmpdata=[]
316     for eachline in fp:
317     a=eachline.strip().split('\t')
318     Tmpdata.append(np.array(a,dtype='f'))
319     Data=np.transpose(Tmpdata)
320    
321     # write the infomation
322     ROIinfo = [ROIdic]
323     status = {'DatainputType':'Clamp data'}
324     sampleinfo, injinfo = [], []
325     injpara = [map(float,i) for i in [Start,Stop]]
326    
327     if len(Conc) >= 1:
328     for i,c in enumerate(Conc):
329     sampledic = {}
330     name = 'Sample '+str(i+1)
331     sampledic = dict(zip(['Name','Concentration'],[name,c]))
332     ## sampledic['Name'] = name
333     ## sampledic['Concentration'] = c[injpara[0][i],injpara[1][i]]
334     sampleinfo.append(sampledic)
335     injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
336     injinfo.append(injdic)
337 clausted 3 dataobj=spr.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
338 clausted 1 dataobj.status = {}
339     dataobj.updateStatus(**status)
340    
341     return dataobj
342    
343    
344    
345 clausted 3 def keyfile_read(fkey):
346 clausted 1 # The Key File contains
347 clausted 3 """Function: Read the key file for old SPR instrument.
348 clausted 1 Input: the filename.
349     Return: the ROI information.
350 clausted 3 """
351 clausted 1 try:
352     fp = open(fkey, "r")
353     except IOError:
354     print 'Cannot open file %s for reading' % fname
355     sys.exit(0)
356     ROIinfo = []
357     firstline=fp.readline() # skip the first line
358     # Table.append([firstline.split('\t')[0],firstline.split('\t')[1],firstline.split('\t')[2],firstline.split('\t')[3]])
359     for eachline in fp:
360     ROIdic = {}
361     tmplist = eachline.strip().split('\t')
362     ROIdic['ID'] = tmplist[0] # spot id
363     ROIdic['Name'] = tmplist[1] # name of the protein immobilized
364     try:
365     ROIdic['Conc'] = tmplist[2]
366     ROIdic['Background Spot'] = int(tmplist[3])
367     except IndexError:
368     pass
369     ROIinfo.append(ROIdic)
370    
371     fp.close()
372     print 'There are %d ROIs in the Key file.' % len(ROIinfo)
373     return ROIinfo
374    
375    
376 clausted 3 def galfile_read(fname):
377     """Function: Read the GAL file.
378 clausted 1 Input: the filename.
379     Return: the ROI information.
380 clausted 3 """
381 clausted 1 fp=file(fname,'r')
382     ROIinfo = []
383     Tmpstr = fp.readline() # skip the first line
384     Tmpstr = fp.readline()
385     Tmpn = int(Tmpstr.split('\t')[0])
386     for i in range(Tmpn):
387     labelline = fp.readline()
388    
389     if labelline.startswith('"Block1'):
390     a = labelline.strip().split(',')
391     columnsize, rowsize = int(a[3]), int(a[5])
392     ## print labelline
393     def IDcal(p):
394     id = (p[0]-1)*columnsize*rowsize + (p[1]-1)*columnsize + p[2]
395     return id
396    
397     for eachline in fp:
398     ROIdic = {}
399     tmplist = eachline.strip().split('\t')
400     pos = tuple(map(int,tmplist[:3]))
401     ROIdic['Position'] = pos
402     ROIdic['IDcal'] =IDcal(pos)
403     ##(pos[0]-1)*columnsize*rowsize + (pos[1]-1)*columnsize + pos[2]
404    
405     ##
406     ROIdic['Name'] = tmplist[3]
407     ROIdic['ID'] = tmplist[4]
408     try:
409     ROIdic['Conc'] = tmplist[5]
410     ROIdic['Family'] = tmplist[6]
411     blist = tmplist[7][1:-1].split(';') # always something like '"1,1,2; 1,2,2"'
412     Backn = len(blist)
413     Bid = []
414    
415     for i in blist:
416     if '' in i.split(','):
417     break
418     Bpos = tuple(map(int, i.split(',')))
419     Bid.append(IDcal(Bpos))
420     ##
421     ROIdic['Background Spot'] = tuple(Bid)
422     except IndexError:
423     pass
424    
425     if int(ROIdic['ID'][4:]) != ROIdic['IDcal']:
426     print 'The spot ID should be consistent with the position. Please check the ID.'
427     break
428     else:
429     ROIdic['ID'] = ROIdic['IDcal']
430     ROIinfo.append(ROIdic)
431     fp.close()
432     print 'There are %d ROIs in the Gal file.' % len(ROIinfo)
433    
434     return ROIinfo
435    
436    
437    
438 clausted 3 def method_read(fname):
439     """Function: Read the analyte table.
440 clausted 1 Input: the filename
441     Return: the sample information
442 clausted 3 """
443 clausted 1 # create a concentration dictionary for the sample injected into the flowcell
444     # export a table containing the concentration of the sample injected, and the duration
445     fp=file(fname,'r')
446     sampleinfo = []
447     ##TmpTable,TmpTable2={},{}
448     labelline=fp.readline()
449     for n,eachline in enumerate(fp):
450     sampledic = {}
451     sampledic['Location'] = eachline.split('\t')[0]
452     sampledic['Name'] = eachline.split('\t')[1]
453     sampledic['Concentration'] = float(eachline.split('\t')[2])
454     sampledic['Duration'] = float(eachline.split('\t')[4])
455     sampledic['Flow Rate'] = float(eachline.split('\t')[3])
456     sampledic['Analyte Series'] = eachline.split('\t')[6]
457     sampledic['Buffer Blank Series'] = eachline.split('\t')[7]
458     sampleinfo.append(sampledic)
459    
460     print 'Name','\t','Concentration'
461     for i in sampleinfo:
462     print i['Name'],'\t', i['Concentration']
463    
464     return sampleinfo
465    
466 clausted 5 def method_read_fake():
467     sampleinfo = []
468     sampleinfo.append({'Location':0, 'Name':0, 'Concentration':0, 'Duration':0, 'Flow Rate':0, 'Analyte Series':0, 'Buffer Blank Series':0})
469     return sampleinfo
470 clausted 1
471 clausted 5
472 clausted 3 def datafile_read(fname):
473     """Function: Read the raw data from the Plexera instrument,
474 clausted 1 and pack the raw data (Shaped_data) into dataobject.
475     Therefore, the initial dataformat = 0
476     Input: the filename
477     Return: the packed SPR.DataPass type data object
478 clausted 3 """
479 clausted 1 #===================check the format of the input files-------
480     if fname.split('.')[-1] == 'txt':
481    
482 clausted 3 Format_input, Firstline = check_format_input(fname)
483 clausted 1 if Format_input == 1: # for SPRit file format
484 clausted 3 dataobj = sprit_format(fname)
485 clausted 1 print '-'*10,' This is a SPRit data file. ','-'*10
486    
487     elif Format_input == 2: # for Plexera ICM file format
488 clausted 3 dataobj = plexera_icm_format(fname)
489 clausted 1 print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
490    
491     elif Format_input == 3: # for Clamp datafile format
492 clausted 3 dataobj = read_clamp_data(fname)
493 clausted 1 print '-'*10,' This is a Clamp file. ','-'*10
494    
495     else:
496     print '-'*10,' unreadable file input! ','-'*10
497     sys.exit(0)
498    
499     if Format_input == 1 or Format_input == 2:
500     flag = str.upper(raw_input('Load the key file? (y/n): '))
501     if flag == 'Y':
502     fkey = raw_input('Input the path of the key file: ')
503 clausted 3 ROIinfo = keyfile_read(fkey)
504 clausted 1 dataobj.updateROIinfo(ROIinfo)
505    
506     elif fname.split('.')[-1] == 'xls':
507     # for the DAM Spreadsheet file format
508     print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
509     book = xlrd.open_workbook(fname)
510     #shape the data in the Spreadsheet, whether single sheet or multiple sheets
511 clausted 3 dataobj=dam_spreadsheet_format(book)
512 clausted 1 flag = str.upper(raw_input('Load the gal file? (y/n): '))
513     if flag == 'Y':
514     fgal = raw_input('Input the path of the gal file: ')
515 clausted 3 ROIinfo = galfile_read(fgal)
516 clausted 1 dataobj.updateROIinfo(ROIinfo)
517     flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
518     if flag == 'Y':
519     fprotocol = raw_input('Input the path of the analyte table: ')
520 clausted 3 sampleinfo = method_read(fprotocol)
521 clausted 1 dataobj.updateSampleinfo(sampleinfo)
522 clausted 5 else:
523     dataobj.updateSampleinfo(method_read_fake())
524    
525 clausted 1 return dataobj
526    
527    
528    
529 clausted 3 def outlier_remove(obj):
530 clausted 1 ##dataobj.viewAll()
531 clausted 3 """Function: Remove unwanted data points from noisy periods.
532 clausted 1 Return: the data object with clean data
533 clausted 3 """
534 clausted 1 dataobj = copy.deepcopy(obj)
535     print ('Select the time area that you want to remove.')
536     tmpstr = raw_input('in the format of "200:240, 500:510" :')
537     areatodel = tmpstr.split(',')
538     data = dataobj.data
539     for i in areatodel:
540     ##if
541     t1, t2 = float(i.split(':')[0]), float(i.split(':')[1])
542     t = data[0] # for dataformat=0 Shaped_data
543     tmpind = plt.find((t>t1) & (t<t2)) # find the outliar indice
544     ## data[1:,tmpind] = 0
545     tmplist = data.tolist()
546     tmplist2 = []
547     for j in tmplist:
548     del j[min(tmpind):max(tmpind)]
549     tmplist2.append(j)
550     data = np.array(tmplist2)
551     ## tmplist = tmplist
552     dataobj.data = data
553     # dataobj.status = 'Outliar removed'
554 clausted 3 dataobj.updateStatus(outlier_remove=True)
555 clausted 1 return dataobj
556    
557 clausted 3 def calibrate(obj):
558     """Function: calibrate the Intensity response.
559 clausted 1 Return: the data object with calibrated data
560     Note: at present, this function is valid only when
561     the whole procedure includes calibration precedure.
562 clausted 3 """
563 clausted 1 dataobj = copy.deepcopy(obj)
564     data = dataobj.data
565     caldata = copy.deepcopy(data)
566     t = data[0]
567     if dataobj.dataformat != 0:
568     print 'We only calibrate the raw data.'
569     return
570     else:
571     t1 = input('Enter the time position of the TOP response during calibration:')
572     s1 = input('The refractive index of the TOP response is: ')
573     t2 = input('Enter the time position of the BOTTOM response during calibration:')
574     s2 = input('The refractive index of the BOTTOM response is: ')
575     ind1 = plt.find(abs(t-t1) == min(abs(t-t1)))[0]
576     ind2 = plt.find(abs(t-t2) == min(abs(t-t2)))[0]
577    
578     for i,y in enumerate(data[1:]):
579     y1, y2 = y[ind1], y[ind2]
580     slope = (s1-s2)/(y1-y2)
581     offset = s1- slope*y1
582     caldata[i+1] = slope*y+offset
583     dataobj.data = caldata
584 clausted 3 dataobj.updateStatus(calibrate=True)
585 clausted 1 return dataobj
586    
587 clausted 3 def get_background_value(obj,bgids):
588     """Get the averaged value of background spots for each ROI."""
589 clausted 1 bgsignal = obj.data[0]*0
590     for j in bgids:
591     if j == obj.ROIinfo[j-1]['ID']:
592     bgsignal = bgsignal + obj.data[j]
593    
594    
595     elif j != obj.ROIinfo[j-1]['ID']:
596     for n,eachspot in enumerate(obj.ROIinfo):
597     if j == eachspot['ID']:
598     bgsignal = bgsignal + obj.data[n+1]
599     bgsignal = bgsignal/len(bgids)
600     return bgsignal
601    
602    
603 clausted 3 def background_subtract(obj, *bgspot):
604     """Function: Perform the Background subtraction for the UNSPLIT curve.
605 clausted 1 Input besides "obj" is the id of the spot taken as background.
606     The following inputs are acceptable:
607 clausted 3 1. background_subtract(obj): the default background in Galfile
608 clausted 1 will be subtracted.
609 clausted 3 2. background_subtract(obj, 1, 6): the average value of spot1
610 clausted 1 and spot6 will be subtracted.
611 clausted 3 """
612 clausted 1 dataobj = copy.deepcopy(obj)
613     ROIinfo = obj.ROIinfo
614     data = dataobj.data
615    
616     if dataobj.dataformat == 0:
617     newdata = np.zeros(np.shape(data))
618     newdata[0] = data[0] # the time scale
619     if bgspot == ():
620     # The average of the default background spots listed in the galfile
621     #are to be subtracted.
622     for i in range(1,dataobj.ROInum+1):
623     bgids = ROIinfo[i-1]['Background Spot']
624 clausted 3 bgsignal = get_background_value(dataobj,bgids)
625 clausted 1 newdata[i] = data[i]-bgsignal
626     # update the status of the data object.
627     dataobj.updateStatus(BackgroundType='Default in Gal')
628    
629     else:
630     # The average of the manually input background spots
631     #are to be subtracted.
632     for i in range(1,dataobj.ROInum+1):
633 clausted 3 bgsignal = get_background_value(dataobj,bgspot)
634 clausted 1 newdata[i] = data[i]-bgsignal
635     dataobj.updateStatus(BackgroundType='Manually choosen')
636     dataobj.data = newdata
637 clausted 3 dataobj.updateStatus(background_subtraction=True)
638 clausted 1 return dataobj
639    
640     else:
641     print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'
642     return
643    
644 clausted 3 def curve_split(obj, t_before, t_after, *t_inj):
645     """Function: Split the whole curve that contains several injections
646 clausted 1 into pieces.
647     The sample information will be checked during the split,
648     if the injection number in sampleinfo is not consistant
649     with the input injection parameter, it will help you to
650     update the sample information.
651     Input besides obj:
652     t_before: time duration before the injection time point
653     t_after: time duration after the injection time point
654     t_inj: the exact start time for each injection
655     Return: the data object with splitted data, and updated sample
656     information
657 clausted 3 """
658 clausted 1 dataobj = copy.deepcopy(obj)
659     ROInum = dataobj.ROInum
660     t=dataobj.data[0]
661     ind=[]
662     # find how many lines of data each injection
663     for i in range(len(t_inj)):
664     ind.append(np.shape(plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after))))[0])
665     m,j = max(ind),max(ind)-min(ind)
666     # the new dataset must be a m*((ROInum+1)*num_inj) matrix:
667     # T1,S_ROI1_c1,S_ROI2_c1,S_ROI3_c1...T2,S_ROI1_c2,S_ROI2_c2...
668     tmpdata = np.zeros(((ROInum+1)*len(t_inj),m))
669    
670     for i in range(len(t_inj)):
671     TmpInd = plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after)))
672     Tmp1,Tmp2 = i*(ROInum+1),(i+1)*(ROInum+1)
673     tmpdata[Tmp1:Tmp2,0:len(TmpInd)] = dataobj.data[:,TmpInd]
674     if j!=0:
675     Split_data = tmpdata[:,:-j]
676     else:
677     Split_data = tmpdata[:]
678     # zero and align the split data
679     for i in range(np.shape(Split_data)[0]):
680     Split_data[i]=Split_data[i]-Split_data[i][0]
681    
682 clausted 3 newsampleinfo, injectinfo = check_sample_info(dataobj,t_before, t_after, t_inj)
683 clausted 1 if newsampleinfo != None :
684     dataobj.data = Split_data
685     dataobj.updateStatus(DataSplit=True)
686     dataobj.updateSampleinfo(newsampleinfo)
687     dataobj.dataformat = 2
688     dataobj.injectinfo = injectinfo
689     return dataobj
690     else:
691     dataobj.updateStatus(DataSplit=False)
692     print 'Curve Splitting not successful!'
693    
694     return
695    
696    
697 clausted 3 def check_sample_info(dataobj, t_before, t_after, t_inj):
698     """Check if the sample information consistent with the injection
699 clausted 1 parameter input. If not, help the user to update sample information
700     for the splitted curve.
701 clausted 3 """
702 clausted 1 injectinfo = []
703     if len(dataobj.sampleinfo) == len(t_inj):
704     for i,j in enumerate(t_inj):
705     injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
706     return dataobj.sampleinfo, injectinfo
707    
708     elif len(dataobj.sampleinfo) > len(t_inj):
709 clausted 3 print """ The number of injection you just input does not match that in sample infomation.
710     If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object."""
711 clausted 1 choose = str.upper(raw_input('Continue? (y/n)'))
712     if choose == 'Y':
713     newsampleinfo, injectinfo = [], []
714     print ' Your input indicates %s injections out of %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
715     for n,i in enumerate(dataobj.sampleinfo):
716     print '--',n,'.',i['Name'],':',i['Concentration']
717     print ' Enter the ID number of the injections you just select, in the form of "2-10" or "4,5,6",'
718     tmpstr = raw_input(':')
719     if ',' in tmpstr:
720     injID = map(int, tmpstr.split(','))
721     elif '-' in tmpstr:
722     injID = range(int(tmpstr.split('-')[0]), int(tmpstr.split('-')[1])+1)
723     else:
724     print ' Illegal input!'
725     return
726    
727     if len(injID) != len(t_inj):
728     print ' Please input %s injection ID numbers instead of %s You just input.' %(len(t_inj),len(injID))
729     return
730    
731     for i in injID:
732     newsampleinfo.append(dataobj.sampleinfo[i])
733     injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
734     return newsampleinfo, injectinfo
735     else:
736     return
737    
738     elif len(dataobj.sampleinfo) < len(t_inj):
739     print ' The number of injection you just input does not match that in sample infomation.'
740     print ' Your input indicates %s injections, where there is only %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
741     print ' Please check.'
742    
743     return
744    
745    
746 clausted 3 def write_data(Head,data_out,ROI_out,DataDir,output_format):
747     """Write the data into a txt file.
748     """
749     # Modified 100405 CGL
750     # create a newfolder with time stamp to save the data
751 clausted 1 if output_format=='B':
752     ftitle='biosensor'
753     Tmpstr = str(ROI_out).replace(', ','_')
754     Tmpstr = Tmpstr.replace('(','-')
755     foutname = ftitle+Tmpstr[:-1]+'.txt'
756     elif output_format=='C':
757     foutname='clamp-'+str(ROI_out)+'.txt'
758     else:
759     ftitle=''
760 clausted 3
761     # Change directories, if requested.
762     if (DataDir != ""):
763     os.chdir('..')
764     try:
765     os.makedirs(DataDir)
766 clausted 5 ##except WindowsError:
767     except OSError:
768 clausted 3 pass
769     os.chdir(DataDir)
770 clausted 1
771     np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
772     fdt=file('temp.txt','r')
773    
774     ## foutname=ftitle+Tmpstr[:-1]+'.txt' ##raw_input('Enter the file name of the output: ')
775     fout=file(foutname,'w+')
776     # write the head of the data first
777     fout.write(Head+'\n')
778     count = len(fdt.readlines())
779     fdt.seek(0)
780     for i in range(count):
781     temp=fdt.readline()
782     fout.write(temp.replace(' ','\t'))
783     fdt.close()
784     fout.close()
785     print '-------The path of the exported data is "', os.getcwd(),'"-------'
786    
787    
788 clausted 3 def data_output_biosensor(obj, DataDir, *ROI_out):
789     """Export the data into a biosensor format txt file,
790 clausted 1 which can be operated in Scrubber.
791     Input: the data object, the output path, the ROIs selected to export
792 clausted 3 """
793 clausted 1 # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
794     dataobj = copy.deepcopy(obj)
795     data = dataobj.data
796     n_inj = len(dataobj.sampleinfo)
797     ROIinfo = dataobj.ROIinfo
798     ROInum = dataobj.ROInum
799    
800     data_out = np.zeros(((len(ROI_out)*2)*n_inj,np.shape(data)[1]))
801     head1 = ""
802     new_ROIinfo = []
803     for n,i in enumerate(ROI_out):
804     new_ROIinfo.append(ROIinfo[i-1]) ##the new ROI info
805     if i != ROIinfo[i-1]['IDcal']:
806 clausted 3 # check if the ID number matches position
807 clausted 1 print n,i, ROIinfo[i-1]
808     return
809     else:
810     headcomp = ROIinfo[i-1]['ID']+': '+ROIinfo[i-1]['Name']
811     try:
812     headcomp = headcomp + ', '+ROIinfo[i-1]['Conc']
813     except KeyError:
814     pass
815     headcomp = headcomp+' Fc='+str(n+1)+' - '
816     for j in range(n_inj):
817     headtmp = headcomp+str(j+1)+'_'
818     head1 = head1+headtmp+'X\t'+headtmp+'Y\t'
819     data_out[2*n*n_inj+2*j]=data[j*(ROInum+1)]
820     data_out[2*n*n_inj+2*j+1]=data[j*(ROInum+1)+i]
821     head1=head1[:-1]
822    
823     dataobj.data = data_out
824     dataobj.dataformat = 3 # biosensor dataformat
825     dataobj.updateStatus(DataType='Biosensor data')
826     dataobj.updateStatus(Head=head1)
827     dataobj.updateROIinfo(new_ROIinfo)
828    
829 clausted 3 write_data(head1,data_out,ROI_out,DataDir,'B')
830 clausted 1
831     return dataobj
832    
833 clausted 3 def data_output_clamp(obj, DataDir, ROI_out):
834     """Export the data into a Clamp format txt file,
835 clausted 1 which can be operated in Clamp.
836     Input: the data object, the output path, the selected ROI
837 clausted 3 """
838 clausted 1 # initialize the data output matrix, which will contain '2*n_inj' columns
839     dataobj = copy.deepcopy(obj)
840     data = dataobj.data
841     n_inj = len(dataobj.sampleinfo)
842 clausted 5 n_inj = max(n_inj, 1) # Must have at least one injection.
843 clausted 1 ROIinfo = dataobj.ROIinfo
844     ROInum = dataobj.ROInum
845     sampleinfo = dataobj.sampleinfo
846    
847     data_out = np.zeros((2*n_inj,np.shape(data)[1]))
848 clausted 5 ## if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
849     if (ROI_out != int(ROIinfo[ROI_out-1]['ID'])):
850 clausted 1 print 'Please check the ROI information.'
851     print ROI_out, ROIinfo[ROI_out-1]
852     return
853     else:
854     tmp = ROIinfo[ROI_out-1]['ID']+': '+ROIinfo[ROI_out-1]['Name']
855     try:
856     tmp = tmp +', Conc:'+ROIinfo[ROI_out-1]['Conc']
857     except KeyError:
858     pass
859     Clamphead = 'Vers 3.41 '+tmp
860     conc=''
861     t_start,t_stop='',''
862     head1='Time\tData\t'*n_inj
863     head1=head1[:-1]
864    
865     for i in range(n_inj):
866     data_out[2*i]=data[i*(ROInum+1)]
867     data_out[2*i+1]=data[i*(ROInum+1)+ROI_out]
868     name = sampleinfo[i]['Name']
869     conc=conc+'\t'+str(sampleinfo[i]['Concentration'])
870     t_start=t_start+'\t'+str(dataobj.injectinfo[i][0])
871     t_stop=t_stop+'\t'+str(dataobj.injectinfo[i][1])
872    
873     hconc,h_start,h_stop='\nConc1'+conc,'\nStart1'+t_start,'\nStop1'+t_stop
874     for i in (hconc,h_start,h_stop):
875     if i[-1]!='':
876     Clamphead=Clamphead+i
877    
878     Clamphead=Clamphead+'\n'+head1
879    
880     dataobj.data = data_out
881     dataobj.dataformat = 4 # biosensor dataformat
882     dataobj.updateStatus(DataType='Clamp data')
883     dataobj.updateStatus(Head=Clamphead)
884     dataobj.updateROIinfo(ROIinfo[ROI_out-1])
885    
886 clausted 3 write_data(Clamphead,data_out,ROI_out,DataDir,'C')
887 clausted 1
888     return dataobj
889     ## return data_out,Clamphead
890    
891    
892 clausted 3 if (__name__ == '__main__'):
893     # Print a short message if this module is run like an app.
894     print "This is the OSPRAI file type conversion tool."
895