ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
Revision: 1
Committed: Wed Mar 17 05:34:43 2010 UTC (9 years ago) by clausted
File size: 32567 byte(s)
Log Message:
Initial import of Osprai project
Line User Rev File contents
1 clausted 1 '''
2     Converter: Perform the datafile reading, pre-processing and writing.
3    
4     Yuhang Wan, Mar 1,2010
5     __version__ = "1.3"
6    
7     Typical Pipeline:
8     DatafileRead (GalfileRead, MethodRead)
9     OutliarRemove
10     Calibrate
11     BackgroundSubtract
12     CurveSplit
13     DataOutputClamp or DataOutputBiosensor
14    
15     '''
16    
17     import numpy as np
18     import pylab as plt
19     import sys
20     import xlrd
21     import time
22     import os
23     import copy
24    
25     ## import matplotlib.pyplot as plt
26     ## import matplotlib.mlab as mlb
27     ## import packageClamp_100210 as Pack
28     import SPRdataclass_100301 as SPR
29    
30    
31     def checkformatinput(fname):
32     '''Examine the format of a input .txt data file.'''
33     # open a file, if file doesn't exit, then exit
34     try:
35     fp = open(fname, "r")
36     except IOError:
37     print 'Cannot open file %s for reading' % fname
38     sys.exit(0)
39     # examine the first line of the input file
40     # for SPRit output:"Elapsed Time (Seconds) Average Intensity (Pixel Intensity)"
41     # for Plexera ICM output:
42     head_input1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\n"
43    
44     Format_input = 0
45     # check file format
46     Tmpstr = fp.readline()
47     if Tmpstr==head_input1:
48     Format_input=1
49     elif Tmpstr[-1]=='\n' and Tmpstr[-2]=='\r' and Tmpstr.split(' ')[0].split('/')[2].startswith('20'):
50     Format_input=2
51     elif Tmpstr.startswith('Vers'):
52     Format_input=3
53    
54     fp.close()
55     return Format_input, Tmpstr
56    
57    
58     def SPRitformat(fname):
59     '''Read the SPRit formatted data and reshape it.
60     Return the data in the form of list.
61     '''
62     # open a file, if file doesn't exit, then exit
63     try:
64     fp = open(fname, "r")
65     except IOError:
66     print 'Cannot open file %s for reading' % fname
67     sys.exit(0)
68     status = {'DatainputType':'SPRit data'}
69     # skip the first line of the input file
70     Tmpstr = fp.readline()
71     # check if the second line is BEGIN
72     TmpStr = fp.readline()
73     if TmpStr != "BEGIN\n":
74     print "Second line is not Begin. Break!"
75     #sys.exit(0)
76     # count the lines of each data spot
77     TmpStr = fp.readline() # skip first "0.000000e+000"
78     num_line = 1 # so we start to count from 1
79     # count until read the "0.000000e+000"
80     while True:
81     TmpStr = fp.readline()
82     if TmpStr.split('\t')[0] == "0.000000e+000":
83     break
84     else:
85     num_line = num_line + 1
86     # count the spots in each file
87     num_spot = 0
88     # skip the first two lines
89     fp.seek(0)
90     for i in range(0,2):
91     TmpStr = fp.readline()
92     text = fp.readlines()
93     for i in text:
94     num_spot += i.count('0.000000e+000')
95     fp.close()
96     #---------------------- Reshaping the data --------------------
97     # reshape the data in the string form num_line*num_spot
98     Shaped_str = np.reshape(text,(num_spot,-1))
99     tmpshape=np.shape(Shaped_str)
100     # reshape the data in the number form: num_line*(num_spot+1)
101     # only the first column is time: time spotl spot2 ... spot n
102     time = np.zeros(num_line)
103     Tmpdata = Shaped_str[0,:]
104     for n,i in enumerate(Tmpdata):
105     time[n]=float(i.split('\t')[0])
106     Shaped_data_1 = np.zeros((tmpshape[0]+1,tmpshape[1]))
107     Shaped_data_1[0,:]=time
108     for n1,i in enumerate(Shaped_str):
109     for n2,j in enumerate(i):
110     Shaped_data_1[n1+1][n2]=float(Shaped_str[n1][n2].split('\t')[1])
111     # reshape the data in the number form:
112     # time1 spotl time2 spot2 ... timen spot n
113     Shaped_data_2 = np.zeros((tmpshape[0]*2,tmpshape[1]))
114     for i in range(tmpshape[0]):
115     Shaped_data_2[i*2]=Shaped_data_1[0]
116     Shaped_data_2[i*2+1] = Shaped_data_1[i+1]
117    
118     ## status = {'Datainput Type':'SPRit data'}
119     dataformat = 0
120     ROInum = num_spot
121     dataobj=SPR.DataPass(Shaped_data_1, ROInum, dataformat)
122     dataobj.status = {}
123     dataobj.updateStatus(**status)
124     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
125    
126     return dataobj
127    
128    
129     def PlexeraICMformat(fname):
130     '''Read the txt data exported from Plexera ICM software and reshape it.
131     Return the data in the form of list.
132     '''
133     fp=file(fname,'r')
134     Tmpdata,tmptime=[],[]
135     status = {'DatainputType':'Plexera ICM data'}
136     for eachline in fp:
137     Tmpdata.append(eachline.split('\t'))
138     num_line,num_spot=np.shape(Tmpdata)
139     # initialize the Shaped_data
140     Shaped_data = np.zeros((num_spot,num_line))
141     num_spot=num_spot-1 # the real number of spot
142     Tmpdata=np.transpose(Tmpdata)
143     # delete the last '\r\n' of the data in last column of the datafile
144     for n,i in enumerate(Tmpdata[-1]):
145     Tmpdata[-1][n]=i.rstrip()
146     # time converting: convert the first column of the datafile(date and time information) into numerical time
147     for i in Tmpdata[0]:
148     tmptime_str=i.split(' ')[1]
149     timelist=tmptime_str.split(':')
150     now=float(timelist[0])*60*60+float(timelist[1])*60+float(timelist[2])
151     tmptime.append(now)
152     Shaped_data[0]=tmptime
153     Shaped_data[0]=Shaped_data[0]-Shaped_data[0][0]
154     # assign the signal data
155     for i in range(num_spot):
156     Shaped_data[i+1]=Tmpdata[i+1]
157    
158     dataformat = 0
159     ROInum = num_spot
160     dataobj=SPR.DataPass(Shaped_data, ROInum, dataformat)
161     dataobj.status = {}
162     dataobj.updateStatus(**status)
163    
164     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
165     return dataobj
166    
167    
168    
169     def DAMsinglesheet(sh,DataList,ROIinfo,start,end, colsize):
170     '''Retrieve and shape data form one single sheet from the Spreadsheet.
171     Return the data, ROI infomation in the form of list.
172     '''
173     ## and return the minimal spot Id (start_spot)
174     ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
175    
176     Head = sh.row_values(1)
177     SpotList = []
178     num_spot = sh.row_values(0).count('Relative Time')
179    
180     for i in range(num_spot):
181     TmpHead=Head[colsize*i]
182     # now there are 5 columns for each ROI for Raw Intensity
183     # and 4 columns for each ROI for Satellite subtracted Intensity
184     DataList.append(sh.col_values(2+colsize*i,start,end))
185    
186     key, val = [], []
187     tmpinfo = TmpHead.split('\n')
188     for j in tmpinfo:
189     key.append(j.split(': ')[0])
190     val.append(j.split(': ')[1])
191     tmpdic = dict(zip(key,val))
192     tmpdic['Position'] = (int(tmpdic['Block'])+1, int(tmpdic['Row'])+1, int(tmpdic['Column'])+1)
193     ROIinfo.append(tmpdic)
194    
195     return DataList,ROIinfo
196    
197     def DAMspreadsheetformat(book):
198     '''Read the spreadsheet exported from Plexera DAM software and reshape it.
199     Return the shaped data and ROI information.
200     '''
201     ROIinfo, newROIinfo, n_sheet = [], [], 0
202     DataList=[]
203     status = {'DatainputType':'Plexera DAM data'}
204     sh0=book.sheet_by_index(0)
205     if sh0.row_values(0).count('Raw Intensity') == 0 and sh0.row_values(0).count('Subtracted Intensity') > 0:
206     print 'This is the satellite subtracted data.'
207     colsize = 4
208     status ['DAM spreadsheet'] = 'Satellite subtracted Intensity'
209     elif sh0.row_values(0).count('Raw Intensity') > 0 and sh0.row_values(0).count('Subtracted Intensity') == 0:
210     print 'This is the raw intensity data.'
211     colsize = 5
212     status ['DAM spreadsheet'] = 'Raw Intensity'
213    
214     for i in range(2,sh0.nrows):
215     if sum(sh0.col_values(2,1,i))>0:
216     break
217     start,end = i-1,sh0.nrows
218     num_line = end - start
219     ## print 'start: ', start
220     # the time axis initialization and assignment
221     Time=sh0.col_values(1,start,end)
222     # in case we lose the time axis during the DAM processing
223     if sum(Time)==0: Time=np.arange(num_line)
224     DataList.append(Time)
225    
226     # list DataList to store the Shaped_data of each ROI in each sheet,
227     # list ROIinfo to store the Spot id and the ligand name of each ROI in each sheet
228     for i in range(book.nsheets):
229     sh=book.sheet_by_index(i)
230     if sh.ncols!=0:
231     DataList,ROIinfo = DAMsinglesheet(sh,DataList,ROIinfo,start,end, colsize)
232     n_sheet=n_sheet+1
233     else:
234     break
235     # calculate the dim of the Shaped_data, and initialize the matrix
236     num_spot=len(DataList)-1
237     num_line=end-start
238     Shaped_data=np.zeros((num_spot+1,num_line))
239     Shaped_data[0]=DataList[0]
240    
241     # in case that the ROIs doesn't count from spot1
242     newROIinfo = copy.deepcopy(ROIinfo)
243     StartspotList=np.array([i['ID'] for i in ROIinfo])
244     start_spot=min(StartspotList)
245     for i in range(num_spot):
246     tmp = ROIinfo[i]['ID']
247     try:
248     j=int(ROIinfo[i]['ID'])-(start_spot-1)
249     except ValueError:
250     ##return tmp
251     if str.upper(unicode.encode(tmp)).startswith(str.upper('spot')):
252     j=int(ROIinfo[i]['ID'][4:])-(start_spot-1)
253     else:
254     print 'Illegal galfile format.\n', 'ID:', tmp
255     ##return tmp
256    
257     Shaped_data[j]=DataList[i+1]
258     newROIinfo[j]=ROIinfo[i]
259    
260     # pack the date and relative information into SPRdata obj
261     dataformat = 0
262     ROInum = num_spot
263     dataobj=SPR.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
264     dataobj.status = {}
265     dataobj.updateStatus(**status)
266     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
267    
268     return dataobj
269    
270    
271     def ReadClampData(fname):
272     '''Read the Clamp format data
273     Return the data in the form of list.
274     '''
275     fp=open(fname,'r')
276     Tmpstr=fp.readline()
277     # examine the file head
278     # e.g. 'Vers 3.41 cTBA-1 Conc:250 nM\n'
279     ROInum, dataformat, Conc, ROIdic = 1, 4, [], {}
280     try:
281     name = Tmpstr.split(' ')[2].strip()
282     ROIdic['ROI name'] = name
283     stmp = Tmpstr.split(' ')[3]
284     rest = Tmpstr[Tmpstr.find(stmp):].strip()
285     ROIdic['concentration'] = rest.split(':')
286     except IndexError:
287     pass
288    
289     for Tmpstr in fp:
290     try:
291     float(Tmpstr.split('\t')[0])
292     n_conc=(Tmpstr.count('\t')+1)/2
293     ##print Tmpstr, 'the beginning of data'
294     break
295     except ValueError:
296     if Tmpstr.startswith('Conc1'):
297     n_conc=Tmpstr.count('\t')
298     Conc=Tmpstr.strip().split('\t')[1:]
299     elif Tmpstr.startswith('Start1'):
300     Start=Tmpstr.strip().split('\t')[1:]
301     n_conc=Tmpstr.count('\t')
302     elif Tmpstr.startswith('Stop1'):
303     Stop=Tmpstr.strip().split('\t')[1:]
304     n_conc=Tmpstr.count('\t')
305    
306    
307     # write the data into matrix
308     Tmpdata=[]
309     for eachline in fp:
310     a=eachline.strip().split('\t')
311     Tmpdata.append(np.array(a,dtype='f'))
312     Data=np.transpose(Tmpdata)
313    
314     # write the infomation
315     ROIinfo = [ROIdic]
316     status = {'DatainputType':'Clamp data'}
317     sampleinfo, injinfo = [], []
318     injpara = [map(float,i) for i in [Start,Stop]]
319    
320     if len(Conc) >= 1:
321     for i,c in enumerate(Conc):
322     sampledic = {}
323     name = 'Sample '+str(i+1)
324     sampledic = dict(zip(['Name','Concentration'],[name,c]))
325     ## sampledic['Name'] = name
326     ## sampledic['Concentration'] = c[injpara[0][i],injpara[1][i]]
327     sampleinfo.append(sampledic)
328     injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
329     injinfo.append(injdic)
330     dataobj=SPR.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
331     dataobj.status = {}
332     dataobj.updateStatus(**status)
333    
334     return dataobj
335    
336    
337    
338     def KeyfileRead(fkey):
339     # The Key File contains
340     '''Function: Read the key file for old SPR instrument.
341     Input: the filename.
342     Return: the ROI information.
343     '''
344     try:
345     fp = open(fkey, "r")
346     except IOError:
347     print 'Cannot open file %s for reading' % fname
348     sys.exit(0)
349     ROIinfo = []
350     firstline=fp.readline() # skip the first line
351     # Table.append([firstline.split('\t')[0],firstline.split('\t')[1],firstline.split('\t')[2],firstline.split('\t')[3]])
352     for eachline in fp:
353     ROIdic = {}
354     tmplist = eachline.strip().split('\t')
355     ROIdic['ID'] = tmplist[0] # spot id
356     ROIdic['Name'] = tmplist[1] # name of the protein immobilized
357     try:
358     ROIdic['Conc'] = tmplist[2]
359     ROIdic['Background Spot'] = int(tmplist[3])
360     except IndexError:
361     pass
362     ROIinfo.append(ROIdic)
363    
364     fp.close()
365     print 'There are %d ROIs in the Key file.' % len(ROIinfo)
366     return ROIinfo
367    
368    
369     def GalfileRead(fname):
370     '''Function: Read the GAL file.
371     Input: the filename.
372     Return: the ROI information.
373     '''
374     fp=file(fname,'r')
375     ROIinfo = []
376     Tmpstr = fp.readline() # skip the first line
377     Tmpstr = fp.readline()
378     Tmpn = int(Tmpstr.split('\t')[0])
379     for i in range(Tmpn):
380     labelline = fp.readline()
381    
382     if labelline.startswith('"Block1'):
383     a = labelline.strip().split(',')
384     columnsize, rowsize = int(a[3]), int(a[5])
385     ## print labelline
386     def IDcal(p):
387     id = (p[0]-1)*columnsize*rowsize + (p[1]-1)*columnsize + p[2]
388     return id
389    
390     for eachline in fp:
391     ROIdic = {}
392     tmplist = eachline.strip().split('\t')
393     pos = tuple(map(int,tmplist[:3]))
394     ROIdic['Position'] = pos
395     ROIdic['IDcal'] =IDcal(pos)
396     ##(pos[0]-1)*columnsize*rowsize + (pos[1]-1)*columnsize + pos[2]
397    
398     ##
399     ROIdic['Name'] = tmplist[3]
400     ROIdic['ID'] = tmplist[4]
401     try:
402     ROIdic['Conc'] = tmplist[5]
403     ROIdic['Family'] = tmplist[6]
404     blist = tmplist[7][1:-1].split(';') # always something like '"1,1,2; 1,2,2"'
405     Backn = len(blist)
406     Bid = []
407    
408     for i in blist:
409     if '' in i.split(','):
410     break
411     Bpos = tuple(map(int, i.split(',')))
412     Bid.append(IDcal(Bpos))
413     ##
414     ROIdic['Background Spot'] = tuple(Bid)
415     except IndexError:
416     pass
417    
418     if int(ROIdic['ID'][4:]) != ROIdic['IDcal']:
419     print 'The spot ID should be consistent with the position. Please check the ID.'
420     break
421     else:
422     ROIdic['ID'] = ROIdic['IDcal']
423     ROIinfo.append(ROIdic)
424     fp.close()
425     print 'There are %d ROIs in the Gal file.' % len(ROIinfo)
426    
427     return ROIinfo
428    
429    
430    
431     def MethodRead(fname):
432     '''Function: Read the analyte table.
433     Input: the filename
434     Return: the sample information
435     '''
436     # create a concentration dictionary for the sample injected into the flowcell
437     # export a table containing the concentration of the sample injected, and the duration
438     fp=file(fname,'r')
439     sampleinfo = []
440     ##TmpTable,TmpTable2={},{}
441     labelline=fp.readline()
442     for n,eachline in enumerate(fp):
443     sampledic = {}
444     sampledic['Location'] = eachline.split('\t')[0]
445     sampledic['Name'] = eachline.split('\t')[1]
446     sampledic['Concentration'] = float(eachline.split('\t')[2])
447     sampledic['Duration'] = float(eachline.split('\t')[4])
448     sampledic['Flow Rate'] = float(eachline.split('\t')[3])
449     sampledic['Analyte Series'] = eachline.split('\t')[6]
450     sampledic['Buffer Blank Series'] = eachline.split('\t')[7]
451     sampleinfo.append(sampledic)
452    
453     print 'Name','\t','Concentration'
454     for i in sampleinfo:
455     print i['Name'],'\t', i['Concentration']
456    
457     return sampleinfo
458    
459    
460    
461     def DatafileRead(fname):
462     '''Function: Read the raw data from the Plexera instrument,
463     and pack the raw data (Shaped_data) into dataobject.
464     Therefore, the initial dataformat = 0
465     Input: the filename
466     Return: the packed SPR.DataPass type data object
467     '''
468     #===================check the format of the input files-------
469     if fname.split('.')[-1] == 'txt':
470    
471     Format_input, Firstline = checkformatinput(fname)
472     if Format_input == 1: # for SPRit file format
473     dataobj = SPRitformat(fname)
474     print '-'*10,' This is a SPRit data file. ','-'*10
475    
476     elif Format_input == 2: # for Plexera ICM file format
477     dataobj = PlexeraICMformat(fname)
478     print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
479    
480     elif Format_input == 3: # for Clamp datafile format
481     dataobj = ReadClampData(fname)
482     print '-'*10,' This is a Clamp file. ','-'*10
483    
484     else:
485     print '-'*10,' unreadable file input! ','-'*10
486     sys.exit(0)
487    
488     if Format_input == 1 or Format_input == 2:
489     flag = str.upper(raw_input('Load the key file? (y/n): '))
490     if flag == 'Y':
491     fkey = raw_input('Input the path of the key file: ')
492     ROIinfo = KeyfileRead(fkey)
493     dataobj.updateROIinfo(ROIinfo)
494    
495     elif fname.split('.')[-1] == 'xls':
496     # for the DAM Spreadsheet file format
497     print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
498     book = xlrd.open_workbook(fname)
499     #shape the data in the Spreadsheet, whether single sheet or multiple sheets
500     dataobj=DAMspreadsheetformat(book)
501     flag = str.upper(raw_input('Load the gal file? (y/n): '))
502     if flag == 'Y':
503     fgal = raw_input('Input the path of the gal file: ')
504     ROIinfo = GalfileRead(fgal)
505     dataobj.updateROIinfo(ROIinfo)
506     flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
507     if flag == 'Y':
508     fprotocol = raw_input('Input the path of the analyte table: ')
509     sampleinfo = MethodRead(fprotocol)
510     dataobj.updateSampleinfo(sampleinfo)
511    
512     return dataobj
513    
514    
515    
516     def OutliarRemove(obj):
517     ##dataobj.viewAll()
518     '''Function: Remove the noisy area that you want to get rid of.
519     Return: the data object with clean data
520     '''
521     dataobj = copy.deepcopy(obj)
522     print ('Select the time area that you want to remove.')
523     tmpstr = raw_input('in the format of "200:240, 500:510" :')
524     areatodel = tmpstr.split(',')
525     data = dataobj.data
526     for i in areatodel:
527     ##if
528     t1, t2 = float(i.split(':')[0]), float(i.split(':')[1])
529     t = data[0] # for dataformat=0 Shaped_data
530     tmpind = plt.find((t>t1) & (t<t2)) # find the outliar indice
531     ## data[1:,tmpind] = 0
532     tmplist = data.tolist()
533     tmplist2 = []
534     for j in tmplist:
535     del j[min(tmpind):max(tmpind)]
536     tmplist2.append(j)
537     data = np.array(tmplist2)
538     ## tmplist = tmplist
539     dataobj.data = data
540     # dataobj.status = 'Outliar removed'
541     dataobj.updateStatus(OutliarRemove=True)
542     return dataobj
543    
544     def Calibrate(obj):
545     '''Function: Calibrate the Intensity response.
546     Return: the data object with calibrated data
547     Note: at present, this function is valid only when
548     the whole procedure includes calibration precedure.
549     '''
550     dataobj = copy.deepcopy(obj)
551     data = dataobj.data
552     caldata = copy.deepcopy(data)
553     t = data[0]
554     if dataobj.dataformat != 0:
555     print 'We only calibrate the raw data.'
556     return
557     else:
558     t1 = input('Enter the time position of the TOP response during calibration:')
559     s1 = input('The refractive index of the TOP response is: ')
560     t2 = input('Enter the time position of the BOTTOM response during calibration:')
561     s2 = input('The refractive index of the BOTTOM response is: ')
562     ind1 = plt.find(abs(t-t1) == min(abs(t-t1)))[0]
563     ind2 = plt.find(abs(t-t2) == min(abs(t-t2)))[0]
564    
565     for i,y in enumerate(data[1:]):
566     y1, y2 = y[ind1], y[ind2]
567     slope = (s1-s2)/(y1-y2)
568     offset = s1- slope*y1
569     caldata[i+1] = slope*y+offset
570     dataobj.data = caldata
571     dataobj.updateStatus(Calibrate=True)
572     return dataobj
573    
574     def getbackgroundvalue(obj,bgids):
575     '''Get the averaged value of background spots for each ROI.
576     '''
577     bgsignal = obj.data[0]*0
578     for j in bgids:
579     if j == obj.ROIinfo[j-1]['ID']:
580     bgsignal = bgsignal + obj.data[j]
581    
582    
583     elif j != obj.ROIinfo[j-1]['ID']:
584     for n,eachspot in enumerate(obj.ROIinfo):
585     if j == eachspot['ID']:
586     bgsignal = bgsignal + obj.data[n+1]
587     bgsignal = bgsignal/len(bgids)
588     return bgsignal
589    
590    
591     def BackgroundSubtract(obj, *bgspot):
592     '''Function: Perform the Background subtraction for the UNSPLIT curve.
593     Input besides "obj" is the id of the spot taken as background.
594     The following inputs are acceptable:
595     1. BackgroundSubtract(obj): the default background in Galfile
596     will be subtracted.
597     2. BackgroundSubtract(obj, 1, 6): the average value of spot1
598     and spot6 will be subtracted.
599     '''
600     dataobj = copy.deepcopy(obj)
601     ROIinfo = obj.ROIinfo
602     data = dataobj.data
603    
604     if dataobj.dataformat == 0:
605     newdata = np.zeros(np.shape(data))
606     newdata[0] = data[0] # the time scale
607     if bgspot == ():
608     # The average of the default background spots listed in the galfile
609     #are to be subtracted.
610     for i in range(1,dataobj.ROInum+1):
611     bgids = ROIinfo[i-1]['Background Spot']
612     bgsignal = getbackgroundvalue(dataobj,bgids)
613     newdata[i] = data[i]-bgsignal
614     # update the status of the data object.
615     dataobj.updateStatus(BackgroundType='Default in Gal')
616    
617     else:
618     # The average of the manually input background spots
619     #are to be subtracted.
620     for i in range(1,dataobj.ROInum+1):
621     bgsignal = getbackgroundvalue(dataobj,bgspot)
622     newdata[i] = data[i]-bgsignal
623     dataobj.updateStatus(BackgroundType='Manually choosen')
624     dataobj.data = newdata
625     dataobj.updateStatus(BackgroundSubtraction=True)
626     return dataobj
627    
628     else:
629     print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'
630     return
631    
632     def CurveSplit(obj, t_before, t_after, *t_inj):
633     '''Function: Split the whole curve that contains several injections
634     into pieces.
635     The sample information will be checked during the split,
636     if the injection number in sampleinfo is not consistant
637     with the input injection parameter, it will help you to
638     update the sample information.
639     Input besides obj:
640     t_before: time duration before the injection time point
641     t_after: time duration after the injection time point
642     t_inj: the exact start time for each injection
643     Return: the data object with splitted data, and updated sample
644     information
645     '''
646     dataobj = copy.deepcopy(obj)
647     ROInum = dataobj.ROInum
648     t=dataobj.data[0]
649     ind=[]
650     # find how many lines of data each injection
651     for i in range(len(t_inj)):
652     ind.append(np.shape(plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after))))[0])
653     m,j = max(ind),max(ind)-min(ind)
654     # the new dataset must be a m*((ROInum+1)*num_inj) matrix:
655     # T1,S_ROI1_c1,S_ROI2_c1,S_ROI3_c1...T2,S_ROI1_c2,S_ROI2_c2...
656     tmpdata = np.zeros(((ROInum+1)*len(t_inj),m))
657    
658     for i in range(len(t_inj)):
659     TmpInd = plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after)))
660     Tmp1,Tmp2 = i*(ROInum+1),(i+1)*(ROInum+1)
661     tmpdata[Tmp1:Tmp2,0:len(TmpInd)] = dataobj.data[:,TmpInd]
662     if j!=0:
663     Split_data = tmpdata[:,:-j]
664     else:
665     Split_data = tmpdata[:]
666     # zero and align the split data
667     for i in range(np.shape(Split_data)[0]):
668     Split_data[i]=Split_data[i]-Split_data[i][0]
669    
670     newsampleinfo, injectinfo = checksampleinfo(dataobj,t_before, t_after, t_inj)
671     if newsampleinfo != None :
672     dataobj.data = Split_data
673     dataobj.updateStatus(DataSplit=True)
674     dataobj.updateSampleinfo(newsampleinfo)
675     dataobj.dataformat = 2
676     dataobj.injectinfo = injectinfo
677     return dataobj
678     else:
679     dataobj.updateStatus(DataSplit=False)
680     print 'Curve Splitting not successful!'
681    
682     return
683    
684    
685     def checksampleinfo(dataobj, t_before, t_after, t_inj):
686     '''Check if the sample information consistent with the injection
687     parameter input. If not, help the user to update sample information
688     for the splitted curve.
689     '''
690     injectinfo = []
691     if len(dataobj.sampleinfo) == len(t_inj):
692     for i,j in enumerate(t_inj):
693     injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
694     return dataobj.sampleinfo, injectinfo
695    
696     elif len(dataobj.sampleinfo) > len(t_inj):
697     print ''' The number of injection you just input does not match that in sample infomation.
698     If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object.'''
699     choose = str.upper(raw_input('Continue? (y/n)'))
700     if choose == 'Y':
701     newsampleinfo, injectinfo = [], []
702     print ' Your input indicates %s injections out of %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
703     for n,i in enumerate(dataobj.sampleinfo):
704     print '--',n,'.',i['Name'],':',i['Concentration']
705     print ' Enter the ID number of the injections you just select, in the form of "2-10" or "4,5,6",'
706     tmpstr = raw_input(':')
707     if ',' in tmpstr:
708     injID = map(int, tmpstr.split(','))
709     elif '-' in tmpstr:
710     injID = range(int(tmpstr.split('-')[0]), int(tmpstr.split('-')[1])+1)
711     else:
712     print ' Illegal input!'
713     return
714    
715     if len(injID) != len(t_inj):
716     print ' Please input %s injection ID numbers instead of %s You just input.' %(len(t_inj),len(injID))
717     return
718    
719     for i in injID:
720     newsampleinfo.append(dataobj.sampleinfo[i])
721     injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
722     return newsampleinfo, injectinfo
723     else:
724     return
725    
726     elif len(dataobj.sampleinfo) < len(t_inj):
727     print ' The number of injection you just input does not match that in sample infomation.'
728     print ' Your input indicates %s injections, where there is only %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
729     print ' Please check.'
730    
731     return
732    
733    
734     def writedata(Head,data_out,ROI_out,DataDir,output_format):
735     '''Write the data into a txt file.
736     '''
737     # create a nowfolder with time stamp to save the data
738     if output_format=='B':
739     ftitle='biosensor'
740     Tmpstr = str(ROI_out).replace(', ','_')
741     Tmpstr = Tmpstr.replace('(','-')
742     foutname = ftitle+Tmpstr[:-1]+'.txt'
743     elif output_format=='C':
744     foutname='clamp-'+str(ROI_out)+'.txt'
745     else:
746     ftitle=''
747     # change to the Root dir
748     os.chdir('..')
749     try:
750     os.makedirs(DataDir)
751     except WindowsError:
752     pass
753     os.chdir(DataDir)
754    
755     np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
756     fdt=file('temp.txt','r')
757    
758     ## foutname=ftitle+Tmpstr[:-1]+'.txt' ##raw_input('Enter the file name of the output: ')
759     fout=file(foutname,'w+')
760     # write the head of the data first
761     fout.write(Head+'\n')
762     count = len(fdt.readlines())
763     fdt.seek(0)
764     for i in range(count):
765     temp=fdt.readline()
766     fout.write(temp.replace(' ','\t'))
767     fdt.close()
768     fout.close()
769     print '-------The path of the exported data is "', os.getcwd(),'"-------'
770    
771    
772     def DataOutputBiosensor(obj, DataDir, *ROI_out):
773     '''Export the data into a biosensor format txt file,
774     which can be operated in Scrubber.
775     Input: the data object, the output path, the ROIs selected to export
776     '''
777     # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
778     dataobj = copy.deepcopy(obj)
779     data = dataobj.data
780     n_inj = len(dataobj.sampleinfo)
781     ROIinfo = dataobj.ROIinfo
782     ROInum = dataobj.ROInum
783    
784     data_out = np.zeros(((len(ROI_out)*2)*n_inj,np.shape(data)[1]))
785     head1 = ""
786     new_ROIinfo = []
787     for n,i in enumerate(ROI_out):
788     new_ROIinfo.append(ROIinfo[i-1]) ##the new ROI info
789     if i != ROIinfo[i-1]['IDcal']:
790     # check if the ID number matchs position
791     print n,i, ROIinfo[i-1]
792     return
793     else:
794     headcomp = ROIinfo[i-1]['ID']+': '+ROIinfo[i-1]['Name']
795     try:
796     headcomp = headcomp + ', '+ROIinfo[i-1]['Conc']
797     except KeyError:
798     pass
799     headcomp = headcomp+' Fc='+str(n+1)+' - '
800     for j in range(n_inj):
801     headtmp = headcomp+str(j+1)+'_'
802     head1 = head1+headtmp+'X\t'+headtmp+'Y\t'
803     data_out[2*n*n_inj+2*j]=data[j*(ROInum+1)]
804     data_out[2*n*n_inj+2*j+1]=data[j*(ROInum+1)+i]
805     head1=head1[:-1]
806    
807     dataobj.data = data_out
808     dataobj.dataformat = 3 # biosensor dataformat
809     dataobj.updateStatus(DataType='Biosensor data')
810     dataobj.updateStatus(Head=head1)
811     dataobj.updateROIinfo(new_ROIinfo)
812    
813     writedata(head1,data_out,ROI_out,DataDir,'B')
814    
815     return dataobj
816    
817     def DataOutputClamp(obj, DataDir, ROI_out):
818     '''Export the data into a Clamp format txt file,
819     which can be operated in Clamp.
820     Input: the data object, the output path, the selected ROI
821     '''
822     # initialize the data output matrix, which will contain '2*n_inj' columns
823     dataobj = copy.deepcopy(obj)
824     data = dataobj.data
825     n_inj = len(dataobj.sampleinfo)
826     ROIinfo = dataobj.ROIinfo
827     ROInum = dataobj.ROInum
828     sampleinfo = dataobj.sampleinfo
829    
830     data_out = np.zeros((2*n_inj,np.shape(data)[1]))
831     if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
832     print 'Please check the ROI information.'
833     print ROI_out, ROIinfo[ROI_out-1]
834     return
835     else:
836     tmp = ROIinfo[ROI_out-1]['ID']+': '+ROIinfo[ROI_out-1]['Name']
837     try:
838     tmp = tmp +', Conc:'+ROIinfo[ROI_out-1]['Conc']
839     except KeyError:
840     pass
841     Clamphead = 'Vers 3.41 '+tmp
842     conc=''
843     t_start,t_stop='',''
844     head1='Time\tData\t'*n_inj
845     head1=head1[:-1]
846    
847     for i in range(n_inj):
848     data_out[2*i]=data[i*(ROInum+1)]
849     data_out[2*i+1]=data[i*(ROInum+1)+ROI_out]
850     name = sampleinfo[i]['Name']
851     conc=conc+'\t'+str(sampleinfo[i]['Concentration'])
852     t_start=t_start+'\t'+str(dataobj.injectinfo[i][0])
853     t_stop=t_stop+'\t'+str(dataobj.injectinfo[i][1])
854    
855     hconc,h_start,h_stop='\nConc1'+conc,'\nStart1'+t_start,'\nStop1'+t_stop
856     for i in (hconc,h_start,h_stop):
857     if i[-1]!='':
858     Clamphead=Clamphead+i
859    
860     Clamphead=Clamphead+'\n'+head1
861    
862     dataobj.data = data_out
863     dataobj.dataformat = 4 # biosensor dataformat
864     dataobj.updateStatus(DataType='Clamp data')
865     dataobj.updateStatus(Head=Clamphead)
866     dataobj.updateROIinfo(ROIinfo[ROI_out-1])
867    
868     writedata(Clamphead,data_out,ROI_out,DataDir,'C')
869    
870     return dataobj
871     ## return data_out,Clamphead
872    
873