ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
Revision: 3
Committed: Tue Apr 6 05:40:47 2010 UTC (8 years, 11 months ago) by clausted
File size: 33042 byte(s)
Log Message:
Functions in converter are renamed to conform to python PEP8 style.  Some uppercase names have been made lowercase.
Line User Rev File contents
1 clausted 3 """
2 clausted 1 Converter: Perform the datafile reading, pre-processing and writing.
3 clausted 3 Yuhang Wan, Christopher Lausted
4     Last modified on 100405 (yymmdd)
5 clausted 1
6 clausted 3 Typical Pipeline:
7     >import converter as cv
8     >dat = cv.datafile_read("example.txt")
9     >dat = cv.outlier_remove(dat)
10     >dat = cv.calibrate(dat)
11     >dat = cv.background_subtract(dat)
12     >dat = cv.curve_split(dat, 100, 800, 200)
13     >dat = cv.data_output_clamp(dat, "/dir", 0,1,2,3)
14     >dat = cv.data_output_biosensor(dat, "/dir", 0)
15     """
16     __version__ = "100405"
17 clausted 1
18     import numpy as np
19     import pylab as plt
20     import sys
21     import xlrd
22     import time
23     import os
24     import copy
25    
26     ## import matplotlib.pyplot as plt
27     ## import matplotlib.mlab as mlb
28     ## import packageClamp_100210 as Pack
29 clausted 3 import SPRdataclass as spr
30 clausted 1
31    
32 clausted 3 def check_format_input(fname):
33     """Examine the format of a input .txt data file."""
34 clausted 1 # open a file, if file doesn't exit, then exit
35     try:
36     fp = open(fname, "r")
37     except IOError:
38     print 'Cannot open file %s for reading' % fname
39     sys.exit(0)
40     # examine the first line of the input file
41     # for SPRit output:"Elapsed Time (Seconds) Average Intensity (Pixel Intensity)"
42     # for Plexera ICM output:
43     head_input1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\n"
44    
45     Format_input = 0
46     # check file format
47     Tmpstr = fp.readline()
48     if Tmpstr==head_input1:
49     Format_input=1
50     elif Tmpstr[-1]=='\n' and Tmpstr[-2]=='\r' and Tmpstr.split(' ')[0].split('/')[2].startswith('20'):
51     Format_input=2
52     elif Tmpstr.startswith('Vers'):
53     Format_input=3
54    
55     fp.close()
56     return Format_input, Tmpstr
57    
58    
59 clausted 3 def sprit_format(fname):
60     """Read the SPRit formatted data and reshape it.
61 clausted 1 Return the data in the form of list.
62 clausted 3 """
63 clausted 1 # open a file, if file doesn't exit, then exit
64     try:
65     fp = open(fname, "r")
66     except IOError:
67     print 'Cannot open file %s for reading' % fname
68     sys.exit(0)
69     status = {'DatainputType':'SPRit data'}
70     # skip the first line of the input file
71     Tmpstr = fp.readline()
72     # check if the second line is BEGIN
73     TmpStr = fp.readline()
74     if TmpStr != "BEGIN\n":
75     print "Second line is not Begin. Break!"
76     #sys.exit(0)
77     # count the lines of each data spot
78     TmpStr = fp.readline() # skip first "0.000000e+000"
79     num_line = 1 # so we start to count from 1
80     # count until read the "0.000000e+000"
81     while True:
82     TmpStr = fp.readline()
83     if TmpStr.split('\t')[0] == "0.000000e+000":
84     break
85     else:
86     num_line = num_line + 1
87     # count the spots in each file
88     num_spot = 0
89     # skip the first two lines
90     fp.seek(0)
91     for i in range(0,2):
92     TmpStr = fp.readline()
93     text = fp.readlines()
94     for i in text:
95     num_spot += i.count('0.000000e+000')
96     fp.close()
97     #---------------------- Reshaping the data --------------------
98     # reshape the data in the string form num_line*num_spot
99     Shaped_str = np.reshape(text,(num_spot,-1))
100     tmpshape=np.shape(Shaped_str)
101     # reshape the data in the number form: num_line*(num_spot+1)
102     # only the first column is time: time spotl spot2 ... spot n
103     time = np.zeros(num_line)
104     Tmpdata = Shaped_str[0,:]
105     for n,i in enumerate(Tmpdata):
106     time[n]=float(i.split('\t')[0])
107     Shaped_data_1 = np.zeros((tmpshape[0]+1,tmpshape[1]))
108     Shaped_data_1[0,:]=time
109     for n1,i in enumerate(Shaped_str):
110     for n2,j in enumerate(i):
111     Shaped_data_1[n1+1][n2]=float(Shaped_str[n1][n2].split('\t')[1])
112     # reshape the data in the number form:
113     # time1 spotl time2 spot2 ... timen spot n
114     Shaped_data_2 = np.zeros((tmpshape[0]*2,tmpshape[1]))
115     for i in range(tmpshape[0]):
116     Shaped_data_2[i*2]=Shaped_data_1[0]
117     Shaped_data_2[i*2+1] = Shaped_data_1[i+1]
118    
119     ## status = {'Datainput Type':'SPRit data'}
120     dataformat = 0
121     ROInum = num_spot
122 clausted 3 dataobj=spr.DataPass(Shaped_data_1, ROInum, dataformat)
123 clausted 1 dataobj.status = {}
124     dataobj.updateStatus(**status)
125     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
126    
127     return dataobj
128    
129    
130 clausted 3 def plexera_icm_format(fname):
131     """Read the txt data exported from Plexera ICM software and reshape it.
132 clausted 1 Return the data in the form of list.
133 clausted 3 """
134 clausted 1 fp=file(fname,'r')
135     Tmpdata,tmptime=[],[]
136     status = {'DatainputType':'Plexera ICM data'}
137     for eachline in fp:
138     Tmpdata.append(eachline.split('\t'))
139     num_line,num_spot=np.shape(Tmpdata)
140     # initialize the Shaped_data
141     Shaped_data = np.zeros((num_spot,num_line))
142     num_spot=num_spot-1 # the real number of spot
143     Tmpdata=np.transpose(Tmpdata)
144     # delete the last '\r\n' of the data in last column of the datafile
145     for n,i in enumerate(Tmpdata[-1]):
146     Tmpdata[-1][n]=i.rstrip()
147     # time converting: convert the first column of the datafile(date and time information) into numerical time
148     for i in Tmpdata[0]:
149     tmptime_str=i.split(' ')[1]
150     timelist=tmptime_str.split(':')
151     now=float(timelist[0])*60*60+float(timelist[1])*60+float(timelist[2])
152     tmptime.append(now)
153     Shaped_data[0]=tmptime
154     Shaped_data[0]=Shaped_data[0]-Shaped_data[0][0]
155     # assign the signal data
156     for i in range(num_spot):
157     Shaped_data[i+1]=Tmpdata[i+1]
158    
159     dataformat = 0
160     ROInum = num_spot
161 clausted 3 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat)
162 clausted 1 dataobj.status = {}
163     dataobj.updateStatus(**status)
164    
165     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
166     return dataobj
167    
168    
169    
170 clausted 3 def dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize):
171     """Retrieve and shape data form one single sheet from the Spreadsheet.
172 clausted 1 Return the data, ROI infomation in the form of list.
173 clausted 3 """
174 clausted 1 ## and return the minimal spot Id (start_spot)
175     ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
176    
177     Head = sh.row_values(1)
178     SpotList = []
179     num_spot = sh.row_values(0).count('Relative Time')
180    
181     for i in range(num_spot):
182     TmpHead=Head[colsize*i]
183     # now there are 5 columns for each ROI for Raw Intensity
184     # and 4 columns for each ROI for Satellite subtracted Intensity
185     DataList.append(sh.col_values(2+colsize*i,start,end))
186    
187     key, val = [], []
188     tmpinfo = TmpHead.split('\n')
189     for j in tmpinfo:
190     key.append(j.split(': ')[0])
191     val.append(j.split(': ')[1])
192     tmpdic = dict(zip(key,val))
193     tmpdic['Position'] = (int(tmpdic['Block'])+1, int(tmpdic['Row'])+1, int(tmpdic['Column'])+1)
194     ROIinfo.append(tmpdic)
195    
196     return DataList,ROIinfo
197    
198 clausted 3 def dam_spreadsheet_format(book):
199     """Read the spreadsheet exported from Plexera DAM software and reshape it.
200 clausted 1 Return the shaped data and ROI information.
201 clausted 3 """
202 clausted 1 ROIinfo, newROIinfo, n_sheet = [], [], 0
203     DataList=[]
204     status = {'DatainputType':'Plexera DAM data'}
205     sh0=book.sheet_by_index(0)
206     if sh0.row_values(0).count('Raw Intensity') == 0 and sh0.row_values(0).count('Subtracted Intensity') > 0:
207     print 'This is the satellite subtracted data.'
208     colsize = 4
209     status ['DAM spreadsheet'] = 'Satellite subtracted Intensity'
210     elif sh0.row_values(0).count('Raw Intensity') > 0 and sh0.row_values(0).count('Subtracted Intensity') == 0:
211     print 'This is the raw intensity data.'
212     colsize = 5
213     status ['DAM spreadsheet'] = 'Raw Intensity'
214    
215     for i in range(2,sh0.nrows):
216     if sum(sh0.col_values(2,1,i))>0:
217     break
218     start,end = i-1,sh0.nrows
219     num_line = end - start
220     ## print 'start: ', start
221     # the time axis initialization and assignment
222     Time=sh0.col_values(1,start,end)
223     # in case we lose the time axis during the DAM processing
224     if sum(Time)==0: Time=np.arange(num_line)
225     DataList.append(Time)
226    
227     # list DataList to store the Shaped_data of each ROI in each sheet,
228     # list ROIinfo to store the Spot id and the ligand name of each ROI in each sheet
229     for i in range(book.nsheets):
230     sh=book.sheet_by_index(i)
231     if sh.ncols!=0:
232 clausted 3 DataList,ROIinfo = dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize)
233 clausted 1 n_sheet=n_sheet+1
234     else:
235     break
236     # calculate the dim of the Shaped_data, and initialize the matrix
237     num_spot=len(DataList)-1
238     num_line=end-start
239     Shaped_data=np.zeros((num_spot+1,num_line))
240     Shaped_data[0]=DataList[0]
241    
242     # in case that the ROIs doesn't count from spot1
243     newROIinfo = copy.deepcopy(ROIinfo)
244     StartspotList=np.array([i['ID'] for i in ROIinfo])
245     start_spot=min(StartspotList)
246     for i in range(num_spot):
247     tmp = ROIinfo[i]['ID']
248     try:
249     j=int(ROIinfo[i]['ID'])-(start_spot-1)
250     except ValueError:
251     ##return tmp
252     if str.upper(unicode.encode(tmp)).startswith(str.upper('spot')):
253     j=int(ROIinfo[i]['ID'][4:])-(start_spot-1)
254     else:
255     print 'Illegal galfile format.\n', 'ID:', tmp
256     ##return tmp
257    
258     Shaped_data[j]=DataList[i+1]
259     newROIinfo[j]=ROIinfo[i]
260    
261     # pack the date and relative information into SPRdata obj
262     dataformat = 0
263     ROInum = num_spot
264 clausted 3 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
265 clausted 1 dataobj.status = {}
266     dataobj.updateStatus(**status)
267     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
268    
269     return dataobj
270    
271    
272 clausted 3 def read_clamp_data(fname):
273     """Read the Clamp format data
274 clausted 1 Return the data in the form of list.
275 clausted 3 """
276 clausted 1 fp=open(fname,'r')
277     Tmpstr=fp.readline()
278     # examine the file head
279     # e.g. 'Vers 3.41 cTBA-1 Conc:250 nM\n'
280     ROInum, dataformat, Conc, ROIdic = 1, 4, [], {}
281     try:
282     name = Tmpstr.split(' ')[2].strip()
283     ROIdic['ROI name'] = name
284     stmp = Tmpstr.split(' ')[3]
285     rest = Tmpstr[Tmpstr.find(stmp):].strip()
286     ROIdic['concentration'] = rest.split(':')
287     except IndexError:
288     pass
289    
290     for Tmpstr in fp:
291     try:
292     float(Tmpstr.split('\t')[0])
293     n_conc=(Tmpstr.count('\t')+1)/2
294     ##print Tmpstr, 'the beginning of data'
295     break
296     except ValueError:
297     if Tmpstr.startswith('Conc1'):
298     n_conc=Tmpstr.count('\t')
299     Conc=Tmpstr.strip().split('\t')[1:]
300     elif Tmpstr.startswith('Start1'):
301     Start=Tmpstr.strip().split('\t')[1:]
302     n_conc=Tmpstr.count('\t')
303     elif Tmpstr.startswith('Stop1'):
304     Stop=Tmpstr.strip().split('\t')[1:]
305     n_conc=Tmpstr.count('\t')
306    
307    
308     # write the data into matrix
309     Tmpdata=[]
310     for eachline in fp:
311     a=eachline.strip().split('\t')
312     Tmpdata.append(np.array(a,dtype='f'))
313     Data=np.transpose(Tmpdata)
314    
315     # write the infomation
316     ROIinfo = [ROIdic]
317     status = {'DatainputType':'Clamp data'}
318     sampleinfo, injinfo = [], []
319     injpara = [map(float,i) for i in [Start,Stop]]
320    
321     if len(Conc) >= 1:
322     for i,c in enumerate(Conc):
323     sampledic = {}
324     name = 'Sample '+str(i+1)
325     sampledic = dict(zip(['Name','Concentration'],[name,c]))
326     ## sampledic['Name'] = name
327     ## sampledic['Concentration'] = c[injpara[0][i],injpara[1][i]]
328     sampleinfo.append(sampledic)
329     injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
330     injinfo.append(injdic)
331 clausted 3 dataobj=spr.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
332 clausted 1 dataobj.status = {}
333     dataobj.updateStatus(**status)
334    
335     return dataobj
336    
337    
338    
339 clausted 3 def keyfile_read(fkey):
340 clausted 1 # The Key File contains
341 clausted 3 """Function: Read the key file for old SPR instrument.
342 clausted 1 Input: the filename.
343     Return: the ROI information.
344 clausted 3 """
345 clausted 1 try:
346     fp = open(fkey, "r")
347     except IOError:
348     print 'Cannot open file %s for reading' % fname
349     sys.exit(0)
350     ROIinfo = []
351     firstline=fp.readline() # skip the first line
352     # Table.append([firstline.split('\t')[0],firstline.split('\t')[1],firstline.split('\t')[2],firstline.split('\t')[3]])
353     for eachline in fp:
354     ROIdic = {}
355     tmplist = eachline.strip().split('\t')
356     ROIdic['ID'] = tmplist[0] # spot id
357     ROIdic['Name'] = tmplist[1] # name of the protein immobilized
358     try:
359     ROIdic['Conc'] = tmplist[2]
360     ROIdic['Background Spot'] = int(tmplist[3])
361     except IndexError:
362     pass
363     ROIinfo.append(ROIdic)
364    
365     fp.close()
366     print 'There are %d ROIs in the Key file.' % len(ROIinfo)
367     return ROIinfo
368    
369    
370 clausted 3 def galfile_read(fname):
371     """Function: Read the GAL file.
372 clausted 1 Input: the filename.
373     Return: the ROI information.
374 clausted 3 """
375 clausted 1 fp=file(fname,'r')
376     ROIinfo = []
377     Tmpstr = fp.readline() # skip the first line
378     Tmpstr = fp.readline()
379     Tmpn = int(Tmpstr.split('\t')[0])
380     for i in range(Tmpn):
381     labelline = fp.readline()
382    
383     if labelline.startswith('"Block1'):
384     a = labelline.strip().split(',')
385     columnsize, rowsize = int(a[3]), int(a[5])
386     ## print labelline
387     def IDcal(p):
388     id = (p[0]-1)*columnsize*rowsize + (p[1]-1)*columnsize + p[2]
389     return id
390    
391     for eachline in fp:
392     ROIdic = {}
393     tmplist = eachline.strip().split('\t')
394     pos = tuple(map(int,tmplist[:3]))
395     ROIdic['Position'] = pos
396     ROIdic['IDcal'] =IDcal(pos)
397     ##(pos[0]-1)*columnsize*rowsize + (pos[1]-1)*columnsize + pos[2]
398    
399     ##
400     ROIdic['Name'] = tmplist[3]
401     ROIdic['ID'] = tmplist[4]
402     try:
403     ROIdic['Conc'] = tmplist[5]
404     ROIdic['Family'] = tmplist[6]
405     blist = tmplist[7][1:-1].split(';') # always something like '"1,1,2; 1,2,2"'
406     Backn = len(blist)
407     Bid = []
408    
409     for i in blist:
410     if '' in i.split(','):
411     break
412     Bpos = tuple(map(int, i.split(',')))
413     Bid.append(IDcal(Bpos))
414     ##
415     ROIdic['Background Spot'] = tuple(Bid)
416     except IndexError:
417     pass
418    
419     if int(ROIdic['ID'][4:]) != ROIdic['IDcal']:
420     print 'The spot ID should be consistent with the position. Please check the ID.'
421     break
422     else:
423     ROIdic['ID'] = ROIdic['IDcal']
424     ROIinfo.append(ROIdic)
425     fp.close()
426     print 'There are %d ROIs in the Gal file.' % len(ROIinfo)
427    
428     return ROIinfo
429    
430    
431    
432 clausted 3 def method_read(fname):
433     """Function: Read the analyte table.
434 clausted 1 Input: the filename
435     Return: the sample information
436 clausted 3 """
437 clausted 1 # create a concentration dictionary for the sample injected into the flowcell
438     # export a table containing the concentration of the sample injected, and the duration
439     fp=file(fname,'r')
440     sampleinfo = []
441     ##TmpTable,TmpTable2={},{}
442     labelline=fp.readline()
443     for n,eachline in enumerate(fp):
444     sampledic = {}
445     sampledic['Location'] = eachline.split('\t')[0]
446     sampledic['Name'] = eachline.split('\t')[1]
447     sampledic['Concentration'] = float(eachline.split('\t')[2])
448     sampledic['Duration'] = float(eachline.split('\t')[4])
449     sampledic['Flow Rate'] = float(eachline.split('\t')[3])
450     sampledic['Analyte Series'] = eachline.split('\t')[6]
451     sampledic['Buffer Blank Series'] = eachline.split('\t')[7]
452     sampleinfo.append(sampledic)
453    
454     print 'Name','\t','Concentration'
455     for i in sampleinfo:
456     print i['Name'],'\t', i['Concentration']
457    
458     return sampleinfo
459    
460    
461    
462 clausted 3 def datafile_read(fname):
463     """Function: Read the raw data from the Plexera instrument,
464 clausted 1 and pack the raw data (Shaped_data) into dataobject.
465     Therefore, the initial dataformat = 0
466     Input: the filename
467     Return: the packed SPR.DataPass type data object
468 clausted 3 """
469 clausted 1 #===================check the format of the input files-------
470     if fname.split('.')[-1] == 'txt':
471    
472 clausted 3 Format_input, Firstline = check_format_input(fname)
473 clausted 1 if Format_input == 1: # for SPRit file format
474 clausted 3 dataobj = sprit_format(fname)
475 clausted 1 print '-'*10,' This is a SPRit data file. ','-'*10
476    
477     elif Format_input == 2: # for Plexera ICM file format
478 clausted 3 dataobj = plexera_icm_format(fname)
479 clausted 1 print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
480    
481     elif Format_input == 3: # for Clamp datafile format
482 clausted 3 dataobj = read_clamp_data(fname)
483 clausted 1 print '-'*10,' This is a Clamp file. ','-'*10
484    
485     else:
486     print '-'*10,' unreadable file input! ','-'*10
487     sys.exit(0)
488    
489     if Format_input == 1 or Format_input == 2:
490     flag = str.upper(raw_input('Load the key file? (y/n): '))
491     if flag == 'Y':
492     fkey = raw_input('Input the path of the key file: ')
493 clausted 3 ROIinfo = keyfile_read(fkey)
494 clausted 1 dataobj.updateROIinfo(ROIinfo)
495    
496     elif fname.split('.')[-1] == 'xls':
497     # for the DAM Spreadsheet file format
498     print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
499     book = xlrd.open_workbook(fname)
500     #shape the data in the Spreadsheet, whether single sheet or multiple sheets
501 clausted 3 dataobj=dam_spreadsheet_format(book)
502 clausted 1 flag = str.upper(raw_input('Load the gal file? (y/n): '))
503     if flag == 'Y':
504     fgal = raw_input('Input the path of the gal file: ')
505 clausted 3 ROIinfo = galfile_read(fgal)
506 clausted 1 dataobj.updateROIinfo(ROIinfo)
507     flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
508     if flag == 'Y':
509     fprotocol = raw_input('Input the path of the analyte table: ')
510 clausted 3 sampleinfo = method_read(fprotocol)
511 clausted 1 dataobj.updateSampleinfo(sampleinfo)
512    
513     return dataobj
514    
515    
516    
517 clausted 3 def outlier_remove(obj):
518 clausted 1 ##dataobj.viewAll()
519 clausted 3 """Function: Remove unwanted data points from noisy periods.
520 clausted 1 Return: the data object with clean data
521 clausted 3 """
522 clausted 1 dataobj = copy.deepcopy(obj)
523     print ('Select the time area that you want to remove.')
524     tmpstr = raw_input('in the format of "200:240, 500:510" :')
525     areatodel = tmpstr.split(',')
526     data = dataobj.data
527     for i in areatodel:
528     ##if
529     t1, t2 = float(i.split(':')[0]), float(i.split(':')[1])
530     t = data[0] # for dataformat=0 Shaped_data
531     tmpind = plt.find((t>t1) & (t<t2)) # find the outliar indice
532     ## data[1:,tmpind] = 0
533     tmplist = data.tolist()
534     tmplist2 = []
535     for j in tmplist:
536     del j[min(tmpind):max(tmpind)]
537     tmplist2.append(j)
538     data = np.array(tmplist2)
539     ## tmplist = tmplist
540     dataobj.data = data
541     # dataobj.status = 'Outliar removed'
542 clausted 3 dataobj.updateStatus(outlier_remove=True)
543 clausted 1 return dataobj
544    
545 clausted 3 def calibrate(obj):
546     """Function: calibrate the Intensity response.
547 clausted 1 Return: the data object with calibrated data
548     Note: at present, this function is valid only when
549     the whole procedure includes calibration precedure.
550 clausted 3 """
551 clausted 1 dataobj = copy.deepcopy(obj)
552     data = dataobj.data
553     caldata = copy.deepcopy(data)
554     t = data[0]
555     if dataobj.dataformat != 0:
556     print 'We only calibrate the raw data.'
557     return
558     else:
559     t1 = input('Enter the time position of the TOP response during calibration:')
560     s1 = input('The refractive index of the TOP response is: ')
561     t2 = input('Enter the time position of the BOTTOM response during calibration:')
562     s2 = input('The refractive index of the BOTTOM response is: ')
563     ind1 = plt.find(abs(t-t1) == min(abs(t-t1)))[0]
564     ind2 = plt.find(abs(t-t2) == min(abs(t-t2)))[0]
565    
566     for i,y in enumerate(data[1:]):
567     y1, y2 = y[ind1], y[ind2]
568     slope = (s1-s2)/(y1-y2)
569     offset = s1- slope*y1
570     caldata[i+1] = slope*y+offset
571     dataobj.data = caldata
572 clausted 3 dataobj.updateStatus(calibrate=True)
573 clausted 1 return dataobj
574    
575 clausted 3 def get_background_value(obj,bgids):
576     """Get the averaged value of background spots for each ROI."""
577 clausted 1 bgsignal = obj.data[0]*0
578     for j in bgids:
579     if j == obj.ROIinfo[j-1]['ID']:
580     bgsignal = bgsignal + obj.data[j]
581    
582    
583     elif j != obj.ROIinfo[j-1]['ID']:
584     for n,eachspot in enumerate(obj.ROIinfo):
585     if j == eachspot['ID']:
586     bgsignal = bgsignal + obj.data[n+1]
587     bgsignal = bgsignal/len(bgids)
588     return bgsignal
589    
590    
591 clausted 3 def background_subtract(obj, *bgspot):
592     """Function: Perform the Background subtraction for the UNSPLIT curve.
593 clausted 1 Input besides "obj" is the id of the spot taken as background.
594     The following inputs are acceptable:
595 clausted 3 1. background_subtract(obj): the default background in Galfile
596 clausted 1 will be subtracted.
597 clausted 3 2. background_subtract(obj, 1, 6): the average value of spot1
598 clausted 1 and spot6 will be subtracted.
599 clausted 3 """
600 clausted 1 dataobj = copy.deepcopy(obj)
601     ROIinfo = obj.ROIinfo
602     data = dataobj.data
603    
604     if dataobj.dataformat == 0:
605     newdata = np.zeros(np.shape(data))
606     newdata[0] = data[0] # the time scale
607     if bgspot == ():
608     # The average of the default background spots listed in the galfile
609     #are to be subtracted.
610     for i in range(1,dataobj.ROInum+1):
611     bgids = ROIinfo[i-1]['Background Spot']
612 clausted 3 bgsignal = get_background_value(dataobj,bgids)
613 clausted 1 newdata[i] = data[i]-bgsignal
614     # update the status of the data object.
615     dataobj.updateStatus(BackgroundType='Default in Gal')
616    
617     else:
618     # The average of the manually input background spots
619     #are to be subtracted.
620     for i in range(1,dataobj.ROInum+1):
621 clausted 3 bgsignal = get_background_value(dataobj,bgspot)
622 clausted 1 newdata[i] = data[i]-bgsignal
623     dataobj.updateStatus(BackgroundType='Manually choosen')
624     dataobj.data = newdata
625 clausted 3 dataobj.updateStatus(background_subtraction=True)
626 clausted 1 return dataobj
627    
628     else:
629     print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'
630     return
631    
632 clausted 3 def curve_split(obj, t_before, t_after, *t_inj):
633     """Function: Split the whole curve that contains several injections
634 clausted 1 into pieces.
635     The sample information will be checked during the split,
636     if the injection number in sampleinfo is not consistant
637     with the input injection parameter, it will help you to
638     update the sample information.
639     Input besides obj:
640     t_before: time duration before the injection time point
641     t_after: time duration after the injection time point
642     t_inj: the exact start time for each injection
643     Return: the data object with splitted data, and updated sample
644     information
645 clausted 3 """
646 clausted 1 dataobj = copy.deepcopy(obj)
647     ROInum = dataobj.ROInum
648     t=dataobj.data[0]
649     ind=[]
650     # find how many lines of data each injection
651     for i in range(len(t_inj)):
652     ind.append(np.shape(plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after))))[0])
653     m,j = max(ind),max(ind)-min(ind)
654     # the new dataset must be a m*((ROInum+1)*num_inj) matrix:
655     # T1,S_ROI1_c1,S_ROI2_c1,S_ROI3_c1...T2,S_ROI1_c2,S_ROI2_c2...
656     tmpdata = np.zeros(((ROInum+1)*len(t_inj),m))
657    
658     for i in range(len(t_inj)):
659     TmpInd = plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after)))
660     Tmp1,Tmp2 = i*(ROInum+1),(i+1)*(ROInum+1)
661     tmpdata[Tmp1:Tmp2,0:len(TmpInd)] = dataobj.data[:,TmpInd]
662     if j!=0:
663     Split_data = tmpdata[:,:-j]
664     else:
665     Split_data = tmpdata[:]
666     # zero and align the split data
667     for i in range(np.shape(Split_data)[0]):
668     Split_data[i]=Split_data[i]-Split_data[i][0]
669    
670 clausted 3 newsampleinfo, injectinfo = check_sample_info(dataobj,t_before, t_after, t_inj)
671 clausted 1 if newsampleinfo != None :
672     dataobj.data = Split_data
673     dataobj.updateStatus(DataSplit=True)
674     dataobj.updateSampleinfo(newsampleinfo)
675     dataobj.dataformat = 2
676     dataobj.injectinfo = injectinfo
677     return dataobj
678     else:
679     dataobj.updateStatus(DataSplit=False)
680     print 'Curve Splitting not successful!'
681    
682     return
683    
684    
685 clausted 3 def check_sample_info(dataobj, t_before, t_after, t_inj):
686     """Check if the sample information consistent with the injection
687 clausted 1 parameter input. If not, help the user to update sample information
688     for the splitted curve.
689 clausted 3 """
690 clausted 1 injectinfo = []
691     if len(dataobj.sampleinfo) == len(t_inj):
692     for i,j in enumerate(t_inj):
693     injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
694     return dataobj.sampleinfo, injectinfo
695    
696     elif len(dataobj.sampleinfo) > len(t_inj):
697 clausted 3 print """ The number of injection you just input does not match that in sample infomation.
698     If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object."""
699 clausted 1 choose = str.upper(raw_input('Continue? (y/n)'))
700     if choose == 'Y':
701     newsampleinfo, injectinfo = [], []
702     print ' Your input indicates %s injections out of %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
703     for n,i in enumerate(dataobj.sampleinfo):
704     print '--',n,'.',i['Name'],':',i['Concentration']
705     print ' Enter the ID number of the injections you just select, in the form of "2-10" or "4,5,6",'
706     tmpstr = raw_input(':')
707     if ',' in tmpstr:
708     injID = map(int, tmpstr.split(','))
709     elif '-' in tmpstr:
710     injID = range(int(tmpstr.split('-')[0]), int(tmpstr.split('-')[1])+1)
711     else:
712     print ' Illegal input!'
713     return
714    
715     if len(injID) != len(t_inj):
716     print ' Please input %s injection ID numbers instead of %s You just input.' %(len(t_inj),len(injID))
717     return
718    
719     for i in injID:
720     newsampleinfo.append(dataobj.sampleinfo[i])
721     injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
722     return newsampleinfo, injectinfo
723     else:
724     return
725    
726     elif len(dataobj.sampleinfo) < len(t_inj):
727     print ' The number of injection you just input does not match that in sample infomation.'
728     print ' Your input indicates %s injections, where there is only %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
729     print ' Please check.'
730    
731     return
732    
733    
734 clausted 3 def write_data(Head,data_out,ROI_out,DataDir,output_format):
735     """Write the data into a txt file.
736     """
737     # Modified 100405 CGL
738     # create a newfolder with time stamp to save the data
739 clausted 1 if output_format=='B':
740     ftitle='biosensor'
741     Tmpstr = str(ROI_out).replace(', ','_')
742     Tmpstr = Tmpstr.replace('(','-')
743     foutname = ftitle+Tmpstr[:-1]+'.txt'
744     elif output_format=='C':
745     foutname='clamp-'+str(ROI_out)+'.txt'
746     else:
747     ftitle=''
748 clausted 3
749     # Change directories, if requested.
750     if (DataDir != ""):
751     os.chdir('..')
752     try:
753     os.makedirs(DataDir)
754     except WindowsError:
755     pass
756     os.chdir(DataDir)
757 clausted 1
758     np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
759     fdt=file('temp.txt','r')
760    
761     ## foutname=ftitle+Tmpstr[:-1]+'.txt' ##raw_input('Enter the file name of the output: ')
762     fout=file(foutname,'w+')
763     # write the head of the data first
764     fout.write(Head+'\n')
765     count = len(fdt.readlines())
766     fdt.seek(0)
767     for i in range(count):
768     temp=fdt.readline()
769     fout.write(temp.replace(' ','\t'))
770     fdt.close()
771     fout.close()
772     print '-------The path of the exported data is "', os.getcwd(),'"-------'
773    
774    
775 clausted 3 def data_output_biosensor(obj, DataDir, *ROI_out):
776     """Export the data into a biosensor format txt file,
777 clausted 1 which can be operated in Scrubber.
778     Input: the data object, the output path, the ROIs selected to export
779 clausted 3 """
780 clausted 1 # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
781     dataobj = copy.deepcopy(obj)
782     data = dataobj.data
783     n_inj = len(dataobj.sampleinfo)
784     ROIinfo = dataobj.ROIinfo
785     ROInum = dataobj.ROInum
786    
787     data_out = np.zeros(((len(ROI_out)*2)*n_inj,np.shape(data)[1]))
788     head1 = ""
789     new_ROIinfo = []
790     for n,i in enumerate(ROI_out):
791     new_ROIinfo.append(ROIinfo[i-1]) ##the new ROI info
792     if i != ROIinfo[i-1]['IDcal']:
793 clausted 3 # check if the ID number matches position
794 clausted 1 print n,i, ROIinfo[i-1]
795     return
796     else:
797     headcomp = ROIinfo[i-1]['ID']+': '+ROIinfo[i-1]['Name']
798     try:
799     headcomp = headcomp + ', '+ROIinfo[i-1]['Conc']
800     except KeyError:
801     pass
802     headcomp = headcomp+' Fc='+str(n+1)+' - '
803     for j in range(n_inj):
804     headtmp = headcomp+str(j+1)+'_'
805     head1 = head1+headtmp+'X\t'+headtmp+'Y\t'
806     data_out[2*n*n_inj+2*j]=data[j*(ROInum+1)]
807     data_out[2*n*n_inj+2*j+1]=data[j*(ROInum+1)+i]
808     head1=head1[:-1]
809    
810     dataobj.data = data_out
811     dataobj.dataformat = 3 # biosensor dataformat
812     dataobj.updateStatus(DataType='Biosensor data')
813     dataobj.updateStatus(Head=head1)
814     dataobj.updateROIinfo(new_ROIinfo)
815    
816 clausted 3 write_data(head1,data_out,ROI_out,DataDir,'B')
817 clausted 1
818     return dataobj
819    
820 clausted 3 def data_output_clamp(obj, DataDir, ROI_out):
821     """Export the data into a Clamp format txt file,
822 clausted 1 which can be operated in Clamp.
823     Input: the data object, the output path, the selected ROI
824 clausted 3 """
825 clausted 1 # initialize the data output matrix, which will contain '2*n_inj' columns
826     dataobj = copy.deepcopy(obj)
827     data = dataobj.data
828     n_inj = len(dataobj.sampleinfo)
829     ROIinfo = dataobj.ROIinfo
830     ROInum = dataobj.ROInum
831     sampleinfo = dataobj.sampleinfo
832    
833     data_out = np.zeros((2*n_inj,np.shape(data)[1]))
834     if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
835     print 'Please check the ROI information.'
836     print ROI_out, ROIinfo[ROI_out-1]
837     return
838     else:
839     tmp = ROIinfo[ROI_out-1]['ID']+': '+ROIinfo[ROI_out-1]['Name']
840     try:
841     tmp = tmp +', Conc:'+ROIinfo[ROI_out-1]['Conc']
842     except KeyError:
843     pass
844     Clamphead = 'Vers 3.41 '+tmp
845     conc=''
846     t_start,t_stop='',''
847     head1='Time\tData\t'*n_inj
848     head1=head1[:-1]
849    
850     for i in range(n_inj):
851     data_out[2*i]=data[i*(ROInum+1)]
852     data_out[2*i+1]=data[i*(ROInum+1)+ROI_out]
853     name = sampleinfo[i]['Name']
854     conc=conc+'\t'+str(sampleinfo[i]['Concentration'])
855     t_start=t_start+'\t'+str(dataobj.injectinfo[i][0])
856     t_stop=t_stop+'\t'+str(dataobj.injectinfo[i][1])
857    
858     hconc,h_start,h_stop='\nConc1'+conc,'\nStart1'+t_start,'\nStop1'+t_stop
859     for i in (hconc,h_start,h_stop):
860     if i[-1]!='':
861     Clamphead=Clamphead+i
862    
863     Clamphead=Clamphead+'\n'+head1
864    
865     dataobj.data = data_out
866     dataobj.dataformat = 4 # biosensor dataformat
867     dataobj.updateStatus(DataType='Clamp data')
868     dataobj.updateStatus(Head=Clamphead)
869     dataobj.updateROIinfo(ROIinfo[ROI_out-1])
870    
871 clausted 3 write_data(Clamphead,data_out,ROI_out,DataDir,'C')
872 clausted 1
873     return dataobj
874     ## return data_out,Clamphead
875    
876    
877 clausted 3 if (__name__ == '__main__'):
878     # Print a short message if this module is run like an app.
879     print "This is the OSPRAI file type conversion tool."
880