ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
Revision: 5
Committed: Wed Apr 7 06:46:26 2010 UTC (9 years, 2 months ago) by clausted
File size: 33468 byte(s)
Log Message:
Changes made to allow use with Linux.  Can now read SPRit and output Clamp formats.  Replace == and != with "in" and "not in" string comparisons.  Change IDcal to ID in line 834.  Add function "method_read_fake" to provide default information.  Change WindowsError exception to OSError exception.  Allow user to specify default pathway for saving files by entering null string "".
Line User Rev File contents
1 clausted 3 """
2 clausted 1 Converter: Perform the datafile reading, pre-processing and writing.
3 clausted 3 Yuhang Wan, Christopher Lausted
4     Last modified on 100405 (yymmdd)
5 clausted 1
6 clausted 3 Typical Pipeline:
7     >import converter as cv
8     >dat = cv.datafile_read("example.txt")
9     >dat = cv.outlier_remove(dat)
10     >dat = cv.calibrate(dat)
11     >dat = cv.background_subtract(dat)
12     >dat = cv.curve_split(dat, 100, 800, 200)
13     >dat = cv.data_output_clamp(dat, "/dir", 0,1,2,3)
14     >dat = cv.data_output_biosensor(dat, "/dir", 0)
15     """
16     __version__ = "100405"
17 clausted 1
18     import numpy as np
19     import pylab as plt
20     import sys
21     import xlrd
22     import time
23     import os
24     import copy
25    
26     ## import matplotlib.pyplot as plt
27     ## import matplotlib.mlab as mlb
28     ## import packageClamp_100210 as Pack
29 clausted 3 import SPRdataclass as spr
30 clausted 1
31    
32 clausted 3 def check_format_input(fname):
33     """Examine the format of a input .txt data file."""
34 clausted 1 # open a file, if file doesn't exit, then exit
35     try:
36     fp = open(fname, "r")
37     except IOError:
38     print 'Cannot open file %s for reading' % fname
39     sys.exit(0)
40     # examine the first line of the input file
41     # for SPRit output:"Elapsed Time (Seconds) Average Intensity (Pixel Intensity)"
42     # for Plexera ICM output:
43 clausted 5 head_input1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)"
44 clausted 1
45     Format_input = 0
46     # check file format
47     Tmpstr = fp.readline()
48 clausted 5 if (head_input1 in Tmpstr):
49 clausted 1 Format_input=1
50     elif Tmpstr[-1]=='\n' and Tmpstr[-2]=='\r' and Tmpstr.split(' ')[0].split('/')[2].startswith('20'):
51     Format_input=2
52     elif Tmpstr.startswith('Vers'):
53     Format_input=3
54    
55     fp.close()
56     return Format_input, Tmpstr
57    
58    
59 clausted 3 def sprit_format(fname):
60     """Read the SPRit formatted data and reshape it.
61 clausted 1 Return the data in the form of list.
62 clausted 3 """
63 clausted 1 # open a file, if file doesn't exit, then exit
64     try:
65     fp = open(fname, "r")
66     except IOError:
67     print 'Cannot open file %s for reading' % fname
68     sys.exit(0)
69     status = {'DatainputType':'SPRit data'}
70     # skip the first line of the input file
71     Tmpstr = fp.readline()
72     # check if the second line is BEGIN
73     TmpStr = fp.readline()
74 clausted 5 if ("BEGIN" not in TmpStr):
75     print "Warning: Second line of data file is not 'BEGIN'"
76     ##sys.exit(0)
77 clausted 1 # count the lines of each data spot
78     TmpStr = fp.readline() # skip first "0.000000e+000"
79     num_line = 1 # so we start to count from 1
80     # count until read the "0.000000e+000"
81     while True:
82     TmpStr = fp.readline()
83     if TmpStr.split('\t')[0] == "0.000000e+000":
84     break
85     else:
86     num_line = num_line + 1
87     # count the spots in each file
88     num_spot = 0
89     # skip the first two lines
90     fp.seek(0)
91     for i in range(0,2):
92     TmpStr = fp.readline()
93     text = fp.readlines()
94     for i in text:
95     num_spot += i.count('0.000000e+000')
96     fp.close()
97     #---------------------- Reshaping the data --------------------
98     # reshape the data in the string form num_line*num_spot
99     Shaped_str = np.reshape(text,(num_spot,-1))
100     tmpshape=np.shape(Shaped_str)
101     # reshape the data in the number form: num_line*(num_spot+1)
102     # only the first column is time: time spotl spot2 ... spot n
103     time = np.zeros(num_line)
104     Tmpdata = Shaped_str[0,:]
105     for n,i in enumerate(Tmpdata):
106     time[n]=float(i.split('\t')[0])
107     Shaped_data_1 = np.zeros((tmpshape[0]+1,tmpshape[1]))
108     Shaped_data_1[0,:]=time
109     for n1,i in enumerate(Shaped_str):
110     for n2,j in enumerate(i):
111     Shaped_data_1[n1+1][n2]=float(Shaped_str[n1][n2].split('\t')[1])
112     # reshape the data in the number form:
113     # time1 spotl time2 spot2 ... timen spot n
114     Shaped_data_2 = np.zeros((tmpshape[0]*2,tmpshape[1]))
115     for i in range(tmpshape[0]):
116     Shaped_data_2[i*2]=Shaped_data_1[0]
117     Shaped_data_2[i*2+1] = Shaped_data_1[i+1]
118    
119     ## status = {'Datainput Type':'SPRit data'}
120     dataformat = 0
121     ROInum = num_spot
122 clausted 3 dataobj=spr.DataPass(Shaped_data_1, ROInum, dataformat)
123 clausted 1 dataobj.status = {}
124     dataobj.updateStatus(**status)
125     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
126    
127     return dataobj
128    
129    
130 clausted 3 def plexera_icm_format(fname):
131     """Read the txt data exported from Plexera ICM software and reshape it.
132 clausted 1 Return the data in the form of list.
133 clausted 3 """
134 clausted 1 fp=file(fname,'r')
135     Tmpdata,tmptime=[],[]
136     status = {'DatainputType':'Plexera ICM data'}
137     for eachline in fp:
138     Tmpdata.append(eachline.split('\t'))
139     num_line,num_spot=np.shape(Tmpdata)
140     # initialize the Shaped_data
141     Shaped_data = np.zeros((num_spot,num_line))
142     num_spot=num_spot-1 # the real number of spot
143     Tmpdata=np.transpose(Tmpdata)
144     # delete the last '\r\n' of the data in last column of the datafile
145     for n,i in enumerate(Tmpdata[-1]):
146     Tmpdata[-1][n]=i.rstrip()
147     # time converting: convert the first column of the datafile(date and time information) into numerical time
148     for i in Tmpdata[0]:
149     tmptime_str=i.split(' ')[1]
150     timelist=tmptime_str.split(':')
151     now=float(timelist[0])*60*60+float(timelist[1])*60+float(timelist[2])
152     tmptime.append(now)
153     Shaped_data[0]=tmptime
154     Shaped_data[0]=Shaped_data[0]-Shaped_data[0][0]
155     # assign the signal data
156     for i in range(num_spot):
157     Shaped_data[i+1]=Tmpdata[i+1]
158    
159     dataformat = 0
160     ROInum = num_spot
161 clausted 3 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat)
162 clausted 1 dataobj.status = {}
163     dataobj.updateStatus(**status)
164    
165     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
166     return dataobj
167    
168    
169    
170 clausted 3 def dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize):
171     """Retrieve and shape data form one single sheet from the Spreadsheet.
172 clausted 1 Return the data, ROI infomation in the form of list.
173 clausted 3 """
174 clausted 1 ## and return the minimal spot Id (start_spot)
175     ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
176    
177     Head = sh.row_values(1)
178     SpotList = []
179     num_spot = sh.row_values(0).count('Relative Time')
180    
181     for i in range(num_spot):
182     TmpHead=Head[colsize*i]
183     # now there are 5 columns for each ROI for Raw Intensity
184     # and 4 columns for each ROI for Satellite subtracted Intensity
185     DataList.append(sh.col_values(2+colsize*i,start,end))
186    
187     key, val = [], []
188     tmpinfo = TmpHead.split('\n')
189     for j in tmpinfo:
190     key.append(j.split(': ')[0])
191     val.append(j.split(': ')[1])
192     tmpdic = dict(zip(key,val))
193     tmpdic['Position'] = (int(tmpdic['Block'])+1, int(tmpdic['Row'])+1, int(tmpdic['Column'])+1)
194     ROIinfo.append(tmpdic)
195    
196     return DataList,ROIinfo
197    
198 clausted 3 def dam_spreadsheet_format(book):
199     """Read the spreadsheet exported from Plexera DAM software and reshape it.
200 clausted 1 Return the shaped data and ROI information.
201 clausted 3 """
202 clausted 1 ROIinfo, newROIinfo, n_sheet = [], [], 0
203     DataList=[]
204     status = {'DatainputType':'Plexera DAM data'}
205     sh0=book.sheet_by_index(0)
206     if sh0.row_values(0).count('Raw Intensity') == 0 and sh0.row_values(0).count('Subtracted Intensity') > 0:
207     print 'This is the satellite subtracted data.'
208     colsize = 4
209     status ['DAM spreadsheet'] = 'Satellite subtracted Intensity'
210     elif sh0.row_values(0).count('Raw Intensity') > 0 and sh0.row_values(0).count('Subtracted Intensity') == 0:
211     print 'This is the raw intensity data.'
212     colsize = 5
213     status ['DAM spreadsheet'] = 'Raw Intensity'
214    
215     for i in range(2,sh0.nrows):
216     if sum(sh0.col_values(2,1,i))>0:
217     break
218     start,end = i-1,sh0.nrows
219     num_line = end - start
220     ## print 'start: ', start
221     # the time axis initialization and assignment
222     Time=sh0.col_values(1,start,end)
223     # in case we lose the time axis during the DAM processing
224     if sum(Time)==0: Time=np.arange(num_line)
225     DataList.append(Time)
226    
227     # list DataList to store the Shaped_data of each ROI in each sheet,
228     # list ROIinfo to store the Spot id and the ligand name of each ROI in each sheet
229     for i in range(book.nsheets):
230     sh=book.sheet_by_index(i)
231     if sh.ncols!=0:
232 clausted 3 DataList,ROIinfo = dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize)
233 clausted 1 n_sheet=n_sheet+1
234     else:
235     break
236     # calculate the dim of the Shaped_data, and initialize the matrix
237     num_spot=len(DataList)-1
238     num_line=end-start
239     Shaped_data=np.zeros((num_spot+1,num_line))
240     Shaped_data[0]=DataList[0]
241    
242     # in case that the ROIs doesn't count from spot1
243     newROIinfo = copy.deepcopy(ROIinfo)
244     StartspotList=np.array([i['ID'] for i in ROIinfo])
245     start_spot=min(StartspotList)
246     for i in range(num_spot):
247     tmp = ROIinfo[i]['ID']
248     try:
249     j=int(ROIinfo[i]['ID'])-(start_spot-1)
250     except ValueError:
251     ##return tmp
252     if str.upper(unicode.encode(tmp)).startswith(str.upper('spot')):
253     j=int(ROIinfo[i]['ID'][4:])-(start_spot-1)
254     else:
255     print 'Illegal galfile format.\n', 'ID:', tmp
256     ##return tmp
257    
258     Shaped_data[j]=DataList[i+1]
259     newROIinfo[j]=ROIinfo[i]
260    
261     # pack the date and relative information into SPRdata obj
262     dataformat = 0
263     ROInum = num_spot
264 clausted 3 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
265 clausted 1 dataobj.status = {}
266     dataobj.updateStatus(**status)
267     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
268    
269     return dataobj
270    
271    
272 clausted 3 def read_clamp_data(fname):
273     """Read the Clamp format data
274 clausted 1 Return the data in the form of list.
275 clausted 3 """
276 clausted 1 fp=open(fname,'r')
277     Tmpstr=fp.readline()
278     # examine the file head
279     # e.g. 'Vers 3.41 cTBA-1 Conc:250 nM\n'
280     ROInum, dataformat, Conc, ROIdic = 1, 4, [], {}
281     try:
282     name = Tmpstr.split(' ')[2].strip()
283     ROIdic['ROI name'] = name
284     stmp = Tmpstr.split(' ')[3]
285     rest = Tmpstr[Tmpstr.find(stmp):].strip()
286     ROIdic['concentration'] = rest.split(':')
287     except IndexError:
288     pass
289    
290     for Tmpstr in fp:
291     try:
292     float(Tmpstr.split('\t')[0])
293     n_conc=(Tmpstr.count('\t')+1)/2
294     ##print Tmpstr, 'the beginning of data'
295     break
296     except ValueError:
297     if Tmpstr.startswith('Conc1'):
298     n_conc=Tmpstr.count('\t')
299     Conc=Tmpstr.strip().split('\t')[1:]
300     elif Tmpstr.startswith('Start1'):
301     Start=Tmpstr.strip().split('\t')[1:]
302     n_conc=Tmpstr.count('\t')
303     elif Tmpstr.startswith('Stop1'):
304     Stop=Tmpstr.strip().split('\t')[1:]
305     n_conc=Tmpstr.count('\t')
306    
307    
308     # write the data into matrix
309     Tmpdata=[]
310     for eachline in fp:
311     a=eachline.strip().split('\t')
312     Tmpdata.append(np.array(a,dtype='f'))
313     Data=np.transpose(Tmpdata)
314    
315     # write the infomation
316     ROIinfo = [ROIdic]
317     status = {'DatainputType':'Clamp data'}
318     sampleinfo, injinfo = [], []
319     injpara = [map(float,i) for i in [Start,Stop]]
320    
321     if len(Conc) >= 1:
322     for i,c in enumerate(Conc):
323     sampledic = {}
324     name = 'Sample '+str(i+1)
325     sampledic = dict(zip(['Name','Concentration'],[name,c]))
326     ## sampledic['Name'] = name
327     ## sampledic['Concentration'] = c[injpara[0][i],injpara[1][i]]
328     sampleinfo.append(sampledic)
329     injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
330     injinfo.append(injdic)
331 clausted 3 dataobj=spr.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
332 clausted 1 dataobj.status = {}
333     dataobj.updateStatus(**status)
334    
335     return dataobj
336    
337    
338    
339 clausted 3 def keyfile_read(fkey):
340 clausted 1 # The Key File contains
341 clausted 3 """Function: Read the key file for old SPR instrument.
342 clausted 1 Input: the filename.
343     Return: the ROI information.
344 clausted 3 """
345 clausted 1 try:
346     fp = open(fkey, "r")
347     except IOError:
348     print 'Cannot open file %s for reading' % fname
349     sys.exit(0)
350     ROIinfo = []
351     firstline=fp.readline() # skip the first line
352     # Table.append([firstline.split('\t')[0],firstline.split('\t')[1],firstline.split('\t')[2],firstline.split('\t')[3]])
353     for eachline in fp:
354     ROIdic = {}
355     tmplist = eachline.strip().split('\t')
356     ROIdic['ID'] = tmplist[0] # spot id
357     ROIdic['Name'] = tmplist[1] # name of the protein immobilized
358     try:
359     ROIdic['Conc'] = tmplist[2]
360     ROIdic['Background Spot'] = int(tmplist[3])
361     except IndexError:
362     pass
363     ROIinfo.append(ROIdic)
364    
365     fp.close()
366     print 'There are %d ROIs in the Key file.' % len(ROIinfo)
367     return ROIinfo
368    
369    
370 clausted 3 def galfile_read(fname):
371     """Function: Read the GAL file.
372 clausted 1 Input: the filename.
373     Return: the ROI information.
374 clausted 3 """
375 clausted 1 fp=file(fname,'r')
376     ROIinfo = []
377     Tmpstr = fp.readline() # skip the first line
378     Tmpstr = fp.readline()
379     Tmpn = int(Tmpstr.split('\t')[0])
380     for i in range(Tmpn):
381     labelline = fp.readline()
382    
383     if labelline.startswith('"Block1'):
384     a = labelline.strip().split(',')
385     columnsize, rowsize = int(a[3]), int(a[5])
386     ## print labelline
387     def IDcal(p):
388     id = (p[0]-1)*columnsize*rowsize + (p[1]-1)*columnsize + p[2]
389     return id
390    
391     for eachline in fp:
392     ROIdic = {}
393     tmplist = eachline.strip().split('\t')
394     pos = tuple(map(int,tmplist[:3]))
395     ROIdic['Position'] = pos
396     ROIdic['IDcal'] =IDcal(pos)
397     ##(pos[0]-1)*columnsize*rowsize + (pos[1]-1)*columnsize + pos[2]
398    
399     ##
400     ROIdic['Name'] = tmplist[3]
401     ROIdic['ID'] = tmplist[4]
402     try:
403     ROIdic['Conc'] = tmplist[5]
404     ROIdic['Family'] = tmplist[6]
405     blist = tmplist[7][1:-1].split(';') # always something like '"1,1,2; 1,2,2"'
406     Backn = len(blist)
407     Bid = []
408    
409     for i in blist:
410     if '' in i.split(','):
411     break
412     Bpos = tuple(map(int, i.split(',')))
413     Bid.append(IDcal(Bpos))
414     ##
415     ROIdic['Background Spot'] = tuple(Bid)
416     except IndexError:
417     pass
418    
419     if int(ROIdic['ID'][4:]) != ROIdic['IDcal']:
420     print 'The spot ID should be consistent with the position. Please check the ID.'
421     break
422     else:
423     ROIdic['ID'] = ROIdic['IDcal']
424     ROIinfo.append(ROIdic)
425     fp.close()
426     print 'There are %d ROIs in the Gal file.' % len(ROIinfo)
427    
428     return ROIinfo
429    
430    
431    
432 clausted 3 def method_read(fname):
433     """Function: Read the analyte table.
434 clausted 1 Input: the filename
435     Return: the sample information
436 clausted 3 """
437 clausted 1 # create a concentration dictionary for the sample injected into the flowcell
438     # export a table containing the concentration of the sample injected, and the duration
439     fp=file(fname,'r')
440     sampleinfo = []
441     ##TmpTable,TmpTable2={},{}
442     labelline=fp.readline()
443     for n,eachline in enumerate(fp):
444     sampledic = {}
445     sampledic['Location'] = eachline.split('\t')[0]
446     sampledic['Name'] = eachline.split('\t')[1]
447     sampledic['Concentration'] = float(eachline.split('\t')[2])
448     sampledic['Duration'] = float(eachline.split('\t')[4])
449     sampledic['Flow Rate'] = float(eachline.split('\t')[3])
450     sampledic['Analyte Series'] = eachline.split('\t')[6]
451     sampledic['Buffer Blank Series'] = eachline.split('\t')[7]
452     sampleinfo.append(sampledic)
453    
454     print 'Name','\t','Concentration'
455     for i in sampleinfo:
456     print i['Name'],'\t', i['Concentration']
457    
458     return sampleinfo
459    
460 clausted 5 def method_read_fake():
461     sampleinfo = []
462     sampleinfo.append({'Location':0, 'Name':0, 'Concentration':0, 'Duration':0, 'Flow Rate':0, 'Analyte Series':0, 'Buffer Blank Series':0})
463     return sampleinfo
464 clausted 1
465 clausted 5
466 clausted 3 def datafile_read(fname):
467     """Function: Read the raw data from the Plexera instrument,
468 clausted 1 and pack the raw data (Shaped_data) into dataobject.
469     Therefore, the initial dataformat = 0
470     Input: the filename
471     Return: the packed SPR.DataPass type data object
472 clausted 3 """
473 clausted 1 #===================check the format of the input files-------
474     if fname.split('.')[-1] == 'txt':
475    
476 clausted 3 Format_input, Firstline = check_format_input(fname)
477 clausted 1 if Format_input == 1: # for SPRit file format
478 clausted 3 dataobj = sprit_format(fname)
479 clausted 1 print '-'*10,' This is a SPRit data file. ','-'*10
480    
481     elif Format_input == 2: # for Plexera ICM file format
482 clausted 3 dataobj = plexera_icm_format(fname)
483 clausted 1 print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
484    
485     elif Format_input == 3: # for Clamp datafile format
486 clausted 3 dataobj = read_clamp_data(fname)
487 clausted 1 print '-'*10,' This is a Clamp file. ','-'*10
488    
489     else:
490     print '-'*10,' unreadable file input! ','-'*10
491     sys.exit(0)
492    
493     if Format_input == 1 or Format_input == 2:
494     flag = str.upper(raw_input('Load the key file? (y/n): '))
495     if flag == 'Y':
496     fkey = raw_input('Input the path of the key file: ')
497 clausted 3 ROIinfo = keyfile_read(fkey)
498 clausted 1 dataobj.updateROIinfo(ROIinfo)
499    
500     elif fname.split('.')[-1] == 'xls':
501     # for the DAM Spreadsheet file format
502     print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
503     book = xlrd.open_workbook(fname)
504     #shape the data in the Spreadsheet, whether single sheet or multiple sheets
505 clausted 3 dataobj=dam_spreadsheet_format(book)
506 clausted 1 flag = str.upper(raw_input('Load the gal file? (y/n): '))
507     if flag == 'Y':
508     fgal = raw_input('Input the path of the gal file: ')
509 clausted 3 ROIinfo = galfile_read(fgal)
510 clausted 1 dataobj.updateROIinfo(ROIinfo)
511     flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
512     if flag == 'Y':
513     fprotocol = raw_input('Input the path of the analyte table: ')
514 clausted 3 sampleinfo = method_read(fprotocol)
515 clausted 1 dataobj.updateSampleinfo(sampleinfo)
516 clausted 5 else:
517     dataobj.updateSampleinfo(method_read_fake())
518    
519 clausted 1 return dataobj
520    
521    
522    
523 clausted 3 def outlier_remove(obj):
524 clausted 1 ##dataobj.viewAll()
525 clausted 3 """Function: Remove unwanted data points from noisy periods.
526 clausted 1 Return: the data object with clean data
527 clausted 3 """
528 clausted 1 dataobj = copy.deepcopy(obj)
529     print ('Select the time area that you want to remove.')
530     tmpstr = raw_input('in the format of "200:240, 500:510" :')
531     areatodel = tmpstr.split(',')
532     data = dataobj.data
533     for i in areatodel:
534     ##if
535     t1, t2 = float(i.split(':')[0]), float(i.split(':')[1])
536     t = data[0] # for dataformat=0 Shaped_data
537     tmpind = plt.find((t>t1) & (t<t2)) # find the outliar indice
538     ## data[1:,tmpind] = 0
539     tmplist = data.tolist()
540     tmplist2 = []
541     for j in tmplist:
542     del j[min(tmpind):max(tmpind)]
543     tmplist2.append(j)
544     data = np.array(tmplist2)
545     ## tmplist = tmplist
546     dataobj.data = data
547     # dataobj.status = 'Outliar removed'
548 clausted 3 dataobj.updateStatus(outlier_remove=True)
549 clausted 1 return dataobj
550    
551 clausted 3 def calibrate(obj):
552     """Function: calibrate the Intensity response.
553 clausted 1 Return: the data object with calibrated data
554     Note: at present, this function is valid only when
555     the whole procedure includes calibration precedure.
556 clausted 3 """
557 clausted 1 dataobj = copy.deepcopy(obj)
558     data = dataobj.data
559     caldata = copy.deepcopy(data)
560     t = data[0]
561     if dataobj.dataformat != 0:
562     print 'We only calibrate the raw data.'
563     return
564     else:
565     t1 = input('Enter the time position of the TOP response during calibration:')
566     s1 = input('The refractive index of the TOP response is: ')
567     t2 = input('Enter the time position of the BOTTOM response during calibration:')
568     s2 = input('The refractive index of the BOTTOM response is: ')
569     ind1 = plt.find(abs(t-t1) == min(abs(t-t1)))[0]
570     ind2 = plt.find(abs(t-t2) == min(abs(t-t2)))[0]
571    
572     for i,y in enumerate(data[1:]):
573     y1, y2 = y[ind1], y[ind2]
574     slope = (s1-s2)/(y1-y2)
575     offset = s1- slope*y1
576     caldata[i+1] = slope*y+offset
577     dataobj.data = caldata
578 clausted 3 dataobj.updateStatus(calibrate=True)
579 clausted 1 return dataobj
580    
581 clausted 3 def get_background_value(obj,bgids):
582     """Get the averaged value of background spots for each ROI."""
583 clausted 1 bgsignal = obj.data[0]*0
584     for j in bgids:
585     if j == obj.ROIinfo[j-1]['ID']:
586     bgsignal = bgsignal + obj.data[j]
587    
588    
589     elif j != obj.ROIinfo[j-1]['ID']:
590     for n,eachspot in enumerate(obj.ROIinfo):
591     if j == eachspot['ID']:
592     bgsignal = bgsignal + obj.data[n+1]
593     bgsignal = bgsignal/len(bgids)
594     return bgsignal
595    
596    
597 clausted 3 def background_subtract(obj, *bgspot):
598     """Function: Perform the Background subtraction for the UNSPLIT curve.
599 clausted 1 Input besides "obj" is the id of the spot taken as background.
600     The following inputs are acceptable:
601 clausted 3 1. background_subtract(obj): the default background in Galfile
602 clausted 1 will be subtracted.
603 clausted 3 2. background_subtract(obj, 1, 6): the average value of spot1
604 clausted 1 and spot6 will be subtracted.
605 clausted 3 """
606 clausted 1 dataobj = copy.deepcopy(obj)
607     ROIinfo = obj.ROIinfo
608     data = dataobj.data
609    
610     if dataobj.dataformat == 0:
611     newdata = np.zeros(np.shape(data))
612     newdata[0] = data[0] # the time scale
613     if bgspot == ():
614     # The average of the default background spots listed in the galfile
615     #are to be subtracted.
616     for i in range(1,dataobj.ROInum+1):
617     bgids = ROIinfo[i-1]['Background Spot']
618 clausted 3 bgsignal = get_background_value(dataobj,bgids)
619 clausted 1 newdata[i] = data[i]-bgsignal
620     # update the status of the data object.
621     dataobj.updateStatus(BackgroundType='Default in Gal')
622    
623     else:
624     # The average of the manually input background spots
625     #are to be subtracted.
626     for i in range(1,dataobj.ROInum+1):
627 clausted 3 bgsignal = get_background_value(dataobj,bgspot)
628 clausted 1 newdata[i] = data[i]-bgsignal
629     dataobj.updateStatus(BackgroundType='Manually choosen')
630     dataobj.data = newdata
631 clausted 3 dataobj.updateStatus(background_subtraction=True)
632 clausted 1 return dataobj
633    
634     else:
635     print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'
636     return
637    
638 clausted 3 def curve_split(obj, t_before, t_after, *t_inj):
639     """Function: Split the whole curve that contains several injections
640 clausted 1 into pieces.
641     The sample information will be checked during the split,
642     if the injection number in sampleinfo is not consistant
643     with the input injection parameter, it will help you to
644     update the sample information.
645     Input besides obj:
646     t_before: time duration before the injection time point
647     t_after: time duration after the injection time point
648     t_inj: the exact start time for each injection
649     Return: the data object with splitted data, and updated sample
650     information
651 clausted 3 """
652 clausted 1 dataobj = copy.deepcopy(obj)
653     ROInum = dataobj.ROInum
654     t=dataobj.data[0]
655     ind=[]
656     # find how many lines of data each injection
657     for i in range(len(t_inj)):
658     ind.append(np.shape(plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after))))[0])
659     m,j = max(ind),max(ind)-min(ind)
660     # the new dataset must be a m*((ROInum+1)*num_inj) matrix:
661     # T1,S_ROI1_c1,S_ROI2_c1,S_ROI3_c1...T2,S_ROI1_c2,S_ROI2_c2...
662     tmpdata = np.zeros(((ROInum+1)*len(t_inj),m))
663    
664     for i in range(len(t_inj)):
665     TmpInd = plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after)))
666     Tmp1,Tmp2 = i*(ROInum+1),(i+1)*(ROInum+1)
667     tmpdata[Tmp1:Tmp2,0:len(TmpInd)] = dataobj.data[:,TmpInd]
668     if j!=0:
669     Split_data = tmpdata[:,:-j]
670     else:
671     Split_data = tmpdata[:]
672     # zero and align the split data
673     for i in range(np.shape(Split_data)[0]):
674     Split_data[i]=Split_data[i]-Split_data[i][0]
675    
676 clausted 3 newsampleinfo, injectinfo = check_sample_info(dataobj,t_before, t_after, t_inj)
677 clausted 1 if newsampleinfo != None :
678     dataobj.data = Split_data
679     dataobj.updateStatus(DataSplit=True)
680     dataobj.updateSampleinfo(newsampleinfo)
681     dataobj.dataformat = 2
682     dataobj.injectinfo = injectinfo
683     return dataobj
684     else:
685     dataobj.updateStatus(DataSplit=False)
686     print 'Curve Splitting not successful!'
687    
688     return
689    
690    
691 clausted 3 def check_sample_info(dataobj, t_before, t_after, t_inj):
692     """Check if the sample information consistent with the injection
693 clausted 1 parameter input. If not, help the user to update sample information
694     for the splitted curve.
695 clausted 3 """
696 clausted 1 injectinfo = []
697     if len(dataobj.sampleinfo) == len(t_inj):
698     for i,j in enumerate(t_inj):
699     injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
700     return dataobj.sampleinfo, injectinfo
701    
702     elif len(dataobj.sampleinfo) > len(t_inj):
703 clausted 3 print """ The number of injection you just input does not match that in sample infomation.
704     If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object."""
705 clausted 1 choose = str.upper(raw_input('Continue? (y/n)'))
706     if choose == 'Y':
707     newsampleinfo, injectinfo = [], []
708     print ' Your input indicates %s injections out of %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
709     for n,i in enumerate(dataobj.sampleinfo):
710     print '--',n,'.',i['Name'],':',i['Concentration']
711     print ' Enter the ID number of the injections you just select, in the form of "2-10" or "4,5,6",'
712     tmpstr = raw_input(':')
713     if ',' in tmpstr:
714     injID = map(int, tmpstr.split(','))
715     elif '-' in tmpstr:
716     injID = range(int(tmpstr.split('-')[0]), int(tmpstr.split('-')[1])+1)
717     else:
718     print ' Illegal input!'
719     return
720    
721     if len(injID) != len(t_inj):
722     print ' Please input %s injection ID numbers instead of %s You just input.' %(len(t_inj),len(injID))
723     return
724    
725     for i in injID:
726     newsampleinfo.append(dataobj.sampleinfo[i])
727     injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
728     return newsampleinfo, injectinfo
729     else:
730     return
731    
732     elif len(dataobj.sampleinfo) < len(t_inj):
733     print ' The number of injection you just input does not match that in sample infomation.'
734     print ' Your input indicates %s injections, where there is only %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
735     print ' Please check.'
736    
737     return
738    
739    
740 clausted 3 def write_data(Head,data_out,ROI_out,DataDir,output_format):
741     """Write the data into a txt file.
742     """
743     # Modified 100405 CGL
744     # create a newfolder with time stamp to save the data
745 clausted 1 if output_format=='B':
746     ftitle='biosensor'
747     Tmpstr = str(ROI_out).replace(', ','_')
748     Tmpstr = Tmpstr.replace('(','-')
749     foutname = ftitle+Tmpstr[:-1]+'.txt'
750     elif output_format=='C':
751     foutname='clamp-'+str(ROI_out)+'.txt'
752     else:
753     ftitle=''
754 clausted 3
755     # Change directories, if requested.
756     if (DataDir != ""):
757     os.chdir('..')
758     try:
759     os.makedirs(DataDir)
760 clausted 5 ##except WindowsError:
761     except OSError:
762 clausted 3 pass
763     os.chdir(DataDir)
764 clausted 1
765     np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
766     fdt=file('temp.txt','r')
767    
768     ## foutname=ftitle+Tmpstr[:-1]+'.txt' ##raw_input('Enter the file name of the output: ')
769     fout=file(foutname,'w+')
770     # write the head of the data first
771     fout.write(Head+'\n')
772     count = len(fdt.readlines())
773     fdt.seek(0)
774     for i in range(count):
775     temp=fdt.readline()
776     fout.write(temp.replace(' ','\t'))
777     fdt.close()
778     fout.close()
779     print '-------The path of the exported data is "', os.getcwd(),'"-------'
780    
781    
782 clausted 3 def data_output_biosensor(obj, DataDir, *ROI_out):
783     """Export the data into a biosensor format txt file,
784 clausted 1 which can be operated in Scrubber.
785     Input: the data object, the output path, the ROIs selected to export
786 clausted 3 """
787 clausted 1 # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
788     dataobj = copy.deepcopy(obj)
789     data = dataobj.data
790     n_inj = len(dataobj.sampleinfo)
791     ROIinfo = dataobj.ROIinfo
792     ROInum = dataobj.ROInum
793    
794     data_out = np.zeros(((len(ROI_out)*2)*n_inj,np.shape(data)[1]))
795     head1 = ""
796     new_ROIinfo = []
797     for n,i in enumerate(ROI_out):
798     new_ROIinfo.append(ROIinfo[i-1]) ##the new ROI info
799     if i != ROIinfo[i-1]['IDcal']:
800 clausted 3 # check if the ID number matches position
801 clausted 1 print n,i, ROIinfo[i-1]
802     return
803     else:
804     headcomp = ROIinfo[i-1]['ID']+': '+ROIinfo[i-1]['Name']
805     try:
806     headcomp = headcomp + ', '+ROIinfo[i-1]['Conc']
807     except KeyError:
808     pass
809     headcomp = headcomp+' Fc='+str(n+1)+' - '
810     for j in range(n_inj):
811     headtmp = headcomp+str(j+1)+'_'
812     head1 = head1+headtmp+'X\t'+headtmp+'Y\t'
813     data_out[2*n*n_inj+2*j]=data[j*(ROInum+1)]
814     data_out[2*n*n_inj+2*j+1]=data[j*(ROInum+1)+i]
815     head1=head1[:-1]
816    
817     dataobj.data = data_out
818     dataobj.dataformat = 3 # biosensor dataformat
819     dataobj.updateStatus(DataType='Biosensor data')
820     dataobj.updateStatus(Head=head1)
821     dataobj.updateROIinfo(new_ROIinfo)
822    
823 clausted 3 write_data(head1,data_out,ROI_out,DataDir,'B')
824 clausted 1
825     return dataobj
826    
827 clausted 3 def data_output_clamp(obj, DataDir, ROI_out):
828     """Export the data into a Clamp format txt file,
829 clausted 1 which can be operated in Clamp.
830     Input: the data object, the output path, the selected ROI
831 clausted 3 """
832 clausted 1 # initialize the data output matrix, which will contain '2*n_inj' columns
833     dataobj = copy.deepcopy(obj)
834     data = dataobj.data
835     n_inj = len(dataobj.sampleinfo)
836 clausted 5 n_inj = max(n_inj, 1) # Must have at least one injection.
837 clausted 1 ROIinfo = dataobj.ROIinfo
838     ROInum = dataobj.ROInum
839     sampleinfo = dataobj.sampleinfo
840    
841     data_out = np.zeros((2*n_inj,np.shape(data)[1]))
842 clausted 5 ## if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
843     if (ROI_out != int(ROIinfo[ROI_out-1]['ID'])):
844 clausted 1 print 'Please check the ROI information.'
845     print ROI_out, ROIinfo[ROI_out-1]
846     return
847     else:
848     tmp = ROIinfo[ROI_out-1]['ID']+': '+ROIinfo[ROI_out-1]['Name']
849     try:
850     tmp = tmp +', Conc:'+ROIinfo[ROI_out-1]['Conc']
851     except KeyError:
852     pass
853     Clamphead = 'Vers 3.41 '+tmp
854     conc=''
855     t_start,t_stop='',''
856     head1='Time\tData\t'*n_inj
857     head1=head1[:-1]
858    
859     for i in range(n_inj):
860     data_out[2*i]=data[i*(ROInum+1)]
861     data_out[2*i+1]=data[i*(ROInum+1)+ROI_out]
862     name = sampleinfo[i]['Name']
863     conc=conc+'\t'+str(sampleinfo[i]['Concentration'])
864     t_start=t_start+'\t'+str(dataobj.injectinfo[i][0])
865     t_stop=t_stop+'\t'+str(dataobj.injectinfo[i][1])
866    
867     hconc,h_start,h_stop='\nConc1'+conc,'\nStart1'+t_start,'\nStop1'+t_stop
868     for i in (hconc,h_start,h_stop):
869     if i[-1]!='':
870     Clamphead=Clamphead+i
871    
872     Clamphead=Clamphead+'\n'+head1
873    
874     dataobj.data = data_out
875     dataobj.dataformat = 4 # biosensor dataformat
876     dataobj.updateStatus(DataType='Clamp data')
877     dataobj.updateStatus(Head=Clamphead)
878     dataobj.updateROIinfo(ROIinfo[ROI_out-1])
879    
880 clausted 3 write_data(Clamphead,data_out,ROI_out,DataDir,'C')
881 clausted 1
882     return dataobj
883     ## return data_out,Clamphead
884    
885    
886 clausted 3 if (__name__ == '__main__'):
887     # Print a short message if this module is run like an app.
888     print "This is the OSPRAI file type conversion tool."
889