ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
Revision: 25
Committed: Wed Apr 28 20:22:24 2010 UTC (9 years, 4 months ago) by rjaynes
File size: 35996 byte(s)
Log Message:
Add py and obj files to allow modeling of more SPR experiments with converter and curvefitting modules.  This is the work of Yuhang Wan and Rui Hou.

1. In "converter.py": 
      Add the saving and reading function for the sprclass data object.
      Also add function "keyfile_read_fake" to provide default information for SPRit and ICM formats in case of the bug when do background_subtract.
      Fix the bugs in "background_subtract".
      Tested by DAM and ICM formats.
2. In model modules:
      "modelclass.py" is the parent class for all the other model classes that performs the theoretical simulating, loading and saving of the parameter or simulated data. Rui and I also add some other model modules like competing model, twostate model, parallel model, and the time variable concentrated models, where the simulated result is compared with Clamp's simulation to make sure the equations are correct. 
       The basicmodel and basicmodel_varyC class are tested. 
3. In "curvefitting.py":
      Add typical pipeline for operation. The examples are packed with the file. 
      Add function to show the Elapsed time for each fitting.
Line User Rev File contents
1 clausted 3 """
2 clausted 1 Converter: Perform the datafile reading, pre-processing and writing.
3 clausted 3 Yuhang Wan, Christopher Lausted
4 rjaynes 25 Last modified on 100427 (yymmdd) by YW
5 clausted 1
6 clausted 3 Typical Pipeline:
7     >import converter as cv
8     >dat = cv.datafile_read("example.txt")
9     >dat = cv.outlier_remove(dat)
10     >dat = cv.calibrate(dat)
11     >dat = cv.background_subtract(dat)
12     >dat = cv.curve_split(dat, 100, 800, 200)
13 rjaynes 25 >dat = cv.data_output_clamp(dat, "/dir", 1)
14     >dat = cv.data_output_biosensor(dat, "/dir", 1,2,3,4)
15     >dat = cv.obj_save(dat, "saved_dataobj")
16 clausted 3 """
17 rjaynes 25 __version__ = "100427"
18 clausted 1
19 clausted 6 ## Import libraries.
20 clausted 1 import numpy as np
21     import pylab as plt
22 rjaynes 25 ##import sys
23 clausted 1 import xlrd
24     import time
25     import os
26     import copy
27 rjaynes 25 import pickle
28 clausted 3 import SPRdataclass as spr
29 clausted 1
30 clausted 6 ## Global constants and variables.
31     FMT_UNKNOWN = 0
32     FMT_SPRIT = 1 ## Old Plexera SPRit format.
33     FMT_ICMTXT = 2 ## Plexera Instrument Control Module text.
34     FMT_CLAMP34 = 3 ## Morton & Myszka CLAMP Version 3.4+
35     FMT_DAMXLS = 4 ## Plexera Data Analysis Module Excel export.
36 rjaynes 25 FMT_DATAOBJ = 5 ## Packed spr Data Object
37 clausted 1
38 clausted 3 def check_format_input(fname):
39 clausted 6 """Examine the format of an input .txt data file."""
40     ## Modified 100407 CGL.
41    
42     ## Open text file. Exit if file not found.
43 clausted 1 try:
44     fp = open(fname, "r")
45     except IOError:
46 clausted 6 print "Error: Unable to open file %s for reading." % fname
47 rjaynes 25 return
48 clausted 1
49 clausted 6 ## Examine first line of text file to guess format.
50     fmt = FMT_UNKNOWN
51     sprit_line1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)"
52     clamp_line1="Vers 3.4"
53     line1 = fp.readline()
54     if (sprit_line1 in line1):
55     fmt = FMT_SPRIT
56     elif (line1[2]=="/" and line1[5]=="/"):
57     ## This format has date mm/dd/yyy in first column.
58     fmt=FMT_ICMTXT
59     elif (clamp_line1 in line1):
60     fmt = FMT_CLAMP34
61    
62 clausted 1 fp.close()
63 clausted 6 return fmt, line1
64 clausted 1
65    
66 clausted 3 def sprit_format(fname):
67     """Read the SPRit formatted data and reshape it.
68 clausted 1 Return the data in the form of list.
69 clausted 3 """
70 clausted 1 # open a file, if file doesn't exit, then exit
71     try:
72     fp = open(fname, "r")
73     except IOError:
74     print 'Cannot open file %s for reading' % fname
75 rjaynes 25 return
76 clausted 1 status = {'DatainputType':'SPRit data'}
77     # skip the first line of the input file
78     Tmpstr = fp.readline()
79     # check if the second line is BEGIN
80     TmpStr = fp.readline()
81 clausted 5 if ("BEGIN" not in TmpStr):
82     print "Warning: Second line of data file is not 'BEGIN'"
83     ##sys.exit(0)
84 clausted 1 # count the lines of each data spot
85     TmpStr = fp.readline() # skip first "0.000000e+000"
86     num_line = 1 # so we start to count from 1
87     # count until read the "0.000000e+000"
88     while True:
89     TmpStr = fp.readline()
90     if TmpStr.split('\t')[0] == "0.000000e+000":
91     break
92     else:
93     num_line = num_line + 1
94     # count the spots in each file
95     num_spot = 0
96     # skip the first two lines
97     fp.seek(0)
98     for i in range(0,2):
99     TmpStr = fp.readline()
100     text = fp.readlines()
101     for i in text:
102     num_spot += i.count('0.000000e+000')
103     fp.close()
104     #---------------------- Reshaping the data --------------------
105     # reshape the data in the string form num_line*num_spot
106     Shaped_str = np.reshape(text,(num_spot,-1))
107     tmpshape=np.shape(Shaped_str)
108     # reshape the data in the number form: num_line*(num_spot+1)
109     # only the first column is time: time spotl spot2 ... spot n
110     time = np.zeros(num_line)
111     Tmpdata = Shaped_str[0,:]
112     for n,i in enumerate(Tmpdata):
113     time[n]=float(i.split('\t')[0])
114     Shaped_data_1 = np.zeros((tmpshape[0]+1,tmpshape[1]))
115     Shaped_data_1[0,:]=time
116     for n1,i in enumerate(Shaped_str):
117     for n2,j in enumerate(i):
118     Shaped_data_1[n1+1][n2]=float(Shaped_str[n1][n2].split('\t')[1])
119     # reshape the data in the number form:
120     # time1 spotl time2 spot2 ... timen spot n
121     Shaped_data_2 = np.zeros((tmpshape[0]*2,tmpshape[1]))
122     for i in range(tmpshape[0]):
123     Shaped_data_2[i*2]=Shaped_data_1[0]
124     Shaped_data_2[i*2+1] = Shaped_data_1[i+1]
125    
126     ## status = {'Datainput Type':'SPRit data'}
127     dataformat = 0
128     ROInum = num_spot
129 clausted 3 dataobj=spr.DataPass(Shaped_data_1, ROInum, dataformat)
130 clausted 1 dataobj.status = {}
131     dataobj.updateStatus(**status)
132     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
133    
134     return dataobj
135    
136    
137 clausted 3 def plexera_icm_format(fname):
138     """Read the txt data exported from Plexera ICM software and reshape it.
139 clausted 1 Return the data in the form of list.
140 clausted 3 """
141 clausted 1 fp=file(fname,'r')
142     Tmpdata,tmptime=[],[]
143     status = {'DatainputType':'Plexera ICM data'}
144     for eachline in fp:
145     Tmpdata.append(eachline.split('\t'))
146     num_line,num_spot=np.shape(Tmpdata)
147     # initialize the Shaped_data
148     Shaped_data = np.zeros((num_spot,num_line))
149     num_spot=num_spot-1 # the real number of spot
150     Tmpdata=np.transpose(Tmpdata)
151     # delete the last '\r\n' of the data in last column of the datafile
152     for n,i in enumerate(Tmpdata[-1]):
153     Tmpdata[-1][n]=i.rstrip()
154     # time converting: convert the first column of the datafile(date and time information) into numerical time
155     for i in Tmpdata[0]:
156     tmptime_str=i.split(' ')[1]
157     timelist=tmptime_str.split(':')
158     now=float(timelist[0])*60*60+float(timelist[1])*60+float(timelist[2])
159     tmptime.append(now)
160     Shaped_data[0]=tmptime
161     Shaped_data[0]=Shaped_data[0]-Shaped_data[0][0]
162     # assign the signal data
163     for i in range(num_spot):
164     Shaped_data[i+1]=Tmpdata[i+1]
165    
166     dataformat = 0
167     ROInum = num_spot
168 clausted 3 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat)
169 clausted 1 dataobj.status = {}
170     dataobj.updateStatus(**status)
171    
172     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
173     return dataobj
174    
175    
176    
177 clausted 3 def dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize):
178     """Retrieve and shape data form one single sheet from the Spreadsheet.
179 clausted 1 Return the data, ROI infomation in the form of list.
180 clausted 3 """
181 clausted 1 ## and return the minimal spot Id (start_spot)
182     ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
183    
184     Head = sh.row_values(1)
185     SpotList = []
186     num_spot = sh.row_values(0).count('Relative Time')
187    
188     for i in range(num_spot):
189 rjaynes 25 TmpHead=unicode.encode(Head[colsize*i])
190 clausted 1 # now there are 5 columns for each ROI for Raw Intensity
191     # and 4 columns for each ROI for Satellite subtracted Intensity
192     DataList.append(sh.col_values(2+colsize*i,start,end))
193    
194     key, val = [], []
195     tmpinfo = TmpHead.split('\n')
196     for j in tmpinfo:
197     key.append(j.split(': ')[0])
198     val.append(j.split(': ')[1])
199     tmpdic = dict(zip(key,val))
200 rjaynes 25 try: # spot id
201     tmpdic['ID'] = int(tmpdic['ID']) # either in form of "spot1" or "1"
202     except ValueError:
203     if str.upper(tmpdic['ID']).startswith('SPOT'):
204     tmpdic['ID'] = int(tmpdic['ID'][4:])
205     else:
206     print "Warning: Illegal galfile format."
207     print "The ID is neither 'spot1' form nor '1' form. \n", "'ID:'", tmp
208    
209 clausted 1 tmpdic['Position'] = (int(tmpdic['Block'])+1, int(tmpdic['Row'])+1, int(tmpdic['Column'])+1)
210     ROIinfo.append(tmpdic)
211    
212     return DataList,ROIinfo
213    
214 clausted 3 def dam_spreadsheet_format(book):
215     """Read the spreadsheet exported from Plexera DAM software and reshape it.
216 clausted 1 Return the shaped data and ROI information.
217 clausted 3 """
218 clausted 1 ROIinfo, newROIinfo, n_sheet = [], [], 0
219     DataList=[]
220     status = {'DatainputType':'Plexera DAM data'}
221     sh0=book.sheet_by_index(0)
222     if sh0.row_values(0).count('Raw Intensity') == 0 and sh0.row_values(0).count('Subtracted Intensity') > 0:
223     print 'This is the satellite subtracted data.'
224     colsize = 4
225     status ['DAM spreadsheet'] = 'Satellite subtracted Intensity'
226     elif sh0.row_values(0).count('Raw Intensity') > 0 and sh0.row_values(0).count('Subtracted Intensity') == 0:
227     print 'This is the raw intensity data.'
228     colsize = 5
229     status ['DAM spreadsheet'] = 'Raw Intensity'
230    
231     for i in range(2,sh0.nrows):
232     if sum(sh0.col_values(2,1,i))>0:
233     break
234     start,end = i-1,sh0.nrows
235     num_line = end - start
236     ## print 'start: ', start
237     # the time axis initialization and assignment
238     Time=sh0.col_values(1,start,end)
239     # in case we lose the time axis during the DAM processing
240     if sum(Time)==0: Time=np.arange(num_line)
241     DataList.append(Time)
242    
243     # list DataList to store the Shaped_data of each ROI in each sheet,
244     # list ROIinfo to store the Spot id and the ligand name of each ROI in each sheet
245     for i in range(book.nsheets):
246     sh=book.sheet_by_index(i)
247     if sh.ncols!=0:
248 clausted 3 DataList,ROIinfo = dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize)
249 clausted 1 n_sheet=n_sheet+1
250     else:
251     break
252     # calculate the dim of the Shaped_data, and initialize the matrix
253     num_spot=len(DataList)-1
254     num_line=end-start
255     Shaped_data=np.zeros((num_spot+1,num_line))
256     Shaped_data[0]=DataList[0]
257    
258     # in case that the ROIs doesn't count from spot1
259     newROIinfo = copy.deepcopy(ROIinfo)
260     StartspotList=np.array([i['ID'] for i in ROIinfo])
261     start_spot=min(StartspotList)
262 rjaynes 25
263 clausted 1 for i in range(num_spot):
264     tmp = ROIinfo[i]['ID']
265 rjaynes 25 j=tmp-(start_spot-1)
266 clausted 1 Shaped_data[j]=DataList[i+1]
267 rjaynes 25 newROIinfo[j-1]=ROIinfo[i]
268 clausted 1
269     # pack the date and relative information into SPRdata obj
270     dataformat = 0
271     ROInum = num_spot
272 clausted 3 dataobj=spr.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
273 clausted 1 dataobj.status = {}
274     dataobj.updateStatus(**status)
275     print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)
276    
277     return dataobj
278    
279    
280 clausted 3 def read_clamp_data(fname):
281     """Read the Clamp format data
282 clausted 1 Return the data in the form of list.
283 clausted 3 """
284 clausted 1 fp=open(fname,'r')
285     Tmpstr=fp.readline()
286     # examine the file head
287     # e.g. 'Vers 3.41 cTBA-1 Conc:250 nM\n'
288     ROInum, dataformat, Conc, ROIdic = 1, 4, [], {}
289     try:
290     name = Tmpstr.split(' ')[2].strip()
291     ROIdic['ROI name'] = name
292     stmp = Tmpstr.split(' ')[3]
293     rest = Tmpstr[Tmpstr.find(stmp):].strip()
294     ROIdic['concentration'] = rest.split(':')
295     except IndexError:
296     pass
297    
298     for Tmpstr in fp:
299     try:
300     float(Tmpstr.split('\t')[0])
301     n_conc=(Tmpstr.count('\t')+1)/2
302     ##print Tmpstr, 'the beginning of data'
303     break
304     except ValueError:
305     if Tmpstr.startswith('Conc1'):
306     n_conc=Tmpstr.count('\t')
307     Conc=Tmpstr.strip().split('\t')[1:]
308     elif Tmpstr.startswith('Start1'):
309     Start=Tmpstr.strip().split('\t')[1:]
310     n_conc=Tmpstr.count('\t')
311     elif Tmpstr.startswith('Stop1'):
312     Stop=Tmpstr.strip().split('\t')[1:]
313     n_conc=Tmpstr.count('\t')
314    
315    
316     # write the data into matrix
317     Tmpdata=[]
318     for eachline in fp:
319     a=eachline.strip().split('\t')
320     Tmpdata.append(np.array(a,dtype='f'))
321     Data=np.transpose(Tmpdata)
322    
323     # write the infomation
324     ROIinfo = [ROIdic]
325     status = {'DatainputType':'Clamp data'}
326     sampleinfo, injinfo = [], []
327     injpara = [map(float,i) for i in [Start,Stop]]
328    
329     if len(Conc) >= 1:
330     for i,c in enumerate(Conc):
331     sampledic = {}
332     name = 'Sample '+str(i+1)
333     sampledic = dict(zip(['Name','Concentration'],[name,c]))
334     ## sampledic['Name'] = name
335     ## sampledic['Concentration'] = c[injpara[0][i],injpara[1][i]]
336     sampleinfo.append(sampledic)
337     injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
338     injinfo.append(injdic)
339 clausted 3 dataobj=spr.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
340 clausted 1 dataobj.status = {}
341     dataobj.updateStatus(**status)
342    
343     return dataobj
344    
345    
346    
347 clausted 3 def keyfile_read(fkey):
348 clausted 1 # The Key File contains
349 clausted 3 """Function: Read the key file for old SPR instrument.
350 clausted 1 Input: the filename.
351     Return: the ROI information.
352 clausted 3 """
353 clausted 1 try:
354     fp = open(fkey, "r")
355     except IOError:
356 rjaynes 25 print "Cannot open file %s for reading" % fname
357     return
358 clausted 1 ROIinfo = []
359     firstline=fp.readline() # skip the first line
360     # Table.append([firstline.split('\t')[0],firstline.split('\t')[1],firstline.split('\t')[2],firstline.split('\t')[3]])
361     for eachline in fp:
362     ROIdic = {}
363     tmplist = eachline.strip().split('\t')
364 rjaynes 25
365     try: # spot id
366     ROIdic['ID'] = int(tmplist[0]) # either in form of "spot1" or "1"
367     except ValueError:
368     if str.upper(tmplist[0]).startswith('SPOT'):
369     ROIdic['ID'] = int(tmplist[0][4:])
370     else:
371     print "Error: Unable to read Spot IDs"
372    
373 clausted 1 ROIdic['Name'] = tmplist[1] # name of the protein immobilized
374     try:
375     ROIdic['Conc'] = tmplist[2]
376 rjaynes 25 ROIdic['Background Spot'] = map(int, tmplist[3].split(';')) # in case more than one spot set as background with ";" as SEPARATOR
377 clausted 1 except IndexError:
378     pass
379     ROIinfo.append(ROIdic)
380    
381     fp.close()
382     print 'There are %d ROIs in the Key file.' % len(ROIinfo)
383     return ROIinfo
384    
385 rjaynes 25 def keyfile_read_fake(obj):
386     ROIinfo = []
387     spot_num = obj.ROInum
388     for i in range(spot_num):
389     ROIdic = {}
390     ROIdic['ID'] = i+1
391     ROIdic['Name'] = "Ligand"+str(i+1)
392     ROIinfo.append(ROIdic)
393     return ROIinfo
394    
395 clausted 1
396 clausted 3 def galfile_read(fname):
397     """Function: Read the GAL file.
398 clausted 1 Input: the filename.
399     Return: the ROI information.
400 clausted 3 """
401 clausted 1 fp=file(fname,'r')
402     ROIinfo = []
403     Tmpstr = fp.readline() # skip the first line
404     Tmpstr = fp.readline()
405     Tmpn = int(Tmpstr.split('\t')[0])
406     for i in range(Tmpn):
407     labelline = fp.readline()
408    
409     if labelline.startswith('"Block1'):
410     a = labelline.strip().split(',')
411     columnsize, rowsize = int(a[3]), int(a[5])
412     ## print labelline
413     def IDcal(p):
414     id = (p[0]-1)*columnsize*rowsize + (p[1]-1)*columnsize + p[2]
415     return id
416    
417     for eachline in fp:
418     ROIdic = {}
419     tmplist = eachline.strip().split('\t')
420     pos = tuple(map(int,tmplist[:3]))
421     ROIdic['Position'] = pos
422     ROIdic['IDcal'] =IDcal(pos)
423     ##(pos[0]-1)*columnsize*rowsize + (pos[1]-1)*columnsize + pos[2]
424    
425     ##
426     ROIdic['Name'] = tmplist[3]
427 rjaynes 25 ## ROIdic['ID'] = tmplist[4]
428     try: # spot id
429     ROIdic['ID'] = int(tmplist[4]) # either in form of "spot1" or "1"
430     except ValueError:
431     if str.upper(tmplist[4]).startswith('SPOT'):
432     ROIdic['ID'] = int(tmplist[4][4:])
433     else:
434     print "Error: Unable to read Spot IDs"
435    
436 clausted 1 try:
437     ROIdic['Conc'] = tmplist[5]
438     ROIdic['Family'] = tmplist[6]
439     blist = tmplist[7][1:-1].split(';') # always something like '"1,1,2; 1,2,2"'
440     Backn = len(blist)
441     Bid = []
442    
443     for i in blist:
444     if '' in i.split(','):
445     break
446     Bpos = tuple(map(int, i.split(',')))
447     Bid.append(IDcal(Bpos))
448     ##
449 rjaynes 25 ROIdic['Background Spot'] = Bid ##tuple(Bid)
450 clausted 1 except IndexError:
451     pass
452    
453 rjaynes 25 if ROIdic['ID'] != ROIdic['IDcal']:
454 clausted 1 print 'The spot ID should be consistent with the position. Please check the ID.'
455     break
456     else:
457     ROIdic['ID'] = ROIdic['IDcal']
458     ROIinfo.append(ROIdic)
459     fp.close()
460     print 'There are %d ROIs in the Gal file.' % len(ROIinfo)
461    
462     return ROIinfo
463    
464    
465    
466 clausted 3 def method_read(fname):
467     """Function: Read the analyte table.
468 clausted 1 Input: the filename
469     Return: the sample information
470 clausted 3 """
471 clausted 1 # create a concentration dictionary for the sample injected into the flowcell
472     # export a table containing the concentration of the sample injected, and the duration
473     fp=file(fname,'r')
474     sampleinfo = []
475     ##TmpTable,TmpTable2={},{}
476     labelline=fp.readline()
477     for n,eachline in enumerate(fp):
478     sampledic = {}
479     sampledic['Location'] = eachline.split('\t')[0]
480     sampledic['Name'] = eachline.split('\t')[1]
481     sampledic['Concentration'] = float(eachline.split('\t')[2])
482     sampledic['Duration'] = float(eachline.split('\t')[4])
483     sampledic['Flow Rate'] = float(eachline.split('\t')[3])
484     sampledic['Analyte Series'] = eachline.split('\t')[6]
485     sampledic['Buffer Blank Series'] = eachline.split('\t')[7]
486     sampleinfo.append(sampledic)
487    
488     print 'Name','\t','Concentration'
489     for i in sampleinfo:
490     print i['Name'],'\t', i['Concentration']
491    
492     return sampleinfo
493    
494 clausted 5 def method_read_fake():
495     sampleinfo = []
496     sampleinfo.append({'Location':0, 'Name':0, 'Concentration':0, 'Duration':0, 'Flow Rate':0, 'Analyte Series':0, 'Buffer Blank Series':0})
497     return sampleinfo
498 clausted 1
499 clausted 5
500 clausted 3 def datafile_read(fname):
501     """Function: Read the raw data from the Plexera instrument,
502 clausted 1 and pack the raw data (Shaped_data) into dataobject.
503     Therefore, the initial dataformat = 0
504     Input: the filename
505     Return: the packed SPR.DataPass type data object
506 clausted 3 """
507 clausted 1 #===================check the format of the input files-------
508     if fname.split('.')[-1] == 'txt':
509    
510 clausted 3 Format_input, Firstline = check_format_input(fname)
511 rjaynes 25 if Format_input == FMT_SPRIT: # for SPRit file format
512 clausted 3 dataobj = sprit_format(fname)
513 clausted 1 print '-'*10,' This is a SPRit data file. ','-'*10
514    
515 rjaynes 25 elif Format_input == FMT_ICMTXT: # for Plexera ICM file format
516 clausted 3 dataobj = plexera_icm_format(fname)
517 clausted 1 print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
518    
519 rjaynes 25 elif Format_input == FMT_CLAMP34: # for Clamp datafile format
520 clausted 3 dataobj = read_clamp_data(fname)
521 clausted 1 print '-'*10,' This is a Clamp file. ','-'*10
522    
523     else:
524     print '-'*10,' unreadable file input! ','-'*10
525 rjaynes 25 return
526 clausted 1
527 rjaynes 25 if Format_input == FMT_SPRIT or Format_input == FMT_ICMTXT:
528 clausted 1 flag = str.upper(raw_input('Load the key file? (y/n): '))
529     if flag == 'Y':
530     fkey = raw_input('Input the path of the key file: ')
531 clausted 3 ROIinfo = keyfile_read(fkey)
532 clausted 1 dataobj.updateROIinfo(ROIinfo)
533 rjaynes 25 else:
534     dataobj.updateROIinfo(keyfile_read_fake(dataobj))
535 clausted 1
536     elif fname.split('.')[-1] == 'xls':
537     # for the DAM Spreadsheet file format
538     print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
539 rjaynes 25 Format_input = FMT_DAMXLS
540 clausted 1 book = xlrd.open_workbook(fname)
541     #shape the data in the Spreadsheet, whether single sheet or multiple sheets
542 clausted 3 dataobj=dam_spreadsheet_format(book)
543 clausted 1 flag = str.upper(raw_input('Load the gal file? (y/n): '))
544     if flag == 'Y':
545     fgal = raw_input('Input the path of the gal file: ')
546 clausted 3 ROIinfo = galfile_read(fgal)
547 clausted 1 dataobj.updateROIinfo(ROIinfo)
548 rjaynes 25
549     elif fname.split('.')[-1] == 'obj':
550     # for the saved data object file format
551     print '-'*10,' This is a saved data object file. ','-'*10
552     Format_input = FMT_DATAOBJ
553     fp = file(fname, 'r+')
554     dataobj = pickle.load(fp)
555     fp.close()
556    
557 clausted 1 flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
558     if flag == 'Y':
559     fprotocol = raw_input('Input the path of the analyte table: ')
560 clausted 3 sampleinfo = method_read(fprotocol)
561 clausted 1 dataobj.updateSampleinfo(sampleinfo)
562 rjaynes 25 elif Format_input != FMT_CLAMP34 and Format_input != FMT_DATAOBJ:
563     # clamp data and obj data may contain the experimental protocol already
564 clausted 5 dataobj.updateSampleinfo(method_read_fake())
565    
566 clausted 1 return dataobj
567    
568    
569 clausted 3 def outlier_remove(obj):
570 clausted 1 ##dataobj.viewAll()
571 clausted 3 """Function: Remove unwanted data points from noisy periods.
572 clausted 1 Return: the data object with clean data
573 clausted 3 """
574 clausted 1 dataobj = copy.deepcopy(obj)
575     print ('Select the time area that you want to remove.')
576     tmpstr = raw_input('in the format of "200:240, 500:510" :')
577     areatodel = tmpstr.split(',')
578     data = dataobj.data
579     for i in areatodel:
580     ##if
581     t1, t2 = float(i.split(':')[0]), float(i.split(':')[1])
582     t = data[0] # for dataformat=0 Shaped_data
583     tmpind = plt.find((t>t1) & (t<t2)) # find the outliar indice
584     ## data[1:,tmpind] = 0
585     tmplist = data.tolist()
586     tmplist2 = []
587     for j in tmplist:
588     del j[min(tmpind):max(tmpind)]
589     tmplist2.append(j)
590     data = np.array(tmplist2)
591     ## tmplist = tmplist
592     dataobj.data = data
593     # dataobj.status = 'Outliar removed'
594 clausted 3 dataobj.updateStatus(outlier_remove=True)
595 clausted 1 return dataobj
596    
597 clausted 3 def calibrate(obj):
598     """Function: calibrate the Intensity response.
599 clausted 1 Return: the data object with calibrated data
600     Note: at present, this function is valid only when
601     the whole procedure includes calibration precedure.
602 clausted 3 """
603 clausted 1 dataobj = copy.deepcopy(obj)
604     data = dataobj.data
605     caldata = copy.deepcopy(data)
606     t = data[0]
607     if dataobj.dataformat != 0:
608     print 'We only calibrate the raw data.'
609     return
610     else:
611     t1 = input('Enter the time position of the TOP response during calibration:')
612     s1 = input('The refractive index of the TOP response is: ')
613     t2 = input('Enter the time position of the BOTTOM response during calibration:')
614     s2 = input('The refractive index of the BOTTOM response is: ')
615     ind1 = plt.find(abs(t-t1) == min(abs(t-t1)))[0]
616     ind2 = plt.find(abs(t-t2) == min(abs(t-t2)))[0]
617    
618     for i,y in enumerate(data[1:]):
619     y1, y2 = y[ind1], y[ind2]
620     slope = (s1-s2)/(y1-y2)
621     offset = s1- slope*y1
622     caldata[i+1] = slope*y+offset
623     dataobj.data = caldata
624 clausted 3 dataobj.updateStatus(calibrate=True)
625 clausted 1 return dataobj
626    
627 clausted 3 def get_background_value(obj,bgids):
628     """Get the averaged value of background spots for each ROI."""
629 clausted 1 bgsignal = obj.data[0]*0
630     for j in bgids:
631 rjaynes 25 if j == int(obj.ROIinfo[j-1]['ID']): # in case the ID in ROIinfo is string type
632 clausted 1 bgsignal = bgsignal + obj.data[j]
633    
634 rjaynes 25 elif j != int(obj.ROIinfo[j-1]['ID']):
635 clausted 1 for n,eachspot in enumerate(obj.ROIinfo):
636 rjaynes 25 if j == int(eachspot['ID']):
637 clausted 1 bgsignal = bgsignal + obj.data[n+1]
638 rjaynes 25
639 clausted 1 bgsignal = bgsignal/len(bgids)
640     return bgsignal
641    
642    
643 clausted 3 def background_subtract(obj, *bgspot):
644     """Function: Perform the Background subtraction for the UNSPLIT curve.
645 clausted 1 Input besides "obj" is the id of the spot taken as background.
646     The following inputs are acceptable:
647 clausted 3 1. background_subtract(obj): the default background in Galfile
648 clausted 1 will be subtracted.
649 clausted 3 2. background_subtract(obj, 1, 6): the average value of spot1
650 clausted 1 and spot6 will be subtracted.
651 clausted 3 """
652 clausted 1 dataobj = copy.deepcopy(obj)
653     ROIinfo = obj.ROIinfo
654     data = dataobj.data
655    
656     if dataobj.dataformat == 0:
657     newdata = np.zeros(np.shape(data))
658     newdata[0] = data[0] # the time scale
659     if bgspot == ():
660     # The average of the default background spots listed in the galfile
661     #are to be subtracted.
662     for i in range(1,dataobj.ROInum+1):
663     bgids = ROIinfo[i-1]['Background Spot']
664 clausted 3 bgsignal = get_background_value(dataobj,bgids)
665 clausted 1 newdata[i] = data[i]-bgsignal
666     # update the status of the data object.
667     dataobj.updateStatus(BackgroundType='Default in Gal')
668 rjaynes 25 print "The background spot embodied in gal is used."
669 clausted 1
670     else:
671     # The average of the manually input background spots
672     #are to be subtracted.
673     for i in range(1,dataobj.ROInum+1):
674 clausted 3 bgsignal = get_background_value(dataobj,bgspot)
675 clausted 1 newdata[i] = data[i]-bgsignal
676 rjaynes 25 dataobj.updateStatus(BackgroundType='Manually chosen')
677     print "The background spot is manually chosen."
678    
679 clausted 1 dataobj.data = newdata
680 clausted 3 dataobj.updateStatus(background_subtraction=True)
681 clausted 1 return dataobj
682    
683     else:
684     print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'
685     return
686    
687 clausted 3 def curve_split(obj, t_before, t_after, *t_inj):
688     """Function: Split the whole curve that contains several injections
689 clausted 1 into pieces.
690     The sample information will be checked during the split,
691     if the injection number in sampleinfo is not consistant
692     with the input injection parameter, it will help you to
693     update the sample information.
694     Input besides obj:
695     t_before: time duration before the injection time point
696     t_after: time duration after the injection time point
697     t_inj: the exact start time for each injection
698     Return: the data object with splitted data, and updated sample
699     information
700 clausted 3 """
701 clausted 1 dataobj = copy.deepcopy(obj)
702     ROInum = dataobj.ROInum
703     t=dataobj.data[0]
704     ind=[]
705     # find how many lines of data each injection
706     for i in range(len(t_inj)):
707     ind.append(np.shape(plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after))))[0])
708     m,j = max(ind),max(ind)-min(ind)
709     # the new dataset must be a m*((ROInum+1)*num_inj) matrix:
710     # T1,S_ROI1_c1,S_ROI2_c1,S_ROI3_c1...T2,S_ROI1_c2,S_ROI2_c2...
711     tmpdata = np.zeros(((ROInum+1)*len(t_inj),m))
712    
713     for i in range(len(t_inj)):
714     TmpInd = plt.find((t>(t_inj[i]-t_before))&(t<(t_inj[i]+t_after)))
715     Tmp1,Tmp2 = i*(ROInum+1),(i+1)*(ROInum+1)
716     tmpdata[Tmp1:Tmp2,0:len(TmpInd)] = dataobj.data[:,TmpInd]
717     if j!=0:
718     Split_data = tmpdata[:,:-j]
719     else:
720     Split_data = tmpdata[:]
721     # zero and align the split data
722     for i in range(np.shape(Split_data)[0]):
723     Split_data[i]=Split_data[i]-Split_data[i][0]
724    
725 clausted 3 newsampleinfo, injectinfo = check_sample_info(dataobj,t_before, t_after, t_inj)
726 clausted 1 if newsampleinfo != None :
727     dataobj.data = Split_data
728     dataobj.updateStatus(DataSplit=True)
729     dataobj.updateSampleinfo(newsampleinfo)
730     dataobj.dataformat = 2
731     dataobj.injectinfo = injectinfo
732     return dataobj
733     else:
734     dataobj.updateStatus(DataSplit=False)
735     print 'Curve Splitting not successful!'
736    
737     return
738    
739    
740 clausted 3 def check_sample_info(dataobj, t_before, t_after, t_inj):
741     """Check if the sample information consistent with the injection
742 clausted 1 parameter input. If not, help the user to update sample information
743     for the splitted curve.
744 clausted 3 """
745 clausted 1 injectinfo = []
746     if len(dataobj.sampleinfo) == len(t_inj):
747     for i,j in enumerate(t_inj):
748     injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
749     return dataobj.sampleinfo, injectinfo
750    
751     elif len(dataobj.sampleinfo) > len(t_inj):
752 clausted 3 print """ The number of injection you just input does not match that in sample infomation.
753     If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object."""
754 clausted 1 choose = str.upper(raw_input('Continue? (y/n)'))
755     if choose == 'Y':
756     newsampleinfo, injectinfo = [], []
757     print ' Your input indicates %s injections out of %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
758     for n,i in enumerate(dataobj.sampleinfo):
759     print '--',n,'.',i['Name'],':',i['Concentration']
760     print ' Enter the ID number of the injections you just select, in the form of "2-10" or "4,5,6",'
761     tmpstr = raw_input(':')
762     if ',' in tmpstr:
763     injID = map(int, tmpstr.split(','))
764     elif '-' in tmpstr:
765     injID = range(int(tmpstr.split('-')[0]), int(tmpstr.split('-')[1])+1)
766     else:
767     print ' Illegal input!'
768     return
769    
770     if len(injID) != len(t_inj):
771     print ' Please input %s injection ID numbers instead of %s You just input.' %(len(t_inj),len(injID))
772     return
773    
774     for i in injID:
775     newsampleinfo.append(dataobj.sampleinfo[i])
776     injectinfo.append([t_before, t_before+dataobj.sampleinfo[i]['Duration'], t_before+t_after])
777     return newsampleinfo, injectinfo
778     else:
779     return
780    
781     elif len(dataobj.sampleinfo) < len(t_inj):
782     print ' The number of injection you just input does not match that in sample infomation.'
783     print ' Your input indicates %s injections, where there is only %s times in experiment. ' % (len(t_inj), len(dataobj.sampleinfo))
784     print ' Please check.'
785    
786     return
787    
788    
789 clausted 3 def write_data(Head,data_out,ROI_out,DataDir,output_format):
790     """Write the data into a txt file.
791     """
792     # Modified 100405 CGL
793     # create a newfolder with time stamp to save the data
794 clausted 1 if output_format=='B':
795     ftitle='biosensor'
796     Tmpstr = str(ROI_out).replace(', ','_')
797     Tmpstr = Tmpstr.replace('(','-')
798     foutname = ftitle+Tmpstr[:-1]+'.txt'
799     elif output_format=='C':
800     foutname='clamp-'+str(ROI_out)+'.txt'
801     else:
802     ftitle=''
803 clausted 3
804     # Change directories, if requested.
805     if (DataDir != ""):
806     os.chdir('..')
807     try:
808     os.makedirs(DataDir)
809 clausted 5 ##except WindowsError:
810     except OSError:
811 clausted 3 pass
812     os.chdir(DataDir)
813 clausted 1
814     np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
815     fdt=file('temp.txt','r')
816    
817     ## foutname=ftitle+Tmpstr[:-1]+'.txt' ##raw_input('Enter the file name of the output: ')
818     fout=file(foutname,'w+')
819     # write the head of the data first
820     fout.write(Head+'\n')
821     count = len(fdt.readlines())
822     fdt.seek(0)
823     for i in range(count):
824     temp=fdt.readline()
825     fout.write(temp.replace(' ','\t'))
826     fdt.close()
827     fout.close()
828     print '-------The path of the exported data is "', os.getcwd(),'"-------'
829 rjaynes 25 return
830 clausted 1
831    
832 clausted 3 def data_output_biosensor(obj, DataDir, *ROI_out):
833     """Export the data into a biosensor format txt file,
834 clausted 1 which can be operated in Scrubber.
835     Input: the data object, the output path, the ROIs selected to export
836 clausted 3 """
837 clausted 1 # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
838     dataobj = copy.deepcopy(obj)
839     data = dataobj.data
840     n_inj = len(dataobj.sampleinfo)
841     ROIinfo = dataobj.ROIinfo
842     ROInum = dataobj.ROInum
843    
844     data_out = np.zeros(((len(ROI_out)*2)*n_inj,np.shape(data)[1]))
845     head1 = ""
846     new_ROIinfo = []
847     for n,i in enumerate(ROI_out):
848     new_ROIinfo.append(ROIinfo[i-1]) ##the new ROI info
849     if i != ROIinfo[i-1]['IDcal']:
850 clausted 3 # check if the ID number matches position
851 clausted 1 print n,i, ROIinfo[i-1]
852     return
853     else:
854     headcomp = ROIinfo[i-1]['ID']+': '+ROIinfo[i-1]['Name']
855     try:
856     headcomp = headcomp + ', '+ROIinfo[i-1]['Conc']
857     except KeyError:
858     pass
859     headcomp = headcomp+' Fc='+str(n+1)+' - '
860     for j in range(n_inj):
861     headtmp = headcomp+str(j+1)+'_'
862     head1 = head1+headtmp+'X\t'+headtmp+'Y\t'
863     data_out[2*n*n_inj+2*j]=data[j*(ROInum+1)]
864     data_out[2*n*n_inj+2*j+1]=data[j*(ROInum+1)+i]
865     head1=head1[:-1]
866    
867     dataobj.data = data_out
868     dataobj.dataformat = 3 # biosensor dataformat
869     dataobj.updateStatus(DataType='Biosensor data')
870     dataobj.updateStatus(Head=head1)
871     dataobj.updateROIinfo(new_ROIinfo)
872    
873 clausted 3 write_data(head1,data_out,ROI_out,DataDir,'B')
874 clausted 1
875     return dataobj
876    
877 clausted 3 def data_output_clamp(obj, DataDir, ROI_out):
878     """Export the data into a Clamp format txt file,
879 clausted 1 which can be operated in Clamp.
880     Input: the data object, the output path, the selected ROI
881 clausted 3 """
882 clausted 1 # initialize the data output matrix, which will contain '2*n_inj' columns
883     dataobj = copy.deepcopy(obj)
884     data = dataobj.data
885     n_inj = len(dataobj.sampleinfo)
886 clausted 5 n_inj = max(n_inj, 1) # Must have at least one injection.
887 clausted 1 ROIinfo = dataobj.ROIinfo
888     ROInum = dataobj.ROInum
889     sampleinfo = dataobj.sampleinfo
890    
891     data_out = np.zeros((2*n_inj,np.shape(data)[1]))
892 clausted 5 ## if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
893     if (ROI_out != int(ROIinfo[ROI_out-1]['ID'])):
894 clausted 1 print 'Please check the ROI information.'
895     print ROI_out, ROIinfo[ROI_out-1]
896     return
897     else:
898     tmp = ROIinfo[ROI_out-1]['ID']+': '+ROIinfo[ROI_out-1]['Name']
899     try:
900     tmp = tmp +', Conc:'+ROIinfo[ROI_out-1]['Conc']
901     except KeyError:
902     pass
903     Clamphead = 'Vers 3.41 '+tmp
904     conc=''
905     t_start,t_stop='',''
906     head1='Time\tData\t'*n_inj
907     head1=head1[:-1]
908    
909     for i in range(n_inj):
910     data_out[2*i]=data[i*(ROInum+1)]
911     data_out[2*i+1]=data[i*(ROInum+1)+ROI_out]
912     name = sampleinfo[i]['Name']
913     conc=conc+'\t'+str(sampleinfo[i]['Concentration'])
914     t_start=t_start+'\t'+str(dataobj.injectinfo[i][0])
915     t_stop=t_stop+'\t'+str(dataobj.injectinfo[i][1])
916    
917     hconc,h_start,h_stop='\nConc1'+conc,'\nStart1'+t_start,'\nStop1'+t_stop
918     for i in (hconc,h_start,h_stop):
919     if i[-1]!='':
920     Clamphead=Clamphead+i
921    
922     Clamphead=Clamphead+'\n'+head1
923    
924     dataobj.data = data_out
925 rjaynes 25 dataobj.dataformat = 4 # clamp dataformat
926 clausted 1 dataobj.updateStatus(DataType='Clamp data')
927     dataobj.updateStatus(Head=Clamphead)
928     dataobj.updateROIinfo(ROIinfo[ROI_out-1])
929    
930 clausted 3 write_data(Clamphead,data_out,ROI_out,DataDir,'C')
931 clausted 1
932     return dataobj
933     ## return data_out,Clamphead
934    
935 rjaynes 25 def obj_save(obj, foutname):
936     foutname = foutname + '.obj' # the file extension of the saved data object is .obj
937     fout = file(foutname, 'w+')
938     pickle.dump(obj, fout)
939     fout.close()
940     print '-------The path of the exported obj is "', os.getcwd(),'"-------'
941     return
942 clausted 1
943 clausted 3 if (__name__ == '__main__'):
944     # Print a short message if this module is run like an app.
945     print "This is the OSPRAI file type conversion tool."
946