ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
(Generate patch)
# Line 1 | Line 1
1 < '''
1 > """
2   Converter: Perform the datafile reading, pre-processing and writing.
3 + Yuhang Wan, Christopher Lausted
4 + Last modified on 100405 (yymmdd)
5  
6 < Yuhang Wan, Mar 1,2010
7 < __version__ = "1.3"
8 <
9 <                    Typical Pipeline:
10 < DatafileRead (GalfileRead, MethodRead)
11 < OutliarRemove
12 < Calibrate
13 < BackgroundSubtract
14 < CurveSplit
15 < DataOutputClamp or DataOutputBiosensor
16 <
15 < '''
6 > Typical Pipeline:
7 > >import converter as cv
8 > >dat = cv.datafile_read("example.txt")
9 > >dat = cv.outlier_remove(dat)
10 > >dat = cv.calibrate(dat)
11 > >dat = cv.background_subtract(dat)
12 > >dat = cv.curve_split(dat, 100, 800, 200)
13 > >dat = cv.data_output_clamp(dat, "/dir", 0,1,2,3)
14 > >dat = cv.data_output_biosensor(dat, "/dir", 0)
15 > """
16 > __version__ = "100405"
17  
18   import numpy as np
19   import pylab as plt
# Line 25 | Line 26
26   ## import matplotlib.pyplot as plt
27   ## import matplotlib.mlab as mlb
28   ## import packageClamp_100210 as Pack
29 < import SPRdataclass_100301 as SPR
29 > import SPRdataclass as spr
30  
31  
32 < def checkformatinput(fname):
33 <    '''Examine the format of a input .txt data file.'''
32 > def check_format_input(fname):
33 >    """Examine the format of a input .txt data file."""
34      # open a file, if file doesn't exit, then exit
35      try:
36          fp = open(fname, "r")
# Line 55 | Line 56
56      return Format_input, Tmpstr
57  
58  
59 < def SPRitformat(fname):
60 <    '''Read the SPRit formatted data and reshape it.
59 > def sprit_format(fname):
60 >    """Read the SPRit formatted data and reshape it.
61      Return the data in the form of list.
62 <    '''
62 >    """
63      # open a file, if file doesn't exit, then exit
64      try:
65          fp = open(fname, "r")
# Line 118 | Line 119
119   ##    status = {'Datainput Type':'SPRit data'}
120      dataformat = 0
121      ROInum = num_spot
122 <    dataobj=SPR.DataPass(Shaped_data_1, ROInum, dataformat)
122 >    dataobj=spr.DataPass(Shaped_data_1, ROInum, dataformat)
123      dataobj.status = {}
124      dataobj.updateStatus(**status)
125      print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)  
# Line 126 | Line 127
127      return dataobj
128  
129  
130 < def PlexeraICMformat(fname):
131 <    '''Read the txt data exported from Plexera ICM software and reshape it.
130 > def plexera_icm_format(fname):
131 >    """Read the txt data exported from Plexera ICM software and reshape it.
132      Return the data in the form of list.
133 <    '''
133 >    """
134      fp=file(fname,'r')
135      Tmpdata,tmptime=[],[]
136      status = {'DatainputType':'Plexera ICM data'}
# Line 157 | Line 158
158  
159      dataformat = 0
160      ROInum = num_spot
161 <    dataobj=SPR.DataPass(Shaped_data, ROInum, dataformat)
161 >    dataobj=spr.DataPass(Shaped_data, ROInum, dataformat)
162      dataobj.status = {}
163      dataobj.updateStatus(**status)  
164      
# Line 166 | Line 167
167  
168  
169  
170 < def DAMsinglesheet(sh,DataList,ROIinfo,start,end, colsize):
171 <    '''Retrieve and shape data form one single sheet from the Spreadsheet.
170 > def dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize):
171 >    """Retrieve and shape data form one single sheet from the Spreadsheet.
172      Return the data, ROI infomation in the form of list.
173 <    '''
173 >    """
174      ## and return the minimal spot Id (start_spot)
175      ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
176      
# Line 194 | Line 195
195  
196      return DataList,ROIinfo
197      
198 < def DAMspreadsheetformat(book):
199 <    '''Read the spreadsheet exported from Plexera DAM software and reshape it.
198 > def dam_spreadsheet_format(book):
199 >    """Read the spreadsheet exported from Plexera DAM software and reshape it.
200      Return the shaped data and ROI information.
201 <    '''
201 >    """
202      ROIinfo, newROIinfo, n_sheet = [], [], 0
203      DataList=[]
204      status = {'DatainputType':'Plexera DAM data'}
# Line 228 | Line 229
229      for i in range(book.nsheets):
230          sh=book.sheet_by_index(i)      
231          if sh.ncols!=0:        
232 <            DataList,ROIinfo = DAMsinglesheet(sh,DataList,ROIinfo,start,end, colsize)
232 >            DataList,ROIinfo = dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize)
233              n_sheet=n_sheet+1
234          else:
235              break
# Line 260 | Line 261
261      # pack the date and relative information into SPRdata obj
262      dataformat = 0
263      ROInum = num_spot
264 <    dataobj=SPR.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
264 >    dataobj=spr.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
265      dataobj.status = {}
266      dataobj.updateStatus(**status)
267      print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)  
# Line 268 | Line 269
269      return dataobj
270  
271  
272 < def ReadClampData(fname):
273 <    '''Read the Clamp format data
272 > def read_clamp_data(fname):
273 >    """Read the Clamp format data
274      Return the data in the form of list.
275 <    '''
275 >    """
276      fp=open(fname,'r')
277      Tmpstr=fp.readline()
278      # examine the file head
# Line 327 | Line 328
328              sampleinfo.append(sampledic)
329              injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
330              injinfo.append(injdic)
331 <    dataobj=SPR.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
331 >    dataobj=spr.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
332      dataobj.status = {}
333      dataobj.updateStatus(**status)
334      
# Line 335 | Line 336
336      
337  
338  
339 < def KeyfileRead(fkey):
339 > def keyfile_read(fkey):
340      # The Key File contains
341 <    '''Function: Read the key file for old SPR instrument.
341 >    """Function: Read the key file for old SPR instrument.
342      Input: the filename.
343      Return: the ROI information.    
344 <    '''
344 >    """
345      try:
346          fp = open(fkey, "r")
347      except IOError:
# Line 366 | Line 367
367      return ROIinfo
368  
369  
370 < def GalfileRead(fname):
371 <    '''Function: Read the GAL file.
370 > def galfile_read(fname):
371 >    """Function: Read the GAL file.
372      Input: the filename.
373      Return: the ROI information.    
374 <    '''
374 >    """
375      fp=file(fname,'r')
376      ROIinfo = []
377      Tmpstr = fp.readline()      # skip the first line
# Line 428 | Line 429
429      
430      
431  
432 < def MethodRead(fname):
433 <    '''Function: Read the analyte table.
432 > def method_read(fname):
433 >    """Function: Read the analyte table.
434      Input: the filename
435      Return: the sample information
436 <    '''
436 >    """
437      # create a concentration dictionary for the sample injected into the flowcell  
438      # export a table containing the concentration of the sample injected, and the duration
439      fp=file(fname,'r')
# Line 458 | Line 459
459      
460  
461  
462 < def DatafileRead(fname):
463 <    '''Function: Read the raw data from the Plexera instrument,
462 > def datafile_read(fname):
463 >    """Function: Read the raw data from the Plexera instrument,
464          and pack the raw data (Shaped_data) into dataobject.
465          Therefore, the initial dataformat = 0
466      Input: the filename
467      Return: the packed SPR.DataPass type data object
468 <    '''
468 >    """
469      #===================check the format of the input files-------
470      if fname.split('.')[-1] == 'txt':
471      
472 <        Format_input, Firstline = checkformatinput(fname)    
472 >        Format_input, Firstline = check_format_input(fname)    
473          if Format_input == 1:   # for SPRit file format
474 <            dataobj = SPRitformat(fname)  
474 >            dataobj = sprit_format(fname)  
475              print '-'*10,' This is a SPRit data file. ','-'*10        
476              
477          elif Format_input == 2: # for Plexera ICM file format
478 <            dataobj = PlexeraICMformat(fname)
478 >            dataobj = plexera_icm_format(fname)
479              print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
480  
481          elif Format_input == 3:         # for Clamp datafile format
482 <            dataobj = ReadClampData(fname)
482 >            dataobj = read_clamp_data(fname)
483              print '-'*10,' This is a Clamp file. ','-'*10
484          
485          else:
# Line 489 | Line 490
490              flag = str.upper(raw_input('Load the key file? (y/n): '))
491              if flag == 'Y':
492                  fkey = raw_input('Input the path of the key file: ')
493 <                ROIinfo = KeyfileRead(fkey)
493 >                ROIinfo = keyfile_read(fkey)
494                  dataobj.updateROIinfo(ROIinfo)        
495      
496      elif fname.split('.')[-1] == 'xls':
# Line 497 | Line 498
498          print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
499          book = xlrd.open_workbook(fname)
500          #shape the data in the Spreadsheet, whether single sheet or multiple sheets
501 <        dataobj=DAMspreadsheetformat(book)
501 >        dataobj=dam_spreadsheet_format(book)
502          flag = str.upper(raw_input('Load the gal file? (y/n): '))
503          if flag == 'Y':
504              fgal = raw_input('Input the path of the gal file: ')
505 <            ROIinfo = GalfileRead(fgal)
505 >            ROIinfo = galfile_read(fgal)
506              dataobj.updateROIinfo(ROIinfo)
507      flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
508      if flag == 'Y':
509          fprotocol = raw_input('Input the path of the analyte table: ')
510 <        sampleinfo = MethodRead(fprotocol)
510 >        sampleinfo = method_read(fprotocol)
511          dataobj.updateSampleinfo(sampleinfo)
512              
513      return dataobj
514      
515  
516  
517 < def OutliarRemove(obj):
517 > def outlier_remove(obj):
518      ##dataobj.viewAll()
519 <    '''Function: Remove the noisy area that you want to get rid of.
519 >    """Function: Remove unwanted data points from noisy periods.
520      Return: the data object with clean data
521 <    '''
521 >    """
522      dataobj = copy.deepcopy(obj)
523      print ('Select the time area that you want to remove.')
524      tmpstr = raw_input('in the format of "200:240, 500:510" :')
# Line 538 | Line 539
539   ##        tmplist = tmplist
540      dataobj.data = data
541   #    dataobj.status = 'Outliar removed'
542 <    dataobj.updateStatus(OutliarRemove=True)
542 >    dataobj.updateStatus(outlier_remove=True)
543      return dataobj
544  
545 < def Calibrate(obj):
546 <    '''Function: Calibrate the Intensity response.
545 > def calibrate(obj):
546 >    """Function: calibrate the Intensity response.
547      Return: the data object with calibrated data
548      Note: at present, this function is valid only when
549          the whole procedure includes calibration precedure.
550 <    '''
550 >    """
551      dataobj = copy.deepcopy(obj)
552      data = dataobj.data
553      caldata = copy.deepcopy(data)
# Line 568 | Line 569
569              offset = s1- slope*y1
570              caldata[i+1] = slope*y+offset
571      dataobj.data = caldata    
572 <    dataobj.updateStatus(Calibrate=True)
572 >    dataobj.updateStatus(calibrate=True)
573      return dataobj
574  
575 < def getbackgroundvalue(obj,bgids):
576 <    '''Get the averaged value of background spots for each ROI.
576 <    '''
575 > def get_background_value(obj,bgids):
576 >    """Get the averaged value of background spots for each ROI."""
577      bgsignal = obj.data[0]*0
578      for j in bgids:
579          if j == obj.ROIinfo[j-1]['ID']:
# Line 588 | Line 588
588      return bgsignal
589      
590  
591 < def BackgroundSubtract(obj, *bgspot):
592 <    '''Function: Perform the Background subtraction for the UNSPLIT curve.
591 > def background_subtract(obj, *bgspot):
592 >    """Function: Perform the Background subtraction for the UNSPLIT curve.
593      Input besides "obj" is the id of the spot taken as background.
594          The following inputs are acceptable:
595 <        1. BackgroundSubtract(obj): the default background in Galfile
595 >        1. background_subtract(obj): the default background in Galfile
596              will be subtracted.
597 <        2. BackgroundSubtract(obj, 1, 6): the average value of spot1
597 >        2. background_subtract(obj, 1, 6): the average value of spot1
598              and spot6 will be subtracted.
599 <    '''
599 >    """
600      dataobj = copy.deepcopy(obj)
601      ROIinfo = obj.ROIinfo
602      data = dataobj.data
# Line 609 | Line 609
609          #are to be subtracted.
610              for i in range(1,dataobj.ROInum+1):
611                  bgids = ROIinfo[i-1]['Background Spot']
612 <                bgsignal = getbackgroundvalue(dataobj,bgids)
612 >                bgsignal = get_background_value(dataobj,bgids)
613                  newdata[i] = data[i]-bgsignal
614              # update the status of the data object.
615              dataobj.updateStatus(BackgroundType='Default in Gal')
# Line 618 | Line 618
618          # The average of the manually input background spots
619          #are to be subtracted.
620              for i in range(1,dataobj.ROInum+1):
621 <                bgsignal = getbackgroundvalue(dataobj,bgspot)
621 >                bgsignal = get_background_value(dataobj,bgspot)
622                  newdata[i] = data[i]-bgsignal
623              dataobj.updateStatus(BackgroundType='Manually choosen')
624          dataobj.data = newdata
625 <        dataobj.updateStatus(BackgroundSubtraction=True)
625 >        dataobj.updateStatus(background_subtraction=True)
626          return dataobj
627          
628      else:
629          print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'    
630      return
631  
632 < def CurveSplit(obj, t_before, t_after, *t_inj):
633 <    '''Function: Split the whole curve that contains several injections
632 > def curve_split(obj, t_before, t_after, *t_inj):
633 >    """Function: Split the whole curve that contains several injections
634          into pieces.
635          The sample information will be checked during the split,
636              if the injection number in sampleinfo is not consistant
# Line 642 | Line 642
642          t_inj: the exact start time for each injection
643      Return: the data object with splitted data, and updated sample
644          information
645 <    '''
645 >    """
646      dataobj = copy.deepcopy(obj)
647      ROInum = dataobj.ROInum
648      t=dataobj.data[0]
# Line 667 | Line 667
667      for i in range(np.shape(Split_data)[0]):
668          Split_data[i]=Split_data[i]-Split_data[i][0]
669      
670 <    newsampleinfo, injectinfo = checksampleinfo(dataobj,t_before, t_after, t_inj)
670 >    newsampleinfo, injectinfo = check_sample_info(dataobj,t_before, t_after, t_inj)
671      if newsampleinfo != None :        
672          dataobj.data = Split_data
673          dataobj.updateStatus(DataSplit=True)
# Line 682 | Line 682
682      return
683      
684      
685 < def checksampleinfo(dataobj, t_before, t_after, t_inj):
686 <    '''Check if the sample information consistent with the injection
685 > def check_sample_info(dataobj, t_before, t_after, t_inj):
686 >    """Check if the sample information consistent with the injection
687          parameter input. If not, help the user to update sample information
688          for the splitted curve.
689 <    '''
689 >    """
690      injectinfo = []
691      if len(dataobj.sampleinfo) == len(t_inj):
692          for i,j in enumerate(t_inj):                
# Line 694 | Line 694
694          return dataobj.sampleinfo, injectinfo
695              
696      elif len(dataobj.sampleinfo) > len(t_inj):
697 <        print '''    The number of injection you just input does not match that in sample infomation.
698 <    If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object.'''
697 >        print """    The number of injection you just input does not match that in sample infomation.
698 >    If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object."""
699          choose = str.upper(raw_input('Continue? (y/n)'))
700          if choose == 'Y':
701              newsampleinfo, injectinfo = [], []
# Line 731 | Line 731
731          return
732      
733      
734 < def writedata(Head,data_out,ROI_out,DataDir,output_format):
735 <    '''Write the data into a txt file.
736 <    '''
737 <    # create a nowfolder with time stamp to save the data
734 > def write_data(Head,data_out,ROI_out,DataDir,output_format):
735 >    """Write the data into a txt file.
736 >    """
737 >    # Modified 100405 CGL
738 >    # create a newfolder with time stamp to save the data
739      if output_format=='B':
740          ftitle='biosensor'
741          Tmpstr = str(ROI_out).replace(', ','_')
# Line 744 | Line 745
745          foutname='clamp-'+str(ROI_out)+'.txt'
746      else:
747          ftitle=''
748 <    #  change to the Root dir
749 <    os.chdir('..')        
750 <    try:
751 <        os.makedirs(DataDir)
752 <    except WindowsError:
753 <        pass
754 <    os.chdir(DataDir)
748 >        
749 >    # Change directories, if requested.
750 >    if (DataDir != ""):
751 >        os.chdir('..')        
752 >        try:
753 >            os.makedirs(DataDir)
754 >        except WindowsError:
755 >            pass
756 >        os.chdir(DataDir)
757      
758      np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
759      fdt=file('temp.txt','r')
# Line 769 | Line 772
772      print '-------The path of the exported data is "', os.getcwd(),'"-------'
773      
774      
775 < def DataOutputBiosensor(obj, DataDir, *ROI_out):
776 <    '''Export the data into a biosensor format txt file,
775 > def data_output_biosensor(obj, DataDir, *ROI_out):
776 >    """Export the data into a biosensor format txt file,
777          which can be operated in Scrubber.
778      Input: the data object, the output path, the ROIs selected to export
779 <    '''
779 >    """
780      # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
781      dataobj = copy.deepcopy(obj)
782      data = dataobj.data
# Line 787 | Line 790
790      for n,i in enumerate(ROI_out):
791          new_ROIinfo.append(ROIinfo[i-1])        ##the new ROI info
792          if i != ROIinfo[i-1]['IDcal']:
793 <            # check if the ID number matchs position
793 >            # check if the ID number matches position
794              print n,i, ROIinfo[i-1]
795              return
796          else:
# Line 810 | Line 813
813      dataobj.updateStatus(Head=head1)    
814      dataobj.updateROIinfo(new_ROIinfo)
815      
816 <    writedata(head1,data_out,ROI_out,DataDir,'B')
816 >    write_data(head1,data_out,ROI_out,DataDir,'B')
817      
818      return dataobj
819      
820 < def DataOutputClamp(obj, DataDir, ROI_out):
821 <    '''Export the data into a Clamp format txt file,
820 > def data_output_clamp(obj, DataDir, ROI_out):
821 >    """Export the data into a Clamp format txt file,
822          which can be operated in Clamp.
823      Input: the data object, the output path, the selected ROI
824 <    '''
824 >    """
825      # initialize the data output matrix, which will contain '2*n_inj' columns
826      dataobj = copy.deepcopy(obj)
827      data = dataobj.data
# Line 865 | Line 868
868      dataobj.updateStatus(Head=Clamphead)    
869      dataobj.updateROIinfo(ROIinfo[ROI_out-1])
870      
871 <    writedata(Clamphead,data_out,ROI_out,DataDir,'C')
871 >    write_data(Clamphead,data_out,ROI_out,DataDir,'C')
872      
873      return dataobj
874   ##    return data_out,Clamphead
875  
876  
877 + if (__name__ == '__main__'):
878 +    # Print a short message if this module is run like an app.
879 +    print "This is the OSPRAI file type conversion tool."
880 +

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines