ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
(Generate patch)
# Line 1 | Line 1
1 < '''
1 > """
2   Converter: Perform the datafile reading, pre-processing and writing.
3 + Yuhang Wan, Christopher Lausted
4 + Last modified on 100407 (yymmdd)
5  
6 < Yuhang Wan, Mar 1,2010
7 < __version__ = "1.3"
8 <
9 <                    Typical Pipeline:
10 < DatafileRead (GalfileRead, MethodRead)
11 < OutliarRemove
12 < Calibrate
13 < BackgroundSubtract
14 < CurveSplit
15 < DataOutputClamp or DataOutputBiosensor
16 <
15 < '''
6 > Typical Pipeline:
7 > >import converter as cv
8 > >dat = cv.datafile_read("example.txt")
9 > >dat = cv.outlier_remove(dat)
10 > >dat = cv.calibrate(dat)
11 > >dat = cv.background_subtract(dat)
12 > >dat = cv.curve_split(dat, 100, 800, 200)
13 > >dat = cv.data_output_clamp(dat, "/dir", 0,1,2,3)
14 > >dat = cv.data_output_biosensor(dat, "/dir", 0)
15 > """
16 > __version__ = "100407"
17  
18 + ## Import libraries.
19   import numpy as np
20   import pylab as plt
21   import sys
# Line 21 | Line 23
23   import time
24   import os
25   import copy
26 + #import matplotlib.pyplot as plt
27 + import SPRdataclass as spr
28  
29 < ## import matplotlib.pyplot as plt
30 < ## import matplotlib.mlab as mlb
31 < ## import packageClamp_100210 as Pack
32 < import SPRdataclass_100301 as SPR
33 <
34 <
35 < def checkformatinput(fname):
36 <    '''Examine the format of a input .txt data file.'''
37 <    # open a file, if file doesn't exit, then exit
29 > ## Global constants and variables.
30 > FMT_UNKNOWN = 0
31 > FMT_SPRIT = 1        ## Old Plexera SPRit format.
32 > FMT_ICMTXT = 2       ## Plexera Instrument Control Module text.
33 > FMT_CLAMP34 = 3      ## Morton & Myszka CLAMP Version 3.4+
34 > FMT_DAMXLS = 4       ## Plexera Data Analysis Module Excel export.
35 >
36 >
37 > def check_format_input(fname):
38 >    """Examine the format of an input .txt data file."""
39 >    ## Modified 100407 CGL.
40 >    
41 >    ## Open text file. Exit if file not found.
42      try:
43          fp = open(fname, "r")
44      except IOError:
45 <        print 'Cannot open file %s for reading' % fname
45 >        print "Error: Unable to open file %s for reading." % fname
46          sys.exit(0)
39    # examine the first line of the input file
40    # for SPRit output:"Elapsed Time (Seconds)  Average Intensity (Pixel Intensity)"
41    # for Plexera ICM output:
42    head_input1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\n"
47      
48 <    Format_input = 0
49 <    # check file format  
50 <    Tmpstr = fp.readline()
51 <    if Tmpstr==head_input1:
52 <        Format_input=1      
53 <    elif Tmpstr[-1]=='\n' and Tmpstr[-2]=='\r' and Tmpstr.split(' ')[0].split('/')[2].startswith('20'):
54 <        Format_input=2
55 <    elif Tmpstr.startswith('Vers'):
56 <        Format_input=3
57 <        
48 >    ## Examine first line of text file to guess format.
49 >    fmt = FMT_UNKNOWN  
50 >    sprit_line1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)"
51 >    clamp_line1="Vers 3.4"
52 >    line1 = fp.readline()
53 >    if (sprit_line1 in line1):
54 >        fmt = FMT_SPRIT      
55 >    elif (line1[2]=="/" and line1[5]=="/"):
56 >        ## This format has date mm/dd/yyy in first column.
57 >        fmt=FMT_ICMTXT
58 >    elif (clamp_line1 in line1):
59 >        fmt = FMT_CLAMP34
60 >    
61      fp.close()
62 <    return Format_input, Tmpstr
62 >    return fmt, line1
63  
64  
65 < def SPRitformat(fname):
66 <    '''Read the SPRit formatted data and reshape it.
65 > def sprit_format(fname):
66 >    """Read the SPRit formatted data and reshape it.
67      Return the data in the form of list.
68 <    '''
68 >    """
69      # open a file, if file doesn't exit, then exit
70      try:
71          fp = open(fname, "r")
# Line 70 | Line 77
77      Tmpstr = fp.readline()
78      # check if the second line is BEGIN
79      TmpStr = fp.readline()
80 <    if TmpStr != "BEGIN\n":
81 <        print "Second line is not Begin. Break!"
82 <        #sys.exit(0)
80 >    if ("BEGIN" not in TmpStr):
81 >        print "Warning: Second line of data file is not 'BEGIN'"
82 >        ##sys.exit(0)
83      # count the lines of each data spot
84      TmpStr = fp.readline() # skip first "0.000000e+000"        
85      num_line = 1 # so we start to count from 1
# Line 118 | Line 125
125   ##    status = {'Datainput Type':'SPRit data'}
126      dataformat = 0
127      ROInum = num_spot
128 <    dataobj=SPR.DataPass(Shaped_data_1, ROInum, dataformat)
128 >    dataobj=spr.DataPass(Shaped_data_1, ROInum, dataformat)
129      dataobj.status = {}
130      dataobj.updateStatus(**status)
131      print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)  
# Line 126 | Line 133
133      return dataobj
134  
135  
136 < def PlexeraICMformat(fname):
137 <    '''Read the txt data exported from Plexera ICM software and reshape it.
136 > def plexera_icm_format(fname):
137 >    """Read the txt data exported from Plexera ICM software and reshape it.
138      Return the data in the form of list.
139 <    '''
139 >    """
140      fp=file(fname,'r')
141      Tmpdata,tmptime=[],[]
142      status = {'DatainputType':'Plexera ICM data'}
# Line 157 | Line 164
164  
165      dataformat = 0
166      ROInum = num_spot
167 <    dataobj=SPR.DataPass(Shaped_data, ROInum, dataformat)
167 >    dataobj=spr.DataPass(Shaped_data, ROInum, dataformat)
168      dataobj.status = {}
169      dataobj.updateStatus(**status)  
170      
# Line 166 | Line 173
173  
174  
175  
176 < def DAMsinglesheet(sh,DataList,ROIinfo,start,end, colsize):
177 <    '''Retrieve and shape data form one single sheet from the Spreadsheet.
176 > def dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize):
177 >    """Retrieve and shape data form one single sheet from the Spreadsheet.
178      Return the data, ROI infomation in the form of list.
179 <    '''
179 >    """
180      ## and return the minimal spot Id (start_spot)
181      ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
182      
# Line 194 | Line 201
201  
202      return DataList,ROIinfo
203      
204 < def DAMspreadsheetformat(book):
205 <    '''Read the spreadsheet exported from Plexera DAM software and reshape it.
204 > def dam_spreadsheet_format(book):
205 >    """Read the spreadsheet exported from Plexera DAM software and reshape it.
206      Return the shaped data and ROI information.
207 <    '''
207 >    """
208      ROIinfo, newROIinfo, n_sheet = [], [], 0
209      DataList=[]
210      status = {'DatainputType':'Plexera DAM data'}
# Line 228 | Line 235
235      for i in range(book.nsheets):
236          sh=book.sheet_by_index(i)      
237          if sh.ncols!=0:        
238 <            DataList,ROIinfo = DAMsinglesheet(sh,DataList,ROIinfo,start,end, colsize)
238 >            DataList,ROIinfo = dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize)
239              n_sheet=n_sheet+1
240          else:
241              break
# Line 260 | Line 267
267      # pack the date and relative information into SPRdata obj
268      dataformat = 0
269      ROInum = num_spot
270 <    dataobj=SPR.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
270 >    dataobj=spr.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
271      dataobj.status = {}
272      dataobj.updateStatus(**status)
273      print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)  
# Line 268 | Line 275
275      return dataobj
276  
277  
278 < def ReadClampData(fname):
279 <    '''Read the Clamp format data
278 > def read_clamp_data(fname):
279 >    """Read the Clamp format data
280      Return the data in the form of list.
281 <    '''
281 >    """
282      fp=open(fname,'r')
283      Tmpstr=fp.readline()
284      # examine the file head
# Line 327 | Line 334
334              sampleinfo.append(sampledic)
335              injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
336              injinfo.append(injdic)
337 <    dataobj=SPR.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
337 >    dataobj=spr.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
338      dataobj.status = {}
339      dataobj.updateStatus(**status)
340      
# Line 335 | Line 342
342      
343  
344  
345 < def KeyfileRead(fkey):
345 > def keyfile_read(fkey):
346      # The Key File contains
347 <    '''Function: Read the key file for old SPR instrument.
347 >    """Function: Read the key file for old SPR instrument.
348      Input: the filename.
349      Return: the ROI information.    
350 <    '''
350 >    """
351      try:
352          fp = open(fkey, "r")
353      except IOError:
# Line 366 | Line 373
373      return ROIinfo
374  
375  
376 < def GalfileRead(fname):
377 <    '''Function: Read the GAL file.
376 > def galfile_read(fname):
377 >    """Function: Read the GAL file.
378      Input: the filename.
379      Return: the ROI information.    
380 <    '''
380 >    """
381      fp=file(fname,'r')
382      ROIinfo = []
383      Tmpstr = fp.readline()      # skip the first line
# Line 428 | Line 435
435      
436      
437  
438 < def MethodRead(fname):
439 <    '''Function: Read the analyte table.
438 > def method_read(fname):
439 >    """Function: Read the analyte table.
440      Input: the filename
441      Return: the sample information
442 <    '''
442 >    """
443      # create a concentration dictionary for the sample injected into the flowcell  
444      # export a table containing the concentration of the sample injected, and the duration
445      fp=file(fname,'r')
# Line 455 | Line 462
462          print i['Name'],'\t', i['Concentration']
463      
464      return sampleinfo
465 <    
465 >
466 > def method_read_fake():
467 >    sampleinfo = []
468 >    sampleinfo.append({'Location':0, 'Name':0, 'Concentration':0, 'Duration':0, 'Flow Rate':0, 'Analyte Series':0, 'Buffer Blank Series':0})
469 >    return sampleinfo
470  
471  
472 < def DatafileRead(fname):
473 <    '''Function: Read the raw data from the Plexera instrument,
472 > def datafile_read(fname):
473 >    """Function: Read the raw data from the Plexera instrument,
474          and pack the raw data (Shaped_data) into dataobject.
475          Therefore, the initial dataformat = 0
476      Input: the filename
477      Return: the packed SPR.DataPass type data object
478 <    '''
478 >    """
479      #===================check the format of the input files-------
480      if fname.split('.')[-1] == 'txt':
481      
482 <        Format_input, Firstline = checkformatinput(fname)    
482 >        Format_input, Firstline = check_format_input(fname)    
483          if Format_input == 1:   # for SPRit file format
484 <            dataobj = SPRitformat(fname)  
484 >            dataobj = sprit_format(fname)  
485              print '-'*10,' This is a SPRit data file. ','-'*10        
486              
487          elif Format_input == 2: # for Plexera ICM file format
488 <            dataobj = PlexeraICMformat(fname)
488 >            dataobj = plexera_icm_format(fname)
489              print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
490  
491          elif Format_input == 3:         # for Clamp datafile format
492 <            dataobj = ReadClampData(fname)
492 >            dataobj = read_clamp_data(fname)
493              print '-'*10,' This is a Clamp file. ','-'*10
494          
495          else:
# Line 489 | Line 500
500              flag = str.upper(raw_input('Load the key file? (y/n): '))
501              if flag == 'Y':
502                  fkey = raw_input('Input the path of the key file: ')
503 <                ROIinfo = KeyfileRead(fkey)
503 >                ROIinfo = keyfile_read(fkey)
504                  dataobj.updateROIinfo(ROIinfo)        
505      
506      elif fname.split('.')[-1] == 'xls':
# Line 497 | Line 508
508          print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
509          book = xlrd.open_workbook(fname)
510          #shape the data in the Spreadsheet, whether single sheet or multiple sheets
511 <        dataobj=DAMspreadsheetformat(book)
511 >        dataobj=dam_spreadsheet_format(book)
512          flag = str.upper(raw_input('Load the gal file? (y/n): '))
513          if flag == 'Y':
514              fgal = raw_input('Input the path of the gal file: ')
515 <            ROIinfo = GalfileRead(fgal)
515 >            ROIinfo = galfile_read(fgal)
516              dataobj.updateROIinfo(ROIinfo)
517      flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
518      if flag == 'Y':
519          fprotocol = raw_input('Input the path of the analyte table: ')
520 <        sampleinfo = MethodRead(fprotocol)
520 >        sampleinfo = method_read(fprotocol)
521          dataobj.updateSampleinfo(sampleinfo)
522 <            
522 >    else:
523 >        dataobj.updateSampleinfo(method_read_fake())
524 >
525      return dataobj
526      
527  
528  
529 < def OutliarRemove(obj):
529 > def outlier_remove(obj):
530      ##dataobj.viewAll()
531 <    '''Function: Remove the noisy area that you want to get rid of.
531 >    """Function: Remove unwanted data points from noisy periods.
532      Return: the data object with clean data
533 <    '''
533 >    """
534      dataobj = copy.deepcopy(obj)
535      print ('Select the time area that you want to remove.')
536      tmpstr = raw_input('in the format of "200:240, 500:510" :')
# Line 538 | Line 551
551   ##        tmplist = tmplist
552      dataobj.data = data
553   #    dataobj.status = 'Outliar removed'
554 <    dataobj.updateStatus(OutliarRemove=True)
554 >    dataobj.updateStatus(outlier_remove=True)
555      return dataobj
556  
557 < def Calibrate(obj):
558 <    '''Function: Calibrate the Intensity response.
557 > def calibrate(obj):
558 >    """Function: calibrate the Intensity response.
559      Return: the data object with calibrated data
560      Note: at present, this function is valid only when
561          the whole procedure includes calibration precedure.
562 <    '''
562 >    """
563      dataobj = copy.deepcopy(obj)
564      data = dataobj.data
565      caldata = copy.deepcopy(data)
# Line 568 | Line 581
581              offset = s1- slope*y1
582              caldata[i+1] = slope*y+offset
583      dataobj.data = caldata    
584 <    dataobj.updateStatus(Calibrate=True)
584 >    dataobj.updateStatus(calibrate=True)
585      return dataobj
586  
587 < def getbackgroundvalue(obj,bgids):
588 <    '''Get the averaged value of background spots for each ROI.
576 <    '''
587 > def get_background_value(obj,bgids):
588 >    """Get the averaged value of background spots for each ROI."""
589      bgsignal = obj.data[0]*0
590      for j in bgids:
591          if j == obj.ROIinfo[j-1]['ID']:
# Line 588 | Line 600
600      return bgsignal
601      
602  
603 < def BackgroundSubtract(obj, *bgspot):
604 <    '''Function: Perform the Background subtraction for the UNSPLIT curve.
603 > def background_subtract(obj, *bgspot):
604 >    """Function: Perform the Background subtraction for the UNSPLIT curve.
605      Input besides "obj" is the id of the spot taken as background.
606          The following inputs are acceptable:
607 <        1. BackgroundSubtract(obj): the default background in Galfile
607 >        1. background_subtract(obj): the default background in Galfile
608              will be subtracted.
609 <        2. BackgroundSubtract(obj, 1, 6): the average value of spot1
609 >        2. background_subtract(obj, 1, 6): the average value of spot1
610              and spot6 will be subtracted.
611 <    '''
611 >    """
612      dataobj = copy.deepcopy(obj)
613      ROIinfo = obj.ROIinfo
614      data = dataobj.data
# Line 609 | Line 621
621          #are to be subtracted.
622              for i in range(1,dataobj.ROInum+1):
623                  bgids = ROIinfo[i-1]['Background Spot']
624 <                bgsignal = getbackgroundvalue(dataobj,bgids)
624 >                bgsignal = get_background_value(dataobj,bgids)
625                  newdata[i] = data[i]-bgsignal
626              # update the status of the data object.
627              dataobj.updateStatus(BackgroundType='Default in Gal')
# Line 618 | Line 630
630          # The average of the manually input background spots
631          #are to be subtracted.
632              for i in range(1,dataobj.ROInum+1):
633 <                bgsignal = getbackgroundvalue(dataobj,bgspot)
633 >                bgsignal = get_background_value(dataobj,bgspot)
634                  newdata[i] = data[i]-bgsignal
635              dataobj.updateStatus(BackgroundType='Manually choosen')
636          dataobj.data = newdata
637 <        dataobj.updateStatus(BackgroundSubtraction=True)
637 >        dataobj.updateStatus(background_subtraction=True)
638          return dataobj
639          
640      else:
641          print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'    
642      return
643  
644 < def CurveSplit(obj, t_before, t_after, *t_inj):
645 <    '''Function: Split the whole curve that contains several injections
644 > def curve_split(obj, t_before, t_after, *t_inj):
645 >    """Function: Split the whole curve that contains several injections
646          into pieces.
647          The sample information will be checked during the split,
648              if the injection number in sampleinfo is not consistant
# Line 642 | Line 654
654          t_inj: the exact start time for each injection
655      Return: the data object with splitted data, and updated sample
656          information
657 <    '''
657 >    """
658      dataobj = copy.deepcopy(obj)
659      ROInum = dataobj.ROInum
660      t=dataobj.data[0]
# Line 667 | Line 679
679      for i in range(np.shape(Split_data)[0]):
680          Split_data[i]=Split_data[i]-Split_data[i][0]
681      
682 <    newsampleinfo, injectinfo = checksampleinfo(dataobj,t_before, t_after, t_inj)
682 >    newsampleinfo, injectinfo = check_sample_info(dataobj,t_before, t_after, t_inj)
683      if newsampleinfo != None :        
684          dataobj.data = Split_data
685          dataobj.updateStatus(DataSplit=True)
# Line 682 | Line 694
694      return
695      
696      
697 < def checksampleinfo(dataobj, t_before, t_after, t_inj):
698 <    '''Check if the sample information consistent with the injection
697 > def check_sample_info(dataobj, t_before, t_after, t_inj):
698 >    """Check if the sample information consistent with the injection
699          parameter input. If not, help the user to update sample information
700          for the splitted curve.
701 <    '''
701 >    """
702      injectinfo = []
703      if len(dataobj.sampleinfo) == len(t_inj):
704          for i,j in enumerate(t_inj):                
# Line 694 | Line 706
706          return dataobj.sampleinfo, injectinfo
707              
708      elif len(dataobj.sampleinfo) > len(t_inj):
709 <        print '''    The number of injection you just input does not match that in sample infomation.
710 <    If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object.'''
709 >        print """    The number of injection you just input does not match that in sample infomation.
710 >    If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object."""
711          choose = str.upper(raw_input('Continue? (y/n)'))
712          if choose == 'Y':
713              newsampleinfo, injectinfo = [], []
# Line 731 | Line 743
743          return
744      
745      
746 < def writedata(Head,data_out,ROI_out,DataDir,output_format):
747 <    '''Write the data into a txt file.
748 <    '''
749 <    # create a nowfolder with time stamp to save the data
746 > def write_data(Head,data_out,ROI_out,DataDir,output_format):
747 >    """Write the data into a txt file.
748 >    """
749 >    # Modified 100405 CGL
750 >    # create a newfolder with time stamp to save the data
751      if output_format=='B':
752          ftitle='biosensor'
753          Tmpstr = str(ROI_out).replace(', ','_')
# Line 744 | Line 757
757          foutname='clamp-'+str(ROI_out)+'.txt'
758      else:
759          ftitle=''
760 <    #  change to the Root dir
761 <    os.chdir('..')        
762 <    try:
763 <        os.makedirs(DataDir)
764 <    except WindowsError:
765 <        pass
766 <    os.chdir(DataDir)
760 >        
761 >    # Change directories, if requested.
762 >    if (DataDir != ""):
763 >        os.chdir('..')        
764 >        try:
765 >            os.makedirs(DataDir)
766 >        ##except WindowsError:
767 >        except OSError:
768 >            pass
769 >        os.chdir(DataDir)
770      
771      np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
772      fdt=file('temp.txt','r')
# Line 769 | Line 785
785      print '-------The path of the exported data is "', os.getcwd(),'"-------'
786      
787      
788 < def DataOutputBiosensor(obj, DataDir, *ROI_out):
789 <    '''Export the data into a biosensor format txt file,
788 > def data_output_biosensor(obj, DataDir, *ROI_out):
789 >    """Export the data into a biosensor format txt file,
790          which can be operated in Scrubber.
791      Input: the data object, the output path, the ROIs selected to export
792 <    '''
792 >    """
793      # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
794      dataobj = copy.deepcopy(obj)
795      data = dataobj.data
# Line 787 | Line 803
803      for n,i in enumerate(ROI_out):
804          new_ROIinfo.append(ROIinfo[i-1])        ##the new ROI info
805          if i != ROIinfo[i-1]['IDcal']:
806 <            # check if the ID number matchs position
806 >            # check if the ID number matches position
807              print n,i, ROIinfo[i-1]
808              return
809          else:
# Line 810 | Line 826
826      dataobj.updateStatus(Head=head1)    
827      dataobj.updateROIinfo(new_ROIinfo)
828      
829 <    writedata(head1,data_out,ROI_out,DataDir,'B')
829 >    write_data(head1,data_out,ROI_out,DataDir,'B')
830      
831      return dataobj
832      
833 < def DataOutputClamp(obj, DataDir, ROI_out):
834 <    '''Export the data into a Clamp format txt file,
833 > def data_output_clamp(obj, DataDir, ROI_out):
834 >    """Export the data into a Clamp format txt file,
835          which can be operated in Clamp.
836      Input: the data object, the output path, the selected ROI
837 <    '''
837 >    """
838      # initialize the data output matrix, which will contain '2*n_inj' columns
839      dataobj = copy.deepcopy(obj)
840      data = dataobj.data
841      n_inj = len(dataobj.sampleinfo)
842 +    n_inj = max(n_inj, 1)  # Must have at least one injection.
843      ROIinfo = dataobj.ROIinfo
844      ROInum = dataobj.ROInum
845      sampleinfo = dataobj.sampleinfo
846      
847      data_out = np.zeros((2*n_inj,np.shape(data)[1]))
848 <    if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
848 > ##  if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
849 >    if (ROI_out != int(ROIinfo[ROI_out-1]['ID'])):
850          print 'Please check the ROI information.'
851          print ROI_out, ROIinfo[ROI_out-1]
852          return
# Line 865 | Line 883
883      dataobj.updateStatus(Head=Clamphead)    
884      dataobj.updateROIinfo(ROIinfo[ROI_out-1])
885      
886 <    writedata(Clamphead,data_out,ROI_out,DataDir,'C')
886 >    write_data(Clamphead,data_out,ROI_out,DataDir,'C')
887      
888      return dataobj
889   ##    return data_out,Clamphead
890  
891  
892 + if (__name__ == '__main__'):
893 +    # Print a short message if this module is run like an app.
894 +    print "This is the OSPRAI file type conversion tool."
895 +

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines