ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
(Generate patch)
# Line 1 | Line 1
1 < '''
1 > """
2   Converter: Perform the datafile reading, pre-processing and writing.
3 + Yuhang Wan, Christopher Lausted
4 + Last modified on 100405 (yymmdd)
5  
6 < Yuhang Wan, Mar 1,2010
7 < __version__ = "1.3"
8 <
9 <                    Typical Pipeline:
10 < DatafileRead (GalfileRead, MethodRead)
11 < OutliarRemove
12 < Calibrate
13 < BackgroundSubtract
14 < CurveSplit
15 < DataOutputClamp or DataOutputBiosensor
16 <
15 < '''
6 > Typical Pipeline:
7 > >import converter as cv
8 > >dat = cv.datafile_read("example.txt")
9 > >dat = cv.outlier_remove(dat)
10 > >dat = cv.calibrate(dat)
11 > >dat = cv.background_subtract(dat)
12 > >dat = cv.curve_split(dat, 100, 800, 200)
13 > >dat = cv.data_output_clamp(dat, "/dir", 0,1,2,3)
14 > >dat = cv.data_output_biosensor(dat, "/dir", 0)
15 > """
16 > __version__ = "100405"
17  
18   import numpy as np
19   import pylab as plt
# Line 25 | Line 26
26   ## import matplotlib.pyplot as plt
27   ## import matplotlib.mlab as mlb
28   ## import packageClamp_100210 as Pack
29 < import SPRdataclass_100301 as SPR
29 > import SPRdataclass as spr
30  
31  
32 < def checkformatinput(fname):
33 <    '''Examine the format of a input .txt data file.'''
32 > def check_format_input(fname):
33 >    """Examine the format of a input .txt data file."""
34      # open a file, if file doesn't exit, then exit
35      try:
36          fp = open(fname, "r")
# Line 39 | Line 40
40      # examine the first line of the input file
41      # for SPRit output:"Elapsed Time (Seconds)  Average Intensity (Pixel Intensity)"
42      # for Plexera ICM output:
43 <    head_input1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)\n"
43 >    head_input1="Elapsed Time (Seconds)\tAverage Intensity (Pixel Intensity)"
44      
45      Format_input = 0
46      # check file format  
47      Tmpstr = fp.readline()
48 <    if Tmpstr==head_input1:
48 >    if (head_input1 in Tmpstr):
49          Format_input=1      
50      elif Tmpstr[-1]=='\n' and Tmpstr[-2]=='\r' and Tmpstr.split(' ')[0].split('/')[2].startswith('20'):
51          Format_input=2
# Line 55 | Line 56
56      return Format_input, Tmpstr
57  
58  
59 < def SPRitformat(fname):
60 <    '''Read the SPRit formatted data and reshape it.
59 > def sprit_format(fname):
60 >    """Read the SPRit formatted data and reshape it.
61      Return the data in the form of list.
62 <    '''
62 >    """
63      # open a file, if file doesn't exit, then exit
64      try:
65          fp = open(fname, "r")
# Line 70 | Line 71
71      Tmpstr = fp.readline()
72      # check if the second line is BEGIN
73      TmpStr = fp.readline()
74 <    if TmpStr != "BEGIN\n":
75 <        print "Second line is not Begin. Break!"
76 <        #sys.exit(0)
74 >    if ("BEGIN" not in TmpStr):
75 >        print "Warning: Second line of data file is not 'BEGIN'"
76 >        ##sys.exit(0)
77      # count the lines of each data spot
78      TmpStr = fp.readline() # skip first "0.000000e+000"        
79      num_line = 1 # so we start to count from 1
# Line 118 | Line 119
119   ##    status = {'Datainput Type':'SPRit data'}
120      dataformat = 0
121      ROInum = num_spot
122 <    dataobj=SPR.DataPass(Shaped_data_1, ROInum, dataformat)
122 >    dataobj=spr.DataPass(Shaped_data_1, ROInum, dataformat)
123      dataobj.status = {}
124      dataobj.updateStatus(**status)
125      print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)  
# Line 126 | Line 127
127      return dataobj
128  
129  
130 < def PlexeraICMformat(fname):
131 <    '''Read the txt data exported from Plexera ICM software and reshape it.
130 > def plexera_icm_format(fname):
131 >    """Read the txt data exported from Plexera ICM software and reshape it.
132      Return the data in the form of list.
133 <    '''
133 >    """
134      fp=file(fname,'r')
135      Tmpdata,tmptime=[],[]
136      status = {'DatainputType':'Plexera ICM data'}
# Line 157 | Line 158
158  
159      dataformat = 0
160      ROInum = num_spot
161 <    dataobj=SPR.DataPass(Shaped_data, ROInum, dataformat)
161 >    dataobj=spr.DataPass(Shaped_data, ROInum, dataformat)
162      dataobj.status = {}
163      dataobj.updateStatus(**status)  
164      
# Line 166 | Line 167
167  
168  
169  
170 < def DAMsinglesheet(sh,DataList,ROIinfo,start,end, colsize):
171 <    '''Retrieve and shape data form one single sheet from the Spreadsheet.
170 > def dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize):
171 >    """Retrieve and shape data form one single sheet from the Spreadsheet.
172      Return the data, ROI infomation in the form of list.
173 <    '''
173 >    """
174      ## and return the minimal spot Id (start_spot)
175      ## retrieve the head of the Spreadsheet, SpotList is used to store the spot Id of each sheet
176      
# Line 194 | Line 195
195  
196      return DataList,ROIinfo
197      
198 < def DAMspreadsheetformat(book):
199 <    '''Read the spreadsheet exported from Plexera DAM software and reshape it.
198 > def dam_spreadsheet_format(book):
199 >    """Read the spreadsheet exported from Plexera DAM software and reshape it.
200      Return the shaped data and ROI information.
201 <    '''
201 >    """
202      ROIinfo, newROIinfo, n_sheet = [], [], 0
203      DataList=[]
204      status = {'DatainputType':'Plexera DAM data'}
# Line 228 | Line 229
229      for i in range(book.nsheets):
230          sh=book.sheet_by_index(i)      
231          if sh.ncols!=0:        
232 <            DataList,ROIinfo = DAMsinglesheet(sh,DataList,ROIinfo,start,end, colsize)
232 >            DataList,ROIinfo = dam_single_sheet(sh,DataList,ROIinfo,start,end, colsize)
233              n_sheet=n_sheet+1
234          else:
235              break
# Line 260 | Line 261
261      # pack the date and relative information into SPRdata obj
262      dataformat = 0
263      ROInum = num_spot
264 <    dataobj=SPR.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
264 >    dataobj=spr.DataPass(Shaped_data, ROInum, dataformat, newROIinfo)
265      dataobj.status = {}
266      dataobj.updateStatus(**status)
267      print "There are %d ROIs, and %d lines of data" % (ROInum, num_line)  
# Line 268 | Line 269
269      return dataobj
270  
271  
272 < def ReadClampData(fname):
273 <    '''Read the Clamp format data
272 > def read_clamp_data(fname):
273 >    """Read the Clamp format data
274      Return the data in the form of list.
275 <    '''
275 >    """
276      fp=open(fname,'r')
277      Tmpstr=fp.readline()
278      # examine the file head
# Line 327 | Line 328
328              sampleinfo.append(sampledic)
329              injdic = dict(zip(['ton','toff'],[float(Start[i]),float(Stop[i])]))
330              injinfo.append(injdic)
331 <    dataobj=SPR.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
331 >    dataobj=spr.DataPass(Data, ROInum, dataformat, ROIinfo, sampleinfo, injinfo)
332      dataobj.status = {}
333      dataobj.updateStatus(**status)
334      
# Line 335 | Line 336
336      
337  
338  
339 < def KeyfileRead(fkey):
339 > def keyfile_read(fkey):
340      # The Key File contains
341 <    '''Function: Read the key file for old SPR instrument.
341 >    """Function: Read the key file for old SPR instrument.
342      Input: the filename.
343      Return: the ROI information.    
344 <    '''
344 >    """
345      try:
346          fp = open(fkey, "r")
347      except IOError:
# Line 366 | Line 367
367      return ROIinfo
368  
369  
370 < def GalfileRead(fname):
371 <    '''Function: Read the GAL file.
370 > def galfile_read(fname):
371 >    """Function: Read the GAL file.
372      Input: the filename.
373      Return: the ROI information.    
374 <    '''
374 >    """
375      fp=file(fname,'r')
376      ROIinfo = []
377      Tmpstr = fp.readline()      # skip the first line
# Line 428 | Line 429
429      
430      
431  
432 < def MethodRead(fname):
433 <    '''Function: Read the analyte table.
432 > def method_read(fname):
433 >    """Function: Read the analyte table.
434      Input: the filename
435      Return: the sample information
436 <    '''
436 >    """
437      # create a concentration dictionary for the sample injected into the flowcell  
438      # export a table containing the concentration of the sample injected, and the duration
439      fp=file(fname,'r')
# Line 455 | Line 456
456          print i['Name'],'\t', i['Concentration']
457      
458      return sampleinfo
459 <    
459 >
460 > def method_read_fake():
461 >    sampleinfo = []
462 >    sampleinfo.append({'Location':0, 'Name':0, 'Concentration':0, 'Duration':0, 'Flow Rate':0, 'Analyte Series':0, 'Buffer Blank Series':0})
463 >    return sampleinfo
464  
465  
466 < def DatafileRead(fname):
467 <    '''Function: Read the raw data from the Plexera instrument,
466 > def datafile_read(fname):
467 >    """Function: Read the raw data from the Plexera instrument,
468          and pack the raw data (Shaped_data) into dataobject.
469          Therefore, the initial dataformat = 0
470      Input: the filename
471      Return: the packed SPR.DataPass type data object
472 <    '''
472 >    """
473      #===================check the format of the input files-------
474      if fname.split('.')[-1] == 'txt':
475      
476 <        Format_input, Firstline = checkformatinput(fname)    
476 >        Format_input, Firstline = check_format_input(fname)    
477          if Format_input == 1:   # for SPRit file format
478 <            dataobj = SPRitformat(fname)  
478 >            dataobj = sprit_format(fname)  
479              print '-'*10,' This is a SPRit data file. ','-'*10        
480              
481          elif Format_input == 2: # for Plexera ICM file format
482 <            dataobj = PlexeraICMformat(fname)
482 >            dataobj = plexera_icm_format(fname)
483              print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
484  
485          elif Format_input == 3:         # for Clamp datafile format
486 <            dataobj = ReadClampData(fname)
486 >            dataobj = read_clamp_data(fname)
487              print '-'*10,' This is a Clamp file. ','-'*10
488          
489          else:
# Line 489 | Line 494
494              flag = str.upper(raw_input('Load the key file? (y/n): '))
495              if flag == 'Y':
496                  fkey = raw_input('Input the path of the key file: ')
497 <                ROIinfo = KeyfileRead(fkey)
497 >                ROIinfo = keyfile_read(fkey)
498                  dataobj.updateROIinfo(ROIinfo)        
499      
500      elif fname.split('.')[-1] == 'xls':
# Line 497 | Line 502
502          print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
503          book = xlrd.open_workbook(fname)
504          #shape the data in the Spreadsheet, whether single sheet or multiple sheets
505 <        dataobj=DAMspreadsheetformat(book)
505 >        dataobj=dam_spreadsheet_format(book)
506          flag = str.upper(raw_input('Load the gal file? (y/n): '))
507          if flag == 'Y':
508              fgal = raw_input('Input the path of the gal file: ')
509 <            ROIinfo = GalfileRead(fgal)
509 >            ROIinfo = galfile_read(fgal)
510              dataobj.updateROIinfo(ROIinfo)
511      flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
512      if flag == 'Y':
513          fprotocol = raw_input('Input the path of the analyte table: ')
514 <        sampleinfo = MethodRead(fprotocol)
514 >        sampleinfo = method_read(fprotocol)
515          dataobj.updateSampleinfo(sampleinfo)
516 <            
516 >    else:
517 >        dataobj.updateSampleinfo(method_read_fake())
518 >
519      return dataobj
520      
521  
522  
523 < def OutliarRemove(obj):
523 > def outlier_remove(obj):
524      ##dataobj.viewAll()
525 <    '''Function: Remove the noisy area that you want to get rid of.
525 >    """Function: Remove unwanted data points from noisy periods.
526      Return: the data object with clean data
527 <    '''
527 >    """
528      dataobj = copy.deepcopy(obj)
529      print ('Select the time area that you want to remove.')
530      tmpstr = raw_input('in the format of "200:240, 500:510" :')
# Line 538 | Line 545
545   ##        tmplist = tmplist
546      dataobj.data = data
547   #    dataobj.status = 'Outliar removed'
548 <    dataobj.updateStatus(OutliarRemove=True)
548 >    dataobj.updateStatus(outlier_remove=True)
549      return dataobj
550  
551 < def Calibrate(obj):
552 <    '''Function: Calibrate the Intensity response.
551 > def calibrate(obj):
552 >    """Function: calibrate the Intensity response.
553      Return: the data object with calibrated data
554      Note: at present, this function is valid only when
555          the whole procedure includes calibration precedure.
556 <    '''
556 >    """
557      dataobj = copy.deepcopy(obj)
558      data = dataobj.data
559      caldata = copy.deepcopy(data)
# Line 568 | Line 575
575              offset = s1- slope*y1
576              caldata[i+1] = slope*y+offset
577      dataobj.data = caldata    
578 <    dataobj.updateStatus(Calibrate=True)
578 >    dataobj.updateStatus(calibrate=True)
579      return dataobj
580  
581 < def getbackgroundvalue(obj,bgids):
582 <    '''Get the averaged value of background spots for each ROI.
576 <    '''
581 > def get_background_value(obj,bgids):
582 >    """Get the averaged value of background spots for each ROI."""
583      bgsignal = obj.data[0]*0
584      for j in bgids:
585          if j == obj.ROIinfo[j-1]['ID']:
# Line 588 | Line 594
594      return bgsignal
595      
596  
597 < def BackgroundSubtract(obj, *bgspot):
598 <    '''Function: Perform the Background subtraction for the UNSPLIT curve.
597 > def background_subtract(obj, *bgspot):
598 >    """Function: Perform the Background subtraction for the UNSPLIT curve.
599      Input besides "obj" is the id of the spot taken as background.
600          The following inputs are acceptable:
601 <        1. BackgroundSubtract(obj): the default background in Galfile
601 >        1. background_subtract(obj): the default background in Galfile
602              will be subtracted.
603 <        2. BackgroundSubtract(obj, 1, 6): the average value of spot1
603 >        2. background_subtract(obj, 1, 6): the average value of spot1
604              and spot6 will be subtracted.
605 <    '''
605 >    """
606      dataobj = copy.deepcopy(obj)
607      ROIinfo = obj.ROIinfo
608      data = dataobj.data
# Line 609 | Line 615
615          #are to be subtracted.
616              for i in range(1,dataobj.ROInum+1):
617                  bgids = ROIinfo[i-1]['Background Spot']
618 <                bgsignal = getbackgroundvalue(dataobj,bgids)
618 >                bgsignal = get_background_value(dataobj,bgids)
619                  newdata[i] = data[i]-bgsignal
620              # update the status of the data object.
621              dataobj.updateStatus(BackgroundType='Default in Gal')
# Line 618 | Line 624
624          # The average of the manually input background spots
625          #are to be subtracted.
626              for i in range(1,dataobj.ROInum+1):
627 <                bgsignal = getbackgroundvalue(dataobj,bgspot)
627 >                bgsignal = get_background_value(dataobj,bgspot)
628                  newdata[i] = data[i]-bgsignal
629              dataobj.updateStatus(BackgroundType='Manually choosen')
630          dataobj.data = newdata
631 <        dataobj.updateStatus(BackgroundSubtraction=True)
631 >        dataobj.updateStatus(background_subtraction=True)
632          return dataobj
633          
634      else:
635          print 'The Background Subtraction should be run at the beginning, with the UNsplit curve.'    
636      return
637  
638 < def CurveSplit(obj, t_before, t_after, *t_inj):
639 <    '''Function: Split the whole curve that contains several injections
638 > def curve_split(obj, t_before, t_after, *t_inj):
639 >    """Function: Split the whole curve that contains several injections
640          into pieces.
641          The sample information will be checked during the split,
642              if the injection number in sampleinfo is not consistant
# Line 642 | Line 648
648          t_inj: the exact start time for each injection
649      Return: the data object with splitted data, and updated sample
650          information
651 <    '''
651 >    """
652      dataobj = copy.deepcopy(obj)
653      ROInum = dataobj.ROInum
654      t=dataobj.data[0]
# Line 667 | Line 673
673      for i in range(np.shape(Split_data)[0]):
674          Split_data[i]=Split_data[i]-Split_data[i][0]
675      
676 <    newsampleinfo, injectinfo = checksampleinfo(dataobj,t_before, t_after, t_inj)
676 >    newsampleinfo, injectinfo = check_sample_info(dataobj,t_before, t_after, t_inj)
677      if newsampleinfo != None :        
678          dataobj.data = Split_data
679          dataobj.updateStatus(DataSplit=True)
# Line 682 | Line 688
688      return
689      
690      
691 < def checksampleinfo(dataobj, t_before, t_after, t_inj):
692 <    '''Check if the sample information consistent with the injection
691 > def check_sample_info(dataobj, t_before, t_after, t_inj):
692 >    """Check if the sample information consistent with the injection
693          parameter input. If not, help the user to update sample information
694          for the splitted curve.
695 <    '''
695 >    """
696      injectinfo = []
697      if len(dataobj.sampleinfo) == len(t_inj):
698          for i,j in enumerate(t_inj):                
# Line 694 | Line 700
700          return dataobj.sampleinfo, injectinfo
701              
702      elif len(dataobj.sampleinfo) > len(t_inj):
703 <        print '''    The number of injection you just input does not match that in sample infomation.
704 <    If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object.'''
703 >        print """    The number of injection you just input does not match that in sample infomation.
704 >    If you just want to split part of the curve, please follow the following direction to update the sampleinfo in the new object."""
705          choose = str.upper(raw_input('Continue? (y/n)'))
706          if choose == 'Y':
707              newsampleinfo, injectinfo = [], []
# Line 731 | Line 737
737          return
738      
739      
740 < def writedata(Head,data_out,ROI_out,DataDir,output_format):
741 <    '''Write the data into a txt file.
742 <    '''
743 <    # create a nowfolder with time stamp to save the data
740 > def write_data(Head,data_out,ROI_out,DataDir,output_format):
741 >    """Write the data into a txt file.
742 >    """
743 >    # Modified 100405 CGL
744 >    # create a newfolder with time stamp to save the data
745      if output_format=='B':
746          ftitle='biosensor'
747          Tmpstr = str(ROI_out).replace(', ','_')
# Line 744 | Line 751
751          foutname='clamp-'+str(ROI_out)+'.txt'
752      else:
753          ftitle=''
754 <    #  change to the Root dir
755 <    os.chdir('..')        
756 <    try:
757 <        os.makedirs(DataDir)
758 <    except WindowsError:
759 <        pass
760 <    os.chdir(DataDir)
754 >        
755 >    # Change directories, if requested.
756 >    if (DataDir != ""):
757 >        os.chdir('..')        
758 >        try:
759 >            os.makedirs(DataDir)
760 >        ##except WindowsError:
761 >        except OSError:
762 >            pass
763 >        os.chdir(DataDir)
764      
765      np.savetxt('temp.txt',np.transpose(data_out),fmt='%f')
766      fdt=file('temp.txt','r')
# Line 769 | Line 779
779      print '-------The path of the exported data is "', os.getcwd(),'"-------'
780      
781      
782 < def DataOutputBiosensor(obj, DataDir, *ROI_out):
783 <    '''Export the data into a biosensor format txt file,
782 > def data_output_biosensor(obj, DataDir, *ROI_out):
783 >    """Export the data into a biosensor format txt file,
784          which can be operated in Scrubber.
785      Input: the data object, the output path, the ROIs selected to export
786 <    '''
786 >    """
787      # initialize the data output matrix, which will contain 'len(spot_out)*2)*n_inj' columns
788      dataobj = copy.deepcopy(obj)
789      data = dataobj.data
# Line 787 | Line 797
797      for n,i in enumerate(ROI_out):
798          new_ROIinfo.append(ROIinfo[i-1])        ##the new ROI info
799          if i != ROIinfo[i-1]['IDcal']:
800 <            # check if the ID number matchs position
800 >            # check if the ID number matches position
801              print n,i, ROIinfo[i-1]
802              return
803          else:
# Line 810 | Line 820
820      dataobj.updateStatus(Head=head1)    
821      dataobj.updateROIinfo(new_ROIinfo)
822      
823 <    writedata(head1,data_out,ROI_out,DataDir,'B')
823 >    write_data(head1,data_out,ROI_out,DataDir,'B')
824      
825      return dataobj
826      
827 < def DataOutputClamp(obj, DataDir, ROI_out):
828 <    '''Export the data into a Clamp format txt file,
827 > def data_output_clamp(obj, DataDir, ROI_out):
828 >    """Export the data into a Clamp format txt file,
829          which can be operated in Clamp.
830      Input: the data object, the output path, the selected ROI
831 <    '''
831 >    """
832      # initialize the data output matrix, which will contain '2*n_inj' columns
833      dataobj = copy.deepcopy(obj)
834      data = dataobj.data
835      n_inj = len(dataobj.sampleinfo)
836 +    n_inj = max(n_inj, 1)  # Must have at least one injection.
837      ROIinfo = dataobj.ROIinfo
838      ROInum = dataobj.ROInum
839      sampleinfo = dataobj.sampleinfo
840      
841      data_out = np.zeros((2*n_inj,np.shape(data)[1]))
842 <    if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
842 > ##  if ROI_out != ROIinfo[ROI_out-1]['IDcal']:
843 >    if (ROI_out != int(ROIinfo[ROI_out-1]['ID'])):
844          print 'Please check the ROI information.'
845          print ROI_out, ROIinfo[ROI_out-1]
846          return
# Line 865 | Line 877
877      dataobj.updateStatus(Head=Clamphead)    
878      dataobj.updateROIinfo(ROIinfo[ROI_out-1])
879      
880 <    writedata(Clamphead,data_out,ROI_out,DataDir,'C')
880 >    write_data(Clamphead,data_out,ROI_out,DataDir,'C')
881      
882      return dataobj
883   ##    return data_out,Clamphead
884  
885  
886 + if (__name__ == '__main__'):
887 +    # Print a short message if this module is run like an app.
888 +    print "This is the OSPRAI file type conversion tool."
889 +

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines