ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/osprai/osprai/trunk/converter.py
(Generate patch)
# Line 1 | Line 1
1   """
2   Converter: Perform the datafile reading, pre-processing and writing.
3   Yuhang Wan, Christopher Lausted
4 < Last modified on 100407 (yymmdd)
4 > Last modified on 100427 (yymmdd) by YW
5  
6   Typical Pipeline:
7   >import converter as cv
# Line 10 | Line 10
10   >dat = cv.calibrate(dat)
11   >dat = cv.background_subtract(dat)
12   >dat = cv.curve_split(dat, 100, 800, 200)
13 < >dat = cv.data_output_clamp(dat, "/dir", 0,1,2,3)
14 < >dat = cv.data_output_biosensor(dat, "/dir", 0)
13 > >dat = cv.data_output_clamp(dat, "/dir", 1)
14 > >dat = cv.data_output_biosensor(dat, "/dir", 1,2,3,4)
15 > >dat = cv.obj_save(dat, "saved_dataobj")
16   """
17 < __version__ = "100407"
17 > __version__ = "100427"
18  
19   ## Import libraries.
20   import numpy as np
21   import pylab as plt
22 < import sys
22 > ##import sys
23   import xlrd
24   import time
25   import os
26   import copy
27 < #import matplotlib.pyplot as plt
27 > import pickle
28   import SPRdataclass as spr
29  
30   ## Global constants and variables.
# Line 32 | Line 33
33   FMT_ICMTXT = 2       ## Plexera Instrument Control Module text.
34   FMT_CLAMP34 = 3      ## Morton & Myszka CLAMP Version 3.4+
35   FMT_DAMXLS = 4       ## Plexera Data Analysis Module Excel export.
36 <
36 > FMT_DATAOBJ = 5      ## Packed spr Data Object
37  
38   def check_format_input(fname):
39      """Examine the format of an input .txt data file."""
# Line 43 | Line 44
44          fp = open(fname, "r")
45      except IOError:
46          print "Error: Unable to open file %s for reading." % fname
47 <        sys.exit(0)
47 >        return
48      
49      ## Examine first line of text file to guess format.
50      fmt = FMT_UNKNOWN  
# Line 71 | Line 72
72          fp = open(fname, "r")
73      except IOError:
74          print 'Cannot open file %s for reading' % fname
75 <        sys.exit(0)
75 >        return
76      status = {'DatainputType':'SPRit data'}
77      # skip the first line of the input file
78      Tmpstr = fp.readline()
# Line 185 | Line 186
186      num_spot = sh.row_values(0).count('Relative Time')  
187      
188      for i in range(num_spot):
189 <        TmpHead=Head[colsize*i]      
189 >        TmpHead=unicode.encode(Head[colsize*i])
190          # now there are 5 columns for each ROI for Raw Intensity
191          # and 4 columns for each ROI for Satellite subtracted Intensity
192          DataList.append(sh.col_values(2+colsize*i,start,end))
# Line 196 | Line 197
197              key.append(j.split(': ')[0])
198              val.append(j.split(': ')[1])
199          tmpdic = dict(zip(key,val))
200 +        try:                                    # spot id
201 +            tmpdic['ID'] = int(tmpdic['ID'])    # either in form of "spot1" or "1"
202 +        except ValueError:
203 +            if str.upper(tmpdic['ID']).startswith('SPOT'):
204 +                tmpdic['ID'] = int(tmpdic['ID'][4:])
205 +            else:
206 +                print "Warning: Illegal galfile format."
207 +                print "The ID is neither 'spot1' form nor '1' form. \n", "'ID:'", tmp
208 +        
209          tmpdic['Position'] = (int(tmpdic['Block'])+1, int(tmpdic['Row'])+1, int(tmpdic['Column'])+1)
210          ROIinfo.append(tmpdic)
211  
# Line 249 | Line 259
259      newROIinfo = copy.deepcopy(ROIinfo)
260      StartspotList=np.array([i['ID'] for i in ROIinfo])
261      start_spot=min(StartspotList)
262 +
263      for i in range(num_spot):
264          tmp = ROIinfo[i]['ID']
265 <        try:
255 <            j=int(ROIinfo[i]['ID'])-(start_spot-1)
256 <        except ValueError:
257 <            ##return tmp
258 <            if str.upper(unicode.encode(tmp)).startswith(str.upper('spot')):
259 <                j=int(ROIinfo[i]['ID'][4:])-(start_spot-1)    
260 <            else:
261 <                print 'Illegal galfile format.\n', 'ID:', tmp
262 <                ##return tmp
263 <            
265 >        j=tmp-(start_spot-1)            
266          Shaped_data[j]=DataList[i+1]
267 <        newROIinfo[j]=ROIinfo[i]  
267 >        newROIinfo[j-1]=ROIinfo[i]  
268          
269      # pack the date and relative information into SPRdata obj
270      dataformat = 0
# Line 351 | Line 353
353      try:
354          fp = open(fkey, "r")
355      except IOError:
356 <        print 'Cannot open file %s for reading' % fname
357 <        sys.exit(0)
356 >        print "Cannot open file %s for reading" % fname
357 >        return
358      ROIinfo = []
359      firstline=fp.readline()     # skip the first line
360   #    Table.append([firstline.split('\t')[0],firstline.split('\t')[1],firstline.split('\t')[2],firstline.split('\t')[3]])
361      for eachline in fp:        
362          ROIdic = {}
363          tmplist = eachline.strip().split('\t')
364 <        ROIdic['ID'] = tmplist[0]       # spot id
364 >        
365 >        try:                                    # spot id
366 >            ROIdic['ID'] = int(tmplist[0])      # either in form of "spot1" or "1"
367 >        except ValueError:
368 >            if str.upper(tmplist[0]).startswith('SPOT'):
369 >                ROIdic['ID'] = int(tmplist[0][4:])
370 >            else:
371 >                print "Error: Unable to read Spot IDs"
372 >                
373          ROIdic['Name'] = tmplist[1]     # name of the protein immobilized
374          try:
375              ROIdic['Conc'] = tmplist[2]
376 <            ROIdic['Background Spot'] = int(tmplist[3])
376 >            ROIdic['Background Spot'] = map(int, tmplist[3].split(';')) # in case more than one spot set as background with ";" as SEPARATOR
377          except IndexError:
378              pass
379          ROIinfo.append(ROIdic)
# Line 372 | Line 382
382      print 'There are %d ROIs in the Key file.' % len(ROIinfo)
383      return ROIinfo
384  
385 + def keyfile_read_fake(obj):
386 +    ROIinfo = []
387 +    spot_num = obj.ROInum
388 +    for i in range(spot_num):
389 +        ROIdic = {}
390 +        ROIdic['ID'] = i+1
391 +        ROIdic['Name'] = "Ligand"+str(i+1)
392 +        ROIinfo.append(ROIdic)
393 +    return ROIinfo
394 +        
395  
396   def galfile_read(fname):
397      """Function: Read the GAL file.
# Line 404 | Line 424
424  
425          ##
426          ROIdic['Name'] = tmplist[3]
427 <        ROIdic['ID'] = tmplist[4]
427 > ##        ROIdic['ID'] = tmplist[4]
428 >        try:                                    # spot id
429 >            ROIdic['ID'] = int(tmplist[4])      # either in form of "spot1" or "1"
430 >        except ValueError:
431 >            if str.upper(tmplist[4]).startswith('SPOT'):
432 >                ROIdic['ID'] = int(tmplist[4][4:])
433 >            else:
434 >                print "Error: Unable to read Spot IDs"
435 >        
436          try:
437              ROIdic['Conc'] = tmplist[5]
438              ROIdic['Family'] = tmplist[6]
# Line 418 | Line 446
446                  Bpos = tuple(map(int, i.split(',')))
447                  Bid.append(IDcal(Bpos))
448                  ##
449 <            ROIdic['Background Spot'] = tuple(Bid)
449 >            ROIdic['Background Spot'] = Bid     ##tuple(Bid)
450          except IndexError:
451              pass
452          
453 <        if int(ROIdic['ID'][4:]) != ROIdic['IDcal']:
453 >        if ROIdic['ID'] != ROIdic['IDcal']:
454              print 'The spot ID should be consistent with the position. Please check the ID.'
455              break
456          else:
# Line 480 | Line 508
508      if fname.split('.')[-1] == 'txt':
509      
510          Format_input, Firstline = check_format_input(fname)    
511 <        if Format_input == 1:   # for SPRit file format
511 >        if Format_input == FMT_SPRIT:   # for SPRit file format
512              dataobj = sprit_format(fname)  
513              print '-'*10,' This is a SPRit data file. ','-'*10        
514              
515 <        elif Format_input == 2: # for Plexera ICM file format
515 >        elif Format_input == FMT_ICMTXT: # for Plexera ICM file format
516              dataobj = plexera_icm_format(fname)
517              print '-'*10,' This is a Plexera ICM exported data file. ','-'*10
518  
519 <        elif Format_input == 3:         # for Clamp datafile format
519 >        elif Format_input == FMT_CLAMP34:         # for Clamp datafile format
520              dataobj = read_clamp_data(fname)
521              print '-'*10,' This is a Clamp file. ','-'*10
522          
523          else:
524              print '-'*10,' unreadable file input! ','-'*10
525 <            sys.exit(0)
525 >            return
526          
527 <        if Format_input == 1 or Format_input == 2:
527 >        if Format_input == FMT_SPRIT or Format_input == FMT_ICMTXT:
528              flag = str.upper(raw_input('Load the key file? (y/n): '))
529              if flag == 'Y':
530                  fkey = raw_input('Input the path of the key file: ')
531                  ROIinfo = keyfile_read(fkey)
532                  dataobj.updateROIinfo(ROIinfo)        
533 +            else:
534 +                dataobj.updateROIinfo(keyfile_read_fake(dataobj))  
535      
536      elif fname.split('.')[-1] == 'xls':
537          # for the DAM Spreadsheet file format
538          print '-'*10,' This is a Plexera DAM exported data file. ','-'*10
539 +        Format_input = FMT_DAMXLS
540          book = xlrd.open_workbook(fname)
541          #shape the data in the Spreadsheet, whether single sheet or multiple sheets
542          dataobj=dam_spreadsheet_format(book)
# Line 514 | Line 545
545              fgal = raw_input('Input the path of the gal file: ')
546              ROIinfo = galfile_read(fgal)
547              dataobj.updateROIinfo(ROIinfo)
548 +            
549 +    elif fname.split('.')[-1] == 'obj':
550 +        # for the saved data object file format
551 +        print '-'*10,' This is a saved data object file. ','-'*10
552 +        Format_input = FMT_DATAOBJ
553 +        fp = file(fname, 'r+')
554 +        dataobj = pickle.load(fp)  
555 +        fp.close()        
556 +    
557      flag = str.upper(raw_input('Load the experimental analyte file? (y/n): '))
558      if flag == 'Y':
559          fprotocol = raw_input('Input the path of the analyte table: ')
560          sampleinfo = method_read(fprotocol)
561          dataobj.updateSampleinfo(sampleinfo)
562 <    else:
562 >    elif Format_input != FMT_CLAMP34 and Format_input != FMT_DATAOBJ:    
563 >        # clamp data and obj data may contain the experimental protocol already
564          dataobj.updateSampleinfo(method_read_fake())
565  
566      return dataobj
567      
568  
528
569   def outlier_remove(obj):
570      ##dataobj.viewAll()
571      """Function: Remove unwanted data points from noisy periods.
# Line 588 | Line 628
628      """Get the averaged value of background spots for each ROI."""
629      bgsignal = obj.data[0]*0
630      for j in bgids:
631 <        if j == obj.ROIinfo[j-1]['ID']:
631 >        if j == int(obj.ROIinfo[j-1]['ID']):    # in case the ID in ROIinfo is string type
632              bgsignal = bgsignal + obj.data[j]
633                  
634 <                
595 <        elif j != obj.ROIinfo[j-1]['ID']:
634 >        elif j != int(obj.ROIinfo[j-1]['ID']):
635              for n,eachspot in enumerate(obj.ROIinfo):
636 <                if j == eachspot['ID']:
636 >                if j == int(eachspot['ID']):
637                      bgsignal = bgsignal + obj.data[n+1]
638 +                    
639      bgsignal = bgsignal/len(bgids)
640      return bgsignal
641      
# Line 625 | Line 665
665                  newdata[i] = data[i]-bgsignal
666              # update the status of the data object.
667              dataobj.updateStatus(BackgroundType='Default in Gal')
668 +            print "The background spot embodied in gal is used."
669                  
670          else:
671          # The average of the manually input background spots
# Line 632 | Line 673
673              for i in range(1,dataobj.ROInum+1):
674                  bgsignal = get_background_value(dataobj,bgspot)
675                  newdata[i] = data[i]-bgsignal
676 <            dataobj.updateStatus(BackgroundType='Manually choosen')
676 >            dataobj.updateStatus(BackgroundType='Manually chosen')
677 >            print "The background spot is manually chosen."
678 >            
679          dataobj.data = newdata
680          dataobj.updateStatus(background_subtraction=True)
681          return dataobj
# Line 783 | Line 826
826      fdt.close()
827      fout.close()
828      print '-------The path of the exported data is "', os.getcwd(),'"-------'
829 +    return
830      
831      
832   def data_output_biosensor(obj, DataDir, *ROI_out):
# Line 878 | Line 922
922      Clamphead=Clamphead+'\n'+head1
923      
924      dataobj.data = data_out
925 <    dataobj.dataformat = 4      # biosensor dataformat
925 >    dataobj.dataformat = 4      # clamp dataformat
926      dataobj.updateStatus(DataType='Clamp data')
927      dataobj.updateStatus(Head=Clamphead)    
928      dataobj.updateROIinfo(ROIinfo[ROI_out-1])
# Line 888 | Line 932
932      return dataobj
933   ##    return data_out,Clamphead
934  
935 + def obj_save(obj, foutname):
936 +    foutname = foutname + '.obj'        # the file extension of the saved data object is .obj
937 +    fout = file(foutname, 'w+')
938 +    pickle.dump(obj, fout)
939 +    fout.close()
940 +    print '-------The path of the exported obj is "', os.getcwd(),'"-------'
941 +    return
942  
943   if (__name__ == '__main__'):
944      # Print a short message if this module is run like an app.

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines