#!/usr/bin/env python

import sys,os,os.path
import re
import sha
import math
import tempfile
import csv
import gzip
import zlib
from array import array
from base64 import b64encode,b64decode
from urllib import quote,unquote
from xml.sax import *

class NameVersion:
    def __init__(self):
        self.name_ = "PyMsXML"
        self.majorVer = 0
        self.minorVer = 5
        self.revVer = 0
    def version(self):
        return "%d.%d.%d"%(self.majorVer,self.minorVer,self.revVer)
    def name(self):
        return self.name_

def fileSHA(filename):
    s = sha.new()
    h = open(filename,'rb')
    while True:
        buffer = h.read(1024)
        if buffer == '':
            break
        s.update(buffer)
    h.close()
    return s.hexdigest().lower()

class ToMzXML:
    def __init__(self,reader,filename,cmpfmt=None,filt=None,peaksCompress=False,version="3.0"):
        self.filename = filename
        self.compressfmt = cmpfmt
        self.reader = reader
        self.initSHA()
        self.writeByteCounter = 0
        self.actualScanCount = 0
        self.scanIndices = {}
        self.filter = filt
        if not version in ('2.1','2.2','3.0'):
            raise "Bad mzXML version \"%s\""%(version,)
        self.version = version
        if peaksCompress and float(self.version) < 3.0:
            raise "Cannot compress peaks until mzXML version 3.0"
        self.peakcompress = peaksCompress

    def __del__(self):
        if hasattr(self,'tmpFileName') and self.tmpFileName and self.tmpFileName != '':
            os.unlink(self.tmpFileName)

    def getFilename(self):
        return self.filename
    
    def initSHA(self):
        self.sha1 = sha.new()

    def updateSHA(self,s):
        self.sha1.update(s)

    def getSHA(self):
        return self.sha1.hexdigest().lower()

    def writeString(self,fh,data):
        self.writeByteCounter += len(data)
        self.sha1.update(data)
        fh.write(data)

    def write(self,debug=False):

        # Make a temporary file for the scan data to count the number of spectra.
        (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
        tmpFile = os.fdopen(tmpFileFD,'wb')

        # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
        self.write_scans(tmpFile,debug)

        tmpFile.close()

        # Reset!

        self.initSHA()
        self.writeByteCounter = 0

        if self.compressfmt == None and not self.filename.endswith('.gz'):
            xmlFile = open(self.filename,'wb')
        elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
            if self.filename.endswith('.gz'):
                xmlFile = gzip.open(self.filename,'wb')
            else:
                xmlFile = gzip.open(self.filename+'.gz','wb')
        else:
            print >>sys.stderr, "Bad compressfmt specification"
            sys.exit(1)

        self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
  
        outStr = "<mzXML "
        outStr += "xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_%s\" "%(self.version,)
        outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
        outStr += "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_%s http://sashimi.sourceforge.net/schema_revision/mzXML_%s/mzXML_idx_%s.xsd\""%(self.version,self.version,self.version)
        outStr += ">\n"

        self.writeString (xmlFile,outStr)
  
        outStr = "<msRun"
        outStr += " scanCount=\"%d\"" % (self.actualScanCount,)
        md = self.reader.getMsRunMetaData()
        for (k,v) in md.items():
            k,f = k.split(':')
            if self.version not in ('2.2',) or (k != 'startTime'and k != 'endTime'):
                outStr += (" %%s=\"%s\""%(f,))%(k,v)
        outStr += ">\n"

        self.writeString (xmlFile,outStr)

        filehashes = {}
        
        for f in self.reader.getFilenames():
            if len(f) == 2:
                outStr = "<parentFile "
                outStr += "fileName=\"%s\" " % (quote(f[0]),)
                outStr += "fileType=\"RAWData\" "
                outStr += "fileSha1=\"%s\"" % (f[1],)
                outStr += "/>\n"
            else:
                outStr = "<parentFile "
                outStr += "fileName=\"%s\" " % (f[0],)
                outStr += "fileType=\"%s\" " % (f[1],)
                outStr += "fileSha1=\"%s\""  % (f[2],)
                outStr += "/>\n"                
            self.writeString (xmlFile,outStr)

        if float(self.version) >= 3.0:
            outStr = "<msInstrument msInstrumentID=\"%s\">\n" % (self.reader.msInstrumentID(),)
        else:
            outStr = "<msInstrument>\n"
            
        outStr += "<msManufacturer category=\"msManufacturer\" value=\"%s\"/>\n" % (self.reader.msManufacturer(),)
        outStr += "<msModel category=\"msModel\" value=\"%s\"/>\n" % (self.reader.msModel(),)
        if self.reader.msIonisation():
            outStr += "<msIonisation category=\"msIonisation\" value=\"%s\"/>\n" % (self.reader.msIonisation(),)
        if self.reader.msMassAnalyzer():
            outStr += "<msMassAnalyzer category=\"msMassAnalyzer\" value=\"%s\"/>\n" % (self.reader.msMassAnalyzer(),)
        if self.reader.msDetector():
            outStr += "<msDetector category=\"msDetector\" value=\"%s\"/>\n" % (self.reader.msDetector(),)

        self.writeString (xmlFile,outStr)
  
        outStr = "<software "
        outStr += "type=\"acquisition\" "
        outStr += "name=\"%s\" "%(self.reader.acquisitionSoftware(),)
        outStr += "version=\"%s\""%(self.reader.acquisitionSoftwareVersion(),)
        outStr += "/>\n"

        self.writeString (xmlFile,outStr)
        
        outStr = "</msInstrument>\n"

        self.writeString (xmlFile,outStr)

        outStr = "<dataProcessing>\n"

        self.writeString (xmlFile,outStr)

        nv = NameVersion()

        outStr = "<software "
        outStr += "type=\"conversion\" "
        outStr += "name=\"%s\" "%(nv.name(),)
        outStr += "version=\"%s\"" % (nv.version(),)
        outStr += "/>\n"

        self.writeString (xmlFile,outStr)

        if (self.reader.peakDetect(1)):
            outStr = "<processingOperation "
            outStr += "name=\"peak_detection_level_1\" "
            outStr += "value=\"true\""
            outStr += "/>\n"
            outStr += "<processingOperation "
            outStr += "name=\"peak_detection_level_1_threshold\" "
            outStr += "value=\"1%\""
            outStr += "/>\n"
            outStr += "<processingOperation "
            outStr += "name=\"peak_detection_level_1_software\" "
            outStr += "value=\"%s\""%(self.reader.acquisitionSoftware(),)
            outStr += "/>\n"
            self.writeString (xmlFile,outStr)
        if (self.reader.peakDetect(2)):
            outStr = "<processingOperation "
            outStr += "name=\"peak_detection_level_2\" "
            outStr += "value=\"true\""
            outStr += "/>\n"
            outStr += "<processingOperation "
            outStr += "name=\"peak_detection_level_2_threshold\" "
            outStr += "value=\"1%\""
            outStr += "/>\n"
            outStr += "<processingOperation "
            outStr += "name=\"peak_detection_level_2_software\" "
            outStr += "value=\"%s\""%(self.reader.acquisitionSoftware(),)
            outStr += "/>\n"
            self.writeString (xmlFile,outStr)

        outStr = "<processingOperation "
        outStr += "name=\"min_peaks_per_spectra\" "
        outStr += "value=\"1\""
        outStr += "/>\n"

        self.writeString (xmlFile,outStr)

        outStr = "</dataProcessing>\n"

        self.writeString (xmlFile,outStr)

        if self.reader.maldi():
            outStr = '<spotting>\n'
            for d in self.reader.plateData():
                outStr += '<plate plateID="%s" spotXCount="%s" spotYCount="%s">\n' % \
                          (d['plateID'],d['spotXCount'],d['spotYCount'])
                outStr += '<plateManufacturer category="plateManufacturer" value="%s"/>\n' % \
                          (d['plateManufacturer'],)
                outStr += '<plateModel category="plateModel" value="%s"/>\n' % \
                          (d['plateModel'],)
                for s in self.reader.spotData(d['plateID']):
                    outStr += '<spot spotID="%s" spotXPosition="%s" spotYPosition="%s">\n' % \
                              (s['spotID'],s['spotXPosition'],s['spotYPosition'])
                    outStr += '<maldiMatrix category="maldiMatrix" value="%s"/>\n' % \
                              (s['maldiMatrix'],)
                    outStr += '</spot>\n'
                outStr += '</plate>\n'
            outStr += '</spotting>\n'
            self.writeString(xmlFile,outStr)

        if self.reader.lc():
            pass
        
        scanOffset = self.writeByteCounter

        tmpFile = open(self.tmpFileName,'rb')
        while True:
            tmpStr = tmpFile.read(1024)
            if tmpStr == '':
                break
            self.writeString(xmlFile,tmpStr)
        tmpFile.close()
        os.unlink(self.tmpFileName)
        self.tmpFileName = ''

        outStr = "</msRun>\n"
        self.writeString (xmlFile,outStr)

        indexOffset = self.writeByteCounter 
                                                 
        outStr = "<index "
        outStr += "name=\"scan\" "
        outStr += ">\n"

        self.writeString (xmlFile,outStr)

        for i in xrange(1,self.actualScanCount+1):
            outStr = "<offset id=\"%d\">" % (i,)
            outStr += "%d</offset>\n" % (self.scanIndices[i] + scanOffset,)
            self.writeString(xmlFile,outStr)

        outStr = "</index>\n"
        self.writeString (xmlFile,outStr)

        outStr = "<indexOffset>%d</indexOffset>\n" % (indexOffset,)

        self.writeString (xmlFile,outStr)

        self.writeString (xmlFile,"<sha1>")

        outStr = self.getSHA()

        self.writeString (xmlFile,outStr)
        
        self.writeString (xmlFile,"</sha1>\n")
  
        outStr = "</mzXML>\n"
        self.writeString (xmlFile,outStr)

        xmlFile.close()

    def write_scans(self,xmlFile,debug=False):

        msLevel = 0
        scanNumber = 0
        ancestors = []
        self.writeByteCounter = 0
        
        for (s,d) in self.reader.spectra():

            if self.filter != None and not self.filter.test(d):
                continue

	    if debug and scanNumber >= 10:
		break
            
            if not d.has_key('msLevel'):
                print >>sys.stderr, "Required scan attributes missing."
                sys.exit(1)

            prevLevel = msLevel
            msLevel = d['msLevel']

            if prevLevel < msLevel and prevLevel > 0:
                # We went "in" a level, push scan number of parent
                ancestors.append((scanNumber,prevSpec))
            elif prevLevel > msLevel and msLevel > 0:
                if len(ancestors) == 0:
                    pass #print >>sys.stderr, "No ancestor for scan %s at level %s"%(scanNumber,msLevel)
                else:
                    ancestors.pop()
            
            outStr = ''
            if prevLevel > msLevel:
                for m in xrange(0,prevLevel-msLevel+1):
                    outStr += "</scan>\n"
            else:
                if prevLevel > 0 and prevLevel == msLevel:
                    outStr += "</scan>\n"

            self.writeString(xmlFile,outStr)

            scanNumber = scanNumber + 1

            self.scanIndices[scanNumber] = self.writeByteCounter

            totIonCurrent = 0
            maxmz = None
            minmz = None
            basePeakMz = None
            basePeakIntensity = None
            peaksCount = 0
            for i in xrange(0,len(s),2):
                x = s[i]; y = s[i+1]
                if minmz is None or minmz > x:
                    minmz = x
                if maxmz is None or maxmz < x:
                    maxmz = x
                totIonCurrent += y
                if basePeakIntensity is None or basePeakIntensity < y:
                    basePeakIntensity = y
                    basePeakMz = x
                peaksCount += 1

            outStr = "<scan num=\"%d\" msLevel=\"%d\"" % (scanNumber,msLevel)
            outStr += " peaksCount=\"%d\"" % (peaksCount,)
            outStr += " lowMz=\"%f\"" % (minmz,)
            outStr += " highMz=\"%f\"" % (maxmz,)
            outStr += " totIonCurrent=\"%f\"" % (totIonCurrent,)
            outStr += " basePeakMz=\"%f\"" % (basePeakMz,)
            outStr += " basePeakIntensity=\"%f\"" % (basePeakIntensity,)

            for (k,v) in d.items():
                if k.startswith('scan.'):
                    k,f = k.split(':')
                    k = k[5:]
                    outStr += (" %%s=\"%s\""%(f,))%(k,v)
            outStr += ">\n"
            self.writeString(xmlFile,outStr)

            if self.version in ('2.2',):
                any = False
                for (k,v) in d.items():
                    if k.startswith('scanOrigin.'):
                        if not any:
                            any = True
                            outStr = "<scanOrigin"
                        k,f = k.split(':')
                        k = k[11:]
                        outStr += (" %%s=\"%s\""%(f,))%(k,v)
                if any:
                    outStr += "/>\n"
                    self.writeString(xmlFile,outStr)

            if msLevel > 1:

                if not d.has_key('precursorMz') :
                    print >>sys.stderr, "Required precursorMz attribute missing."
                    sys.exit(1)

                # We scan the parent spectrum in the region of the
                # precursorMz to establish the intensity. Optionally,
                # we tune the precursorMz itself, on the basis of this

                pMz = d['precursorMz']
                tol = self.reader.precursorPrecision(pMz)
                pMzLB = pMz-tol
                pMzUB = pMz+tol
                pMzIntensity = 0

                if len(ancestors) > 0:
                    for i in xrange(0,len(ancestors[-1][1]),2):
                        x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
                        if x < pMzLB:
                            continue
                        if x > pMzUB:
                            break
                        if pMzIntensity < y:
                            pMzIntensity = y
                            pMzTune = x

                    if self.reader.precursorTune():
                        pMz = pMzTune

                outStr = "<precursorMz"
                for (k,v) in d.items():
                    if k.startswith('precursorMz.'):
                        k,f = k.split(':')
                        k = k[12:]
                        outStr += (" %%s=\"%s\""%(f,))%(k,v)
                if len(ancestors)>0:
                    outStr += " precursorScanNum=\"%d\""%(ancestors[-1][0],)
                    outStr += " precursorIntensity=\"%f\">"%(pMzIntensity,)
                else:
                    outStr += ">"
                outStr += "%f</precursorMz>\n" % (pMz,)
                self.writeString(xmlFile,outStr)

            if self.reader.maldi() and self.version in ('2.2',):
                outStr = '<maldi'
                for (k,v) in d.items():
                    if k.startswith('maldi.'):
                        k,f = k.split(':')
                        k = k[6:]
                        outStr += (" %%s=\"%s\""%(f,))%(k,v)
                outStr += " plateID=\"%s\""%(d['plateID'],)
                outStr += " spotID=\"%s\""%(d['spotID'],)
	        outStr += "/>\n"
                self.writeString(xmlFile,outStr)

            if sys.byteorder != 'big':
                s.byteswap()

            if debug:
                s = s[:20]

            specstr = s.tostring()
            if self.peakcompress:
                specstr = zlib.compress(specstr,9)
                outStr = "<peaks precision=\"32\" byteOrder=\"network\" contentType=\"m/z-int\" compressionType=\"zlib\" compressedLen=\"%d\">"%(len(specstr),)
            else:
                if float(self.version) >= 3.0:
                    outStr = "<peaks precision=\"32\" byteOrder=\"network\" contentType=\"m/z-int\" compressionType=\"none\" compressedLen=\"%d\">"%(len(specstr),)
                else:
                    outStr = "<peaks precision=\"32\" byteOrder=\"network\" pairOrder=\"m/z-int\">"
                    
            self.writeString(xmlFile,outStr)

            # Spec says the byte order shall be
            # network, which is the same as big endian.

            if sys.byteorder != 'big':
                s.byteswap()

            outStr = b64encode(specstr)
            self.writeString(xmlFile,outStr)

            outStr = "</peaks>\n"
            self.writeString(xmlFile,outStr)

            if self.reader.maldi() and self.version not in ('2.2',):
		outStr = ''
                for (k,v) in d.items():
                    if k.startswith('maldi.'):
                        k,f = k.split(':')
                        k = k[6:]
			outStr += '<nameValue'
			outStr += ' name="maldi.%s"'%(k,)
			outStr += (' value="%s"'%(f,))%(v,)
			outStr += '/>\n'
                outStr += '<nameValue'
		outStr += ' name="maldi.plateID"'
		outStr += ' value="%s"'%(d['plateID'],)
		outStr += '/>\n'
                outStr += '<nameValue'
		outStr += ' name="maldi.spotID"'
		outStr += ' value="%s"'%(d['spotID'],)
		outStr += '/>\n'
                self.writeString(xmlFile,outStr)

            if self.version not in ('2.2',):
		outStr = ''
                for (k,v) in d.items():
                    if k.startswith('scanOrigin.'):
                        k,f = k.split(':')
                        k = k[11:]
			outStr += '<nameValue'
			outStr += ' name="scanOrigin.%s"'%(k,)
			outStr += (' value="%s"'%(f,))%(v,)
			outStr += '/>\n'
                self.writeString(xmlFile,outStr)

            outStr = ''
            for (k,v) in d.items():
                if k.startswith('nameValue.'):
                    k,f = k.split(':')
                    k = k[10:]
                    outStr += '<nameValue'
                    outStr += ' name="%s"'%(k,)
                    outStr += (' value="%s"'%(f,))%(v,)
                    outStr += '/>\n'
            if len(outStr) > 0:
                self.writeString(xmlFile,outStr)

            xmlFile.flush()

            prevSpec = s

        outStr = ""
        for m in xrange(0,msLevel):
            outStr += "</scan>\n"

        self.writeString(xmlFile,outStr)
        self.actualScanCount = scanNumber

class ToMzData:
    def __init__(self,reader,filename,cmpfmt=None,filt=None):
        self.filename = filename
        self.compressfmt = cmpfmt
        self.reader = reader
        self.actualScanCount = 0
        self.initSHA()
        self.filter = filt

    def writeString(self,fh,data):
        # self.writeByteCounter += len(data)
        self.sha1.update(data)
        fh.write(data)

    def initSHA(self):
        self.sha1 = sha.new()

    def updateSHA(self,s):
        self.sha1.update(s)

    def getSHA(self):
        return self.sha1.hexdigest().lower()

    def write(self,debug=False):

        # Make a temporary file for the scan data to count the number of spectra.
        (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
        tmpFile = os.fdopen(tmpFileFD,'wb')

        # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
        self.write_scans(tmpFile,debug)

        tmpFile.close()

        scanhash = self.getSHA()

        self.initSHA()

        if self.compressfmt == None and not self.filename.endswith('.gz'):
            xmlFile = open(self.filename,'wb')
        elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
            if self.filename.endswith('.gz'):
                xmlFile = gzip.open(self.filename,'wb')
            else:
                xmlFile = gzip.open(self.filename+'.gz','wb')
        else:
            print >>sys.stderr, "Bad compressfmt specification"
            sys.exit(1)

        self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
  
        outStr = "<mzData "
        outStr += "version=\"1.05\" "
        outStr += "accessionNumber=\"pymsxml:%s\" "%(scanhash,)
        outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
        # outStr += "xmlns=\"http://psidev.sourceforge.net/ms/xml/mzdata\" "
        outStr += "xsi:noNamespaceSchemaLocation=\"http://psidev.sourceforge.net/ms/xml/mzdata/mzdata.xsd\""
        outStr += ">\n"

        self.writeString (xmlFile,outStr)
  
        outStr = "<description>\n"
        outStr += "<admin>\n"
        outStr += "<sampleName></sampleName>\n"
        outStr += "<contact>\n"
        outStr += "<name></name>\n"
        outStr += "<institution></institution>\n"
        outStr += "</contact>\n"
        outStr += "</admin>\n"
        outStr += "<instrument>\n"
        outStr += "<instrumentName></instrumentName>\n"
        outStr += "<source>\n"
        outStr += "</source>\n"
        outStr += "<analyzerList count=\"1\">\n"
        outStr += "<analyzer>\n"
        outStr += "</analyzer>\n"        
        outStr += "</analyzerList>\n"
        outStr += "<detector>\n"
        outStr += "</detector>\n"
        outStr += "</instrument>\n"
        outStr += "<dataProcessing>\n"
        outStr += "<software>\n"
        outStr += "<name>%s</name>\n"%(self.reader.acquisitionSoftware(),)
        outStr += "<version>%s</version>\n"%(self.reader.acquisitionSoftwareVersion(),)
        outStr += "</software>\n"
        outStr += "<software>\n"
        nv = NameVersion()
        outStr += "<name>%s</name>\n"%(nv.name(),)
        outStr += "<version>%s</version>\n"%(nv.version(),)
        outStr += "</software>\n"
        outStr += "</dataProcessing>\n"
        outStr += "</description>\n"

        self.writeString (xmlFile,outStr)
  
        outStr = "<spectrumList"
        outStr += " count=\"%d\">\n"%(self.actualScanCount,)
        
        self.writeString (xmlFile,outStr)

        tmpFile = open(self.tmpFileName,'rb')
        while True:
            tmpStr = tmpFile.read(1024)
            if tmpStr == '':
                break
            self.writeString(xmlFile,tmpStr)
        tmpFile.close()
        os.unlink(self.tmpFileName)
        self.tmpFileName = ''

        outStr = "</spectrumList>\n</mzData>\n"
        
        self.writeString (xmlFile,outStr)

    def write_scans(self,xmlFile,debug=False):

        msLevel = 0
        scanNumber = 0
        ancestors = []

        self.initSHA()
        # self.writeByteCounter = 0
        
        for (s,d) in self.reader.spectra():

            if self.filter != None and not self.filter.test(d):
                continue

	    if debug and scanNumber >= 10:
		break
            
            if not d.has_key('msLevel'):
                print >>sys.stderr, "Required scan attributes missing."
                sys.exit(1)

            prevLevel = msLevel
            msLevel = d['msLevel']

            if prevLevel < msLevel and prevLevel > 0:
                # We went "in" a level, push scan number of parent
                ancestors.append((scanNumber,prevSpec))
            elif prevLevel > msLevel and msLevel > 0:
                ancestors.pop()
            
            scanNumber = scanNumber + 1

            totIonCurrent = 0
            maxmz = None
            minmz = None
            basePeakMz = None
            basePeakIntensity = None
            peaksCount = 0
            for i in xrange(0,len(s),2):
                x = s[i]; y = s[i+1]
                if minmz is None or minmz > x:
                    minmz = x
                if maxmz is None or maxmz < x:
                    maxmz = x
                totIonCurrent += y
                if basePeakIntensity is None or basePeakIntensity < y:
                    basePeakIntensity = y
                    basePeakMz = x
                peaksCount += 1

            outStr = "<spectrum id=\"%d\">\n" % (scanNumber,)
            outStr += "<spectrumDesc>\n"
            outStr += "<spectrumSettings>\n"

            self.writeString (xmlFile,outStr)

            # outStr += "<acqSpecification>\n"
            # outStr += "</acqSpecification>\n"

            outStr = "<spectrumInstrument"
            outStr += " msLevel=\"%d\""%(msLevel,)
            if d.has_key('scan.startMz') and \
               d.has_key('scan.endMz'):
                outStr += " mzRangeStart=\"%f\""%(d['scan.startMz'],)
                outStr += " mzRangeStop=\"%f\""%(d['scan.endMz'],)
            outStr += ">\n"
            for (k,v) in d.items():
                if k.startswith('scan.'):
                    k,f = k.split(':')
                    k = k[5:]
                    
                    if k == "polarity":
                        if v == '+':
                            outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Positive",)
                        elif v == '-':
                            outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Negative",)
                    elif k == 'retentionTime':
                        outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"TimeInSeconds\" value=\"%f\"/>\n"%(v,)
                    else:
                        outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
            outStr += "<userParam name=\"lowMz\" value=\"%f\"/>\n"%(minmz,)
            outStr += "<userParam name=\"highMz\" value=\"%f\"/>\n"%(maxmz,)
            outStr += "<userParam name=\"totIonCurrent\" value=\"%f\"/>\n"%(totIonCurrent,)
            outStr += "<userParam name=\"basePeakMz\" value=\"%f\"/>\n"%(basePeakMz,)
            outStr += "<userParam name=\"basePeakIntensity\" value=\"%f\"/>\n"%(basePeakIntensity,)
            outStr += "</spectrumInstrument>\n"
            outStr += "</spectrumSettings>\n"

            self.writeString (xmlFile,outStr)
            
            if msLevel > 1:

                if not d.has_key('precursorMz') :
                    print >>sys.stderr, "Required precursorMz attribute missing."
                    sys.exit(1)

                # We scan the parent spectrum in the region of the
                # precursorMz to establish the intensity. Optionally,
                # we tune the precursorMz itself, on the basis of this

                pMz = d['precursorMz']
                tol = self.reader.precursorPrecision(pMz)
                pMzLB = pMz-tol
                pMzUB = pMz+tol
                pMzIntensity = 0

                for i in xrange(0,len(ancestors[-1][1]),2):
                    x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
                    if x < pMzLB:
                        continue
                    if x > pMzUB:
                        break
                    if pMzIntensity < y:
                        pMzIntensity = y
                        pMzTune = x

                if self.reader.precursorTune():
                    pMz = pMzTune

                outStr = "<precursorList count=\"1\">\n"
                        
                outStr += "<precursor"
                outStr += " msLevel=\"%d\""%(msLevel,)
                outStr += " spectrumRef=\"%d\""%(ancestors[-1][0],)
                outStr += ">\n"

                self.writeString(xmlFile,outStr)
                
                outStr = "<ionSelection>\n"
                outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"MassToChargeRatio\" value=\"%f\"/>\n"%(pMz,)
                outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Intensity\" value=\"%f\"/>\n"%(pMzIntensity,)
                for (k,v) in d.items():
                    if k.startswith('precursorMz.'):
                        k,f = k.split(':')
                        k = k[12:]
                        outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
                outStr += "</ionSelection>\n"
                
                self.writeString(xmlFile,outStr)

                outStr = "<activation>\n"
                outStr += "</activation>\n"
                outStr += "</precursor>\n"
                outStr += "</precursorList>\n"

                self.writeString(xmlFile,outStr)

            if self.reader.maldi():
                outStr = '<maldi'
                for (k,v) in d.items():
                    if k.startswith('maldi.'):
                        k,f = k.split(':')
                        k = k[6:]
                        outStr += (" %%s=\"%s\""%(f,))%(k,v)
                outStr += " plateID=\"%s\""%(d['plateID'],)
                outStr += " spotID=\"%s\""%(d['spotID'],)
	        outStr += "/>\n"
                self.writeString(xmlFile,outStr)

            outStr = "</spectrumDesc>\n"
            self.writeString(xmlFile,outStr)

            outStr = "<mzArrayBinary>\n"
            outStr += "<data"
            outStr += " precision=\"32\""
            outStr += " endian=\"%s\""%(sys.byteorder,)
            outStr += " length=\"%d\""%(peaksCount,)
            outStr += ">"
            self.writeString(xmlFile,outStr)

            outStr = b64encode(s[0:peaksCount*2:2].tostring())
	    if not debug:
            	self.writeString(xmlFile,outStr)
	    else:
		self.writeString(xmlFile,outStr[0:100])

            outStr = "</data>\n</mzArrayBinary>\n"
            self.writeString(xmlFile,outStr)

            outStr = "<intenArrayBinary>\n"
            outStr += "<data"
            outStr += " precision=\"32\""
            outStr += " endian=\"%s\""%(sys.byteorder,)
            outStr += " length=\"%d\""%(peaksCount,)
            outStr += ">"
            self.writeString(xmlFile,outStr)

            outStr = b64encode(s[1:peaksCount*2:2].tostring())
	    if not debug:
            	self.writeString(xmlFile,outStr)
	    else:
		self.writeString(xmlFile,outStr[0:100])

            outStr = "</data>\n</intenArrayBinary>\n"
            self.writeString(xmlFile,outStr)

            if self.reader.maldi():
		outStr = ''
                for (k,v) in d.items():
                    if k.startswith('maldi.'):
                        k,f = k.split(':')
                        k = k[6:]
			outStr += '<nameValue'
			outStr += ' name="%s"'%(k,)
			outStr += (' value="%f"'%(f,))%(v,)
			outStr += '/>\n'
##              outStr += '<nameValue'
## 		outStr += ' name="plateID"'
## 		outStr += ' value="%s"'%(d['plateID'],)
## 		outStr += '/>\n'
##              outStr += '<nameValue'
## 		outStr += ' name="spotID"'
## 		outStr += ' value="%s"'%(d['spotID'],)
## 		outStr += '/>\n'
                self.writeString(xmlFile,outStr)

            outStr = "</spectrum>\n"
            self.writeString(xmlFile,outStr)
            xmlFile.flush()

            prevSpec = s

        self.actualScanCount = scanNumber
        
class QStarWiff:
    def __init__(self,filename,peaks=None):
        self.filename = filename
        self.filehash = fileSHA(filename)
        self.theTIC = None
        self.theWF = None
        self.theFMANSpecData = None
        self.startTime = None
        self.stopTime = None
        self.applyPeakDetect = []
        if peaks:
            self.applyPeakDetect = map(int,peaks.split(','))

        if not os.path.exists(self.filename):
            print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
            sys.exit(1)

        self.filename = os.path.abspath(self.filename)

    def __del__(self):
        # Probably unnecessary
        self.close()

    def peakDetect(self,msLevel):
        return ((msLevel in self.applyPeakDetect) and (msLevel != 1))

    def open(self):

        if self.theTIC is None:

            from win32com.client import Dispatch

            self.theFMANSpecData = Dispatch('Analyst.FMANSpecData')
            self.theTIC = Dispatch('Analyst.FMANChromData')
            self.theSPL = Dispatch('Analyst.SpectralPeakList')
            
            self.theFMANSpecData.WiffFileName = self.filename
            self.theTIC.WiffFileName = self.filename
            self.theWF = self.theFMANSpecData.GetWiffFileObject()
            
            self.theTIC.SetToTIC(1,0,0)
            
            self.startTime = self.theTIC.GetDataPointXValue(1)
            self.stopTime = self.theTIC.GetDataPointXValue(self.theTIC.GetNumberOfDataPoints())

    def close(self):

        if not self.theTIC is None:

            if not self.theWF is None:
                self.theWF.CloseWiffFile()
            self.theTIC = None
            self.theFMANSpecData = None
            self.theWF = None

    def spectra(self):
        self.open()
        spectrumCount=0;

        numberOfSamples = self.theWF.GetActualNumberOfSamples()
        for i in xrange(1,numberOfSamples+1):

	    # print >>sys.stderr, "Sample %d"%i
	    # print >>sys.stderr, "Sample Name: %s"%(self.theWF.GetSampleName(i),)

	    sampleName = self.theWF.GetSampleName(i)

            numberOfPeriods = self.theWF.GetActualNumberOfPeriods(i)
            for j in xrange(0,numberOfPeriods):

                numberOfExperiments = self.theWF.GetNumberOfExperiments(i,j)
                self.theTIC.SetToTIC(i, j, 0)

                numberOfTICDataPoints = self.theTIC.GetNumberOfDataPoints()
                for l in xrange(1,numberOfTICDataPoints+1):

                    for k in xrange(0,numberOfExperiments):

                        self.theExperiment = self.theWF.GetExperimentObject(i, j, k)
                        self.theTIC.SetToTIC(i, j, k)
                        
                        # Convert to seconds?
                        xValue = self.theTIC.GetDataPointXValue(l) * 60

                        spectrumCount += 1;
                        
                        # If any Ion current at time "l"
                        if self.theTIC.GetDataPointYValue(l) > 0:

                            self.theFMANSpecData.SetSpectrum(i, j, k, xValue, xValue)
                            
                            if self.theExperiment.ScanType == 9:
                                msLevel = 2
                                m = re.search(r'[(]([0-9.]+)[)]',self.theFMANSpecData.DataTitle)
                                fixedMass = float(m.group(1))
                            else:
                                msLevel = 1

                            startMass = self.theFMANSpecData.GetStartMass()
                            stopMass = self.theFMANSpecData.GetStopMass()
                            dataArr = array('f');
                            peakDetect = 'False'
                            if self.peakDetect(msLevel):
                                peakDetect = 'True'
                                minY,maxY = self.theFMANSpecData.GetYValueRange()
                                # print minY,maxY
                                self.theFMANSpecData.Threshold(maxY*0.01)
                                self.theSPL.FindPeaksInDataObject(self.theFMANSpecData,50.0)
                                numPeaks = self.theSPL.GetNumberOfPeaks()
                                # print numPeaks
                                for m in xrange(1,numPeaks+1):
                                    (x,w,y,yp) = self.theSPL.GetPeak(m)
                                    if y <= 0:
                                        continue
                                    dataArr.append(x)
                                    dataArr.append(y)
                            else:
                                numPoints = self.theFMANSpecData.GetNumberOfDataPoints()
                                for m in xrange(1,numPoints+1):
                                    if self.theFMANSpecData.GetDataPointYValue(m) <= 0:
                                        continue
                                    x = self.theFMANSpecData.GetDataPointXValue(m)
                                    dataArr.append(x)
                                    y = self.theFMANSpecData.GetDataPointYValue(m)
                                    dataArr.append(y)
                            polarity = self.theExperiment.Polarity
                            
                            if len(dataArr) == 0:
                                continue

                            if polarity == 0:
                                polarity_val = "+"
                            else:
                                polarity_val = "-"

                            rt = "PT%fS"%(xValue,)

                            # Scan level attributes
                            # Required by mzXML (no format spec. needed)
                            d = {'msLevel':msLevel}
                            # Optional ('scan.' and prefixformat spec. needed)
                            d.update({
                                'scan.retentionTime:PT%fS':xValue,
                                'scan.polarity:%s':polarity_val,
                                'scan.startMz:%f':startMass,
                                'scan.endMz:%f':stopMass,
                                })
                            d.update({'scanOrigin.parentFileID:%s': self.filehash,
                                      'scanOrigin.num:%d': spectrumCount})
                            d.update({'nameValue.sample:%d':i,
                                      'nameValue.period:%d':j,
                                      'nameValue.experiment:%d':k,
                                      'nameValue.sampleName:%s':sampleName})

                            if msLevel == 1:
                                yield (dataArr,d)
                            else:
                                # tandem MS scans add the following
                                # Required by mzXML
                                d.update({'precursorMz':fixedMass})
                                # Optional
                                # d.update({'precursorMz.precursorMzIntensity:f':0.0})
                                yield (dataArr,d)

    def getMsRunMetaData(self):
        self.open()
        d = {'startTime:PT%fS':self.startTime,
             'endTime:PT%fS':self.stopTime,
             }
        return d

    def getFilenames(self):
        return [ (self.filename,self.filehash) ]

    def msInstrumentID(self):
        return "1"

    def msManufacturer(self):
        return "ABI / SCIEX"

    def msModel(self):
        return "QSTAR"

    def msIonisation(self):
        return "ESI"

    def msMassAnalyzer(self):
        return "Quadrupole"

    def msDetector(self):
        return "LE"

    def acquisitionSoftware(self):
        return "Analyst"

    def acquisitionSoftwareVersion(self):
        return "1.0"

    def precursorPrecision(self,mz):
        return .1

    def precursorTune(self):
        return True

    def lc(self):
        return True

    def maldi(self):
        return not self.lc()

class mzXML_md_sax(handler.ContentHandler):

    def __init__(self):
        self.md = {}
        self.context = ''

    def startElement(self, name, attrs):
        self.context += ':%s'%(name,)
        # print >>sys.stderr, ">>",self.context
        # sys.stderr.flush()
        if self.context.endswith(':msRun'):
            self.md['startTime'] = float(attrs['startTime'][2:-1])
            self.md['endTime'] = float(attrs['endTime'][2:-1])
        elif self.context.endswith(':parentFile'):
            self.md['fileName'] = unquote(attrs['fileName'])
            self.md['fileType'] = attrs['fileType']
            self.md['fileSha1'] = attrs['fileSha1']
        elif self.context.endswith(':instrument'):
            self.md['msManufacturer'] = attrs['manufacturer']
            self.md['msModel'] = attrs['model']
            self.md['msIonisation'] = attrs['ionisation']
            self.md['msMassAnalyzer'] = attrs['msType']
        elif self.context.endswith(':msInstrument:msManufacturer'):
            self.md['msManufacturer'] = attrs['value']
        elif self.context.endswith(':msInstrument:msModel'):
            self.md['msModel'] = attrs['value']
        elif self.context.endswith(':msInstrument:msIonisation'):
            self.md['msIonisation'] = attrs['value']
        elif self.context.endswith(':msInstrument:msMassAnalyzer'):
            self.md['msMassAnalyzer'] = attrs['value']
        elif self.context.endswith(':msInstrument:msDetector'):
            self.md['msDetector'] = attrs['value']
        elif self.context.endswith(':msInstrument:software'):
            if attrs['type'] == 'acquisition':
                self.md['acquisitionSoftware'] = attrs['name']
                self.md['acquisitionSoftwareVersion'] = attrs['version']
        elif self.context.endswith(':instrument:software'):
            if attrs['type'] == 'acquisition':
                self.md['acquisitionSoftware'] = attrs['name']
                self.md['acquisitionSoftwareVersion'] = attrs['version']
        elif self.context.endswith(':scan'):
            raise SAXException("Early termination")

    def endElement(self, name):
        # print >>sys.stderr, "<<",self.context
        # sys.stderr.flush()
        self.context = self.context[0:-(len(name)+1)]


class mzXML_spec_sax(handler.ContentHandler):

    def __init__(self):
        self.context = ''
        self.content = ''
        self.spectra = []
        self.done = False
        self.scancount = 0
        self.scanmax = 1000
        self.scanstart = 0
        
    def startElement(self, name, attrs):
        self.context += ':%s'%(name,)
        # print >>sys.stderr, ">>",self.context
        # sys.stderr.flush()
        self.content = ''
        if name == 'scan':
            self.scanmd = {}
            self.scanlevel = int(attrs['msLevel'])
            self.scannum = int(attrs['num'])
            if attrs.has_key('retentionTime'):
                self.rt = float(attrs['retentionTime'][2:-1])
            self.endMz = float(attrs['endMz'])
            self.startMz = float(attrs['startMz'])
            self.polarity = attrs.get('polarity',None)
            for (k,v) in attrs.items():
                self.scanmd[k] = v
            self.precursorMz = None
            self.scancount += 1
        elif name == 'msRun':
            self.scancount = 0
            self.spectra = []
            self.context = ':msRun'
        elif name == 'scanOrigin':
            self.scanmd['scanOrigin'] = attrs
            
    def characters(self, content):
        if self.scancount >= self.scanstart:
            if self.context.endswith(':peaks') or \
                   self.context.endswith(':precursorMz'):
                self.content += content
            
    def endElement(self, name):
        if self.scancount >= self.scanstart:
            if self.context.endswith('scan:peaks'):
                
                spec = array('f')
                # print >>sys.stderr, len(self.content), len(b64decode(self.content)), self.content
                spec.fromstring(b64decode(self.content))
                if sys.byteorder != 'big':
                    spec.byteswap()
                d = {'msLevel':self.scanlevel}
                # Optional ('scan.' and prefixformat spec. needed)
                d.update({
                    'scan.retentionTime:PT%fS':self.rt,
                    'scan.startMz:%f':self.startMz,
                    'scan.endMz:%f':self.endMz
                    })
                if self.polarity != None:
                    d.update({'scan.polarity:%s':self.polarity})
                if self.scanlevel == 2:
                    d.update({'precursorMz':self.precursorMz})
                if self.scanmd.has_key('scanOrigin'):
                    d.update({'scanOrigin.parentFileID:%s': self.scanmd['scanOrigin']['parentFileID'],
                              'scanOrigin.num:%d': self.scanmd['scanOrigin']['num']})
                for (k,v) in self.scanmd.items():
                    if not d.has_key(k):
                        d[k] = v
                self.spectra.append((spec,d))
                if len(self.spectra) >= self.scanmax:
                    self.scanstart = self.scancount+1
                    raise SAXException("Early termination")
            elif self.context.endswith('scan:precursorMz'):
                self.precursorMz = float(self.content)
        if self.context.endswith('msRun'):
            self.done = True
        # print >>sys.stderr, "<<",self.context
        # sys.stderr.flush()
        self.context = self.context[0:-(len(name)+1)]
            
class FromMzXML:
    def __init__(self,filename):
        self.filename = filename
        self.parser = None

        if not os.path.exists(self.filename):
            print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
            sys.exit(1)

        if self.filename.endswith('.gz'):
            self.compressfmt = 'gz'
        else:
            self.compressfmt = None

    def __del__(self):
        self.close()

    def open(self):

        if self.parser == None:
            # print >>sys.stderr, "Parsing out meta-data..."
            # sys.stderr.flush()
            self.parser = make_parser()
            self.handler = mzXML_md_sax()
            self.parser.setContentHandler(self.handler)
            try:
                if self.compressfmt == None:
                    self.parser.parse(self.filename)
                elif self.compressfmt == 'gz':
                    self.parser.parse(gzip.open(self.filename,'r'))
                else:
                    print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
                    sys.exit(1)
            except SAXException:
                pass

            self.md = self.handler.md

            # print >>sys.stderr, self.md
            # sys.stderr.flush()

            # print >>sys.stderr, "Set up spectrum parser..."
            # sys.stderr.flush()
            self.parser = make_parser()
            self.handler = mzXML_spec_sax()
            self.parser.setContentHandler(self.handler)

    def close(self):
        pass
    
    def spectra(self):
        self.open()
        while True:
            # print >>sys.stderr, "Spectrum parser to read spectra from",self.handler.scanstart
            # sys.stderr.flush()
            try:
                if self.compressfmt == None:
                    self.parser.parse(self.filename)
                elif self.compressfmt == 'gz':
                    self.parser.parse(gzip.open(self.filename,'r'))
                else:
                    print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
                    sys.exit(1)
            except SAXException:
                pass
            for (s,d) in self.handler.spectra:
                # print >>sys.stderr, "Yeild spectrum number:",d['num']
                yield (s,d)
            if self.handler.done:
                break

    def getMsRunMetaData(self):
        self.open()
        d = {'startTime:PT%fS':self.md['startTime'],
             'endTime:PT%fS':self.md['endTime']
             }
        return d

    def getFilenames(self):
        return [ (self.md['fileName'],self.md['fileType'],self.md['fileSha1']) ]

    def msManufacturer(self):
        return self.md.get('msManufacturer','')

    def msModel(self):
        return self.md.get('msModel','')

    def msIonisation(self):
        return self.md.get('msIonisation','')

    def msMassAnalyzer(self):
        return self.md.get('msMassAnalyzer','')

    def msDetector(self):
        return self.md.get('msDetector','')

    def acquisitionSoftware(self):
        return self.md.get('acquisitionSoftware','')

    def acquisitionSoftwareVersion(self):
        return self.md.get('acquisitionSoftwareVersion','')

    def precursorPrecision(self,mz):
        return 0

    def precursorTune(self):
        return False

    def lc(self):
        return True

    def maldi(self):
        return not self.lc()

class AB4700T2D:
    def __init__(self,filename,peaks=None):
        self.filename = filename
        self.datafiles = None
        self.deapp = None
        self.metadata = {}
        self.platespots = {}
        self.applyPeakDetect = []
        if peaks:
            self.applyPeakDetect = map(int,peaks.split(','))

        if not os.path.exists(self.filename):
            print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
            sys.exit(1)

        self.filename = os.path.abspath(self.filename)

        self.dir = os.path.split(self.filename)[0]

    def __del__(self):
        # Probably unnecessary
        self.close()

    def open(self):

        if self.deapp is None:

            self.metadata['PLATEDEF'] = []
            self.metadata['PLATE'] = []
            self.metadata['SPOT'] = []
            self.metadata['SCAN'] = []

            h = open(self.filename,'rb')
            r = csv.reader(h,'excel-tab')
            for l in r:
                if len(l) == 0:
                    continue
                kw = l.pop(0).upper()
                if kw in ('PLATEDEF','PLATE','SPOT','SCAN'):
                    self.metadata[kw].append({})
                    while len(l)>=2:
                        k = l.pop(0);v = l.pop(0);
                        if (kw == 'PLATE' and \
                            (k in ('plateID','spotXCount','spotYCount','plateManufacturer','plateModel'))) or \
                           (kw == 'PLATEDEF' and \
                            (k in ('plateID','plateManufacturer','plateModel','spotNaming','maldiMatrix'))) or \
                           (kw == 'SPOT' and \
                            (k in ('plateID','spotID','spotXPosition','spotYPosition','maldiMatrix'))) or \
                           (kw == 'SCAN' and \
                            (k in ('plateID','spotID','filename','index'))):
                            self.metadata[kw][-1][k] = v
                        elif k == '':
                            break
                        else:
                            print >>sys.stderr,"Bad key %s for %s row"%(k,kw)
                            sys.exit(1)
            h.close()

            for pd in self.metadata['PLATEDEF']:
                # insert all the gory details needed for a particular plate model etc...
                if pd['plateManufacturer'] == 'ABI / SCIEX' and pd['plateModel'] == '01-192+06-BB':
                    if not pd.has_key('spotNaming') or not pd.has_key('maldiMatrix') or \
                       not pd['spotNaming'] in ('alpha','parallel','antiparallel'):
                        print >>sys.stderr,"Bad plate definition, missing maldiMatrix or spotNaming, or bad spotNaming value."
                        sys.exit(1)
                    self.metadata['PLATE'].append({
                        'plateID':pd['plateID'],
                        'plateManufacturer':pd['plateManufacturer'],
                        'plateModel':pd['plateModel'],
                        'spotXCount':24,
                        'spotYCount':8,
                        })
                    if pd['spotNaming'] == 'alpha':
                            for y in xrange(0,8):
                                for x in xrange(0,24):
                                    self.metadata['SPOT'].append({
                                        'plateID':pd['plateID'],
                                        'spotID':"%c%d"%(y+ord('A'),x+1),
                                        'spotXPosition':x,
                                        'spotYPosition':2*y+(x%2),
                                        'maldiMatrix':pd['maldiMatrix']
                                        })
                    else:
                        print >>sys.stderr,"Valid spotNaming value, not yet implemented."
                        sys.exit(1)

            for pl in self.metadata['PLATE']:
                self.platespots[pl['plateID']] = []

            for sp in self.metadata['SPOT']:
                self.platespots[sp['plateID']].append(sp)

            self.datafiles = [ (os.path.join(self.dir,md['filename']),int(md['index'])) for md in self.metadata['SCAN'] ]
            self.distinct_datafiles = dict([(os.path.join(self.dir,md['filename']),
                                            fileSHA(os.path.join(self.dir,md['filename']))) for md in self.metadata['SCAN']])

            self.maptoscan = {}
            index = 0
            for md in self.metadata['SCAN']:
                self.maptoscan[(os.path.join(self.dir,md['filename']),int(md['index']))] = index
                index += 1

            from win32com.client import Dispatch, gencache
            self.deapp = Dispatch('DataExplorer.Application',
                                  resultCLSID='{3FED40F1-D409-11D1-8B56-0060971CB54B}')
            self.delib = gencache.EnsureModule('{06972F50-13F6-11D3-A5CB-0060971CB54B}',0,4,2)
            self.deapp.AutomatedProcessing = 1
            self.deapp.Visible             = 0

    def close(self):

        if not self.deapp is None:
            self.deapp.Quit()
            self.deapp = None

    def peakDetect(self,msLevel):
        return msLevel in self.applyPeakDetect

#   remove .set and .cts files to prevent DataExplorer crashes - DT 11-1-2006
    def remsettingsfile(self,fn):
        # print "rmsettingsfile: %s"%fn
        if ((len(fn) > 3) and (fn[-4:].lower() == '.t2d')):
            path2del = fn[:-4] + '.cts'
            if os.path.exists(path2del):
                os.unlink(path2del)
            path2del = fn[:-4] + '.set'
            if os.path.exists(path2del):
                os.unlink(path2del)

    def spectra(self):
        self.open()

        prevfile = ''
        for (f,i) in self.datafiles:

            if f != prevfile:
                self.deapp.Documents.Close()
                self.remsettingsfile(prevfile)

                if not os.path.exists(f):
                    print >>sys.stderr, "Filename %s does not exist."%(f,)
                    sys.exit(1)
                
                self.remsettingsfile(f)
                self.deapp.Documents.Open(f)
                # print "open %s"%f


            doc = self.deapp.Documents.Item(0)
            sv = doc.SpecView

            sv.SetSpectrumAt(i-1)

            (tf,fixedMass) = doc.InstrumentSettings.GetSetting(self.delib.constants.dePreCursorIon,i-1,None)
                
            if fixedMass > 0:
                msLevel = 2
            else:
                msLevel = 1
                    
            dataArr = array('f');
            peakDetect="False"
            if self.peakDetect(msLevel):
                peakDetect="True"
                sv.CorrectBaseline()
                doc.SpecSetup.NoiseReductionStdev = 2.0
                sv.FilterNoise(self.delib.constants.deNoiseRemoval)
                sv.PeakSettings.PeakThresholdPercent = 1
                st,sp = sv.GetPeakData(self.delib.constants.deSpecPeakAll,
                                       self.delib.constants.deSpecPeakSortMass, 0, 0)
                for j in xrange(len(sp)):
                    x = sp[j][self.delib.constants.deSpecPeakCentroidMass]
                    y = sp[j][self.delib.constants.deSpecPeakArea]
                    if y <= 0:
                        continue
                    dataArr.append(x)
                    dataArr.append(y)
            else:
                n,s = sv.GetRawData(0,0)
                for j in xrange(n):
                    dataArr.append(s[j][0])
                    dataArr.append(s[j][1])
                            
            if len(dataArr) == 0:
                continue

            polarity_val = "+"

            # Scan level attributes
            # Required by mzXML (no format spec. needed)
            d = {'msLevel':msLevel}
            # Optional ('scan.' and prefixformat spec. needed)
            d.update({
                'scan.polarity:%s':polarity_val,
                })

            scanindex = self.maptoscan[(f,i)]
            d.update({
                'plateID':self.metadata['SCAN'][scanindex]['plateID'],
                'spotID':self.metadata['SCAN'][scanindex]['spotID'],
                })
            d.update({'scanOrigin.parentFileID:%s': self.distinct_datafiles[f],
                      'scanOrigin.num:%d': i})
            if msLevel == 1:
                yield (dataArr,d)
            else:
                # tandem MS scans add the following
                # Required by mzXML
                d.update({'precursorMz':fixedMass})
                # Optional
                d.update({'precursorMz.precursorCharge:%d':1})
                yield (dataArr,d)

            prevfile = f

        self.deapp.Documents.Close()
        self.remsettingsfile(prevfile)
        self.remsettingsfile(f)
            
    def getMsRunMetaData(self):
        self.open()
        return {}

    def getFilenames(self):
        return [ t for t in self.distinct_datafiles.items() ]

    def msInstrumentID(self):
        return "1"

    def msManufacturer(self):
        return "ABI / SCIEX"

    def msModel(self):
        return "4700 Proteomic Analyzer"

    def msIonisation(self):
        return "MALDI"

    def msMassAnalyzer(self):
        return "TOF"

    def msDetector(self):
        return "LE"

    def acquisitionSoftware(self):
        return "DataExplorer"

    def acquisitionSoftwareVersion(self):
        return "1.0"

    def precursorPrecision(self,mz):
        return .1

    def precursorTune(self):
        return False

    def lc(self):
        return False

    def maldi(self):
        return not self.lc()

    def plateData(self):
        self.open()
        for d in self.metadata['PLATE']:
            yield d 

    def spotData(self,plate):
        self.open()
        for s in self.platespots[plate]:
            yield s 

class scanfilter:
    def __init__(self,str):
        self.rtre = re.compile('^PT([0-9]+(\.[0-9]+)?)S$')
        self.intre = re.compile('^[1-9][0-9]*$')
        self.floatre = re.compile('^[0-9]+(\.[0-9]+)?$')
        self.cl = []
        clause = str.split(',')
        for cl in clause:
            scl = cl.split('.')
            m = self.rtre.match(scl[2].strip())
            if m != None:
                v = float(m.group(1))
            elif self.floatre.match(scl[2].strip()):
                v = float(scl[2])
            elif self.intre.match(scl[2].strip()):
                v = int(scl[2])
            else:
                v = scl[2]
            self.cl.append((scl[0],scl[1],v))

    def test(self,d):
        a = {}
        for (k,v) in d.iteritems():
            k0 = k.split(':')[0].split('.')[-1]
            a[k0] = v
        for cl in self.cl:
            if type(a[cl[0]]) == type(u'') or \
                   type(a[cl[0]]) == type('') :
                m = self.rtre.match(a[cl[0]].strip())
                if m != None:
                    v1 = float(m.group(1))
                elif self.floatre.match(a[cl[0]].strip()):
                    v1 = float(a[cl[0]])
                elif self.intre.match(a[cl[0]].strip()):
                    v1 = int(a[cl[0]])
                else:
                    v1 = a[cl[0]].strip()
            else:
                v1 = a[cl[0]]
            v2 = cl[2]
            # print >>sys.stderr, "Spectrum test:",v1,type(v1),cl[1],v2,type(v2)
            # sys.stderr.flush()
            if cl[1] == 'eq' and v1 != v2:
                # print >>sys.stderr, "FALSE"
                # sys.stderr.flush()
                return False
            elif cl[1] == 'ne' and not v1 != v2:
                return False
            elif cl[1] == 'lt' and not v1 < v2:
                return False
            elif cl[1] == 'le' and not v1 <= v2:
                return False
            elif cl[1] == 'gt' and not v1 > v2:
                return False
            elif cl[1] == 'ge' and not v1 >= v2:
                return False
        # print >>sys.stderr, "TRUE"
        # sys.stderr.flush()
        return True
            
if __name__ == '__main__':

    from optparse import OptionParser
    import sys
    import glob

    parser = OptionParser()
    parser.add_option("-R", "--rawdata", type="string", dest="rawdata", default="",\
                      help="Format of raw data. Optional if raw spectra file ends in .wiff or .t2m.")
    parser.add_option("-X", "--xmlformat", type="string", dest="xmlformat", default="",\
                      help="XML format to output. Optional if output file ends in .mzxml or .mzdata.")
    parser.add_option("-o", "--output", type="string", dest="output", default="",\
                      help="Output file.")
    parser.add_option("-p", "--peaks", type="string", dest="peaks", default="2",\
                      help="Level(s) of spectra to apply peak detection to (comma separated). QStar,4700 only.")
    parser.add_option("-f", "--filter", type="string", dest="filter", default=None,\
                      help="Filter on mzxml scan meta-data: field.op.value[,field.op.value]. Default: No filter.")
    parser.add_option("-V", "--version", type='string', dest="version", default="3.0",
                      help="XML version. mzXML only. Valid options '2.1','2.2','3.0'. Default: '3.0'.")
    parser.add_option("-z", "--compress_peaks", action="store_true", dest="compress_peaks", default=None,\
                      help="Compress mzXML peaks data using zlib. Default: False")
    parser.add_option("-Z", "--compress", type="string", dest="compress", default=None,\
                      help="Compress output file. Valid options 'gz'. Default: None.")
    parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False,\
                      help="Debug. First 10 spectra only, and truncated peaks data element. Default: False.")
    
    ( opts, args ) = parser.parse_args()

    if opts.output == "" and opts.xmlformat == "":
        parser.error("Either -o or -X must be suppied.")
        sys.exit(1)

    if opts.filter != None:
        filt=scanfilter(opts.filter)
    else:
        filt=None

    argsglob = []
    for a in args:
        argsglob.extend(glob.glob(a))

    if opts.output != "" and len(argsglob) > 1:
        parser.error("At most one raw spectrum data file is permitted, if -o option is used.")

    for a in argsglob:

        if opts.output != "":
            o = opts.output
        else:
            p = a.rfind('.')
            o = a[0:p]+'.'+opts.xmlformat

        if o == a:
            print >>sys.stderr, "Input file %s and output file %s are the same!"%(a,o)
            sys.exit(1)

        print >>sys.stderr, "Processing",a,"to",o

        if opts.rawdata.lower() in ('wiff','qstar'):
            r = QStarWiff(a,opts.peaks)
        elif opts.rawdata.lower() in ('t2d','dat','ab4700','ab4800','mariner','voyager'):
            r = AB4700T2D(a,opts.peaks)
        elif opts.rawdata.lower() in ('mzxml',):
            r = FromMzXML(a)
        elif a[-5:].lower() in ('.wiff',):
            r = QStarWiff(a,opts.peaks)
        elif a[-4:].lower() in ('.t2m',):
            r = AB4700T2D(a,opts.peaks)
        elif a[-6:].lower() in ('.mzxml',):
            r = FromMzXML(a)
        else:
            parser.error("Bad rawdata format specification.")
            sys.exit(1)

        if opts.xmlformat.lower() == 'mzxml':
            x = ToMzXML(r,o,opts.compress,filt,opts.compress_peaks,opts.version)
        elif opts.xmlformat.lower() == 'mzdata':
            x = ToMzData(r,o,opts.compress,filt)
        elif opts.output[-6:].lower() in ('.mzxml',):
            x = ToMzXML(r,opts.output,opts.compress,filt,opts.compress_peaks,opts.version)
        elif opts.output[-7:].lower() in ('.mzdata',):
            x = ToMzData(r,opts.output,opts.compress,filt)
        else:
            parser.error("Bad xml format specification.")
            sys.exit(1)
        
        x.write(debug=opts.debug)
