ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/pymsxml/pymsxml.py
Revision: 1.1.1.1 (vendor branch)
Committed: Sat Nov 4 14:01:59 2006 UTC (15 years, 9 months ago) by nje01
CVS Tags: HEAD
Changes since 1.1: +0 -0 lines
Log Message:

Line User Rev File contents
1 nje01 1.1 #!/usr/bin/env python
2    
3     import sys,os,os.path
4     import re
5     import sha
6     import math
7     import tempfile
8     import csv
9     import gzip
10     import zipfile
11     from array import array
12     from base64 import b64encode,b64decode
13     from urllib import quote,unquote
14     from xml.sax import *
15    
16     class NameVersion:
17     def __init__(self):
18     self.name_ = "PyMsXML"
19     self.majorVer = 0
20     self.minorVer = 4
21     self.revVer = 0
22     def version(self):
23     return "%d.%d.%d"%(self.majorVer,self.minorVer,self.revVer)
24     def name(self):
25     return self.name_
26    
27     def fileSHA(filename):
28     s = sha.new()
29     h = open(filename,'rb')
30     while True:
31     buffer = h.read(1024)
32     if buffer == '':
33     break
34     s.update(buffer)
35     h.close()
36     return s.hexdigest().lower()
37    
38     class ToMzXML:
39     def __init__(self,reader,filename,cmpfmt=None,filt=None):
40     self.filename = filename
41     self.compressfmt = cmpfmt
42     self.reader = reader
43     self.initSHA()
44     self.writeByteCounter = 0
45     self.actualScanCount = 0
46     self.scanIndices = {}
47     self.filter = filt
48    
49     def __del__(self):
50     if hasattr(self,'tmpFileName') and self.tmpFileName and self.tmpFileName != '':
51     os.unlink(self.tmpFileName)
52    
53     def getFilename(self):
54     return self.filename
55    
56     def initSHA(self):
57     self.sha1 = sha.new()
58    
59     def updateSHA(self,s):
60     self.sha1.update(s)
61    
62     def getSHA(self):
63     return self.sha1.hexdigest().lower()
64    
65     def writeString(self,fh,data):
66     self.writeByteCounter += len(data)
67     self.sha1.update(data)
68     fh.write(data)
69    
70     def write(self,debug=False):
71    
72     # Make a temporary file for the scan data to count the number of spectra.
73     (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
74     tmpFile = os.fdopen(tmpFileFD,'wb')
75    
76     # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
77     self.write_scans(tmpFile,debug)
78    
79     tmpFile.close()
80    
81     # Reset!
82    
83     self.initSHA()
84     self.writeByteCounter = 0
85    
86     if self.compressfmt == None and not self.filename.endswith('.gz'):
87     xmlFile = open(self.filename,'wb')
88     elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
89     if self.filename.endswith('.gz'):
90     xmlFile = gzip.open(self.filename,'wb')
91     else:
92     xmlFile = gzip.open(self.filename+'.gz','wb')
93     else:
94     print >>sys.stderr, "Bad compressfmt specification"
95     sys.exit(1)
96    
97     self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
98    
99     outStr = "<mzXML "
100     outStr += "xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1\" "
101     outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
102     outStr += "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd\""
103     outStr += ">\n"
104    
105     self.writeString (xmlFile,outStr)
106    
107     outStr = "<msRun"
108     outStr += " scanCount=\"%d\"" % (self.actualScanCount,)
109     for (k,v) in self.reader.getMsRunMetaData().items():
110     k,f = k.split(':')
111     outStr += (" %%s=\"%s\""%(f,))%(k,v)
112     outStr += ">\n"
113    
114     self.writeString (xmlFile,outStr)
115    
116     filehashes = {}
117    
118     for f in self.reader.getFilenames():
119     if len(f) == 2:
120     outStr = "<parentFile "
121     outStr += "fileName=\"%s\" " % (quote(f[0]),)
122     outStr += "fileType=\"RAWData\" "
123     outStr += "fileSha1=\"%s\"" % (f[1],)
124     outStr += "/>\n"
125     else:
126     outStr = "<parentFile "
127     outStr += "fileName=\"%s\" " % (f[0],)
128     outStr += "fileType=\"%s\" " % (f[1],)
129     outStr += "fileSha1=\"%s\"" % (f[2],)
130     outStr += "/>\n"
131     self.writeString (xmlFile,outStr)
132    
133     outStr = "<msInstrument>\n"
134     outStr += "<msManufacturer category=\"msManufacturer\" value=\"%s\"/>\n" % (self.reader.msManufacturer(),)
135     outStr += "<msModel category=\"msModel\" value=\"%s\"/>\n" % (self.reader.msModel(),)
136     if self.reader.msIonisation():
137     outStr += "<msIonisation category=\"msIonisation\" value=\"%s\"/>\n" % (self.reader.msIonisation(),)
138     if self.reader.msMassAnalyzer():
139     outStr += "<msMassAnalyzer category=\"msMassAnalyzer\" value=\"%s\"/>\n" % (self.reader.msMassAnalyzer(),)
140     if self.reader.msDetector():
141     outStr += "<msDetector category=\"msDetector\" value=\"%s\"/>\n" % (self.reader.msDetector(),)
142    
143     self.writeString (xmlFile,outStr)
144    
145     outStr = "<software "
146     outStr += "type=\"acquisition\" "
147     outStr += "name=\"%s\" "%(self.reader.acquisitionSoftware(),)
148     outStr += "version=\"%s\""%(self.reader.acquisitionSoftwareVersion(),)
149     outStr += "/>\n"
150    
151     self.writeString (xmlFile,outStr)
152    
153     outStr = "</msInstrument>\n"
154    
155     self.writeString (xmlFile,outStr)
156    
157     outStr = "<dataProcessing>\n"
158    
159     self.writeString (xmlFile,outStr)
160    
161     nv = NameVersion()
162    
163     outStr = "<software "
164     outStr += "type=\"conversion\" "
165     outStr += "name=\"%s\" "%(nv.name(),)
166     outStr += "version=\"%s\"" % (nv.version(),)
167     outStr += "/>\n"
168    
169     self.writeString (xmlFile,outStr)
170    
171     if (self.reader.peakDetect(1)):
172     outStr = "<processingOperation "
173     outStr += "name=\"peak_detection_level_1\" "
174     outStr += "value=\"true\""
175     outStr += "/>\n"
176     outStr += "<processingOperation "
177     outStr += "name=\"peak_detection_level_1_threshold\" "
178     outStr += "value=\"1%\""
179     outStr += "/>\n"
180     outStr += "<processingOperation "
181     outStr += "name=\"peak_detection_level_1_software\" "
182     outStr += "value=\"Analyst\""
183     outStr += "/>\n"
184     self.writeString (xmlFile,outStr)
185     if (self.reader.peakDetect(2)):
186     outStr = "<processingOperation "
187     outStr += "name=\"peak_detection_level_2\" "
188     outStr += "value=\"true\""
189     outStr += "/>\n"
190     outStr += "<processingOperation "
191     outStr += "name=\"peak_detection_level_2_threshold\" "
192     outStr += "value=\"1%\""
193     outStr += "/>\n"
194     outStr += "<processingOperation "
195     outStr += "name=\"peak_detection_level_2_software\" "
196     outStr += "value=\"Analyst\""
197     outStr += "/>\n"
198     self.writeString (xmlFile,outStr)
199    
200     outStr = "<processingOperation "
201     outStr += "name=\"min_peaks_per_spectra\" "
202     outStr += "value=\"1\""
203     outStr += "/>\n"
204    
205     self.writeString (xmlFile,outStr)
206    
207     outStr = "</dataProcessing>\n"
208    
209     self.writeString (xmlFile,outStr)
210    
211     if self.reader.maldi():
212     outStr = '<spotting>\n'
213     for d in self.reader.plateData():
214     outStr += '<plate plateID="%s" spotXCount="%s" spotYCount="%s">\n' % \
215     (d['plateID'],d['spotXCount'],d['spotYCount'])
216     outStr += '<plateManufacturer category="plateManufacturer" value="%s"/>\n' % \
217     (d['plateManufacturer'],)
218     outStr += '<plateModel category="plateModel" value="%s"/>\n' % \
219     (d['plateModel'],)
220     for s in self.reader.spotData(d['plateID']):
221     outStr += '<spot spotID="%s" spotXPosition="%s" spotYPosition="%s">\n' % \
222     (s['spotID'],s['spotXPosition'],s['spotYPosition'])
223     outStr += '<maldiMatrix category="maldiMatrix" value="%s"/>\n' % \
224     (s['maldiMatrix'],)
225     outStr += '</spot>\n'
226     outStr += '</plate>\n'
227     outStr += '</spotting>\n'
228     self.writeString(xmlFile,outStr)
229    
230     if self.reader.lc():
231     pass
232    
233     scanOffset = self.writeByteCounter
234    
235     tmpFile = open(self.tmpFileName,'rb')
236     while True:
237     tmpStr = tmpFile.read(1024)
238     if tmpStr == '':
239     break
240     self.writeString(xmlFile,tmpStr)
241     tmpFile.close()
242     os.unlink(self.tmpFileName)
243     self.tmpFileName = ''
244    
245     outStr = "</msRun>\n"
246     self.writeString (xmlFile,outStr)
247    
248     indexOffset = self.writeByteCounter
249    
250     outStr = "<index "
251     outStr += "name=\"scan\" "
252     outStr += ">\n"
253    
254     self.writeString (xmlFile,outStr)
255    
256     for i in xrange(1,self.actualScanCount+1):
257     outStr = "<offset id=\"%d\">" % (i,)
258     outStr += "%d</offset>\n" % (self.scanIndices[i] + scanOffset,)
259     self.writeString(xmlFile,outStr)
260    
261     outStr = "</index>\n"
262     self.writeString (xmlFile,outStr)
263    
264     outStr = "<indexOffset>%d</indexOffset>\n" % (indexOffset,)
265    
266     self.writeString (xmlFile,outStr)
267    
268     self.writeString (xmlFile,"<sha1>")
269    
270     outStr = self.getSHA()
271    
272     self.writeString (xmlFile,outStr)
273    
274     self.writeString (xmlFile,"</sha1>\n")
275    
276     outStr = "</mzXML>\n"
277     self.writeString (xmlFile,outStr)
278    
279     xmlFile.close()
280    
281     def write_scans(self,xmlFile,debug=False):
282    
283     msLevel = 0
284     scanNumber = 0
285     ancestors = []
286     self.writeByteCounter = 0
287    
288     for (s,d) in self.reader.spectra():
289    
290     if self.filter != None and not self.filter.test(d):
291     continue
292    
293     if debug and scanNumber >= 10:
294     break
295    
296     if not d.has_key('msLevel'):
297     print >>sys.stderr, "Required scan attributes missing."
298     sys.exit(1)
299    
300     prevLevel = msLevel
301     msLevel = d['msLevel']
302    
303     if prevLevel < msLevel and prevLevel > 0:
304     # We went "in" a level, push scan number of parent
305     ancestors.append((scanNumber,prevSpec))
306     elif prevLevel > msLevel and msLevel > 0:
307     if len(ancestors) == 0:
308     pass #print >>sys.stderr, "No ancestor for scan %s at level %s"%(scanNumber,msLevel)
309     else:
310     ancestors.pop()
311    
312     outStr = ''
313     if prevLevel > msLevel:
314     for m in xrange(0,prevLevel-msLevel+1):
315     outStr += "</scan>\n"
316     else:
317     if prevLevel > 0 and prevLevel == msLevel:
318     outStr += "</scan>\n"
319    
320     self.writeString(xmlFile,outStr)
321    
322     scanNumber = scanNumber + 1
323    
324     self.scanIndices[scanNumber] = self.writeByteCounter
325    
326     totIonCurrent = 0
327     maxmz = None
328     minmz = None
329     basePeakMz = None
330     basePeakIntensity = None
331     peaksCount = 0
332     for i in xrange(0,len(s),2):
333     x = s[i]; y = s[i+1]
334     if minmz is None or minmz > x:
335     minmz = x
336     if maxmz is None or maxmz < x:
337     maxmz = x
338     totIonCurrent += y
339     if basePeakIntensity is None or basePeakIntensity < y:
340     basePeakIntensity = y
341     basePeakMz = x
342     peaksCount += 1
343    
344     outStr = "<scan num=\"%d\" msLevel=\"%d\"" % (scanNumber,msLevel)
345     outStr += " peaksCount=\"%d\"" % (peaksCount,)
346     outStr += " lowMz=\"%f\"" % (minmz,)
347     outStr += " highMz=\"%f\"" % (maxmz,)
348     outStr += " totIonCurrent=\"%f\"" % (totIonCurrent,)
349     outStr += " basePeakMz=\"%f\"" % (basePeakMz,)
350     outStr += " basePeakIntensity=\"%f\"" % (basePeakIntensity,)
351    
352     for (k,v) in d.items():
353     if k.startswith('scan.'):
354     k,f = k.split(':')
355     k = k[5:]
356     outStr += (" %%s=\"%s\""%(f,))%(k,v)
357     outStr += ">\n"
358     self.writeString(xmlFile,outStr)
359    
360     if msLevel > 1:
361    
362     if not d.has_key('precursorMz') :
363     print >>sys.stderr, "Required precursorMz attribute missing."
364     sys.exit(1)
365    
366     # We scan the parent spectrum in the region of the
367     # precursorMz to establish the intensity. Optionally,
368     # we tune the precursorMz itself, on the basis of this
369    
370     pMz = d['precursorMz']
371     tol = self.reader.precursorPrecision(pMz)
372     pMzLB = pMz-tol
373     pMzUB = pMz+tol
374     pMzIntensity = 0
375    
376     if len(ancestors) > 0:
377     for i in xrange(0,len(ancestors[-1][1]),2):
378     x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
379     if x < pMzLB:
380     continue
381     if x > pMzUB:
382     break
383     if pMzIntensity < y:
384     pMzIntensity = y
385     pMzTune = x
386    
387     if self.reader.precursorTune():
388     pMz = pMzTune
389    
390     outStr = "<precursorMz"
391     for (k,v) in d.items():
392     if k.startswith('precursorMz.'):
393     k,f = k.split(':')
394     k = k[12:]
395     outStr += (" %%s=\"%s\""%(f,))%(k,v)
396     if len(ancestors)>0:
397     outStr += " precursorScanNum=\"%d\""%(ancestors[-1][0],)
398     outStr += " precursorIntensity=\"%f\">"%(pMzIntensity,)
399     else:
400     outStr += ">"
401     outStr += "%f</precursorMz>\n" % (pMz,)
402     self.writeString(xmlFile,outStr)
403    
404     if self.reader.maldi():
405     outStr = '<maldi'
406     for (k,v) in d.items():
407     if k.startswith('maldi.'):
408     k,f = k.split(':')
409     k = k[6:]
410     outStr += (" %%s=\"%s\""%(f,))%(k,v)
411     outStr += " plateID=\"%s\""%(d['plateID'],)
412     outStr += " spotID=\"%s\""%(d['spotID'],)
413     outStr += "/>\n"
414     self.writeString(xmlFile,outStr)
415    
416     outStr = "<peaks precision=\"32\" byteOrder=\"network\" pairOrder=\"m/z-int\">"
417     self.writeString(xmlFile,outStr)
418    
419     # Spec says the byte order shall be
420     # network, which is the same as big endian.
421    
422     if sys.byteorder != 'big':
423     s.byteswap()
424    
425     outStr = b64encode(s.tostring())
426     if not debug:
427     self.writeString(xmlFile,outStr)
428     else:
429     self.writeString(xmlFile,outStr[0:100])
430    
431     if sys.byteorder != 'big':
432     s.byteswap()
433    
434     outStr = "</peaks>\n"
435     self.writeString(xmlFile,outStr)
436    
437     if self.reader.maldi():
438     outStr = ''
439     for (k,v) in d.items():
440     if k.startswith('maldi.'):
441     k,f = k.split(':')
442     k = k[6:]
443     outStr += '<nameValue'
444     outStr += ' name="%s"'%(k,)
445     outStr += (' value="%f"'%(f,))%(v,)
446     outStr += '/>\n'
447     ## outStr += '<nameValue'
448     ## outStr += ' name="plateID"'
449     ## outStr += ' value="%s"'%(d['plateID'],)
450     ## outStr += '/>\n'
451     ## outStr += '<nameValue'
452     ## outStr += ' name="spotID"'
453     ## outStr += ' value="%s"'%(d['spotID'],)
454     ## outStr += '/>\n'
455     self.writeString(xmlFile,outStr)
456    
457     xmlFile.flush()
458    
459     prevSpec = s
460    
461     outStr = ""
462     for m in xrange(0,msLevel):
463     outStr += "</scan>\n"
464    
465     self.writeString(xmlFile,outStr)
466     self.actualScanCount = scanNumber
467    
468     class ToMzData:
469     def __init__(self,reader,filename,cmpfmt=None,filt=None):
470     self.filename = filename
471     self.compressfmt = cmpfmt
472     self.reader = reader
473     self.actualScanCount = 0
474     self.initSHA()
475     self.filter = filt
476    
477     def writeString(self,fh,data):
478     # self.writeByteCounter += len(data)
479     self.sha1.update(data)
480     fh.write(data)
481    
482     def initSHA(self):
483     self.sha1 = sha.new()
484    
485     def updateSHA(self,s):
486     self.sha1.update(s)
487    
488     def getSHA(self):
489     return self.sha1.hexdigest().lower()
490    
491     def write(self,debug=False):
492    
493     # Make a temporary file for the scan data to count the number of spectra.
494     (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
495     tmpFile = os.fdopen(tmpFileFD,'wb')
496    
497     # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
498     self.write_scans(tmpFile,debug)
499    
500     tmpFile.close()
501    
502     scanhash = self.getSHA()
503    
504     self.initSHA()
505    
506     if self.compressfmt == None and not self.filename.endswith('.gz'):
507     xmlFile = open(self.filename,'wb')
508     elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
509     if self.filename.endswith('.gz'):
510     xmlFile = gzip.open(self.filename,'wb')
511     else:
512     xmlFile = gzip.open(self.filename+'.gz','wb')
513     else:
514     print >>sys.stderr, "Bad compressfmt specification"
515     sys.exit(1)
516    
517     self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
518    
519     outStr = "<mzData "
520     outStr += "version=\"1.05\" "
521     outStr += "accessionNumber=\"pymsxml:%s\" "%(scanhash,)
522     outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
523     # outStr += "xmlns=\"http://psidev.sourceforge.net/ms/xml/mzdata\" "
524     outStr += "xsi:noNamespaceSchemaLocation=\"http://psidev.sourceforge.net/ms/xml/mzdata/mzdata.xsd\""
525     outStr += ">\n"
526    
527     self.writeString (xmlFile,outStr)
528    
529     outStr = "<description>\n"
530     outStr += "<admin>\n"
531     outStr += "<sampleName></sampleName>\n"
532     outStr += "<contact>\n"
533     outStr += "<name></name>\n"
534     outStr += "<institution></institution>\n"
535     outStr += "</contact>\n"
536     outStr += "</admin>\n"
537     outStr += "<instrument>\n"
538     outStr += "<instrumentName></instrumentName>\n"
539     outStr += "<source>\n"
540     outStr += "</source>\n"
541     outStr += "<analyzerList count=\"1\">\n"
542     outStr += "<analyzer>\n"
543     outStr += "</analyzer>\n"
544     outStr += "</analyzerList>\n"
545     outStr += "<detector>\n"
546     outStr += "</detector>\n"
547     outStr += "</instrument>\n"
548     outStr += "<dataProcessing>\n"
549     outStr += "<software>\n"
550     outStr += "<name>%s</name>\n"%(self.reader.acquisitionSoftware(),)
551     outStr += "<version>%s</version>\n"%(self.reader.acquisitionSoftwareVersion(),)
552     outStr += "</software>\n"
553     outStr += "<software>\n"
554     nv = NameVersion()
555     outStr += "<name>%s</name>\n"%(nv.name(),)
556     outStr += "<version>%s</version>\n"%(nv.version(),)
557     outStr += "</software>\n"
558     outStr += "</dataProcessing>\n"
559     outStr += "</description>\n"
560    
561     self.writeString (xmlFile,outStr)
562    
563     outStr = "<spectrumList"
564     outStr += " count=\"%d\">\n"%(self.actualScanCount,)
565    
566     self.writeString (xmlFile,outStr)
567    
568     tmpFile = open(self.tmpFileName,'rb')
569     while True:
570     tmpStr = tmpFile.read(1024)
571     if tmpStr == '':
572     break
573     self.writeString(xmlFile,tmpStr)
574     tmpFile.close()
575     os.unlink(self.tmpFileName)
576     self.tmpFileName = ''
577    
578     outStr = "</spectrumList>\n</mzData>\n"
579    
580     self.writeString (xmlFile,outStr)
581    
582     def write_scans(self,xmlFile,debug=False):
583    
584     msLevel = 0
585     scanNumber = 0
586     ancestors = []
587    
588     self.initSHA()
589     # self.writeByteCounter = 0
590    
591     for (s,d) in self.reader.spectra():
592    
593     if self.filter != None and not self.filter.test(d):
594     continue
595    
596     if debug and scanNumber >= 10:
597     break
598    
599     if not d.has_key('msLevel'):
600     print >>sys.stderr, "Required scan attributes missing."
601     sys.exit(1)
602    
603     prevLevel = msLevel
604     msLevel = d['msLevel']
605    
606     if prevLevel < msLevel and prevLevel > 0:
607     # We went "in" a level, push scan number of parent
608     ancestors.append((scanNumber,prevSpec))
609     elif prevLevel > msLevel and msLevel > 0:
610     ancestors.pop()
611    
612     scanNumber = scanNumber + 1
613    
614     totIonCurrent = 0
615     maxmz = None
616     minmz = None
617     basePeakMz = None
618     basePeakIntensity = None
619     peaksCount = 0
620     for i in xrange(0,len(s),2):
621     x = s[i]; y = s[i+1]
622     if minmz is None or minmz > x:
623     minmz = x
624     if maxmz is None or maxmz < x:
625     maxmz = x
626     totIonCurrent += y
627     if basePeakIntensity is None or basePeakIntensity < y:
628     basePeakIntensity = y
629     basePeakMz = x
630     peaksCount += 1
631    
632     outStr = "<spectrum id=\"%d\">\n" % (scanNumber,)
633     outStr += "<spectrumDesc>\n"
634     outStr += "<spectrumSettings>\n"
635    
636     self.writeString (xmlFile,outStr)
637    
638     # outStr += "<acqSpecification>\n"
639     # outStr += "</acqSpecification>\n"
640    
641     outStr = "<spectrumInstrument"
642     outStr += " msLevel=\"%d\""%(msLevel,)
643     if d.has_key('scan.startMz') and \
644     d.has_key('scan.endMz'):
645     outStr += " mzRangeStart=\"%f\""%(d['scan.startMz'],)
646     outStr += " mzRangeStop=\"%f\""%(d['scan.endMz'],)
647     outStr += ">\n"
648     for (k,v) in d.items():
649     if k.startswith('scan.'):
650     k,f = k.split(':')
651     k = k[5:]
652    
653     if k == "polarity":
654     if v == '+':
655     outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Positive",)
656     elif v == '-':
657     outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Negative",)
658     elif k == 'retentionTime':
659     outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"TimeInSeconds\" value=\"%f\"/>\n"%(v,)
660     else:
661     outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
662     outStr += "<userParam name=\"lowMz\" value=\"%f\"/>\n"%(minmz,)
663     outStr += "<userParam name=\"highMz\" value=\"%f\"/>\n"%(maxmz,)
664     outStr += "<userParam name=\"totIonCurrent\" value=\"%f\"/>\n"%(totIonCurrent,)
665     outStr += "<userParam name=\"basePeakMz\" value=\"%f\"/>\n"%(basePeakMz,)
666     outStr += "<userParam name=\"basePeakIntensity\" value=\"%f\"/>\n"%(basePeakIntensity,)
667     outStr += "</spectrumInstrument>\n"
668     outStr += "</spectrumSettings>\n"
669    
670     self.writeString (xmlFile,outStr)
671    
672     if msLevel > 1:
673    
674     if not d.has_key('precursorMz') :
675     print >>sys.stderr, "Required precursorMz attribute missing."
676     sys.exit(1)
677    
678     # We scan the parent spectrum in the region of the
679     # precursorMz to establish the intensity. Optionally,
680     # we tune the precursorMz itself, on the basis of this
681    
682     pMz = d['precursorMz']
683     tol = self.reader.precursorPrecision(pMz)
684     pMzLB = pMz-tol
685     pMzUB = pMz+tol
686     pMzIntensity = 0
687    
688     for i in xrange(0,len(ancestors[-1][1]),2):
689     x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
690     if x < pMzLB:
691     continue
692     if x > pMzUB:
693     break
694     if pMzIntensity < y:
695     pMzIntensity = y
696     pMzTune = x
697    
698     if self.reader.precursorTune():
699     pMz = pMzTune
700    
701     outStr = "<precursorList count=\"1\">\n"
702    
703     outStr += "<precursor"
704     outStr += " msLevel=\"%d\""%(msLevel,)
705     outStr += " spectrumRef=\"%d\""%(ancestors[-1][0],)
706     outStr += ">\n"
707    
708     self.writeString(xmlFile,outStr)
709    
710     outStr = "<ionSelection>\n"
711     outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"MassToChargeRatio\" value=\"%f\"/>\n"%(pMz,)
712     outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Intensity\" value=\"%f\"/>\n"%(pMzIntensity,)
713     for (k,v) in d.items():
714     if k.startswith('precursorMz.'):
715     k,f = k.split(':')
716     k = k[12:]
717     outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
718     outStr += "</ionSelection>\n"
719    
720     self.writeString(xmlFile,outStr)
721    
722     outStr = "<activation>\n"
723     outStr += "</activation>\n"
724     outStr += "</precursor>\n"
725     outStr += "</precursorList>\n"
726    
727     self.writeString(xmlFile,outStr)
728    
729     if self.reader.maldi():
730     outStr = '<maldi'
731     for (k,v) in d.items():
732     if k.startswith('maldi.'):
733     k,f = k.split(':')
734     k = k[6:]
735     outStr += (" %%s=\"%s\""%(f,))%(k,v)
736     outStr += " plateID=\"%s\""%(d['plateID'],)
737     outStr += " spotID=\"%s\""%(d['spotID'],)
738     outStr += "/>\n"
739     self.writeString(xmlFile,outStr)
740    
741     outStr = "</spectrumDesc>\n"
742     self.writeString(xmlFile,outStr)
743    
744     outStr = "<mzArrayBinary>\n"
745     outStr += "<data"
746     outStr += " precision=\"32\""
747     outStr += " endian=\"%s\""%(sys.byteorder,)
748     outStr += " length=\"%d\""%(peaksCount,)
749     outStr += ">"
750     self.writeString(xmlFile,outStr)
751    
752     outStr = b64encode(s[0:peaksCount*2:2].tostring())
753     if not debug:
754     self.writeString(xmlFile,outStr)
755     else:
756     self.writeString(xmlFile,outStr[0:100])
757    
758     outStr = "</data>\n</mzArrayBinary>\n"
759     self.writeString(xmlFile,outStr)
760    
761     outStr = "<intenArrayBinary>\n"
762     outStr += "<data"
763     outStr += " precision=\"32\""
764     outStr += " endian=\"%s\""%(sys.byteorder,)
765     outStr += " length=\"%d\""%(peaksCount,)
766     outStr += ">"
767     self.writeString(xmlFile,outStr)
768    
769     outStr = b64encode(s[1:peaksCount*2:2].tostring())
770     if not debug:
771     self.writeString(xmlFile,outStr)
772     else:
773     self.writeString(xmlFile,outStr[0:100])
774    
775     outStr = "</data>\n</intenArrayBinary>\n"
776     self.writeString(xmlFile,outStr)
777    
778     if self.reader.maldi():
779     outStr = ''
780     for (k,v) in d.items():
781     if k.startswith('maldi.'):
782     k,f = k.split(':')
783     k = k[6:]
784     outStr += '<nameValue'
785     outStr += ' name="%s"'%(k,)
786     outStr += (' value="%f"'%(f,))%(v,)
787     outStr += '/>\n'
788     ## outStr += '<nameValue'
789     ## outStr += ' name="plateID"'
790     ## outStr += ' value="%s"'%(d['plateID'],)
791     ## outStr += '/>\n'
792     ## outStr += '<nameValue'
793     ## outStr += ' name="spotID"'
794     ## outStr += ' value="%s"'%(d['spotID'],)
795     ## outStr += '/>\n'
796     self.writeString(xmlFile,outStr)
797    
798     outStr = "</spectrum>\n"
799     self.writeString(xmlFile,outStr)
800     xmlFile.flush()
801    
802     prevSpec = s
803    
804     self.actualScanCount = scanNumber
805    
806     class QStarWiff:
807     def __init__(self,filename,peaks=None):
808     self.filename = filename
809     self.filehash = fileSHA(filename)
810     self.theTIC = None
811     self.theWF = None
812     self.theFMANSpecData = None
813     self.startTime = None
814     self.stopTime = None
815     if peaks:
816     self.applyPeakDetect = map(int,peaks.split(','))
817    
818     if not os.path.exists(self.filename):
819     print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
820     sys.exit(1)
821    
822     self.filename = os.path.abspath(self.filename)
823    
824     def __del__(self):
825     # Probably unnecessary
826     self.close()
827    
828     def peakDetect(self,msLevel):
829     return ((msLevel in self.applyPeakDetect) and (msLevel != 1))
830    
831     def open(self):
832    
833     if self.theTIC is None:
834    
835     from win32com.client import Dispatch
836    
837     self.theFMANSpecData = Dispatch('Analyst.FMANSpecData')
838     self.theTIC = Dispatch('Analyst.FMANChromData')
839     self.theSPL = Dispatch('Analyst.SpectralPeakList')
840    
841     self.theFMANSpecData.WiffFileName = self.filename
842     self.theTIC.WiffFileName = self.filename
843     self.theWF = self.theFMANSpecData.GetWiffFileObject()
844    
845     self.theTIC.SetToTIC(1,0,0)
846    
847     self.startTime = self.theTIC.GetDataPointXValue(1)
848     self.stopTime = self.theTIC.GetDataPointXValue(self.theTIC.GetNumberOfDataPoints())
849    
850     def close(self):
851    
852     if not self.theTIC is None:
853    
854     if not self.theWF is None:
855     self.theWF.CloseWiffFile()
856     self.theTIC = None
857     self.theFMANSpecData = None
858     self.theWF = None
859    
860     def spectra(self):
861     self.open()
862     spectrumCount=0;
863    
864     numberOfSamples = self.theWF.GetActualNumberOfSamples()
865     for i in xrange(1,numberOfSamples+1):
866    
867     # print >>sys.stderr, "Sample %d"%i
868     # print >>sys.stderr, "Sample Name: %s"%(self.theWF.GetSampleName(i),)
869    
870     sampleName = self.theWF.GetSampleName(i)
871    
872     numberOfPeriods = self.theWF.GetActualNumberOfPeriods(i)
873     for j in xrange(0,numberOfPeriods):
874    
875     numberOfExperiments = self.theWF.GetNumberOfExperiments(i,j)
876     self.theTIC.SetToTIC(i, j, 0)
877    
878     numberOfTICDataPoints = self.theTIC.GetNumberOfDataPoints()
879     for l in xrange(1,numberOfTICDataPoints+1):
880    
881     for k in xrange(0,numberOfExperiments):
882    
883     self.theExperiment = self.theWF.GetExperimentObject(i, j, k)
884     self.theTIC.SetToTIC(i, j, k)
885    
886     # Convert to seconds?
887     xValue = self.theTIC.GetDataPointXValue(l) * 60
888    
889     spectrumCount += 1;
890    
891     # If any Ion current at time "l"
892     if self.theTIC.GetDataPointYValue(l) > 0:
893    
894     self.theFMANSpecData.SetSpectrum(i, j, k, xValue, xValue)
895    
896     if self.theExperiment.ScanType == 9:
897     msLevel = 2
898     m = re.search(r'[(]([0-9.]+)[)]',self.theFMANSpecData.DataTitle)
899     fixedMass = float(m.group(1))
900     else:
901     msLevel = 1
902    
903     startMass = self.theFMANSpecData.GetStartMass()
904     stopMass = self.theFMANSpecData.GetStopMass()
905     dataArr = array('f');
906     peakDetect = 'False'
907     if self.peakDetect(msLevel):
908     peakDetect = 'True'
909     minY,maxY = self.theFMANSpecData.GetYValueRange()
910     print minY,maxY
911     self.theFMANSpecData.Threshold(maxY*0.01)
912     self.theSPL.FindPeaksInDataObject(self.theFMANSpecData,50.0)
913     numPeaks = self.theSPL.GetNumberOfPeaks()
914     print numPeaks
915     for m in xrange(1,numPeaks+1):
916     (x,w,y,yp) = self.theSPL.GetPeak(m)
917     if y <= 0:
918     continue
919     dataArr.append(x)
920     dataArr.append(y)
921     else:
922     numPoints = self.theFMANSpecData.GetNumberOfDataPoints()
923     for m in xrange(1,numPoints+1):
924     if self.theFMANSpecData.GetDataPointYValue(m) <= 0:
925     continue
926     x = self.theFMANSpecData.GetDataPointXValue(m)
927     dataArr.append(x)
928     y = self.theFMANSpecData.GetDataPointYValue(m)
929     dataArr.append(y)
930     polarity = self.theExperiment.Polarity
931    
932     if len(dataArr) == 0:
933     continue
934    
935     if polarity == 0:
936     polarity_val = "+"
937     else:
938     polarity_val = "-"
939    
940     rt = "PT%fS"%(xValue,)
941    
942     # Scan level attributes
943     # Required by mzXML (no format spec. needed)
944     d = {'msLevel':msLevel,'scan.peakDetect:%s':peakDetect}
945     # Optional ('scan.' and prefixformat spec. needed)
946     d.update({
947     'scan.retentionTime:PT%fS':xValue,
948     'scan.polarity:%s':polarity_val,
949     'scan.startMz:%f':startMass,
950     'scan.endMz:%f':stopMass,
951     'scan.sample:%d':i,
952     'scan.period:%d':j,
953     'scan.experiment:%d':k,
954     'scan.sampleName:%s':sampleName
955     })
956     d.update({'scanOrigin.parentID:%s': self.filehash[f],
957     'scanOrigin.num:%d': spectrumCount})
958     if msLevel == 1:
959     yield (dataArr,d)
960     else:
961     # tandem MS scans add the following
962     # Required by mzXML
963     d.update({'precursorMz':fixedMass})
964     # Optional
965     # d.update({'precursorMz.precursorMzIntensity:f':0.0})
966     yield (dataArr,d)
967    
968     def getMsRunMetaData(self):
969     self.open()
970     d = {'startTime:PT%fS':self.startTime,
971     'endTime:PT%fS':self.stopTime,
972     }
973     return d
974    
975     def getFilenames(self):
976     return [ (self.filename,self.filehash) ]
977    
978     def msManufacturer(self):
979     return "ABI / SCIEX"
980    
981     def msModel(self):
982     return "QSTAR"
983    
984     def msIonisation(self):
985     return "ESI"
986    
987     def msMassAnalyzer(self):
988     return "Quadrupole"
989    
990     def msDetector(self):
991     return None
992    
993     def acquisitionSoftware(self):
994     return "Analyst"
995    
996     def acquisitionSoftwareVersion(self):
997     return "1.0"
998    
999     def precursorPrecision(self,mz):
1000     return .1
1001    
1002     def precursorTune(self):
1003     return True
1004    
1005     def lc(self):
1006     return True
1007    
1008     def maldi(self):
1009     return not self.lc()
1010    
1011     class mzXML_md_sax(handler.ContentHandler):
1012    
1013     def __init__(self):
1014     self.md = {}
1015     self.context = ''
1016    
1017     def startElement(self, name, attrs):
1018     self.context += ':%s'%(name,)
1019     # print >>sys.stderr, ">>",self.context
1020     # sys.stderr.flush()
1021     if self.context.endswith(':msRun'):
1022     self.md['startTime'] = float(attrs['startTime'][2:-1])
1023     self.md['endTime'] = float(attrs['endTime'][2:-1])
1024     elif self.context.endswith(':parentFile'):
1025     self.md['fileName'] = unquote(attrs['fileName'])
1026     self.md['fileType'] = attrs['fileType']
1027     self.md['fileSha1'] = attrs['fileSha1']
1028     elif self.context.endswith(':instrument'):
1029     self.md['msManufacturer'] = attrs['manufacturer']
1030     self.md['msModel'] = attrs['model']
1031     self.md['msIonisation'] = attrs['ionisation']
1032     self.md['msMassAnalyzer'] = attrs['msType']
1033     elif self.context.endswith(':msInstrument:msManufacturer'):
1034     self.md['msManufacturer'] = attrs['value']
1035     elif self.context.endswith(':msInstrument:msModel'):
1036     self.md['msModel'] = attrs['value']
1037     elif self.context.endswith(':msInstrument:msIonisation'):
1038     self.md['msIonisation'] = attrs['value']
1039     elif self.context.endswith(':msInstrument:msMassAnalyzer'):
1040     self.md['msMassAnalyzer'] = attrs['value']
1041     elif self.context.endswith(':msInstrument:msDetector'):
1042     self.md['msDetector'] = attrs['value']
1043     elif self.context.endswith(':msInstrument:software'):
1044     if attrs['type'] == 'acquisition':
1045     self.md['acquisitionSoftware'] = attrs['name']
1046     self.md['acquisitionSoftwareVersion'] = attrs['version']
1047     elif self.context.endswith(':instrument:software'):
1048     if attrs['type'] == 'acquisition':
1049     self.md['acquisitionSoftware'] = attrs['name']
1050     self.md['acquisitionSoftwareVersion'] = attrs['version']
1051     elif self.context.endswith(':scan'):
1052     raise SAXException("Early termination")
1053    
1054     def endElement(self, name):
1055     # print >>sys.stderr, "<<",self.context
1056     # sys.stderr.flush()
1057     self.context = self.context[0:-(len(name)+1)]
1058    
1059    
1060     class mzXML_spec_sax(handler.ContentHandler):
1061    
1062     def __init__(self):
1063     self.context = ''
1064     self.content = ''
1065     self.spectra = []
1066     self.done = False
1067     self.scancount = 0
1068     self.scanmax = 1000
1069     self.scanstart = 0
1070    
1071     def startElement(self, name, attrs):
1072     self.context += ':%s'%(name,)
1073     # print >>sys.stderr, ">>",self.context
1074     # sys.stderr.flush()
1075     self.content = ''
1076     if name == 'scan':
1077     self.scanmd = {}
1078     self.scanlevel = int(attrs['msLevel'])
1079     self.scannum = int(attrs['num'])
1080     if attrs.has_key('retentionTime'):
1081     self.rt = float(attrs['retentionTime'][2:-1])
1082     self.endMz = float(attrs['endMz'])
1083     self.startMz = float(attrs['startMz'])
1084     self.polarity = attrs.get('polarity',None)
1085     for (k,v) in attrs.items():
1086     self.scanmd[k] = v
1087     self.precursorMz = None
1088     self.scancount += 1
1089     elif name == 'msRun':
1090     self.scancount = 0
1091     self.spectra = []
1092     self.context = ':msRun'
1093     elif name == 'scanOrigin':
1094     self.scanmd['scanOrigin'] = attrs
1095    
1096     def characters(self, content):
1097     if self.scancount >= self.scanstart:
1098     if self.context.endswith(':peaks') or \
1099     self.context.endswith(':precursorMz'):
1100     self.content += content
1101    
1102     def endElement(self, name):
1103     if self.scancount >= self.scanstart:
1104     if self.context.endswith('scan:peaks'):
1105    
1106     spec = array('f')
1107     # print >>sys.stderr, len(self.content), len(b64decode(self.content)), self.content
1108     spec.fromstring(b64decode(self.content))
1109     if sys.byteorder != 'big':
1110     spec.byteswap()
1111     d = {'msLevel':self.scanlevel}
1112     # Optional ('scan.' and prefixformat spec. needed)
1113     d.update({
1114     'scan.retentionTime:PT%fS':self.rt,
1115     'scan.startMz:%f':self.startMz,
1116     'scan.endMz:%f':self.endMz
1117     })
1118     if self.polarity != None:
1119     d.update({'scan.polarity:%s':self.polarity})
1120     if self.scanlevel == 2:
1121     d.update({'precursorMz':self.precursorMz})
1122     if self.scanmd.has_key('scanOrigin'):
1123     d.update({'scanOrigin.parentID:%s': self.scanmd['scanOrigin']['parentID'],
1124     'scanOrigin.num:%d': self.scanmd['scanOrigin']['num']})
1125     for (k,v) in self.scanmd.items():
1126     if not d.has_key(k):
1127     d[k] = v
1128     self.spectra.append((spec,d))
1129     if len(self.spectra) >= self.scanmax:
1130     self.scanstart = self.scancount+1
1131     raise SAXException("Early termination")
1132     elif self.context.endswith('scan:precursorMz'):
1133     self.precursorMz = float(self.content)
1134     if self.context.endswith('msRun'):
1135     self.done = True
1136     # print >>sys.stderr, "<<",self.context
1137     # sys.stderr.flush()
1138     self.context = self.context[0:-(len(name)+1)]
1139    
1140     class FromMzXML:
1141     def __init__(self,filename):
1142     self.filename = filename
1143     self.parser = None
1144    
1145     if not os.path.exists(self.filename):
1146     print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1147     sys.exit(1)
1148    
1149     if self.filename.endswith('.gz'):
1150     self.compressfmt = 'gz'
1151     else:
1152     self.compressfmt = None
1153    
1154     def __del__(self):
1155     self.close()
1156    
1157     def open(self):
1158    
1159     if self.parser == None:
1160     # print >>sys.stderr, "Parsing out meta-data..."
1161     # sys.stderr.flush()
1162     self.parser = make_parser()
1163     self.handler = mzXML_md_sax()
1164     self.parser.setContentHandler(self.handler)
1165     try:
1166     if self.compressfmt == None:
1167     self.parser.parse(self.filename)
1168     elif self.compressfmt == 'gz':
1169     self.parser.parse(gzip.open(self.filename,'r'))
1170     else:
1171     print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1172     sys.exit(1)
1173     except SAXException:
1174     pass
1175    
1176     self.md = self.handler.md
1177    
1178     # print >>sys.stderr, self.md
1179     # sys.stderr.flush()
1180    
1181     # print >>sys.stderr, "Set up spectrum parser..."
1182     # sys.stderr.flush()
1183     self.parser = make_parser()
1184     self.handler = mzXML_spec_sax()
1185     self.parser.setContentHandler(self.handler)
1186    
1187     def close(self):
1188     pass
1189    
1190     def spectra(self):
1191     self.open()
1192     while True:
1193     # print >>sys.stderr, "Spectrum parser to read spectra from",self.handler.scanstart
1194     # sys.stderr.flush()
1195     try:
1196     if self.compressfmt == None:
1197     self.parser.parse(self.filename)
1198     elif self.compressfmt == 'gz':
1199     self.parser.parse(gzip.open(self.filename,'r'))
1200     else:
1201     print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1202     sys.exit(1)
1203     except SAXException:
1204     pass
1205     for (s,d) in self.handler.spectra:
1206     # print >>sys.stderr, "Yeild spectrum number:",d['num']
1207     yield (s,d)
1208     if self.handler.done:
1209     break
1210    
1211     def getMsRunMetaData(self):
1212     self.open()
1213     d = {'startTime:PT%fS':self.md['startTime'],
1214     'endTime:PT%fS':self.md['endTime']
1215     }
1216     return d
1217    
1218     def getFilenames(self):
1219     return [ (self.md['fileName'],self.md['fileType'],self.md['fileSha1']) ]
1220    
1221     def msManufacturer(self):
1222     return self.md.get('msManufacturer','')
1223    
1224     def msModel(self):
1225     return self.md.get('msModel','')
1226    
1227     def msIonisation(self):
1228     return self.md.get('msIonisation','')
1229    
1230     def msMassAnalyzer(self):
1231     return self.md.get('msMassAnalyzer','')
1232    
1233     def msDetector(self):
1234     return self.md.get('msDetector','')
1235    
1236     def acquisitionSoftware(self):
1237     return self.md.get('acquisitionSoftware','')
1238    
1239     def acquisitionSoftwareVersion(self):
1240     return self.md.get('acquisitionSoftwareVersion','')
1241    
1242     def precursorPrecision(self,mz):
1243     return 0
1244    
1245     def precursorTune(self):
1246     return False
1247    
1248     def lc(self):
1249     return True
1250    
1251     def maldi(self):
1252     return not self.lc()
1253    
1254     class AB4700T2D:
1255     def __init__(self,filename,peaks=None):
1256     self.filename = filename
1257     self.datafiles = None
1258     self.deapp = None
1259     self.metadata = {}
1260     self.platespots = {}
1261     self.applyPeakDetect = []
1262     if peaks:
1263     self.applyPeakDetect = map(int,peaks.split(','))
1264    
1265     if not os.path.exists(self.filename):
1266     print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1267     sys.exit(1)
1268    
1269     self.filename = os.path.abspath(self.filename)
1270    
1271     self.dir = os.path.split(self.filename)[0]
1272    
1273     def __del__(self):
1274     # Probably unnecessary
1275     self.close()
1276    
1277     def open(self):
1278    
1279     if self.deapp is None:
1280    
1281     self.metadata['PLATEDEF'] = []
1282     self.metadata['PLATE'] = []
1283     self.metadata['SPOT'] = []
1284     self.metadata['SCAN'] = []
1285    
1286     h = open(self.filename,'rb')
1287     r = csv.reader(h,'excel-tab')
1288     for l in r:
1289     if len(l) == 0:
1290     continue
1291     kw = l.pop(0).upper()
1292     if kw in ('PLATEDEF','PLATE','SPOT','SCAN'):
1293     self.metadata[kw].append({})
1294     while len(l)>=2:
1295     k = l.pop(0);v = l.pop(0);
1296     if (kw == 'PLATE' and \
1297     (k in ('plateID','spotXCount','spotYCount','plateManufacturer','plateModel'))) or \
1298     (kw == 'PLATEDEF' and \
1299     (k in ('plateID','plateManufacturer','plateModel','spotNaming','maldiMatrix'))) or \
1300     (kw == 'SPOT' and \
1301     (k in ('plateID','spotID','spotXPosition','spotYPosition','maldiMatrix'))) or \
1302     (kw == 'SCAN' and \
1303     (k in ('plateID','spotID','filename','index'))):
1304     self.metadata[kw][-1][k] = v
1305     elif k == '':
1306     break
1307     else:
1308     print >>sys.stderr,"Bad key %s for %s row"%(k,kw)
1309     sys.exit(1)
1310     h.close()
1311    
1312     for pd in self.metadata['PLATEDEF']:
1313     # insert all the gory details needed for a particular plate model etc...
1314     if pd['plateManufacturer'] == 'ABI / SCIEX' and pd['plateModel'] == '01-192+06-BB':
1315     if not pd.has_key('spotNaming') or not pd.has_key('maldiMatrix') or \
1316     not pd['spotNaming'] in ('alpha','parallel','antiparallel'):
1317     print >>sys.stderr,"Bad plate definition, missing maldiMatrix or spotNaming, or bad spotNaming value."
1318     sys.exit(1)
1319     self.metadata['PLATE'].append({
1320     'plateID':pd['plateID'],
1321     'plateManufacturer':pd['plateManufacturer'],
1322     'plateModel':pd['plateModel'],
1323     'spotXCount':24,
1324     'spotYCount':8,
1325     })
1326     if pd['spotNaming'] == 'alpha':
1327     for y in xrange(0,8):
1328     for x in xrange(0,24):
1329     self.metadata['SPOT'].append({
1330     'plateID':pd['plateID'],
1331     'spotID':"%c%d"%(y+ord('A'),x+1),
1332     'spotXPosition':x,
1333     'spotYPosition':2*y+(x%2),
1334     'maldiMatrix':pd['maldiMatrix']
1335     })
1336     else:
1337     print >>sys.stderr,"Valid spotNaming value, not yet implemented."
1338     sys.exit(1)
1339    
1340     for pl in self.metadata['PLATE']:
1341     self.platespots[pl['plateID']] = []
1342    
1343     for sp in self.metadata['SPOT']:
1344     self.platespots[sp['plateID']].append(sp)
1345    
1346     self.datafiles = [ (os.path.join(self.dir,md['filename']),int(md['index'])) for md in self.metadata['SCAN'] ]
1347     self.distinct_datafiles = dict([(os.path.join(self.dir,md['filename']),
1348     fileSHA(os.path.join(self.dir,md['filename']))) for md in self.metadata['SCAN']])
1349    
1350     self.maptoscan = {}
1351     index = 0
1352     for md in self.metadata['SCAN']:
1353     self.maptoscan[(os.path.join(self.dir,md['filename']),int(md['index']))] = index
1354     index += 1
1355    
1356     from win32com.client import Dispatch, gencache
1357     self.deapp = Dispatch('DataExplorer.Application',
1358     resultCLSID='{3FED40F1-D409-11D1-8B56-0060971CB54B}')
1359     self.delib = gencache.EnsureModule('{06972F50-13F6-11D3-A5CB-0060971CB54B}',0,4,2)
1360     self.deapp.AutomatedProcessing = 1
1361     self.deapp.Visible = 0
1362    
1363     def close(self):
1364    
1365     if not self.deapp is None:
1366     self.deapp.Quit()
1367     self.deapp = None
1368    
1369     def peakDetect(self,msLevel):
1370     return msLevel in self.applyPeakDetect
1371    
1372     # remove .set and .cts files to prevent DataExplorer crashes - DT 11-1-2006
1373     def remsettingsfile(self,fn):
1374    
1375     if ((len(fn) > 3) and (fn[-4:].lower() == '.t2d')):
1376     path2del = fn[:-4] + '.cts'
1377     if os.path.exists(path2del):
1378     os.unlink(path2del)
1379     path2del = fn[:-4] + '.set'
1380     if os.path.exists(path2del):
1381     os.unlink(path2del)
1382    
1383     def spectra(self):
1384     self.open()
1385    
1386     prevfile = ''
1387     for (f,i) in self.datafiles:
1388    
1389     if f != prevfile:
1390     self.deapp.Documents.Close()
1391    
1392     if not os.path.exists(f):
1393     print >>sys.stderr, "Filename %s does not exist."%(f,)
1394     sys.exit(1)
1395    
1396     self.remsettingsfile(f)
1397     self.remsettingsfile(prevfile)
1398    
1399     self.deapp.Documents.Open(f)
1400    
1401     doc = self.deapp.Documents.Item(0)
1402     sv = doc.SpecView
1403    
1404     sv.SetSpectrumAt(i-1)
1405    
1406     (tf,fixedMass) = doc.InstrumentSettings.GetSetting(self.delib.constants.dePreCursorIon,i-1,None)
1407    
1408     if fixedMass > 0:
1409     msLevel = 2
1410     else:
1411     msLevel = 1
1412    
1413     dataArr = array('f');
1414     peakDetect="False"
1415     if self.peakDetect(msLevel):
1416     peakDetect="True"
1417     sv.CorrectBaseline()
1418     doc.SpecSetup.NoiseReductionStdev = 2.0
1419     sv.FilterNoise(self.delib.constants.deNoiseRemoval)
1420     sv.PeakSettings.PeakThresholdPercent = 1
1421     st,sp = sv.GetPeakData(self.delib.constants.deSpecPeakAll,
1422     self.delib.constants.deSpecPeakSortMass, 0, 0)
1423     for j in xrange(len(sp)):
1424     x = sp[j][self.delib.constants.deSpecPeakCentroidMass]
1425     y = sp[j][self.delib.constants.deSpecPeakArea]
1426     if y <= 0:
1427     continue
1428     dataArr.append(x)
1429     dataArr.append(y)
1430     else:
1431     n,s = sv.GetRawData(0,0)
1432     for j in xrange(n):
1433     dataArr.append(s[j][0])
1434     dataArr.append(s[j][1])
1435    
1436     if len(dataArr) == 0:
1437     continue
1438    
1439     polarity_val = "+"
1440    
1441     # Scan level attributes
1442     # Required by mzXML (no format spec. needed)
1443     d = {'msLevel':msLevel,'scan.peakDetect:%s':peakDetect}
1444     # Optional ('scan.' and prefixformat spec. needed)
1445     d.update({
1446     'scan.polarity:%s':polarity_val,
1447     })
1448    
1449     scanindex = self.maptoscan[(f,i)]
1450     d.update({
1451     'plateID':self.metadata['SCAN'][scanindex]['plateID'],
1452     'spotID':self.metadata['SCAN'][scanindex]['spotID'],
1453     })
1454     d.update({'scanOrigin.parentID:%s': self.distinct_datafiles[f],
1455     'scanOrigin.num:%d': i})
1456     if msLevel == 1:
1457     yield (dataArr,d)
1458     else:
1459     # tandem MS scans add the following
1460     # Required by mzXML
1461     d.update({'precursorMz':fixedMass})
1462     # Optional
1463     d.update({'precursorMz.precursorCharge:%d':1})
1464     yield (dataArr,d)
1465    
1466     prevfile = f
1467    
1468     self.deapp.Documents.Close()
1469    
1470     self.remsettingsfile(prevfile)
1471    
1472     def getMsRunMetaData(self):
1473     self.open()
1474     return {}
1475    
1476     def getFilenames(self):
1477     return [ t for t in self.distinct_datafiles.items() ]
1478    
1479     def msManufacturer(self):
1480     return "ABI / SCIEX"
1481    
1482     def msModel(self):
1483     return "4700 Proteomic Analyzer"
1484    
1485     def msIonisation(self):
1486     return "MALDI"
1487    
1488     def msMassAnalyzer(self):
1489     return "TOF"
1490    
1491     def msDetector(self):
1492     return None
1493    
1494     def acquisitionSoftware(self):
1495     return "DataExplorer"
1496    
1497     def acquisitionSoftwareVersion(self):
1498     return "1.0"
1499    
1500     def precursorPrecision(self,mz):
1501     return .1
1502    
1503     def precursorTune(self):
1504     return False
1505    
1506     def lc(self):
1507     return False
1508    
1509     def maldi(self):
1510     return not self.lc()
1511    
1512     def plateData(self):
1513     self.open()
1514     for d in self.metadata['PLATE']:
1515     yield d
1516    
1517     def spotData(self,plate):
1518     self.open()
1519     for s in self.platespots[plate]:
1520     yield s
1521    
1522     class scanfilter:
1523     def __init__(self,str):
1524     self.rtre = re.compile('^PT([0-9]+(\.[0-9]+)?)S$')
1525     self.intre = re.compile('^[1-9][0-9]*$')
1526     self.floatre = re.compile('^[0-9]+(\.[0-9]+)?$')
1527     self.cl = []
1528     clause = str.split(',')
1529     for cl in clause:
1530     scl = cl.split('.')
1531     m = self.rtre.match(scl[2].strip())
1532     if m != None:
1533     v = float(m.group(1))
1534     elif self.floatre.match(scl[2].strip()):
1535     v = float(scl[2])
1536     elif self.intre.match(scl[2].strip()):
1537     v = int(scl[2])
1538     else:
1539     v = scl[2]
1540     self.cl.append((scl[0],scl[1],v))
1541    
1542     def test(self,d):
1543     a = {}
1544     for (k,v) in d.iteritems():
1545     k0 = k.split(':')[0].split('.')[-1]
1546     a[k0] = v
1547     for cl in self.cl:
1548     if type(a[cl[0]]) == type(u'') or \
1549     type(a[cl[0]]) == type('') :
1550     m = self.rtre.match(a[cl[0]].strip())
1551     if m != None:
1552     v1 = float(m.group(1))
1553     elif self.floatre.match(a[cl[0]].strip()):
1554     v1 = float(a[cl[0]])
1555     elif self.intre.match(a[cl[0]].strip()):
1556     v1 = int(a[cl[0]])
1557     else:
1558     v1 = a[cl[0]].strip()
1559     else:
1560     v1 = a[cl[0]]
1561     v2 = cl[2]
1562     # print >>sys.stderr, "Spectrum test:",v1,type(v1),cl[1],v2,type(v2)
1563     # sys.stderr.flush()
1564     if cl[1] == 'eq' and v1 != v2:
1565     # print >>sys.stderr, "FALSE"
1566     # sys.stderr.flush()
1567     return False
1568     elif cl[1] == 'ne' and not v1 != v2:
1569     return False
1570     elif cl[1] == 'lt' and not v1 < v2:
1571     return False
1572     elif cl[1] == 'le' and not v1 <= v2:
1573     return False
1574     elif cl[1] == 'gt' and not v1 > v2:
1575     return False
1576     elif cl[1] == 'ge' and not v1 >= v2:
1577     return False
1578     # print >>sys.stderr, "TRUE"
1579     # sys.stderr.flush()
1580     return True
1581    
1582     if __name__ == '__main__':
1583    
1584     from optparse import OptionParser
1585     import sys
1586     import glob
1587    
1588     parser = OptionParser()
1589     parser.add_option("-R", "--rawdata", type="string", dest="rawdata", default="",\
1590     help="Format of raw data. Optional if raw spectra file ends in .wiff or .t2m.")
1591     parser.add_option("-X", "--xmlformat", type="string", dest="xmlformat", default="",\
1592     help="XML format to output. Optional if output file ends in .mzxml or .mzdata.")
1593     parser.add_option("-o", "--output", type="string", dest="output", default="",\
1594     help="Output file.")
1595     parser.add_option("-p", "--peaks", type="string", dest="peaks", default="2",\
1596     help="Level(s) of spectra to apply peak detection to (comma separated). QStar,4700 only.")
1597     parser.add_option("-f", "--filter", type="string", dest="filter", default=None,\
1598     help="Filter on mzxml scan meta-data: field.op.value[,field.op.value]. Default: No filter.")
1599     parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False,\
1600     help="Debug. First 10 spectra only, and truncated peaks data element. Default: False.")
1601     parser.add_option("-Z", "--compress", type="string", dest="compress", default=None,\
1602     help="Compress output file. Valid options 'gz'. Default: None.")
1603    
1604     ( opts, args ) = parser.parse_args()
1605    
1606     if opts.output == "" and opts.xmlformat == "":
1607     parser.error("Either -o or -X must be suppied.")
1608     sys.exit(1)
1609    
1610     if opts.filter != None:
1611     filt=scanfilter(opts.filter)
1612     else:
1613     filt=None
1614    
1615     argsglob = []
1616     for a in args:
1617     argsglob.extend(glob.glob(a))
1618    
1619     if opts.output != "" and len(argsglob) > 1:
1620     parser.error("At most one raw spectrum data file is permitted, if -o option is used.")
1621    
1622     for a in argsglob:
1623    
1624     if opts.output != "":
1625     o = opts.output
1626     else:
1627     p = a.rfind('.')
1628     o = a[0:p]+'.'+opts.xmlformat
1629    
1630     if o == a:
1631     print >>sys.stderr, "Input file %s and output file %s are the same!"%(a,o)
1632     sys.exit(1)
1633    
1634     print >>sys.stderr, "Processing",a,"to",o
1635    
1636     if opts.rawdata.lower() in ('wiff','qstar'):
1637     r = QStarWiff(a,opts.peaks)
1638     elif opts.rawdata.lower() in ('t2d','dat','ab4700','ab4800','mariner','voyager'):
1639     r = AB4700T2D(a,opts.peaks)
1640     elif opts.rawdata.lower() in ('mzxml',):
1641     r = FromMzXML(a)
1642     elif a[-5:].lower() in ('.wiff',):
1643     r = QStarWiff(a,opts.peaks)
1644     elif a[-4:].lower() in ('.t2m',):
1645     r = AB4700T2D(a,opts.peaks)
1646     elif a[-6:].lower() in ('.mzxml',):
1647     r = FromMzXML(a)
1648     else:
1649     parser.error("Bad rawdata format specification.")
1650     sys.exit(1)
1651    
1652     if opts.xmlformat.lower() == 'mzxml':
1653     x = ToMzXML(r,o,opts.compress,filt)
1654     elif opts.xmlformat.lower() == 'mzdata':
1655     x = ToMzData(r,o,opts.compress,filt)
1656     elif opts.output[-6:].lower() in ('.mzxml',):
1657     x = ToMzXML(r,opts.output,opts.compress,filt)
1658     elif opts.output[-7:].lower() in ('.mzdata',):
1659     x = ToMzData(r,opts.output,opts.compress,filt)
1660     else:
1661     parser.error("Bad xml format specification.")
1662     sys.exit(1)
1663    
1664     x.write(debug=opts.debug)