ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/pymsxml/pymsxml.py
Revision: 1.2
Committed: Sat Nov 4 14:04:22 2006 UTC (15 years, 9 months ago) by nje01
Branch: MAIN
Changes since 1.1: +29 -14 lines
Log Message:
*** empty log message ***

Line User Rev File contents
1 nje01 1.1 #!/usr/bin/env python
2    
3     import sys,os,os.path
4     import re
5     import sha
6     import math
7     import tempfile
8     import csv
9     import gzip
10     import zipfile
11     from array import array
12     from base64 import b64encode,b64decode
13     from urllib import quote,unquote
14     from xml.sax import *
15    
16     class NameVersion:
17     def __init__(self):
18     self.name_ = "PyMsXML"
19     self.majorVer = 0
20     self.minorVer = 4
21     self.revVer = 0
22     def version(self):
23     return "%d.%d.%d"%(self.majorVer,self.minorVer,self.revVer)
24     def name(self):
25     return self.name_
26    
27     def fileSHA(filename):
28     s = sha.new()
29     h = open(filename,'rb')
30     while True:
31     buffer = h.read(1024)
32     if buffer == '':
33     break
34     s.update(buffer)
35     h.close()
36     return s.hexdigest().lower()
37    
38     class ToMzXML:
39     def __init__(self,reader,filename,cmpfmt=None,filt=None):
40     self.filename = filename
41     self.compressfmt = cmpfmt
42     self.reader = reader
43     self.initSHA()
44     self.writeByteCounter = 0
45     self.actualScanCount = 0
46     self.scanIndices = {}
47     self.filter = filt
48    
49     def __del__(self):
50     if hasattr(self,'tmpFileName') and self.tmpFileName and self.tmpFileName != '':
51     os.unlink(self.tmpFileName)
52    
53     def getFilename(self):
54     return self.filename
55    
56     def initSHA(self):
57     self.sha1 = sha.new()
58    
59     def updateSHA(self,s):
60     self.sha1.update(s)
61    
62     def getSHA(self):
63     return self.sha1.hexdigest().lower()
64    
65     def writeString(self,fh,data):
66     self.writeByteCounter += len(data)
67     self.sha1.update(data)
68     fh.write(data)
69    
70     def write(self,debug=False):
71    
72     # Make a temporary file for the scan data to count the number of spectra.
73     (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
74     tmpFile = os.fdopen(tmpFileFD,'wb')
75    
76     # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
77     self.write_scans(tmpFile,debug)
78    
79     tmpFile.close()
80    
81     # Reset!
82    
83     self.initSHA()
84     self.writeByteCounter = 0
85    
86     if self.compressfmt == None and not self.filename.endswith('.gz'):
87     xmlFile = open(self.filename,'wb')
88     elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
89     if self.filename.endswith('.gz'):
90     xmlFile = gzip.open(self.filename,'wb')
91     else:
92     xmlFile = gzip.open(self.filename+'.gz','wb')
93     else:
94     print >>sys.stderr, "Bad compressfmt specification"
95     sys.exit(1)
96    
97     self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
98    
99     outStr = "<mzXML "
100     outStr += "xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1\" "
101     outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
102     outStr += "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd\""
103     outStr += ">\n"
104    
105     self.writeString (xmlFile,outStr)
106    
107     outStr = "<msRun"
108     outStr += " scanCount=\"%d\"" % (self.actualScanCount,)
109     for (k,v) in self.reader.getMsRunMetaData().items():
110     k,f = k.split(':')
111     outStr += (" %%s=\"%s\""%(f,))%(k,v)
112     outStr += ">\n"
113    
114     self.writeString (xmlFile,outStr)
115    
116     filehashes = {}
117    
118     for f in self.reader.getFilenames():
119     if len(f) == 2:
120     outStr = "<parentFile "
121     outStr += "fileName=\"%s\" " % (quote(f[0]),)
122     outStr += "fileType=\"RAWData\" "
123     outStr += "fileSha1=\"%s\"" % (f[1],)
124     outStr += "/>\n"
125     else:
126     outStr = "<parentFile "
127     outStr += "fileName=\"%s\" " % (f[0],)
128     outStr += "fileType=\"%s\" " % (f[1],)
129     outStr += "fileSha1=\"%s\"" % (f[2],)
130     outStr += "/>\n"
131     self.writeString (xmlFile,outStr)
132    
133     outStr = "<msInstrument>\n"
134     outStr += "<msManufacturer category=\"msManufacturer\" value=\"%s\"/>\n" % (self.reader.msManufacturer(),)
135     outStr += "<msModel category=\"msModel\" value=\"%s\"/>\n" % (self.reader.msModel(),)
136     if self.reader.msIonisation():
137     outStr += "<msIonisation category=\"msIonisation\" value=\"%s\"/>\n" % (self.reader.msIonisation(),)
138     if self.reader.msMassAnalyzer():
139     outStr += "<msMassAnalyzer category=\"msMassAnalyzer\" value=\"%s\"/>\n" % (self.reader.msMassAnalyzer(),)
140     if self.reader.msDetector():
141     outStr += "<msDetector category=\"msDetector\" value=\"%s\"/>\n" % (self.reader.msDetector(),)
142    
143     self.writeString (xmlFile,outStr)
144    
145     outStr = "<software "
146     outStr += "type=\"acquisition\" "
147     outStr += "name=\"%s\" "%(self.reader.acquisitionSoftware(),)
148     outStr += "version=\"%s\""%(self.reader.acquisitionSoftwareVersion(),)
149     outStr += "/>\n"
150    
151     self.writeString (xmlFile,outStr)
152    
153     outStr = "</msInstrument>\n"
154    
155     self.writeString (xmlFile,outStr)
156    
157     outStr = "<dataProcessing>\n"
158    
159     self.writeString (xmlFile,outStr)
160    
161     nv = NameVersion()
162    
163     outStr = "<software "
164     outStr += "type=\"conversion\" "
165     outStr += "name=\"%s\" "%(nv.name(),)
166     outStr += "version=\"%s\"" % (nv.version(),)
167     outStr += "/>\n"
168    
169     self.writeString (xmlFile,outStr)
170    
171     if (self.reader.peakDetect(1)):
172     outStr = "<processingOperation "
173     outStr += "name=\"peak_detection_level_1\" "
174     outStr += "value=\"true\""
175     outStr += "/>\n"
176     outStr += "<processingOperation "
177     outStr += "name=\"peak_detection_level_1_threshold\" "
178     outStr += "value=\"1%\""
179     outStr += "/>\n"
180     outStr += "<processingOperation "
181     outStr += "name=\"peak_detection_level_1_software\" "
182     outStr += "value=\"Analyst\""
183     outStr += "/>\n"
184     self.writeString (xmlFile,outStr)
185     if (self.reader.peakDetect(2)):
186     outStr = "<processingOperation "
187     outStr += "name=\"peak_detection_level_2\" "
188     outStr += "value=\"true\""
189     outStr += "/>\n"
190     outStr += "<processingOperation "
191     outStr += "name=\"peak_detection_level_2_threshold\" "
192     outStr += "value=\"1%\""
193     outStr += "/>\n"
194     outStr += "<processingOperation "
195     outStr += "name=\"peak_detection_level_2_software\" "
196     outStr += "value=\"Analyst\""
197     outStr += "/>\n"
198     self.writeString (xmlFile,outStr)
199    
200     outStr = "<processingOperation "
201     outStr += "name=\"min_peaks_per_spectra\" "
202     outStr += "value=\"1\""
203     outStr += "/>\n"
204    
205     self.writeString (xmlFile,outStr)
206    
207     outStr = "</dataProcessing>\n"
208    
209     self.writeString (xmlFile,outStr)
210    
211     if self.reader.maldi():
212     outStr = '<spotting>\n'
213     for d in self.reader.plateData():
214     outStr += '<plate plateID="%s" spotXCount="%s" spotYCount="%s">\n' % \
215     (d['plateID'],d['spotXCount'],d['spotYCount'])
216     outStr += '<plateManufacturer category="plateManufacturer" value="%s"/>\n' % \
217     (d['plateManufacturer'],)
218     outStr += '<plateModel category="plateModel" value="%s"/>\n' % \
219     (d['plateModel'],)
220     for s in self.reader.spotData(d['plateID']):
221     outStr += '<spot spotID="%s" spotXPosition="%s" spotYPosition="%s">\n' % \
222     (s['spotID'],s['spotXPosition'],s['spotYPosition'])
223     outStr += '<maldiMatrix category="maldiMatrix" value="%s"/>\n' % \
224     (s['maldiMatrix'],)
225     outStr += '</spot>\n'
226     outStr += '</plate>\n'
227     outStr += '</spotting>\n'
228     self.writeString(xmlFile,outStr)
229    
230     if self.reader.lc():
231     pass
232    
233     scanOffset = self.writeByteCounter
234    
235     tmpFile = open(self.tmpFileName,'rb')
236     while True:
237     tmpStr = tmpFile.read(1024)
238     if tmpStr == '':
239     break
240     self.writeString(xmlFile,tmpStr)
241     tmpFile.close()
242     os.unlink(self.tmpFileName)
243     self.tmpFileName = ''
244    
245     outStr = "</msRun>\n"
246     self.writeString (xmlFile,outStr)
247    
248     indexOffset = self.writeByteCounter
249    
250     outStr = "<index "
251     outStr += "name=\"scan\" "
252     outStr += ">\n"
253    
254     self.writeString (xmlFile,outStr)
255    
256     for i in xrange(1,self.actualScanCount+1):
257     outStr = "<offset id=\"%d\">" % (i,)
258     outStr += "%d</offset>\n" % (self.scanIndices[i] + scanOffset,)
259     self.writeString(xmlFile,outStr)
260    
261     outStr = "</index>\n"
262     self.writeString (xmlFile,outStr)
263    
264     outStr = "<indexOffset>%d</indexOffset>\n" % (indexOffset,)
265    
266     self.writeString (xmlFile,outStr)
267    
268     self.writeString (xmlFile,"<sha1>")
269    
270     outStr = self.getSHA()
271    
272     self.writeString (xmlFile,outStr)
273    
274     self.writeString (xmlFile,"</sha1>\n")
275    
276     outStr = "</mzXML>\n"
277     self.writeString (xmlFile,outStr)
278    
279     xmlFile.close()
280    
281     def write_scans(self,xmlFile,debug=False):
282    
283     msLevel = 0
284     scanNumber = 0
285     ancestors = []
286     self.writeByteCounter = 0
287    
288     for (s,d) in self.reader.spectra():
289    
290     if self.filter != None and not self.filter.test(d):
291     continue
292    
293     if debug and scanNumber >= 10:
294     break
295    
296     if not d.has_key('msLevel'):
297     print >>sys.stderr, "Required scan attributes missing."
298     sys.exit(1)
299    
300     prevLevel = msLevel
301     msLevel = d['msLevel']
302    
303     if prevLevel < msLevel and prevLevel > 0:
304     # We went "in" a level, push scan number of parent
305     ancestors.append((scanNumber,prevSpec))
306     elif prevLevel > msLevel and msLevel > 0:
307     if len(ancestors) == 0:
308     pass #print >>sys.stderr, "No ancestor for scan %s at level %s"%(scanNumber,msLevel)
309     else:
310     ancestors.pop()
311    
312     outStr = ''
313     if prevLevel > msLevel:
314     for m in xrange(0,prevLevel-msLevel+1):
315     outStr += "</scan>\n"
316     else:
317     if prevLevel > 0 and prevLevel == msLevel:
318     outStr += "</scan>\n"
319    
320     self.writeString(xmlFile,outStr)
321    
322     scanNumber = scanNumber + 1
323    
324     self.scanIndices[scanNumber] = self.writeByteCounter
325    
326     totIonCurrent = 0
327     maxmz = None
328     minmz = None
329     basePeakMz = None
330     basePeakIntensity = None
331     peaksCount = 0
332     for i in xrange(0,len(s),2):
333     x = s[i]; y = s[i+1]
334     if minmz is None or minmz > x:
335     minmz = x
336     if maxmz is None or maxmz < x:
337     maxmz = x
338     totIonCurrent += y
339     if basePeakIntensity is None or basePeakIntensity < y:
340     basePeakIntensity = y
341     basePeakMz = x
342     peaksCount += 1
343    
344     outStr = "<scan num=\"%d\" msLevel=\"%d\"" % (scanNumber,msLevel)
345     outStr += " peaksCount=\"%d\"" % (peaksCount,)
346     outStr += " lowMz=\"%f\"" % (minmz,)
347     outStr += " highMz=\"%f\"" % (maxmz,)
348     outStr += " totIonCurrent=\"%f\"" % (totIonCurrent,)
349     outStr += " basePeakMz=\"%f\"" % (basePeakMz,)
350     outStr += " basePeakIntensity=\"%f\"" % (basePeakIntensity,)
351    
352     for (k,v) in d.items():
353     if k.startswith('scan.'):
354     k,f = k.split(':')
355     k = k[5:]
356     outStr += (" %%s=\"%s\""%(f,))%(k,v)
357     outStr += ">\n"
358     self.writeString(xmlFile,outStr)
359    
360 nje01 1.2 any = False
361     for (k,v) in d.items():
362     if k.startswith('scanOrigin.'):
363     if not any:
364     any = True
365     outStr = "<scanOrigin"
366     k,f = k.split(':')
367     k = k[11:]
368     outStr += (" %%s=\"%s\""%(f,))%(k,v)
369     if any:
370     outStr += "/>\n"
371     self.writeString(xmlFile,outStr)
372    
373 nje01 1.1 if msLevel > 1:
374    
375     if not d.has_key('precursorMz') :
376     print >>sys.stderr, "Required precursorMz attribute missing."
377     sys.exit(1)
378    
379     # We scan the parent spectrum in the region of the
380     # precursorMz to establish the intensity. Optionally,
381     # we tune the precursorMz itself, on the basis of this
382    
383     pMz = d['precursorMz']
384     tol = self.reader.precursorPrecision(pMz)
385     pMzLB = pMz-tol
386     pMzUB = pMz+tol
387     pMzIntensity = 0
388    
389     if len(ancestors) > 0:
390     for i in xrange(0,len(ancestors[-1][1]),2):
391     x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
392     if x < pMzLB:
393     continue
394     if x > pMzUB:
395     break
396     if pMzIntensity < y:
397     pMzIntensity = y
398     pMzTune = x
399    
400     if self.reader.precursorTune():
401     pMz = pMzTune
402    
403     outStr = "<precursorMz"
404     for (k,v) in d.items():
405     if k.startswith('precursorMz.'):
406     k,f = k.split(':')
407     k = k[12:]
408     outStr += (" %%s=\"%s\""%(f,))%(k,v)
409     if len(ancestors)>0:
410     outStr += " precursorScanNum=\"%d\""%(ancestors[-1][0],)
411     outStr += " precursorIntensity=\"%f\">"%(pMzIntensity,)
412     else:
413     outStr += ">"
414     outStr += "%f</precursorMz>\n" % (pMz,)
415     self.writeString(xmlFile,outStr)
416    
417     if self.reader.maldi():
418     outStr = '<maldi'
419     for (k,v) in d.items():
420     if k.startswith('maldi.'):
421     k,f = k.split(':')
422     k = k[6:]
423     outStr += (" %%s=\"%s\""%(f,))%(k,v)
424     outStr += " plateID=\"%s\""%(d['plateID'],)
425     outStr += " spotID=\"%s\""%(d['spotID'],)
426     outStr += "/>\n"
427     self.writeString(xmlFile,outStr)
428    
429     outStr = "<peaks precision=\"32\" byteOrder=\"network\" pairOrder=\"m/z-int\">"
430     self.writeString(xmlFile,outStr)
431    
432     # Spec says the byte order shall be
433     # network, which is the same as big endian.
434    
435     if sys.byteorder != 'big':
436     s.byteswap()
437    
438     outStr = b64encode(s.tostring())
439     if not debug:
440     self.writeString(xmlFile,outStr)
441     else:
442     self.writeString(xmlFile,outStr[0:100])
443    
444     if sys.byteorder != 'big':
445     s.byteswap()
446    
447     outStr = "</peaks>\n"
448     self.writeString(xmlFile,outStr)
449    
450     if self.reader.maldi():
451     outStr = ''
452     for (k,v) in d.items():
453     if k.startswith('maldi.'):
454     k,f = k.split(':')
455     k = k[6:]
456     outStr += '<nameValue'
457     outStr += ' name="%s"'%(k,)
458     outStr += (' value="%f"'%(f,))%(v,)
459     outStr += '/>\n'
460     ## outStr += '<nameValue'
461     ## outStr += ' name="plateID"'
462     ## outStr += ' value="%s"'%(d['plateID'],)
463     ## outStr += '/>\n'
464     ## outStr += '<nameValue'
465     ## outStr += ' name="spotID"'
466     ## outStr += ' value="%s"'%(d['spotID'],)
467     ## outStr += '/>\n'
468     self.writeString(xmlFile,outStr)
469    
470     xmlFile.flush()
471    
472     prevSpec = s
473    
474     outStr = ""
475     for m in xrange(0,msLevel):
476     outStr += "</scan>\n"
477    
478     self.writeString(xmlFile,outStr)
479     self.actualScanCount = scanNumber
480    
481     class ToMzData:
482     def __init__(self,reader,filename,cmpfmt=None,filt=None):
483     self.filename = filename
484     self.compressfmt = cmpfmt
485     self.reader = reader
486     self.actualScanCount = 0
487     self.initSHA()
488     self.filter = filt
489    
490     def writeString(self,fh,data):
491     # self.writeByteCounter += len(data)
492     self.sha1.update(data)
493     fh.write(data)
494    
495     def initSHA(self):
496     self.sha1 = sha.new()
497    
498     def updateSHA(self,s):
499     self.sha1.update(s)
500    
501     def getSHA(self):
502     return self.sha1.hexdigest().lower()
503    
504     def write(self,debug=False):
505    
506     # Make a temporary file for the scan data to count the number of spectra.
507     (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
508     tmpFile = os.fdopen(tmpFileFD,'wb')
509    
510     # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
511     self.write_scans(tmpFile,debug)
512    
513     tmpFile.close()
514    
515     scanhash = self.getSHA()
516    
517     self.initSHA()
518    
519     if self.compressfmt == None and not self.filename.endswith('.gz'):
520     xmlFile = open(self.filename,'wb')
521     elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
522     if self.filename.endswith('.gz'):
523     xmlFile = gzip.open(self.filename,'wb')
524     else:
525     xmlFile = gzip.open(self.filename+'.gz','wb')
526     else:
527     print >>sys.stderr, "Bad compressfmt specification"
528     sys.exit(1)
529    
530     self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
531    
532     outStr = "<mzData "
533     outStr += "version=\"1.05\" "
534     outStr += "accessionNumber=\"pymsxml:%s\" "%(scanhash,)
535     outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
536     # outStr += "xmlns=\"http://psidev.sourceforge.net/ms/xml/mzdata\" "
537     outStr += "xsi:noNamespaceSchemaLocation=\"http://psidev.sourceforge.net/ms/xml/mzdata/mzdata.xsd\""
538     outStr += ">\n"
539    
540     self.writeString (xmlFile,outStr)
541    
542     outStr = "<description>\n"
543     outStr += "<admin>\n"
544     outStr += "<sampleName></sampleName>\n"
545     outStr += "<contact>\n"
546     outStr += "<name></name>\n"
547     outStr += "<institution></institution>\n"
548     outStr += "</contact>\n"
549     outStr += "</admin>\n"
550     outStr += "<instrument>\n"
551     outStr += "<instrumentName></instrumentName>\n"
552     outStr += "<source>\n"
553     outStr += "</source>\n"
554     outStr += "<analyzerList count=\"1\">\n"
555     outStr += "<analyzer>\n"
556     outStr += "</analyzer>\n"
557     outStr += "</analyzerList>\n"
558     outStr += "<detector>\n"
559     outStr += "</detector>\n"
560     outStr += "</instrument>\n"
561     outStr += "<dataProcessing>\n"
562     outStr += "<software>\n"
563     outStr += "<name>%s</name>\n"%(self.reader.acquisitionSoftware(),)
564     outStr += "<version>%s</version>\n"%(self.reader.acquisitionSoftwareVersion(),)
565     outStr += "</software>\n"
566     outStr += "<software>\n"
567     nv = NameVersion()
568     outStr += "<name>%s</name>\n"%(nv.name(),)
569     outStr += "<version>%s</version>\n"%(nv.version(),)
570     outStr += "</software>\n"
571     outStr += "</dataProcessing>\n"
572     outStr += "</description>\n"
573    
574     self.writeString (xmlFile,outStr)
575    
576     outStr = "<spectrumList"
577     outStr += " count=\"%d\">\n"%(self.actualScanCount,)
578    
579     self.writeString (xmlFile,outStr)
580    
581     tmpFile = open(self.tmpFileName,'rb')
582     while True:
583     tmpStr = tmpFile.read(1024)
584     if tmpStr == '':
585     break
586     self.writeString(xmlFile,tmpStr)
587     tmpFile.close()
588     os.unlink(self.tmpFileName)
589     self.tmpFileName = ''
590    
591     outStr = "</spectrumList>\n</mzData>\n"
592    
593     self.writeString (xmlFile,outStr)
594    
595     def write_scans(self,xmlFile,debug=False):
596    
597     msLevel = 0
598     scanNumber = 0
599     ancestors = []
600    
601     self.initSHA()
602     # self.writeByteCounter = 0
603    
604     for (s,d) in self.reader.spectra():
605    
606     if self.filter != None and not self.filter.test(d):
607     continue
608    
609     if debug and scanNumber >= 10:
610     break
611    
612     if not d.has_key('msLevel'):
613     print >>sys.stderr, "Required scan attributes missing."
614     sys.exit(1)
615    
616     prevLevel = msLevel
617     msLevel = d['msLevel']
618    
619     if prevLevel < msLevel and prevLevel > 0:
620     # We went "in" a level, push scan number of parent
621     ancestors.append((scanNumber,prevSpec))
622     elif prevLevel > msLevel and msLevel > 0:
623     ancestors.pop()
624    
625     scanNumber = scanNumber + 1
626    
627     totIonCurrent = 0
628     maxmz = None
629     minmz = None
630     basePeakMz = None
631     basePeakIntensity = None
632     peaksCount = 0
633     for i in xrange(0,len(s),2):
634     x = s[i]; y = s[i+1]
635     if minmz is None or minmz > x:
636     minmz = x
637     if maxmz is None or maxmz < x:
638     maxmz = x
639     totIonCurrent += y
640     if basePeakIntensity is None or basePeakIntensity < y:
641     basePeakIntensity = y
642     basePeakMz = x
643     peaksCount += 1
644    
645     outStr = "<spectrum id=\"%d\">\n" % (scanNumber,)
646     outStr += "<spectrumDesc>\n"
647     outStr += "<spectrumSettings>\n"
648    
649     self.writeString (xmlFile,outStr)
650    
651     # outStr += "<acqSpecification>\n"
652     # outStr += "</acqSpecification>\n"
653    
654     outStr = "<spectrumInstrument"
655     outStr += " msLevel=\"%d\""%(msLevel,)
656     if d.has_key('scan.startMz') and \
657     d.has_key('scan.endMz'):
658     outStr += " mzRangeStart=\"%f\""%(d['scan.startMz'],)
659     outStr += " mzRangeStop=\"%f\""%(d['scan.endMz'],)
660     outStr += ">\n"
661     for (k,v) in d.items():
662     if k.startswith('scan.'):
663     k,f = k.split(':')
664     k = k[5:]
665    
666     if k == "polarity":
667     if v == '+':
668     outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Positive",)
669     elif v == '-':
670     outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Negative",)
671     elif k == 'retentionTime':
672     outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"TimeInSeconds\" value=\"%f\"/>\n"%(v,)
673     else:
674     outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
675     outStr += "<userParam name=\"lowMz\" value=\"%f\"/>\n"%(minmz,)
676     outStr += "<userParam name=\"highMz\" value=\"%f\"/>\n"%(maxmz,)
677     outStr += "<userParam name=\"totIonCurrent\" value=\"%f\"/>\n"%(totIonCurrent,)
678     outStr += "<userParam name=\"basePeakMz\" value=\"%f\"/>\n"%(basePeakMz,)
679     outStr += "<userParam name=\"basePeakIntensity\" value=\"%f\"/>\n"%(basePeakIntensity,)
680     outStr += "</spectrumInstrument>\n"
681     outStr += "</spectrumSettings>\n"
682    
683     self.writeString (xmlFile,outStr)
684    
685     if msLevel > 1:
686    
687     if not d.has_key('precursorMz') :
688     print >>sys.stderr, "Required precursorMz attribute missing."
689     sys.exit(1)
690    
691     # We scan the parent spectrum in the region of the
692     # precursorMz to establish the intensity. Optionally,
693     # we tune the precursorMz itself, on the basis of this
694    
695     pMz = d['precursorMz']
696     tol = self.reader.precursorPrecision(pMz)
697     pMzLB = pMz-tol
698     pMzUB = pMz+tol
699     pMzIntensity = 0
700    
701     for i in xrange(0,len(ancestors[-1][1]),2):
702     x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
703     if x < pMzLB:
704     continue
705     if x > pMzUB:
706     break
707     if pMzIntensity < y:
708     pMzIntensity = y
709     pMzTune = x
710    
711     if self.reader.precursorTune():
712     pMz = pMzTune
713    
714     outStr = "<precursorList count=\"1\">\n"
715    
716     outStr += "<precursor"
717     outStr += " msLevel=\"%d\""%(msLevel,)
718     outStr += " spectrumRef=\"%d\""%(ancestors[-1][0],)
719     outStr += ">\n"
720    
721     self.writeString(xmlFile,outStr)
722    
723     outStr = "<ionSelection>\n"
724     outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"MassToChargeRatio\" value=\"%f\"/>\n"%(pMz,)
725     outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Intensity\" value=\"%f\"/>\n"%(pMzIntensity,)
726     for (k,v) in d.items():
727     if k.startswith('precursorMz.'):
728     k,f = k.split(':')
729     k = k[12:]
730     outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
731     outStr += "</ionSelection>\n"
732    
733     self.writeString(xmlFile,outStr)
734    
735     outStr = "<activation>\n"
736     outStr += "</activation>\n"
737     outStr += "</precursor>\n"
738     outStr += "</precursorList>\n"
739    
740     self.writeString(xmlFile,outStr)
741    
742     if self.reader.maldi():
743     outStr = '<maldi'
744     for (k,v) in d.items():
745     if k.startswith('maldi.'):
746     k,f = k.split(':')
747     k = k[6:]
748     outStr += (" %%s=\"%s\""%(f,))%(k,v)
749     outStr += " plateID=\"%s\""%(d['plateID'],)
750     outStr += " spotID=\"%s\""%(d['spotID'],)
751     outStr += "/>\n"
752     self.writeString(xmlFile,outStr)
753    
754     outStr = "</spectrumDesc>\n"
755     self.writeString(xmlFile,outStr)
756    
757     outStr = "<mzArrayBinary>\n"
758     outStr += "<data"
759     outStr += " precision=\"32\""
760     outStr += " endian=\"%s\""%(sys.byteorder,)
761     outStr += " length=\"%d\""%(peaksCount,)
762     outStr += ">"
763     self.writeString(xmlFile,outStr)
764    
765     outStr = b64encode(s[0:peaksCount*2:2].tostring())
766     if not debug:
767     self.writeString(xmlFile,outStr)
768     else:
769     self.writeString(xmlFile,outStr[0:100])
770    
771     outStr = "</data>\n</mzArrayBinary>\n"
772     self.writeString(xmlFile,outStr)
773    
774     outStr = "<intenArrayBinary>\n"
775     outStr += "<data"
776     outStr += " precision=\"32\""
777     outStr += " endian=\"%s\""%(sys.byteorder,)
778     outStr += " length=\"%d\""%(peaksCount,)
779     outStr += ">"
780     self.writeString(xmlFile,outStr)
781    
782     outStr = b64encode(s[1:peaksCount*2:2].tostring())
783     if not debug:
784     self.writeString(xmlFile,outStr)
785     else:
786     self.writeString(xmlFile,outStr[0:100])
787    
788     outStr = "</data>\n</intenArrayBinary>\n"
789     self.writeString(xmlFile,outStr)
790    
791     if self.reader.maldi():
792     outStr = ''
793     for (k,v) in d.items():
794     if k.startswith('maldi.'):
795     k,f = k.split(':')
796     k = k[6:]
797     outStr += '<nameValue'
798     outStr += ' name="%s"'%(k,)
799     outStr += (' value="%f"'%(f,))%(v,)
800     outStr += '/>\n'
801     ## outStr += '<nameValue'
802     ## outStr += ' name="plateID"'
803     ## outStr += ' value="%s"'%(d['plateID'],)
804     ## outStr += '/>\n'
805     ## outStr += '<nameValue'
806     ## outStr += ' name="spotID"'
807     ## outStr += ' value="%s"'%(d['spotID'],)
808     ## outStr += '/>\n'
809     self.writeString(xmlFile,outStr)
810    
811     outStr = "</spectrum>\n"
812     self.writeString(xmlFile,outStr)
813     xmlFile.flush()
814    
815     prevSpec = s
816    
817     self.actualScanCount = scanNumber
818    
819     class QStarWiff:
820     def __init__(self,filename,peaks=None):
821     self.filename = filename
822     self.filehash = fileSHA(filename)
823     self.theTIC = None
824     self.theWF = None
825     self.theFMANSpecData = None
826     self.startTime = None
827     self.stopTime = None
828 nje01 1.2 self.applyPeakDetect = []
829 nje01 1.1 if peaks:
830     self.applyPeakDetect = map(int,peaks.split(','))
831    
832     if not os.path.exists(self.filename):
833     print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
834     sys.exit(1)
835    
836     self.filename = os.path.abspath(self.filename)
837    
838     def __del__(self):
839     # Probably unnecessary
840     self.close()
841    
842     def peakDetect(self,msLevel):
843     return ((msLevel in self.applyPeakDetect) and (msLevel != 1))
844    
845     def open(self):
846    
847     if self.theTIC is None:
848    
849     from win32com.client import Dispatch
850    
851     self.theFMANSpecData = Dispatch('Analyst.FMANSpecData')
852     self.theTIC = Dispatch('Analyst.FMANChromData')
853     self.theSPL = Dispatch('Analyst.SpectralPeakList')
854    
855     self.theFMANSpecData.WiffFileName = self.filename
856     self.theTIC.WiffFileName = self.filename
857     self.theWF = self.theFMANSpecData.GetWiffFileObject()
858    
859     self.theTIC.SetToTIC(1,0,0)
860    
861     self.startTime = self.theTIC.GetDataPointXValue(1)
862     self.stopTime = self.theTIC.GetDataPointXValue(self.theTIC.GetNumberOfDataPoints())
863    
864     def close(self):
865    
866     if not self.theTIC is None:
867    
868     if not self.theWF is None:
869     self.theWF.CloseWiffFile()
870     self.theTIC = None
871     self.theFMANSpecData = None
872     self.theWF = None
873    
874     def spectra(self):
875     self.open()
876     spectrumCount=0;
877    
878     numberOfSamples = self.theWF.GetActualNumberOfSamples()
879     for i in xrange(1,numberOfSamples+1):
880    
881     # print >>sys.stderr, "Sample %d"%i
882     # print >>sys.stderr, "Sample Name: %s"%(self.theWF.GetSampleName(i),)
883    
884     sampleName = self.theWF.GetSampleName(i)
885    
886     numberOfPeriods = self.theWF.GetActualNumberOfPeriods(i)
887     for j in xrange(0,numberOfPeriods):
888    
889     numberOfExperiments = self.theWF.GetNumberOfExperiments(i,j)
890     self.theTIC.SetToTIC(i, j, 0)
891    
892     numberOfTICDataPoints = self.theTIC.GetNumberOfDataPoints()
893     for l in xrange(1,numberOfTICDataPoints+1):
894    
895     for k in xrange(0,numberOfExperiments):
896    
897     self.theExperiment = self.theWF.GetExperimentObject(i, j, k)
898     self.theTIC.SetToTIC(i, j, k)
899    
900     # Convert to seconds?
901     xValue = self.theTIC.GetDataPointXValue(l) * 60
902    
903     spectrumCount += 1;
904    
905     # If any Ion current at time "l"
906     if self.theTIC.GetDataPointYValue(l) > 0:
907    
908     self.theFMANSpecData.SetSpectrum(i, j, k, xValue, xValue)
909    
910     if self.theExperiment.ScanType == 9:
911     msLevel = 2
912     m = re.search(r'[(]([0-9.]+)[)]',self.theFMANSpecData.DataTitle)
913     fixedMass = float(m.group(1))
914     else:
915     msLevel = 1
916    
917     startMass = self.theFMANSpecData.GetStartMass()
918     stopMass = self.theFMANSpecData.GetStopMass()
919     dataArr = array('f');
920     peakDetect = 'False'
921     if self.peakDetect(msLevel):
922     peakDetect = 'True'
923     minY,maxY = self.theFMANSpecData.GetYValueRange()
924     print minY,maxY
925     self.theFMANSpecData.Threshold(maxY*0.01)
926     self.theSPL.FindPeaksInDataObject(self.theFMANSpecData,50.0)
927     numPeaks = self.theSPL.GetNumberOfPeaks()
928     print numPeaks
929     for m in xrange(1,numPeaks+1):
930     (x,w,y,yp) = self.theSPL.GetPeak(m)
931     if y <= 0:
932     continue
933     dataArr.append(x)
934     dataArr.append(y)
935     else:
936     numPoints = self.theFMANSpecData.GetNumberOfDataPoints()
937     for m in xrange(1,numPoints+1):
938     if self.theFMANSpecData.GetDataPointYValue(m) <= 0:
939     continue
940     x = self.theFMANSpecData.GetDataPointXValue(m)
941     dataArr.append(x)
942     y = self.theFMANSpecData.GetDataPointYValue(m)
943     dataArr.append(y)
944     polarity = self.theExperiment.Polarity
945    
946     if len(dataArr) == 0:
947     continue
948    
949     if polarity == 0:
950     polarity_val = "+"
951     else:
952     polarity_val = "-"
953    
954     rt = "PT%fS"%(xValue,)
955    
956     # Scan level attributes
957     # Required by mzXML (no format spec. needed)
958     d = {'msLevel':msLevel,'scan.peakDetect:%s':peakDetect}
959     # Optional ('scan.' and prefixformat spec. needed)
960     d.update({
961     'scan.retentionTime:PT%fS':xValue,
962     'scan.polarity:%s':polarity_val,
963     'scan.startMz:%f':startMass,
964     'scan.endMz:%f':stopMass,
965     'scan.sample:%d':i,
966     'scan.period:%d':j,
967     'scan.experiment:%d':k,
968     'scan.sampleName:%s':sampleName
969     })
970 nje01 1.2 d.update({'scanOrigin.parentFileID:%s': self.filehash,
971 nje01 1.1 'scanOrigin.num:%d': spectrumCount})
972     if msLevel == 1:
973     yield (dataArr,d)
974     else:
975     # tandem MS scans add the following
976     # Required by mzXML
977     d.update({'precursorMz':fixedMass})
978     # Optional
979     # d.update({'precursorMz.precursorMzIntensity:f':0.0})
980     yield (dataArr,d)
981    
982     def getMsRunMetaData(self):
983     self.open()
984     d = {'startTime:PT%fS':self.startTime,
985     'endTime:PT%fS':self.stopTime,
986     }
987     return d
988    
989     def getFilenames(self):
990     return [ (self.filename,self.filehash) ]
991    
992     def msManufacturer(self):
993     return "ABI / SCIEX"
994    
995     def msModel(self):
996     return "QSTAR"
997    
998     def msIonisation(self):
999     return "ESI"
1000    
1001     def msMassAnalyzer(self):
1002     return "Quadrupole"
1003    
1004     def msDetector(self):
1005     return None
1006    
1007     def acquisitionSoftware(self):
1008     return "Analyst"
1009    
1010     def acquisitionSoftwareVersion(self):
1011     return "1.0"
1012    
1013     def precursorPrecision(self,mz):
1014     return .1
1015    
1016     def precursorTune(self):
1017     return True
1018    
1019     def lc(self):
1020     return True
1021    
1022     def maldi(self):
1023     return not self.lc()
1024    
1025     class mzXML_md_sax(handler.ContentHandler):
1026    
1027     def __init__(self):
1028     self.md = {}
1029     self.context = ''
1030    
1031     def startElement(self, name, attrs):
1032     self.context += ':%s'%(name,)
1033     # print >>sys.stderr, ">>",self.context
1034     # sys.stderr.flush()
1035     if self.context.endswith(':msRun'):
1036     self.md['startTime'] = float(attrs['startTime'][2:-1])
1037     self.md['endTime'] = float(attrs['endTime'][2:-1])
1038     elif self.context.endswith(':parentFile'):
1039     self.md['fileName'] = unquote(attrs['fileName'])
1040     self.md['fileType'] = attrs['fileType']
1041     self.md['fileSha1'] = attrs['fileSha1']
1042     elif self.context.endswith(':instrument'):
1043     self.md['msManufacturer'] = attrs['manufacturer']
1044     self.md['msModel'] = attrs['model']
1045     self.md['msIonisation'] = attrs['ionisation']
1046     self.md['msMassAnalyzer'] = attrs['msType']
1047     elif self.context.endswith(':msInstrument:msManufacturer'):
1048     self.md['msManufacturer'] = attrs['value']
1049     elif self.context.endswith(':msInstrument:msModel'):
1050     self.md['msModel'] = attrs['value']
1051     elif self.context.endswith(':msInstrument:msIonisation'):
1052     self.md['msIonisation'] = attrs['value']
1053     elif self.context.endswith(':msInstrument:msMassAnalyzer'):
1054     self.md['msMassAnalyzer'] = attrs['value']
1055     elif self.context.endswith(':msInstrument:msDetector'):
1056     self.md['msDetector'] = attrs['value']
1057     elif self.context.endswith(':msInstrument:software'):
1058     if attrs['type'] == 'acquisition':
1059     self.md['acquisitionSoftware'] = attrs['name']
1060     self.md['acquisitionSoftwareVersion'] = attrs['version']
1061     elif self.context.endswith(':instrument:software'):
1062     if attrs['type'] == 'acquisition':
1063     self.md['acquisitionSoftware'] = attrs['name']
1064     self.md['acquisitionSoftwareVersion'] = attrs['version']
1065     elif self.context.endswith(':scan'):
1066     raise SAXException("Early termination")
1067    
1068     def endElement(self, name):
1069     # print >>sys.stderr, "<<",self.context
1070     # sys.stderr.flush()
1071     self.context = self.context[0:-(len(name)+1)]
1072    
1073    
1074     class mzXML_spec_sax(handler.ContentHandler):
1075    
1076     def __init__(self):
1077     self.context = ''
1078     self.content = ''
1079     self.spectra = []
1080     self.done = False
1081     self.scancount = 0
1082     self.scanmax = 1000
1083     self.scanstart = 0
1084    
1085     def startElement(self, name, attrs):
1086     self.context += ':%s'%(name,)
1087     # print >>sys.stderr, ">>",self.context
1088     # sys.stderr.flush()
1089     self.content = ''
1090     if name == 'scan':
1091     self.scanmd = {}
1092     self.scanlevel = int(attrs['msLevel'])
1093     self.scannum = int(attrs['num'])
1094     if attrs.has_key('retentionTime'):
1095     self.rt = float(attrs['retentionTime'][2:-1])
1096     self.endMz = float(attrs['endMz'])
1097     self.startMz = float(attrs['startMz'])
1098     self.polarity = attrs.get('polarity',None)
1099     for (k,v) in attrs.items():
1100     self.scanmd[k] = v
1101     self.precursorMz = None
1102     self.scancount += 1
1103     elif name == 'msRun':
1104     self.scancount = 0
1105     self.spectra = []
1106     self.context = ':msRun'
1107     elif name == 'scanOrigin':
1108     self.scanmd['scanOrigin'] = attrs
1109    
1110     def characters(self, content):
1111     if self.scancount >= self.scanstart:
1112     if self.context.endswith(':peaks') or \
1113     self.context.endswith(':precursorMz'):
1114     self.content += content
1115    
1116     def endElement(self, name):
1117     if self.scancount >= self.scanstart:
1118     if self.context.endswith('scan:peaks'):
1119    
1120     spec = array('f')
1121     # print >>sys.stderr, len(self.content), len(b64decode(self.content)), self.content
1122     spec.fromstring(b64decode(self.content))
1123     if sys.byteorder != 'big':
1124     spec.byteswap()
1125     d = {'msLevel':self.scanlevel}
1126     # Optional ('scan.' and prefixformat spec. needed)
1127     d.update({
1128     'scan.retentionTime:PT%fS':self.rt,
1129     'scan.startMz:%f':self.startMz,
1130     'scan.endMz:%f':self.endMz
1131     })
1132     if self.polarity != None:
1133     d.update({'scan.polarity:%s':self.polarity})
1134     if self.scanlevel == 2:
1135     d.update({'precursorMz':self.precursorMz})
1136     if self.scanmd.has_key('scanOrigin'):
1137 nje01 1.2 d.update({'scanOrigin.parentFileID:%s': self.scanmd['scanOrigin']['parentFileID'],
1138 nje01 1.1 'scanOrigin.num:%d': self.scanmd['scanOrigin']['num']})
1139     for (k,v) in self.scanmd.items():
1140     if not d.has_key(k):
1141     d[k] = v
1142     self.spectra.append((spec,d))
1143     if len(self.spectra) >= self.scanmax:
1144     self.scanstart = self.scancount+1
1145     raise SAXException("Early termination")
1146     elif self.context.endswith('scan:precursorMz'):
1147     self.precursorMz = float(self.content)
1148     if self.context.endswith('msRun'):
1149     self.done = True
1150     # print >>sys.stderr, "<<",self.context
1151     # sys.stderr.flush()
1152     self.context = self.context[0:-(len(name)+1)]
1153    
1154     class FromMzXML:
1155     def __init__(self,filename):
1156     self.filename = filename
1157     self.parser = None
1158    
1159     if not os.path.exists(self.filename):
1160     print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1161     sys.exit(1)
1162    
1163     if self.filename.endswith('.gz'):
1164     self.compressfmt = 'gz'
1165     else:
1166     self.compressfmt = None
1167    
1168     def __del__(self):
1169     self.close()
1170    
1171     def open(self):
1172    
1173     if self.parser == None:
1174     # print >>sys.stderr, "Parsing out meta-data..."
1175     # sys.stderr.flush()
1176     self.parser = make_parser()
1177     self.handler = mzXML_md_sax()
1178     self.parser.setContentHandler(self.handler)
1179     try:
1180     if self.compressfmt == None:
1181     self.parser.parse(self.filename)
1182     elif self.compressfmt == 'gz':
1183     self.parser.parse(gzip.open(self.filename,'r'))
1184     else:
1185     print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1186     sys.exit(1)
1187     except SAXException:
1188     pass
1189    
1190     self.md = self.handler.md
1191    
1192     # print >>sys.stderr, self.md
1193     # sys.stderr.flush()
1194    
1195     # print >>sys.stderr, "Set up spectrum parser..."
1196     # sys.stderr.flush()
1197     self.parser = make_parser()
1198     self.handler = mzXML_spec_sax()
1199     self.parser.setContentHandler(self.handler)
1200    
1201     def close(self):
1202     pass
1203    
1204     def spectra(self):
1205     self.open()
1206     while True:
1207     # print >>sys.stderr, "Spectrum parser to read spectra from",self.handler.scanstart
1208     # sys.stderr.flush()
1209     try:
1210     if self.compressfmt == None:
1211     self.parser.parse(self.filename)
1212     elif self.compressfmt == 'gz':
1213     self.parser.parse(gzip.open(self.filename,'r'))
1214     else:
1215     print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1216     sys.exit(1)
1217     except SAXException:
1218     pass
1219     for (s,d) in self.handler.spectra:
1220     # print >>sys.stderr, "Yeild spectrum number:",d['num']
1221     yield (s,d)
1222     if self.handler.done:
1223     break
1224    
1225     def getMsRunMetaData(self):
1226     self.open()
1227     d = {'startTime:PT%fS':self.md['startTime'],
1228     'endTime:PT%fS':self.md['endTime']
1229     }
1230     return d
1231    
1232     def getFilenames(self):
1233     return [ (self.md['fileName'],self.md['fileType'],self.md['fileSha1']) ]
1234    
1235     def msManufacturer(self):
1236     return self.md.get('msManufacturer','')
1237    
1238     def msModel(self):
1239     return self.md.get('msModel','')
1240    
1241     def msIonisation(self):
1242     return self.md.get('msIonisation','')
1243    
1244     def msMassAnalyzer(self):
1245     return self.md.get('msMassAnalyzer','')
1246    
1247     def msDetector(self):
1248     return self.md.get('msDetector','')
1249    
1250     def acquisitionSoftware(self):
1251     return self.md.get('acquisitionSoftware','')
1252    
1253     def acquisitionSoftwareVersion(self):
1254     return self.md.get('acquisitionSoftwareVersion','')
1255    
1256     def precursorPrecision(self,mz):
1257     return 0
1258    
1259     def precursorTune(self):
1260     return False
1261    
1262     def lc(self):
1263     return True
1264    
1265     def maldi(self):
1266     return not self.lc()
1267    
1268     class AB4700T2D:
1269     def __init__(self,filename,peaks=None):
1270     self.filename = filename
1271     self.datafiles = None
1272     self.deapp = None
1273     self.metadata = {}
1274     self.platespots = {}
1275     self.applyPeakDetect = []
1276     if peaks:
1277     self.applyPeakDetect = map(int,peaks.split(','))
1278    
1279     if not os.path.exists(self.filename):
1280     print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1281     sys.exit(1)
1282    
1283     self.filename = os.path.abspath(self.filename)
1284    
1285     self.dir = os.path.split(self.filename)[0]
1286    
1287     def __del__(self):
1288     # Probably unnecessary
1289     self.close()
1290    
1291     def open(self):
1292    
1293     if self.deapp is None:
1294    
1295     self.metadata['PLATEDEF'] = []
1296     self.metadata['PLATE'] = []
1297     self.metadata['SPOT'] = []
1298     self.metadata['SCAN'] = []
1299    
1300     h = open(self.filename,'rb')
1301     r = csv.reader(h,'excel-tab')
1302     for l in r:
1303     if len(l) == 0:
1304     continue
1305     kw = l.pop(0).upper()
1306     if kw in ('PLATEDEF','PLATE','SPOT','SCAN'):
1307     self.metadata[kw].append({})
1308     while len(l)>=2:
1309     k = l.pop(0);v = l.pop(0);
1310     if (kw == 'PLATE' and \
1311     (k in ('plateID','spotXCount','spotYCount','plateManufacturer','plateModel'))) or \
1312     (kw == 'PLATEDEF' and \
1313     (k in ('plateID','plateManufacturer','plateModel','spotNaming','maldiMatrix'))) or \
1314     (kw == 'SPOT' and \
1315     (k in ('plateID','spotID','spotXPosition','spotYPosition','maldiMatrix'))) or \
1316     (kw == 'SCAN' and \
1317     (k in ('plateID','spotID','filename','index'))):
1318     self.metadata[kw][-1][k] = v
1319     elif k == '':
1320     break
1321     else:
1322     print >>sys.stderr,"Bad key %s for %s row"%(k,kw)
1323     sys.exit(1)
1324     h.close()
1325    
1326     for pd in self.metadata['PLATEDEF']:
1327     # insert all the gory details needed for a particular plate model etc...
1328     if pd['plateManufacturer'] == 'ABI / SCIEX' and pd['plateModel'] == '01-192+06-BB':
1329     if not pd.has_key('spotNaming') or not pd.has_key('maldiMatrix') or \
1330     not pd['spotNaming'] in ('alpha','parallel','antiparallel'):
1331     print >>sys.stderr,"Bad plate definition, missing maldiMatrix or spotNaming, or bad spotNaming value."
1332     sys.exit(1)
1333     self.metadata['PLATE'].append({
1334     'plateID':pd['plateID'],
1335     'plateManufacturer':pd['plateManufacturer'],
1336     'plateModel':pd['plateModel'],
1337     'spotXCount':24,
1338     'spotYCount':8,
1339     })
1340     if pd['spotNaming'] == 'alpha':
1341     for y in xrange(0,8):
1342     for x in xrange(0,24):
1343     self.metadata['SPOT'].append({
1344     'plateID':pd['plateID'],
1345     'spotID':"%c%d"%(y+ord('A'),x+1),
1346     'spotXPosition':x,
1347     'spotYPosition':2*y+(x%2),
1348     'maldiMatrix':pd['maldiMatrix']
1349     })
1350     else:
1351     print >>sys.stderr,"Valid spotNaming value, not yet implemented."
1352     sys.exit(1)
1353    
1354     for pl in self.metadata['PLATE']:
1355     self.platespots[pl['plateID']] = []
1356    
1357     for sp in self.metadata['SPOT']:
1358     self.platespots[sp['plateID']].append(sp)
1359    
1360     self.datafiles = [ (os.path.join(self.dir,md['filename']),int(md['index'])) for md in self.metadata['SCAN'] ]
1361     self.distinct_datafiles = dict([(os.path.join(self.dir,md['filename']),
1362     fileSHA(os.path.join(self.dir,md['filename']))) for md in self.metadata['SCAN']])
1363    
1364     self.maptoscan = {}
1365     index = 0
1366     for md in self.metadata['SCAN']:
1367     self.maptoscan[(os.path.join(self.dir,md['filename']),int(md['index']))] = index
1368     index += 1
1369    
1370     from win32com.client import Dispatch, gencache
1371     self.deapp = Dispatch('DataExplorer.Application',
1372     resultCLSID='{3FED40F1-D409-11D1-8B56-0060971CB54B}')
1373     self.delib = gencache.EnsureModule('{06972F50-13F6-11D3-A5CB-0060971CB54B}',0,4,2)
1374     self.deapp.AutomatedProcessing = 1
1375     self.deapp.Visible = 0
1376    
1377     def close(self):
1378    
1379     if not self.deapp is None:
1380     self.deapp.Quit()
1381     self.deapp = None
1382    
1383     def peakDetect(self,msLevel):
1384     return msLevel in self.applyPeakDetect
1385    
1386     # remove .set and .cts files to prevent DataExplorer crashes - DT 11-1-2006
1387     def remsettingsfile(self,fn):
1388 nje01 1.2 # print "rmsettingsfile: %s"%fn
1389     if ((len(fn) > 3) and (fn[-4:].lower() == '.t2d')):
1390     path2del = fn[:-4] + '.cts'
1391     if os.path.exists(path2del):
1392     os.unlink(path2del)
1393     path2del = fn[:-4] + '.set'
1394     if os.path.exists(path2del):
1395     os.unlink(path2del)
1396 nje01 1.1
1397     def spectra(self):
1398     self.open()
1399    
1400     prevfile = ''
1401     for (f,i) in self.datafiles:
1402    
1403     if f != prevfile:
1404     self.deapp.Documents.Close()
1405 nje01 1.2 self.remsettingsfile(prevfile)
1406 nje01 1.1
1407     if not os.path.exists(f):
1408     print >>sys.stderr, "Filename %s does not exist."%(f,)
1409     sys.exit(1)
1410    
1411     self.remsettingsfile(f)
1412 nje01 1.2 self.deapp.Documents.Open(f)
1413     # print "open %s"%f
1414 nje01 1.1
1415    
1416     doc = self.deapp.Documents.Item(0)
1417     sv = doc.SpecView
1418    
1419     sv.SetSpectrumAt(i-1)
1420    
1421     (tf,fixedMass) = doc.InstrumentSettings.GetSetting(self.delib.constants.dePreCursorIon,i-1,None)
1422    
1423     if fixedMass > 0:
1424     msLevel = 2
1425     else:
1426     msLevel = 1
1427    
1428     dataArr = array('f');
1429     peakDetect="False"
1430     if self.peakDetect(msLevel):
1431     peakDetect="True"
1432     sv.CorrectBaseline()
1433     doc.SpecSetup.NoiseReductionStdev = 2.0
1434     sv.FilterNoise(self.delib.constants.deNoiseRemoval)
1435     sv.PeakSettings.PeakThresholdPercent = 1
1436     st,sp = sv.GetPeakData(self.delib.constants.deSpecPeakAll,
1437     self.delib.constants.deSpecPeakSortMass, 0, 0)
1438     for j in xrange(len(sp)):
1439     x = sp[j][self.delib.constants.deSpecPeakCentroidMass]
1440     y = sp[j][self.delib.constants.deSpecPeakArea]
1441     if y <= 0:
1442     continue
1443     dataArr.append(x)
1444     dataArr.append(y)
1445     else:
1446     n,s = sv.GetRawData(0,0)
1447     for j in xrange(n):
1448     dataArr.append(s[j][0])
1449     dataArr.append(s[j][1])
1450    
1451     if len(dataArr) == 0:
1452     continue
1453    
1454     polarity_val = "+"
1455    
1456     # Scan level attributes
1457     # Required by mzXML (no format spec. needed)
1458     d = {'msLevel':msLevel,'scan.peakDetect:%s':peakDetect}
1459     # Optional ('scan.' and prefixformat spec. needed)
1460     d.update({
1461     'scan.polarity:%s':polarity_val,
1462     })
1463    
1464     scanindex = self.maptoscan[(f,i)]
1465     d.update({
1466     'plateID':self.metadata['SCAN'][scanindex]['plateID'],
1467     'spotID':self.metadata['SCAN'][scanindex]['spotID'],
1468     })
1469 nje01 1.2 d.update({'scanOrigin.parentFileID:%s': self.distinct_datafiles[f],
1470 nje01 1.1 'scanOrigin.num:%d': i})
1471     if msLevel == 1:
1472     yield (dataArr,d)
1473     else:
1474     # tandem MS scans add the following
1475     # Required by mzXML
1476     d.update({'precursorMz':fixedMass})
1477     # Optional
1478     d.update({'precursorMz.precursorCharge:%d':1})
1479     yield (dataArr,d)
1480    
1481     prevfile = f
1482    
1483     self.deapp.Documents.Close()
1484     self.remsettingsfile(prevfile)
1485 nje01 1.2 self.remsettingsfile(f)
1486 nje01 1.1
1487     def getMsRunMetaData(self):
1488     self.open()
1489     return {}
1490    
1491     def getFilenames(self):
1492     return [ t for t in self.distinct_datafiles.items() ]
1493    
1494     def msManufacturer(self):
1495     return "ABI / SCIEX"
1496    
1497     def msModel(self):
1498     return "4700 Proteomic Analyzer"
1499    
1500     def msIonisation(self):
1501     return "MALDI"
1502    
1503     def msMassAnalyzer(self):
1504     return "TOF"
1505    
1506     def msDetector(self):
1507     return None
1508    
1509     def acquisitionSoftware(self):
1510     return "DataExplorer"
1511    
1512     def acquisitionSoftwareVersion(self):
1513     return "1.0"
1514    
1515     def precursorPrecision(self,mz):
1516     return .1
1517    
1518     def precursorTune(self):
1519     return False
1520    
1521     def lc(self):
1522     return False
1523    
1524     def maldi(self):
1525     return not self.lc()
1526    
1527     def plateData(self):
1528     self.open()
1529     for d in self.metadata['PLATE']:
1530     yield d
1531    
1532     def spotData(self,plate):
1533     self.open()
1534     for s in self.platespots[plate]:
1535     yield s
1536    
1537     class scanfilter:
1538     def __init__(self,str):
1539     self.rtre = re.compile('^PT([0-9]+(\.[0-9]+)?)S$')
1540     self.intre = re.compile('^[1-9][0-9]*$')
1541     self.floatre = re.compile('^[0-9]+(\.[0-9]+)?$')
1542     self.cl = []
1543     clause = str.split(',')
1544     for cl in clause:
1545     scl = cl.split('.')
1546     m = self.rtre.match(scl[2].strip())
1547     if m != None:
1548     v = float(m.group(1))
1549     elif self.floatre.match(scl[2].strip()):
1550     v = float(scl[2])
1551     elif self.intre.match(scl[2].strip()):
1552     v = int(scl[2])
1553     else:
1554     v = scl[2]
1555     self.cl.append((scl[0],scl[1],v))
1556    
1557     def test(self,d):
1558     a = {}
1559     for (k,v) in d.iteritems():
1560     k0 = k.split(':')[0].split('.')[-1]
1561     a[k0] = v
1562     for cl in self.cl:
1563     if type(a[cl[0]]) == type(u'') or \
1564     type(a[cl[0]]) == type('') :
1565     m = self.rtre.match(a[cl[0]].strip())
1566     if m != None:
1567     v1 = float(m.group(1))
1568     elif self.floatre.match(a[cl[0]].strip()):
1569     v1 = float(a[cl[0]])
1570     elif self.intre.match(a[cl[0]].strip()):
1571     v1 = int(a[cl[0]])
1572     else:
1573     v1 = a[cl[0]].strip()
1574     else:
1575     v1 = a[cl[0]]
1576     v2 = cl[2]
1577     # print >>sys.stderr, "Spectrum test:",v1,type(v1),cl[1],v2,type(v2)
1578     # sys.stderr.flush()
1579     if cl[1] == 'eq' and v1 != v2:
1580     # print >>sys.stderr, "FALSE"
1581     # sys.stderr.flush()
1582     return False
1583     elif cl[1] == 'ne' and not v1 != v2:
1584     return False
1585     elif cl[1] == 'lt' and not v1 < v2:
1586     return False
1587     elif cl[1] == 'le' and not v1 <= v2:
1588     return False
1589     elif cl[1] == 'gt' and not v1 > v2:
1590     return False
1591     elif cl[1] == 'ge' and not v1 >= v2:
1592     return False
1593     # print >>sys.stderr, "TRUE"
1594     # sys.stderr.flush()
1595     return True
1596    
1597     if __name__ == '__main__':
1598    
1599     from optparse import OptionParser
1600     import sys
1601     import glob
1602    
1603     parser = OptionParser()
1604     parser.add_option("-R", "--rawdata", type="string", dest="rawdata", default="",\
1605     help="Format of raw data. Optional if raw spectra file ends in .wiff or .t2m.")
1606     parser.add_option("-X", "--xmlformat", type="string", dest="xmlformat", default="",\
1607     help="XML format to output. Optional if output file ends in .mzxml or .mzdata.")
1608     parser.add_option("-o", "--output", type="string", dest="output", default="",\
1609     help="Output file.")
1610     parser.add_option("-p", "--peaks", type="string", dest="peaks", default="2",\
1611     help="Level(s) of spectra to apply peak detection to (comma separated). QStar,4700 only.")
1612     parser.add_option("-f", "--filter", type="string", dest="filter", default=None,\
1613     help="Filter on mzxml scan meta-data: field.op.value[,field.op.value]. Default: No filter.")
1614     parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False,\
1615     help="Debug. First 10 spectra only, and truncated peaks data element. Default: False.")
1616     parser.add_option("-Z", "--compress", type="string", dest="compress", default=None,\
1617     help="Compress output file. Valid options 'gz'. Default: None.")
1618    
1619     ( opts, args ) = parser.parse_args()
1620    
1621     if opts.output == "" and opts.xmlformat == "":
1622     parser.error("Either -o or -X must be suppied.")
1623     sys.exit(1)
1624    
1625     if opts.filter != None:
1626     filt=scanfilter(opts.filter)
1627     else:
1628     filt=None
1629    
1630     argsglob = []
1631     for a in args:
1632     argsglob.extend(glob.glob(a))
1633    
1634     if opts.output != "" and len(argsglob) > 1:
1635     parser.error("At most one raw spectrum data file is permitted, if -o option is used.")
1636    
1637     for a in argsglob:
1638    
1639     if opts.output != "":
1640     o = opts.output
1641     else:
1642     p = a.rfind('.')
1643     o = a[0:p]+'.'+opts.xmlformat
1644    
1645     if o == a:
1646     print >>sys.stderr, "Input file %s and output file %s are the same!"%(a,o)
1647     sys.exit(1)
1648    
1649     print >>sys.stderr, "Processing",a,"to",o
1650    
1651     if opts.rawdata.lower() in ('wiff','qstar'):
1652     r = QStarWiff(a,opts.peaks)
1653     elif opts.rawdata.lower() in ('t2d','dat','ab4700','ab4800','mariner','voyager'):
1654     r = AB4700T2D(a,opts.peaks)
1655     elif opts.rawdata.lower() in ('mzxml',):
1656     r = FromMzXML(a)
1657     elif a[-5:].lower() in ('.wiff',):
1658     r = QStarWiff(a,opts.peaks)
1659     elif a[-4:].lower() in ('.t2m',):
1660     r = AB4700T2D(a,opts.peaks)
1661     elif a[-6:].lower() in ('.mzxml',):
1662     r = FromMzXML(a)
1663     else:
1664     parser.error("Bad rawdata format specification.")
1665     sys.exit(1)
1666    
1667     if opts.xmlformat.lower() == 'mzxml':
1668     x = ToMzXML(r,o,opts.compress,filt)
1669     elif opts.xmlformat.lower() == 'mzdata':
1670     x = ToMzData(r,o,opts.compress,filt)
1671     elif opts.output[-6:].lower() in ('.mzxml',):
1672     x = ToMzXML(r,opts.output,opts.compress,filt)
1673     elif opts.output[-7:].lower() in ('.mzdata',):
1674     x = ToMzData(r,opts.output,opts.compress,filt)
1675     else:
1676     parser.error("Bad xml format specification.")
1677     sys.exit(1)
1678    
1679     x.write(debug=opts.debug)