ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/pymsxml/pymsxml.py
Revision: 1.4
Committed: Mon Nov 13 22:37:42 2006 UTC (15 years, 10 months ago) by nje01
Branch: MAIN
Changes since 1.3: +31 -18 lines
Log Message:
*** empty log message ***

Line File contents
1 #!/usr/bin/env python
2
3 import sys,os,os.path
4 import re
5 import sha
6 import math
7 import tempfile
8 import csv
9 import gzip
10 import zlib
11 from array import array
12 from base64 import b64encode,b64decode
13 from urllib import quote,unquote
14 from xml.sax import *
15
16 class NameVersion:
17 def __init__(self):
18 self.name_ = "PyMsXML"
19 self.majorVer = 0
20 self.minorVer = 5
21 self.revVer = 1
22 def version(self):
23 return "%d.%d.%d"%(self.majorVer,self.minorVer,self.revVer)
24 def name(self):
25 return self.name_
26
27 def fileSHA(filename):
28 s = sha.new()
29 h = open(filename,'rb')
30 while True:
31 buffer = h.read(1024)
32 if buffer == '':
33 break
34 s.update(buffer)
35 h.close()
36 return s.hexdigest().lower()
37
38 class ToMzXML:
39 def __init__(self,reader,filename,cmpfmt=None,filt=None,peaksCompress=False,version="3.0"):
40 self.filename = filename
41 self.compressfmt = cmpfmt
42 self.reader = reader
43 self.initSHA()
44 self.writeByteCounter = 0
45 self.actualScanCount = 0
46 self.scanIndices = {}
47 self.filter = filt
48 if not version in ('2.1','2.2','3.0'):
49 raise "Bad mzXML version \"%s\""%(version,)
50 self.version = version
51 if peaksCompress and float(self.version) < 3.0:
52 raise "Cannot compress peaks until mzXML version 3.0"
53 self.peakcompress = peaksCompress
54
55 def __del__(self):
56 if hasattr(self,'tmpFileName') and self.tmpFileName and self.tmpFileName != '':
57 os.unlink(self.tmpFileName)
58
59 def getFilename(self):
60 return self.filename
61
62 def initSHA(self):
63 self.sha1 = sha.new()
64
65 def updateSHA(self,s):
66 self.sha1.update(s)
67
68 def getSHA(self):
69 return self.sha1.hexdigest().lower()
70
71 def writeString(self,fh,data):
72 self.writeByteCounter += len(data)
73 self.sha1.update(data)
74 fh.write(data)
75
76 def write(self,debug=False):
77
78 # Make a temporary file for the scan data to count the number of spectra.
79 (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
80 tmpFile = os.fdopen(tmpFileFD,'wb')
81
82 # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
83 self.write_scans(tmpFile,debug)
84
85 tmpFile.close()
86
87 # Reset!
88
89 self.initSHA()
90 self.writeByteCounter = 0
91
92 if self.compressfmt == None and not self.filename.endswith('.gz'):
93 xmlFile = open(self.filename,'wb')
94 elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
95 if self.filename.endswith('.gz'):
96 xmlFile = gzip.open(self.filename,'wb')
97 else:
98 xmlFile = gzip.open(self.filename+'.gz','wb')
99 else:
100 print >>sys.stderr, "Bad compressfmt specification"
101 sys.exit(1)
102
103 self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
104
105 outStr = "<mzXML "
106 outStr += "xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_%s\" "%(self.version,)
107 outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
108 outStr += "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_%s http://sashimi.sourceforge.net/schema_revision/mzXML_%s/mzXML_idx_%s.xsd\""%(self.version,self.version,self.version)
109 outStr += ">\n"
110
111 self.writeString (xmlFile,outStr)
112
113 outStr = "<msRun"
114 outStr += " scanCount=\"%d\"" % (self.actualScanCount,)
115 md = self.reader.getMsRunMetaData()
116 for (k,v) in md.items():
117 k,f = k.split(':')
118 if self.version not in ('2.2',) or (k != 'startTime'and k != 'endTime'):
119 outStr += (" %%s=\"%s\""%(f,))%(k,v)
120 outStr += ">\n"
121
122 self.writeString (xmlFile,outStr)
123
124 filehashes = {}
125
126 for f in self.reader.getFilenames():
127 if len(f) == 2:
128 outStr = "<parentFile "
129 outStr += "fileName=\"%s\" " % (quote(f[0]),)
130 outStr += "fileType=\"RAWData\" "
131 outStr += "fileSha1=\"%s\"" % (f[1],)
132 outStr += "/>\n"
133 else:
134 outStr = "<parentFile "
135 outStr += "fileName=\"%s\" " % (f[0],)
136 outStr += "fileType=\"%s\" " % (f[1],)
137 outStr += "fileSha1=\"%s\"" % (f[2],)
138 outStr += "/>\n"
139 self.writeString (xmlFile,outStr)
140
141 if float(self.version) >= 3.0:
142 outStr = "<msInstrument msInstrumentID=\"%s\">\n" % (self.reader.msInstrumentID(),)
143 else:
144 outStr = "<msInstrument>\n"
145
146 outStr += "<msManufacturer category=\"msManufacturer\" value=\"%s\"/>\n" % (self.reader.msManufacturer(),)
147 outStr += "<msModel category=\"msModel\" value=\"%s\"/>\n" % (self.reader.msModel(),)
148 if self.reader.msIonisation():
149 outStr += "<msIonisation category=\"msIonisation\" value=\"%s\"/>\n" % (self.reader.msIonisation(),)
150 if self.reader.msMassAnalyzer():
151 outStr += "<msMassAnalyzer category=\"msMassAnalyzer\" value=\"%s\"/>\n" % (self.reader.msMassAnalyzer(),)
152 if self.reader.msDetector():
153 outStr += "<msDetector category=\"msDetector\" value=\"%s\"/>\n" % (self.reader.msDetector(),)
154
155 self.writeString (xmlFile,outStr)
156
157 outStr = "<software "
158 outStr += "type=\"acquisition\" "
159 outStr += "name=\"%s\" "%(self.reader.acquisitionSoftware(),)
160 outStr += "version=\"%s\""%(self.reader.acquisitionSoftwareVersion(),)
161 outStr += "/>\n"
162
163 self.writeString (xmlFile,outStr)
164
165 outStr = "</msInstrument>\n"
166
167 self.writeString (xmlFile,outStr)
168
169 outStr = "<dataProcessing>\n"
170
171 self.writeString (xmlFile,outStr)
172
173 nv = NameVersion()
174
175 outStr = "<software "
176 outStr += "type=\"conversion\" "
177 outStr += "name=\"%s\" "%(nv.name(),)
178 outStr += "version=\"%s\"" % (nv.version(),)
179 outStr += "/>\n"
180
181 self.writeString (xmlFile,outStr)
182
183 if (self.reader.peakDetect(1)):
184 outStr = "<processingOperation "
185 outStr += "name=\"peak_detection_level_1\" "
186 outStr += "value=\"true\""
187 outStr += "/>\n"
188 outStr += "<processingOperation "
189 outStr += "name=\"peak_detection_level_1_threshold\" "
190 outStr += "value=\"1%\""
191 outStr += "/>\n"
192 outStr += "<processingOperation "
193 outStr += "name=\"peak_detection_level_1_software\" "
194 outStr += "value=\"%s\""%(self.reader.acquisitionSoftware(),)
195 outStr += "/>\n"
196 self.writeString (xmlFile,outStr)
197 if (self.reader.peakDetect(2)):
198 outStr = "<processingOperation "
199 outStr += "name=\"peak_detection_level_2\" "
200 outStr += "value=\"true\""
201 outStr += "/>\n"
202 outStr += "<processingOperation "
203 outStr += "name=\"peak_detection_level_2_threshold\" "
204 outStr += "value=\"1%\""
205 outStr += "/>\n"
206 outStr += "<processingOperation "
207 outStr += "name=\"peak_detection_level_2_software\" "
208 outStr += "value=\"%s\""%(self.reader.acquisitionSoftware(),)
209 outStr += "/>\n"
210 self.writeString (xmlFile,outStr)
211
212 outStr = "<processingOperation "
213 outStr += "name=\"min_peaks_per_spectra\" "
214 outStr += "value=\"1\""
215 outStr += "/>\n"
216
217 self.writeString (xmlFile,outStr)
218
219 outStr = "</dataProcessing>\n"
220
221 self.writeString (xmlFile,outStr)
222
223 if self.reader.maldi():
224 outStr = '<spotting>\n'
225 for d in self.reader.plateData():
226 outStr += '<plate plateID="%s" spotXCount="%s" spotYCount="%s">\n' % \
227 (d['plateID'],d['spotXCount'],d['spotYCount'])
228 outStr += '<plateManufacturer category="plateManufacturer" value="%s"/>\n' % \
229 (d['plateManufacturer'],)
230 outStr += '<plateModel category="plateModel" value="%s"/>\n' % \
231 (d['plateModel'],)
232 for s in self.reader.spotData(d['plateID']):
233 outStr += '<spot spotID="%s" spotXPosition="%s" spotYPosition="%s">\n' % \
234 (s['spotID'],s['spotXPosition'],s['spotYPosition'])
235 outStr += '<maldiMatrix category="maldiMatrix" value="%s"/>\n' % \
236 (s['maldiMatrix'],)
237 outStr += '</spot>\n'
238 outStr += '</plate>\n'
239 outStr += '</spotting>\n'
240 self.writeString(xmlFile,outStr)
241
242 if self.reader.lc():
243 pass
244
245 scanOffset = self.writeByteCounter
246
247 tmpFile = open(self.tmpFileName,'rb')
248 while True:
249 tmpStr = tmpFile.read(1024)
250 if tmpStr == '':
251 break
252 self.writeString(xmlFile,tmpStr)
253 tmpFile.close()
254 os.unlink(self.tmpFileName)
255 self.tmpFileName = ''
256
257 outStr = "</msRun>\n"
258 self.writeString (xmlFile,outStr)
259
260 indexOffset = self.writeByteCounter
261
262 outStr = "<index "
263 outStr += "name=\"scan\" "
264 outStr += ">\n"
265
266 self.writeString (xmlFile,outStr)
267
268 for i in xrange(1,self.actualScanCount+1):
269 outStr = "<offset id=\"%d\">" % (i,)
270 outStr += "%d</offset>\n" % (self.scanIndices[i] + scanOffset,)
271 self.writeString(xmlFile,outStr)
272
273 outStr = "</index>\n"
274 self.writeString (xmlFile,outStr)
275
276 outStr = "<indexOffset>%d</indexOffset>\n" % (indexOffset,)
277
278 self.writeString (xmlFile,outStr)
279
280 self.writeString (xmlFile,"<sha1>")
281
282 outStr = self.getSHA()
283
284 self.writeString (xmlFile,outStr)
285
286 self.writeString (xmlFile,"</sha1>\n")
287
288 outStr = "</mzXML>\n"
289 self.writeString (xmlFile,outStr)
290
291 xmlFile.close()
292
293 def write_scans(self,xmlFile,debug=False):
294
295 msLevel = 0
296 scanNumber = 0
297 ancestors = []
298 self.writeByteCounter = 0
299
300 for (s,d) in self.reader.spectra():
301
302 if self.filter != None and not self.filter.test(d):
303 continue
304
305 if debug and scanNumber >= 10:
306 break
307
308 if not d.has_key('msLevel'):
309 print >>sys.stderr, "Required scan attributes missing."
310 sys.exit(1)
311
312 prevLevel = msLevel
313 msLevel = d['msLevel']
314
315 if prevLevel < msLevel and prevLevel > 0:
316 # We went "in" a level, push scan number of parent
317 ancestors.append((scanNumber,prevSpec))
318 elif prevLevel > msLevel and msLevel > 0:
319 if len(ancestors) == 0:
320 pass #print >>sys.stderr, "No ancestor for scan %s at level %s"%(scanNumber,msLevel)
321 else:
322 ancestors.pop()
323
324 outStr = ''
325 if prevLevel > msLevel:
326 for m in xrange(0,prevLevel-msLevel+1):
327 outStr += "</scan>\n"
328 else:
329 if prevLevel > 0 and prevLevel == msLevel:
330 outStr += "</scan>\n"
331
332 self.writeString(xmlFile,outStr)
333
334 scanNumber = scanNumber + 1
335
336 self.scanIndices[scanNumber] = self.writeByteCounter
337
338 totIonCurrent = 0
339 maxmz = None
340 minmz = None
341 basePeakMz = None
342 basePeakIntensity = None
343 peaksCount = 0
344 for i in xrange(0,len(s),2):
345 x = s[i]; y = s[i+1]
346 if minmz is None or minmz > x:
347 minmz = x
348 if maxmz is None or maxmz < x:
349 maxmz = x
350 totIonCurrent += y
351 if basePeakIntensity is None or basePeakIntensity < y:
352 basePeakIntensity = y
353 basePeakMz = x
354 peaksCount += 1
355
356 outStr = "<scan num=\"%d\" msLevel=\"%d\"" % (scanNumber,msLevel)
357 outStr += " peaksCount=\"%d\"" % (peaksCount,)
358 outStr += " lowMz=\"%f\"" % (minmz,)
359 outStr += " highMz=\"%f\"" % (maxmz,)
360 outStr += " totIonCurrent=\"%f\"" % (totIonCurrent,)
361 outStr += " basePeakMz=\"%f\"" % (basePeakMz,)
362 outStr += " basePeakIntensity=\"%f\"" % (basePeakIntensity,)
363
364 for (k,v) in d.items():
365 if k.startswith('scan.'):
366 k,f = k.split(':')
367 k = k[5:]
368 outStr += (" %%s=\"%s\""%(f,))%(k,v)
369 outStr += ">\n"
370 self.writeString(xmlFile,outStr)
371
372 if self.version in ('2.2',):
373 any = False
374 for (k,v) in d.items():
375 if k.startswith('scanOrigin.'):
376 if not any:
377 any = True
378 outStr = "<scanOrigin"
379 k,f = k.split(':')
380 k = k[11:]
381 outStr += (" %%s=\"%s\""%(f,))%(k,v)
382 if any:
383 outStr += "/>\n"
384 self.writeString(xmlFile,outStr)
385
386 if msLevel > 1:
387
388 if not d.has_key('precursorMz') :
389 print >>sys.stderr, "Required precursorMz attribute missing."
390 sys.exit(1)
391
392 # We scan the parent spectrum in the region of the
393 # precursorMz to establish the intensity. Optionally,
394 # we tune the precursorMz itself, on the basis of this
395
396 pMz = d['precursorMz']
397 tol = self.reader.precursorPrecision(pMz)
398 pMzLB = pMz-tol
399 pMzUB = pMz+tol
400 pMzIntensity = 0
401
402 if len(ancestors) > 0:
403 for i in xrange(0,len(ancestors[-1][1]),2):
404 x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
405 if x < pMzLB:
406 continue
407 if x > pMzUB:
408 break
409 if pMzIntensity < y:
410 pMzIntensity = y
411 pMzTune = x
412
413 if self.reader.precursorTune():
414 pMz = pMzTune
415
416 outStr = "<precursorMz"
417 for (k,v) in d.items():
418 if k.startswith('precursorMz.'):
419 k,f = k.split(':')
420 k = k[12:]
421 outStr += (" %%s=\"%s\""%(f,))%(k,v)
422 if len(ancestors)>0:
423 outStr += " precursorScanNum=\"%d\""%(ancestors[-1][0],)
424 outStr += " precursorIntensity=\"%f\">"%(pMzIntensity,)
425 else:
426 outStr += ">"
427 outStr += "%f</precursorMz>\n" % (pMz,)
428 self.writeString(xmlFile,outStr)
429
430 if self.reader.maldi() and self.version in ('2.2',):
431 outStr = '<maldi'
432 for (k,v) in d.items():
433 if k.startswith('maldi.'):
434 k,f = k.split(':')
435 k = k[6:]
436 outStr += (" %%s=\"%s\""%(f,))%(k,v)
437 outStr += " plateID=\"%s\""%(d['plateID'],)
438 outStr += " spotID=\"%s\""%(d['spotID'],)
439 outStr += "/>\n"
440 self.writeString(xmlFile,outStr)
441
442 if sys.byteorder != 'big':
443 s.byteswap()
444
445 if debug:
446 s = s[:20]
447
448 specstr = s.tostring()
449 if self.peakcompress:
450 specstr = zlib.compress(specstr,9)
451 outStr = "<peaks precision=\"32\" byteOrder=\"network\" contentType=\"m/z-int\" compressionType=\"zlib\" compressedLen=\"%d\">"%(len(specstr),)
452 else:
453 if float(self.version) >= 3.0:
454 outStr = "<peaks precision=\"32\" byteOrder=\"network\" contentType=\"m/z-int\" compressionType=\"none\" compressedLen=\"%d\">"%(len(specstr),)
455 else:
456 outStr = "<peaks precision=\"32\" byteOrder=\"network\" pairOrder=\"m/z-int\">"
457
458 self.writeString(xmlFile,outStr)
459
460 # Spec says the byte order shall be
461 # network, which is the same as big endian.
462
463 if sys.byteorder != 'big':
464 s.byteswap()
465
466 outStr = b64encode(specstr)
467 self.writeString(xmlFile,outStr)
468
469 outStr = "</peaks>\n"
470 self.writeString(xmlFile,outStr)
471
472 if self.reader.maldi() and self.version not in ('2.2',):
473 outStr = ''
474 for (k,v) in d.items():
475 if k.startswith('maldi.'):
476 k,f = k.split(':')
477 k = k[6:]
478 outStr += '<nameValue'
479 outStr += ' name="maldi.%s"'%(k,)
480 outStr += (' value="%s"'%(f,))%(v,)
481 outStr += '/>\n'
482 outStr += '<nameValue'
483 outStr += ' name="maldi.plateID"'
484 outStr += ' value="%s"'%(d['plateID'],)
485 outStr += '/>\n'
486 outStr += '<nameValue'
487 outStr += ' name="maldi.spotID"'
488 outStr += ' value="%s"'%(d['spotID'],)
489 outStr += '/>\n'
490 self.writeString(xmlFile,outStr)
491
492 if self.version not in ('2.2',):
493 outStr = ''
494 for (k,v) in d.items():
495 if k.startswith('scanOrigin.'):
496 k,f = k.split(':')
497 k = k[11:]
498 outStr += '<nameValue'
499 outStr += ' name="scanOrigin.%s"'%(k,)
500 outStr += (' value="%s"'%(f,))%(v,)
501 outStr += '/>\n'
502 self.writeString(xmlFile,outStr)
503
504 outStr = ''
505 for (k,v) in d.items():
506 if k.startswith('nameValue.'):
507 k,f = k.split(':')
508 k = k[10:]
509 outStr += '<nameValue'
510 outStr += ' name="%s"'%(k,)
511 outStr += (' value="%s"'%(f,))%(v,)
512 outStr += '/>\n'
513 if len(outStr) > 0:
514 self.writeString(xmlFile,outStr)
515
516 xmlFile.flush()
517
518 prevSpec = s
519
520 outStr = ""
521 for m in xrange(0,len(ancestors)+1):
522 outStr += "</scan>\n"
523
524 self.writeString(xmlFile,outStr)
525 self.actualScanCount = scanNumber
526
527 class ToMzData:
528 def __init__(self,reader,filename,cmpfmt=None,filt=None):
529 self.filename = filename
530 self.compressfmt = cmpfmt
531 self.reader = reader
532 self.actualScanCount = 0
533 self.initSHA()
534 self.filter = filt
535
536 def writeString(self,fh,data):
537 # self.writeByteCounter += len(data)
538 self.sha1.update(data)
539 fh.write(data)
540
541 def initSHA(self):
542 self.sha1 = sha.new()
543
544 def updateSHA(self,s):
545 self.sha1.update(s)
546
547 def getSHA(self):
548 return self.sha1.hexdigest().lower()
549
550 def write(self,debug=False):
551
552 # Make a temporary file for the scan data to count the number of spectra.
553 (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
554 tmpFile = os.fdopen(tmpFileFD,'wb')
555
556 # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
557 self.write_scans(tmpFile,debug)
558
559 tmpFile.close()
560
561 scanhash = self.getSHA()
562
563 self.initSHA()
564
565 if self.compressfmt == None and not self.filename.endswith('.gz'):
566 xmlFile = open(self.filename,'wb')
567 elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
568 if self.filename.endswith('.gz'):
569 xmlFile = gzip.open(self.filename,'wb')
570 else:
571 xmlFile = gzip.open(self.filename+'.gz','wb')
572 else:
573 print >>sys.stderr, "Bad compressfmt specification"
574 sys.exit(1)
575
576 self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
577
578 outStr = "<mzData "
579 outStr += "version=\"1.05\" "
580 outStr += "accessionNumber=\"pymsxml:%s\" "%(scanhash,)
581 outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
582 # outStr += "xmlns=\"http://psidev.sourceforge.net/ms/xml/mzdata\" "
583 outStr += "xsi:noNamespaceSchemaLocation=\"http://psidev.sourceforge.net/ms/xml/mzdata/mzdata.xsd\""
584 outStr += ">\n"
585
586 self.writeString (xmlFile,outStr)
587
588 outStr = "<description>\n"
589 outStr += "<admin>\n"
590 outStr += "<sampleName></sampleName>\n"
591 outStr += "<contact>\n"
592 outStr += "<name></name>\n"
593 outStr += "<institution></institution>\n"
594 outStr += "</contact>\n"
595 outStr += "</admin>\n"
596 outStr += "<instrument>\n"
597 outStr += "<instrumentName></instrumentName>\n"
598 outStr += "<source>\n"
599 outStr += "</source>\n"
600 outStr += "<analyzerList count=\"1\">\n"
601 outStr += "<analyzer>\n"
602 outStr += "</analyzer>\n"
603 outStr += "</analyzerList>\n"
604 outStr += "<detector>\n"
605 outStr += "</detector>\n"
606 outStr += "</instrument>\n"
607 outStr += "<dataProcessing>\n"
608 outStr += "<software>\n"
609 outStr += "<name>%s</name>\n"%(self.reader.acquisitionSoftware(),)
610 outStr += "<version>%s</version>\n"%(self.reader.acquisitionSoftwareVersion(),)
611 outStr += "</software>\n"
612 outStr += "<software>\n"
613 nv = NameVersion()
614 outStr += "<name>%s</name>\n"%(nv.name(),)
615 outStr += "<version>%s</version>\n"%(nv.version(),)
616 outStr += "</software>\n"
617 outStr += "</dataProcessing>\n"
618 outStr += "</description>\n"
619
620 self.writeString (xmlFile,outStr)
621
622 outStr = "<spectrumList"
623 outStr += " count=\"%d\">\n"%(self.actualScanCount,)
624
625 self.writeString (xmlFile,outStr)
626
627 tmpFile = open(self.tmpFileName,'rb')
628 while True:
629 tmpStr = tmpFile.read(1024)
630 if tmpStr == '':
631 break
632 self.writeString(xmlFile,tmpStr)
633 tmpFile.close()
634 os.unlink(self.tmpFileName)
635 self.tmpFileName = ''
636
637 outStr = "</spectrumList>\n</mzData>\n"
638
639 self.writeString (xmlFile,outStr)
640
641 def write_scans(self,xmlFile,debug=False):
642
643 msLevel = 0
644 scanNumber = 0
645 ancestors = []
646
647 self.initSHA()
648 # self.writeByteCounter = 0
649
650 for (s,d) in self.reader.spectra():
651
652 if self.filter != None and not self.filter.test(d):
653 continue
654
655 if debug and scanNumber >= 10:
656 break
657
658 if not d.has_key('msLevel'):
659 print >>sys.stderr, "Required scan attributes missing."
660 sys.exit(1)
661
662 prevLevel = msLevel
663 msLevel = d['msLevel']
664
665 if prevLevel < msLevel and prevLevel > 0:
666 # We went "in" a level, push scan number of parent
667 ancestors.append((scanNumber,prevSpec))
668 elif prevLevel > msLevel and msLevel > 0:
669 ancestors.pop()
670
671 scanNumber = scanNumber + 1
672
673 totIonCurrent = 0
674 maxmz = None
675 minmz = None
676 basePeakMz = None
677 basePeakIntensity = None
678 peaksCount = 0
679 for i in xrange(0,len(s),2):
680 x = s[i]; y = s[i+1]
681 if minmz is None or minmz > x:
682 minmz = x
683 if maxmz is None or maxmz < x:
684 maxmz = x
685 totIonCurrent += y
686 if basePeakIntensity is None or basePeakIntensity < y:
687 basePeakIntensity = y
688 basePeakMz = x
689 peaksCount += 1
690
691 outStr = "<spectrum id=\"%d\">\n" % (scanNumber,)
692 outStr += "<spectrumDesc>\n"
693 outStr += "<spectrumSettings>\n"
694
695 self.writeString (xmlFile,outStr)
696
697 # outStr += "<acqSpecification>\n"
698 # outStr += "</acqSpecification>\n"
699
700 outStr = "<spectrumInstrument"
701 outStr += " msLevel=\"%d\""%(msLevel,)
702 if d.has_key('scan.startMz') and \
703 d.has_key('scan.endMz'):
704 outStr += " mzRangeStart=\"%f\""%(d['scan.startMz'],)
705 outStr += " mzRangeStop=\"%f\""%(d['scan.endMz'],)
706 outStr += ">\n"
707 for (k,v) in d.items():
708 if k.startswith('scan.'):
709 k,f = k.split(':')
710 k = k[5:]
711
712 if k == "polarity":
713 if v == '+':
714 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Positive",)
715 elif v == '-':
716 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Negative",)
717 elif k == 'retentionTime':
718 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"TimeInSeconds\" value=\"%f\"/>\n"%(v,)
719 else:
720 outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
721 outStr += "<userParam name=\"lowMz\" value=\"%f\"/>\n"%(minmz,)
722 outStr += "<userParam name=\"highMz\" value=\"%f\"/>\n"%(maxmz,)
723 outStr += "<userParam name=\"totIonCurrent\" value=\"%f\"/>\n"%(totIonCurrent,)
724 outStr += "<userParam name=\"basePeakMz\" value=\"%f\"/>\n"%(basePeakMz,)
725 outStr += "<userParam name=\"basePeakIntensity\" value=\"%f\"/>\n"%(basePeakIntensity,)
726 outStr += "</spectrumInstrument>\n"
727 outStr += "</spectrumSettings>\n"
728
729 self.writeString (xmlFile,outStr)
730
731 if msLevel > 1:
732
733 if not d.has_key('precursorMz') :
734 print >>sys.stderr, "Required precursorMz attribute missing."
735 sys.exit(1)
736
737 # We scan the parent spectrum in the region of the
738 # precursorMz to establish the intensity. Optionally,
739 # we tune the precursorMz itself, on the basis of this
740
741 pMz = d['precursorMz']
742 tol = self.reader.precursorPrecision(pMz)
743 pMzLB = pMz-tol
744 pMzUB = pMz+tol
745 pMzIntensity = 0
746
747 for i in xrange(0,len(ancestors[-1][1]),2):
748 x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
749 if x < pMzLB:
750 continue
751 if x > pMzUB:
752 break
753 if pMzIntensity < y:
754 pMzIntensity = y
755 pMzTune = x
756
757 if self.reader.precursorTune():
758 pMz = pMzTune
759
760 outStr = "<precursorList count=\"1\">\n"
761
762 outStr += "<precursor"
763 outStr += " msLevel=\"%d\""%(msLevel,)
764 outStr += " spectrumRef=\"%d\""%(ancestors[-1][0],)
765 outStr += ">\n"
766
767 self.writeString(xmlFile,outStr)
768
769 outStr = "<ionSelection>\n"
770 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"MassToChargeRatio\" value=\"%f\"/>\n"%(pMz,)
771 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Intensity\" value=\"%f\"/>\n"%(pMzIntensity,)
772 for (k,v) in d.items():
773 if k.startswith('precursorMz.'):
774 k,f = k.split(':')
775 k = k[12:]
776 outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
777 outStr += "</ionSelection>\n"
778
779 self.writeString(xmlFile,outStr)
780
781 outStr = "<activation>\n"
782 outStr += "</activation>\n"
783 outStr += "</precursor>\n"
784 outStr += "</precursorList>\n"
785
786 self.writeString(xmlFile,outStr)
787
788 if self.reader.maldi():
789 outStr = '<maldi'
790 for (k,v) in d.items():
791 if k.startswith('maldi.'):
792 k,f = k.split(':')
793 k = k[6:]
794 outStr += (" %%s=\"%s\""%(f,))%(k,v)
795 outStr += " plateID=\"%s\""%(d['plateID'],)
796 outStr += " spotID=\"%s\""%(d['spotID'],)
797 outStr += "/>\n"
798 self.writeString(xmlFile,outStr)
799
800 outStr = "</spectrumDesc>\n"
801 self.writeString(xmlFile,outStr)
802
803 outStr = "<mzArrayBinary>\n"
804 outStr += "<data"
805 outStr += " precision=\"32\""
806 outStr += " endian=\"%s\""%(sys.byteorder,)
807 outStr += " length=\"%d\""%(peaksCount,)
808 outStr += ">"
809 self.writeString(xmlFile,outStr)
810
811 outStr = b64encode(s[0:peaksCount*2:2].tostring())
812 if not debug:
813 self.writeString(xmlFile,outStr)
814 else:
815 self.writeString(xmlFile,outStr[0:100])
816
817 outStr = "</data>\n</mzArrayBinary>\n"
818 self.writeString(xmlFile,outStr)
819
820 outStr = "<intenArrayBinary>\n"
821 outStr += "<data"
822 outStr += " precision=\"32\""
823 outStr += " endian=\"%s\""%(sys.byteorder,)
824 outStr += " length=\"%d\""%(peaksCount,)
825 outStr += ">"
826 self.writeString(xmlFile,outStr)
827
828 outStr = b64encode(s[1:peaksCount*2:2].tostring())
829 if not debug:
830 self.writeString(xmlFile,outStr)
831 else:
832 self.writeString(xmlFile,outStr[0:100])
833
834 outStr = "</data>\n</intenArrayBinary>\n"
835 self.writeString(xmlFile,outStr)
836
837 if self.reader.maldi():
838 outStr = ''
839 for (k,v) in d.items():
840 if k.startswith('maldi.'):
841 k,f = k.split(':')
842 k = k[6:]
843 outStr += '<nameValue'
844 outStr += ' name="%s"'%(k,)
845 outStr += (' value="%f"'%(f,))%(v,)
846 outStr += '/>\n'
847 ## outStr += '<nameValue'
848 ## outStr += ' name="plateID"'
849 ## outStr += ' value="%s"'%(d['plateID'],)
850 ## outStr += '/>\n'
851 ## outStr += '<nameValue'
852 ## outStr += ' name="spotID"'
853 ## outStr += ' value="%s"'%(d['spotID'],)
854 ## outStr += '/>\n'
855 self.writeString(xmlFile,outStr)
856
857 outStr = "</spectrum>\n"
858 self.writeString(xmlFile,outStr)
859 xmlFile.flush()
860
861 prevSpec = s
862
863 self.actualScanCount = scanNumber
864
865 class QStarWiff:
866 def __init__(self,filename,peaks=None):
867 self.filename = filename
868 self.filehash = fileSHA(filename)
869 self.theTIC = None
870 self.theWF = None
871 self.theFMANSpecData = None
872 self.startTime = None
873 self.stopTime = None
874 self.applyPeakDetect = []
875 if peaks:
876 self.applyPeakDetect = map(int,peaks.split(','))
877
878 if not os.path.exists(self.filename):
879 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
880 sys.exit(1)
881
882 self.filename = os.path.abspath(self.filename)
883
884 def __del__(self):
885 # Probably unnecessary
886 self.close()
887
888 def peakDetect(self,msLevel):
889 return ((msLevel in self.applyPeakDetect) and (msLevel != 1))
890
891 def open(self):
892
893 if self.theTIC is None:
894
895 from win32com.client import Dispatch
896
897 self.theFMANSpecData = Dispatch('Analyst.FMANSpecData')
898 self.theTIC = Dispatch('Analyst.FMANChromData')
899 self.theSPL = Dispatch('Analyst.SpectralPeakList')
900
901 self.theFMANSpecData.WiffFileName = self.filename
902 self.theTIC.WiffFileName = self.filename
903 self.theWF = self.theFMANSpecData.GetWiffFileObject()
904
905 self.theTIC.SetToTIC(1,0,0)
906
907 self.startTime = self.theTIC.GetDataPointXValue(1)
908 self.stopTime = self.theTIC.GetDataPointXValue(self.theTIC.GetNumberOfDataPoints())
909
910 def close(self):
911
912 if not self.theTIC is None:
913
914 if not self.theWF is None:
915 self.theWF.CloseWiffFile()
916 self.theTIC = None
917 self.theFMANSpecData = None
918 self.theWF = None
919
920 def spectra(self):
921 self.open()
922 spectrumCount=0;
923
924 numberOfSamples = self.theWF.GetActualNumberOfSamples()
925 for i in xrange(1,numberOfSamples+1):
926
927 # print >>sys.stderr, "Sample %d"%i
928 # print >>sys.stderr, "Sample Name: %s"%(self.theWF.GetSampleName(i),)
929
930 sampleName = self.theWF.GetSampleName(i)
931
932 numberOfPeriods = self.theWF.GetActualNumberOfPeriods(i)
933 for j in xrange(0,numberOfPeriods):
934
935 numberOfExperiments = self.theWF.GetNumberOfExperiments(i,j)
936 self.theTIC.SetToTIC(i, j, 0)
937
938 numberOfTICDataPoints = self.theTIC.GetNumberOfDataPoints()
939 for l in xrange(1,numberOfTICDataPoints+1):
940
941 for k in xrange(0,numberOfExperiments):
942
943 self.theExperiment = self.theWF.GetExperimentObject(i, j, k)
944 self.theTIC.SetToTIC(i, j, k)
945
946 # Convert to seconds?
947 xValue = self.theTIC.GetDataPointXValue(l) * 60
948
949 spectrumCount += 1;
950
951 # If any Ion current at time "l"
952 if self.theTIC.GetDataPointYValue(l) > 0:
953
954 self.theFMANSpecData.SetSpectrum(i, j, k, xValue, xValue)
955
956 if self.theExperiment.ScanType == 9:
957 msLevel = 2
958 m = re.search(r'[(]([0-9.]+)[)]',self.theFMANSpecData.DataTitle)
959 fixedMass = float(m.group(1))
960 else:
961 msLevel = 1
962
963 startMass = self.theFMANSpecData.GetStartMass()
964 stopMass = self.theFMANSpecData.GetStopMass()
965 dataArr = array('f');
966 peakDetect = 'False'
967 if self.peakDetect(msLevel):
968 peakDetect = 'True'
969 minY,maxY = self.theFMANSpecData.GetYValueRange()
970 # print minY,maxY
971 self.theFMANSpecData.Threshold(maxY*0.01)
972 self.theSPL.FindPeaksInDataObject(self.theFMANSpecData,50.0)
973 numPeaks = self.theSPL.GetNumberOfPeaks()
974 # print numPeaks
975 for m in xrange(1,numPeaks+1):
976 (x,w,y,yp) = self.theSPL.GetPeak(m)
977 if y <= 0:
978 continue
979 dataArr.append(x)
980 dataArr.append(y)
981 else:
982 numPoints = self.theFMANSpecData.GetNumberOfDataPoints()
983 for m in xrange(1,numPoints+1):
984 if self.theFMANSpecData.GetDataPointYValue(m) <= 0:
985 continue
986 x = self.theFMANSpecData.GetDataPointXValue(m)
987 dataArr.append(x)
988 y = self.theFMANSpecData.GetDataPointYValue(m)
989 dataArr.append(y)
990 polarity = self.theExperiment.Polarity
991
992 if len(dataArr) == 0:
993 continue
994
995 if polarity == 0:
996 polarity_val = "+"
997 else:
998 polarity_val = "-"
999
1000 rt = "PT%fS"%(xValue,)
1001
1002 # Scan level attributes
1003 # Required by mzXML (no format spec. needed)
1004 d = {'msLevel':msLevel}
1005 # Optional ('scan.' and prefixformat spec. needed)
1006 d.update({
1007 'scan.retentionTime:PT%fS':xValue,
1008 'scan.polarity:%s':polarity_val,
1009 'scan.startMz:%f':startMass,
1010 'scan.endMz:%f':stopMass,
1011 })
1012 d.update({'scanOrigin.parentFileID:%s': self.filehash,
1013 'scanOrigin.num:%d': spectrumCount})
1014 d.update({'nameValue.sample:%d':i,
1015 'nameValue.period:%d':j,
1016 'nameValue.experiment:%d':k,
1017 'nameValue.sampleName:%s':sampleName})
1018
1019 if msLevel == 1:
1020 yield (dataArr,d)
1021 else:
1022 # tandem MS scans add the following
1023 # Required by mzXML
1024 d.update({'precursorMz':fixedMass})
1025 # Optional
1026 # d.update({'precursorMz.precursorMzIntensity:f':0.0})
1027 yield (dataArr,d)
1028
1029 def getMsRunMetaData(self):
1030 self.open()
1031 d = {'startTime:PT%fS':self.startTime,
1032 'endTime:PT%fS':self.stopTime,
1033 }
1034 return d
1035
1036 def getFilenames(self):
1037 return [ (self.filename,self.filehash) ]
1038
1039 def msInstrumentID(self):
1040 return "1"
1041
1042 def msManufacturer(self):
1043 return "ABI / SCIEX"
1044
1045 def msModel(self):
1046 return "QSTAR"
1047
1048 def msIonisation(self):
1049 return "ESI"
1050
1051 def msMassAnalyzer(self):
1052 return "Quadrupole"
1053
1054 def msDetector(self):
1055 return "LE"
1056
1057 def acquisitionSoftware(self):
1058 return "Analyst"
1059
1060 def acquisitionSoftwareVersion(self):
1061 return "1.0"
1062
1063 def precursorPrecision(self,mz):
1064 return .1
1065
1066 def precursorTune(self):
1067 return True
1068
1069 def lc(self):
1070 return True
1071
1072 def maldi(self):
1073 return not self.lc()
1074
1075 class mzXML_md_sax(handler.ContentHandler):
1076
1077 def __init__(self):
1078 self.md = {}
1079 self.context = ''
1080
1081 def startElement(self, name, attrs):
1082 self.context += ':%s'%(name,)
1083 # print >>sys.stderr, ">>",self.context
1084 # sys.stderr.flush()
1085 if self.context.endswith(':msRun'):
1086 if attrs.has_key('startTime'):
1087 self.md['startTime'] = float(attrs['startTime'][2:-1])
1088 if attrs.has_key('endTime'):
1089 self.md['endTime'] = float(attrs['endTime'][2:-1])
1090 elif self.context.endswith(':parentFile'):
1091 if not self.md.has_key('parentFile'):
1092 self.md['parentFile'] = []
1093 self.md['parentFile'].append((unquote(attrs['fileName']),attrs['fileType'],attrs['fileSha1']))
1094 elif self.context.endswith(':instrument'):
1095 self.md['msManufacturer'] = attrs['manufacturer']
1096 self.md['msModel'] = attrs['model']
1097 self.md['msIonisation'] = attrs['ionisation']
1098 self.md['msMassAnalyzer'] = attrs['msType']
1099 elif self.context.endswith(':msInstrument:msManufacturer'):
1100 self.md['msManufacturer'] = attrs['value']
1101 elif self.context.endswith(':msInstrument:msModel'):
1102 self.md['msModel'] = attrs['value']
1103 elif self.context.endswith(':msInstrument:msIonisation'):
1104 self.md['msIonisation'] = attrs['value']
1105 elif self.context.endswith(':msInstrument:msMassAnalyzer'):
1106 self.md['msMassAnalyzer'] = attrs['value']
1107 elif self.context.endswith(':msInstrument:msDetector'):
1108 self.md['msDetector'] = attrs['value']
1109 elif self.context.endswith(':msInstrument:software'):
1110 if attrs['type'] == 'acquisition':
1111 self.md['acquisitionSoftware'] = attrs['name']
1112 self.md['acquisitionSoftwareVersion'] = attrs['version']
1113 elif self.context.endswith(':instrument:software'):
1114 if attrs['type'] == 'acquisition':
1115 self.md['acquisitionSoftware'] = attrs['name']
1116 self.md['acquisitionSoftwareVersion'] = attrs['version']
1117 elif self.context.endswith(':scan'):
1118 raise SAXException("Early termination")
1119
1120 def endElement(self, name):
1121 # print >>sys.stderr, "<<",self.context
1122 # sys.stderr.flush()
1123 self.context = self.context[0:-(len(name)+1)]
1124
1125
1126 class mzXML_spec_sax(handler.ContentHandler):
1127
1128 def __init__(self):
1129 self.context = ''
1130 self.content = ''
1131 self.spectra = []
1132 self.done = False
1133 self.scancount = 0
1134 self.scanmax = 1000
1135 self.scanstart = 0
1136
1137 def startElement(self, name, attrs):
1138 self.context += ':%s'%(name,)
1139 # print >>sys.stderr, ">>",self.context
1140 # sys.stderr.flush()
1141 self.content = ''
1142 if name == 'scan':
1143 self.scanmd = {}
1144 self.scanlevel = int(attrs['msLevel'])
1145 self.scannum = int(attrs['num'])
1146 if attrs.has_key('retentionTime'):
1147 self.rt = float(attrs['retentionTime'][2:-1])
1148 if attrs.has_key('endMz'):
1149 self.endMz = float(attrs['endMz'])
1150 if attrs.has_key('startMz'):
1151 self.startMz = float(attrs['startMz'])
1152 self.polarity = attrs.get('polarity',None)
1153 for (k,v) in attrs.items():
1154 self.scanmd[k] = v
1155 self.precursorMz = None
1156 self.scancount += 1
1157 elif name == 'msRun':
1158 self.scancount = 0
1159 self.spectra = []
1160 self.context = ':msRun'
1161 elif name == 'scanOrigin':
1162 self.scanmd['scanOrigin'] = attrs
1163
1164 def characters(self, content):
1165 if self.scancount >= self.scanstart:
1166 if self.context.endswith(':peaks') or \
1167 self.context.endswith(':precursorMz'):
1168 self.content += content
1169
1170 def endElement(self, name):
1171 if self.scancount >= self.scanstart:
1172 if self.context.endswith('scan:peaks'):
1173
1174 spec = array('f')
1175 # print >>sys.stderr, len(self.content), len(b64decode(self.content)), self.content
1176 spec.fromstring(b64decode(self.content))
1177 if sys.byteorder != 'big':
1178 spec.byteswap()
1179 d = {'msLevel':self.scanlevel}
1180 # Optional ('scan.' and prefixformat spec. needed)
1181 if hasattr(self,'rt'):
1182 d.update({'scan.retentionTime:PT%fS':self.rt})
1183 if hasattr(self,'startMz'):
1184 d.update({'scan.startMz:%f':self.startMz})
1185 if hasattr(self,'endMz'):
1186 d.update({'scan.endMz:%f':self.endMz})
1187 if self.polarity != None:
1188 d.update({'scan.polarity:%s':self.polarity})
1189 if self.scanlevel == 2:
1190 d.update({'precursorMz':self.precursorMz})
1191 if self.scanmd.has_key('scanOrigin'):
1192 d.update({'scanOrigin.parentFileID:%s': self.scanmd['scanOrigin']['parentFileID'],
1193 'scanOrigin.num:%d': self.scanmd['scanOrigin']['num']})
1194 for (k,v) in self.scanmd.items():
1195 if not d.has_key(k):
1196 d[k] = v
1197 self.spectra.append((spec,d))
1198 if len(self.spectra) >= self.scanmax:
1199 self.scanstart = self.scancount+1
1200 raise SAXException("Early termination")
1201 elif self.context.endswith('scan:precursorMz'):
1202 self.precursorMz = float(self.content)
1203 if self.context.endswith('msRun'):
1204 self.done = True
1205 # print >>sys.stderr, "<<",self.context
1206 # sys.stderr.flush()
1207 self.context = self.context[0:-(len(name)+1)]
1208
1209 class FromMzXML:
1210 def __init__(self,filename):
1211 self.filename = filename
1212 self.parser = None
1213
1214 if not os.path.exists(self.filename):
1215 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1216 sys.exit(1)
1217
1218 if self.filename.endswith('.gz'):
1219 self.compressfmt = 'gz'
1220 else:
1221 self.compressfmt = None
1222
1223 def __del__(self):
1224 self.close()
1225
1226 def open(self):
1227
1228 if self.parser == None:
1229 # print >>sys.stderr, "Parsing out meta-data..."
1230 # sys.stderr.flush()
1231 self.parser = make_parser()
1232 self.handler = mzXML_md_sax()
1233 self.parser.setContentHandler(self.handler)
1234 try:
1235 if self.compressfmt == None:
1236 self.parser.parse(self.filename)
1237 elif self.compressfmt == 'gz':
1238 self.parser.parse(gzip.open(self.filename,'r'))
1239 else:
1240 print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1241 sys.exit(1)
1242 except SAXException:
1243 pass
1244
1245 self.md = self.handler.md
1246
1247 # print >>sys.stderr, self.md
1248 # sys.stderr.flush()
1249
1250 # print >>sys.stderr, "Set up spectrum parser..."
1251 # sys.stderr.flush()
1252 self.parser = make_parser()
1253 self.handler = mzXML_spec_sax()
1254 self.parser.setContentHandler(self.handler)
1255
1256 def close(self):
1257 pass
1258
1259 def spectra(self):
1260 self.open()
1261 while True:
1262 # print >>sys.stderr, "Spectrum parser to read spectra from",self.handler.scanstart
1263 # sys.stderr.flush()
1264 try:
1265 if self.compressfmt == None:
1266 self.parser.parse(self.filename)
1267 elif self.compressfmt == 'gz':
1268 self.parser.parse(gzip.open(self.filename,'r'))
1269 else:
1270 print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1271 sys.exit(1)
1272 except SAXException:
1273 pass
1274 for (s,d) in self.handler.spectra:
1275 # print >>sys.stderr, "Yeild spectrum number:",d['num']
1276 yield (s,d)
1277 if self.handler.done:
1278 break
1279
1280 def getMsRunMetaData(self):
1281 self.open()
1282 d = {}
1283 if self.md.has_key('startTime'):
1284 d.update({'startTime:PT%fS':self.md['startTime']})
1285 if self.md.has_key('endTime'):
1286 d.update({'endTime:PT%fS':self.md['endTime']})
1287 return d
1288
1289 def getFilenames(self):
1290 return self.md['parentFile']
1291
1292 def msInstrumentID(self):
1293 return "1"
1294
1295 def peakDetect(self,level):
1296 return False
1297
1298 def msManufacturer(self):
1299 return self.md.get('msManufacturer','')
1300
1301 def msModel(self):
1302 return self.md.get('msModel','')
1303
1304 def msIonisation(self):
1305 return self.md.get('msIonisation','')
1306
1307 def msMassAnalyzer(self):
1308 return self.md.get('msMassAnalyzer','')
1309
1310 def msDetector(self):
1311 return self.md.get('msDetector','')
1312
1313 def acquisitionSoftware(self):
1314 return self.md.get('acquisitionSoftware','')
1315
1316 def acquisitionSoftwareVersion(self):
1317 return self.md.get('acquisitionSoftwareVersion','')
1318
1319 def precursorPrecision(self,mz):
1320 return 0
1321
1322 def precursorTune(self):
1323 return False
1324
1325 def lc(self):
1326 return True
1327
1328 def maldi(self):
1329 return not self.lc()
1330
1331 class AB4700T2D:
1332 def __init__(self,filename,peaks=None):
1333 self.filename = filename
1334 self.datafiles = None
1335 self.deapp = None
1336 self.metadata = {}
1337 self.platespots = {}
1338 self.applyPeakDetect = []
1339 if peaks:
1340 self.applyPeakDetect = map(int,peaks.split(','))
1341
1342 if not os.path.exists(self.filename):
1343 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1344 sys.exit(1)
1345
1346 self.filename = os.path.abspath(self.filename)
1347
1348 self.dir = os.path.split(self.filename)[0]
1349
1350 def __del__(self):
1351 # Probably unnecessary
1352 self.close()
1353
1354 def open(self):
1355
1356 if self.deapp is None:
1357
1358 self.metadata['PLATEDEF'] = []
1359 self.metadata['PLATE'] = []
1360 self.metadata['SPOT'] = []
1361 self.metadata['SCAN'] = []
1362
1363 h = open(self.filename,'rb')
1364 r = csv.reader(h,'excel-tab')
1365 for l in r:
1366 if len(l) == 0:
1367 continue
1368 kw = l.pop(0).upper()
1369 if kw in ('PLATEDEF','PLATE','SPOT','SCAN'):
1370 self.metadata[kw].append({})
1371 while len(l)>=2:
1372 k = l.pop(0);v = l.pop(0);
1373 if (kw == 'PLATE' and \
1374 (k in ('plateID','spotXCount','spotYCount','plateManufacturer','plateModel'))) or \
1375 (kw == 'PLATEDEF' and \
1376 (k in ('plateID','plateManufacturer','plateModel','spotNaming','maldiMatrix'))) or \
1377 (kw == 'SPOT' and \
1378 (k in ('plateID','spotID','spotXPosition','spotYPosition','maldiMatrix'))) or \
1379 (kw == 'SCAN' and \
1380 (k in ('plateID','spotID','filename','index'))):
1381 self.metadata[kw][-1][k] = v
1382 elif k == '':
1383 break
1384 else:
1385 print >>sys.stderr,"Bad key %s for %s row"%(k,kw)
1386 sys.exit(1)
1387 h.close()
1388
1389 for pd in self.metadata['PLATEDEF']:
1390 # insert all the gory details needed for a particular plate model etc...
1391 if pd['plateManufacturer'] == 'ABI / SCIEX' and pd['plateModel'] == '01-192+06-BB':
1392 if not pd.has_key('spotNaming') or not pd.has_key('maldiMatrix') or \
1393 not pd['spotNaming'] in ('alpha','parallel','antiparallel'):
1394 print >>sys.stderr,"Bad plate definition, missing maldiMatrix or spotNaming, or bad spotNaming value."
1395 sys.exit(1)
1396 self.metadata['PLATE'].append({
1397 'plateID':pd['plateID'],
1398 'plateManufacturer':pd['plateManufacturer'],
1399 'plateModel':pd['plateModel'],
1400 'spotXCount':24,
1401 'spotYCount':8,
1402 })
1403 if pd['spotNaming'] == 'alpha':
1404 for y in xrange(0,8):
1405 for x in xrange(0,24):
1406 self.metadata['SPOT'].append({
1407 'plateID':pd['plateID'],
1408 'spotID':"%c%d"%(y+ord('A'),x+1),
1409 'spotXPosition':x,
1410 'spotYPosition':2*y+(x%2),
1411 'maldiMatrix':pd['maldiMatrix']
1412 })
1413 else:
1414 print >>sys.stderr,"Valid spotNaming value, not yet implemented."
1415 sys.exit(1)
1416
1417 for pl in self.metadata['PLATE']:
1418 self.platespots[pl['plateID']] = []
1419
1420 for sp in self.metadata['SPOT']:
1421 self.platespots[sp['plateID']].append(sp)
1422
1423 self.datafiles = [ (os.path.join(self.dir,md['filename']),int(md['index'])) for md in self.metadata['SCAN'] ]
1424 self.distinct_datafiles = dict([(os.path.join(self.dir,md['filename']),
1425 fileSHA(os.path.join(self.dir,md['filename']))) for md in self.metadata['SCAN']])
1426
1427 self.maptoscan = {}
1428 index = 0
1429 for md in self.metadata['SCAN']:
1430 self.maptoscan[(os.path.join(self.dir,md['filename']),int(md['index']))] = index
1431 index += 1
1432
1433 from win32com.client import Dispatch, gencache
1434 self.deapp = Dispatch('DataExplorer.Application',
1435 resultCLSID='{3FED40F1-D409-11D1-8B56-0060971CB54B}')
1436 self.delib = gencache.EnsureModule('{06972F50-13F6-11D3-A5CB-0060971CB54B}',0,4,2)
1437 self.deapp.AutomatedProcessing = 1
1438 self.deapp.Visible = 0
1439
1440 def close(self):
1441
1442 if not self.deapp is None:
1443 self.deapp.Quit()
1444 self.deapp = None
1445
1446 def peakDetect(self,msLevel):
1447 return msLevel in self.applyPeakDetect
1448
1449 # remove .set and .cts files to prevent DataExplorer crashes - DT 11-1-2006
1450 def remsettingsfile(self,fn):
1451 # print "rmsettingsfile: %s"%fn
1452 if ((len(fn) > 3) and (fn[-4:].lower() == '.t2d')):
1453 path2del = fn[:-4] + '.cts'
1454 if os.path.exists(path2del):
1455 os.unlink(path2del)
1456 path2del = fn[:-4] + '.set'
1457 if os.path.exists(path2del):
1458 os.unlink(path2del)
1459
1460 def spectra(self):
1461 self.open()
1462
1463 prevfile = ''
1464 for (f,i) in self.datafiles:
1465
1466 if f != prevfile:
1467 self.deapp.Documents.Close()
1468 self.remsettingsfile(prevfile)
1469
1470 if not os.path.exists(f):
1471 print >>sys.stderr, "Filename %s does not exist."%(f,)
1472 sys.exit(1)
1473
1474 self.remsettingsfile(f)
1475 self.deapp.Documents.Open(f)
1476 # print "open %s"%f
1477
1478
1479 doc = self.deapp.Documents.Item(0)
1480 sv = doc.SpecView
1481
1482 sv.SetSpectrumAt(i-1)
1483
1484 (tf,fixedMass) = doc.InstrumentSettings.GetSetting(self.delib.constants.dePreCursorIon,i-1,None)
1485
1486 if fixedMass > 0:
1487 msLevel = 2
1488 else:
1489 msLevel = 1
1490
1491 dataArr = array('f');
1492 peakDetect="False"
1493 if self.peakDetect(msLevel):
1494 peakDetect="True"
1495 sv.CorrectBaseline()
1496 doc.SpecSetup.NoiseReductionStdev = 2.0
1497 sv.FilterNoise(self.delib.constants.deNoiseRemoval)
1498 sv.PeakSettings.PeakThresholdPercent = 1
1499 st,sp = sv.GetPeakData(self.delib.constants.deSpecPeakAll,
1500 self.delib.constants.deSpecPeakSortMass, 0, 0)
1501 for j in xrange(len(sp)):
1502 x = sp[j][self.delib.constants.deSpecPeakCentroidMass]
1503 y = sp[j][self.delib.constants.deSpecPeakArea]
1504 if y <= 0:
1505 continue
1506 dataArr.append(x)
1507 dataArr.append(y)
1508 else:
1509 n,s = sv.GetRawData(0,0)
1510 for j in xrange(n):
1511 dataArr.append(s[j][0])
1512 dataArr.append(s[j][1])
1513
1514 if len(dataArr) == 0:
1515 continue
1516
1517 polarity_val = "+"
1518
1519 # Scan level attributes
1520 # Required by mzXML (no format spec. needed)
1521 d = {'msLevel':msLevel}
1522 # Optional ('scan.' and prefixformat spec. needed)
1523 d.update({
1524 'scan.polarity:%s':polarity_val,
1525 })
1526
1527 scanindex = self.maptoscan[(f,i)]
1528 d.update({
1529 'plateID':self.metadata['SCAN'][scanindex]['plateID'],
1530 'spotID':self.metadata['SCAN'][scanindex]['spotID'],
1531 })
1532 d.update({'scanOrigin.parentFileID:%s': self.distinct_datafiles[f],
1533 'scanOrigin.num:%d': i})
1534 if msLevel == 1:
1535 yield (dataArr,d)
1536 else:
1537 # tandem MS scans add the following
1538 # Required by mzXML
1539 d.update({'precursorMz':fixedMass})
1540 # Optional
1541 d.update({'precursorMz.precursorCharge:%d':1})
1542 yield (dataArr,d)
1543
1544 prevfile = f
1545
1546 self.deapp.Documents.Close()
1547 self.remsettingsfile(prevfile)
1548 self.remsettingsfile(f)
1549
1550 def getMsRunMetaData(self):
1551 self.open()
1552 return {}
1553
1554 def getFilenames(self):
1555 return [ t for t in self.distinct_datafiles.items() ]
1556
1557 def msInstrumentID(self):
1558 return "1"
1559
1560 def msManufacturer(self):
1561 return "ABI / SCIEX"
1562
1563 def msModel(self):
1564 return "4700 Proteomic Analyzer"
1565
1566 def msIonisation(self):
1567 return "MALDI"
1568
1569 def msMassAnalyzer(self):
1570 return "TOF"
1571
1572 def msDetector(self):
1573 return "LE"
1574
1575 def acquisitionSoftware(self):
1576 return "DataExplorer"
1577
1578 def acquisitionSoftwareVersion(self):
1579 return "1.0"
1580
1581 def precursorPrecision(self,mz):
1582 return .1
1583
1584 def precursorTune(self):
1585 return False
1586
1587 def lc(self):
1588 return False
1589
1590 def maldi(self):
1591 return not self.lc()
1592
1593 def plateData(self):
1594 self.open()
1595 for d in self.metadata['PLATE']:
1596 yield d
1597
1598 def spotData(self,plate):
1599 self.open()
1600 for s in self.platespots[plate]:
1601 yield s
1602
1603 class scanfilter:
1604 def __init__(self,str):
1605 self.rtre = re.compile('^PT([0-9]+(\.[0-9]+)?)S$')
1606 self.intre = re.compile('^[1-9][0-9]*$')
1607 self.floatre = re.compile('^[0-9]+(\.[0-9]+)?$')
1608 self.cl = []
1609 clause = str.split(',')
1610 for cl in clause:
1611 scl = cl.split('.')
1612 m = self.rtre.match(scl[2].strip())
1613 if m != None:
1614 v = float(m.group(1))
1615 elif self.floatre.match(scl[2].strip()):
1616 v = float(scl[2])
1617 elif self.intre.match(scl[2].strip()):
1618 v = int(scl[2])
1619 else:
1620 v = scl[2]
1621 self.cl.append((scl[0],scl[1],v))
1622
1623 def test(self,d):
1624 a = {}
1625 for (k,v) in d.iteritems():
1626 k0 = k.split(':')[0].split('.')[-1]
1627 a[k0] = v
1628 for cl in self.cl:
1629 if type(a[cl[0]]) == type(u'') or \
1630 type(a[cl[0]]) == type('') :
1631 m = self.rtre.match(a[cl[0]].strip())
1632 if m != None:
1633 v1 = float(m.group(1))
1634 elif self.floatre.match(a[cl[0]].strip()):
1635 v1 = float(a[cl[0]])
1636 elif self.intre.match(a[cl[0]].strip()):
1637 v1 = int(a[cl[0]])
1638 else:
1639 v1 = a[cl[0]].strip()
1640 else:
1641 v1 = a[cl[0]]
1642 v2 = cl[2]
1643 # print >>sys.stderr, "Spectrum test:",v1,type(v1),cl[1],v2,type(v2)
1644 # sys.stderr.flush()
1645 if cl[1] == 'eq' and v1 != v2:
1646 # print >>sys.stderr, "FALSE"
1647 # sys.stderr.flush()
1648 return False
1649 elif cl[1] == 'ne' and not v1 != v2:
1650 return False
1651 elif cl[1] == 'lt' and not v1 < v2:
1652 return False
1653 elif cl[1] == 'le' and not v1 <= v2:
1654 return False
1655 elif cl[1] == 'gt' and not v1 > v2:
1656 return False
1657 elif cl[1] == 'ge' and not v1 >= v2:
1658 return False
1659 # print >>sys.stderr, "TRUE"
1660 # sys.stderr.flush()
1661 return True
1662
1663 if __name__ == '__main__':
1664
1665 from optparse import OptionParser
1666 import sys
1667 import glob
1668
1669 parser = OptionParser()
1670 parser.add_option("-R", "--rawdata", type="string", dest="rawdata", default="",\
1671 help="Format of raw data. Optional if raw spectra file ends in .wiff or .t2m.")
1672 parser.add_option("-X", "--xmlformat", type="string", dest="xmlformat", default="",\
1673 help="XML format to output. Optional if output file ends in .mzxml or .mzdata.")
1674 parser.add_option("-o", "--output", type="string", dest="output", default="",\
1675 help="Output file.")
1676 parser.add_option("-p", "--peaks", type="string", dest="peaks", default="2",\
1677 help="Level(s) of spectra to apply peak detection to (comma separated). QStar,4700 only.")
1678 parser.add_option("-f", "--filter", type="string", dest="filter", default=None,\
1679 help="Filter on mzxml scan meta-data: field.op.value[,field.op.value]. Default: No filter.")
1680 parser.add_option("-V", "--version", type='string', dest="version", default="3.0",
1681 help="XML version. mzXML only. Valid options '2.1','2.2','3.0'. Default: '3.0'.")
1682 parser.add_option("-z", "--compress_peaks", action="store_true", dest="compress_peaks", default=None,\
1683 help="Compress mzXML peaks data using zlib. Default: False")
1684 parser.add_option("-Z", "--compress", type="string", dest="compress", default=None,\
1685 help="Compress output file. Valid options 'gz'. Default: None.")
1686 parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False,\
1687 help="Debug. First 10 spectra only, and truncated peaks data element. Default: False.")
1688
1689 ( opts, args ) = parser.parse_args()
1690
1691 if opts.output == "" and opts.xmlformat == "":
1692 parser.error("Either -o or -X must be suppied.")
1693 sys.exit(1)
1694
1695 if opts.filter != None:
1696 filt=scanfilter(opts.filter)
1697 else:
1698 filt=None
1699
1700 argsglob = []
1701 for a in args:
1702 argsglob.extend(glob.glob(a))
1703
1704 if opts.output != "" and len(argsglob) > 1:
1705 parser.error("At most one raw spectrum data file is permitted, if -o option is used.")
1706
1707 for a in argsglob:
1708
1709 if opts.output != "":
1710 o = opts.output
1711 else:
1712 p = a.rfind('.')
1713 o = a[0:p]+'.'+opts.xmlformat
1714
1715 if o == a:
1716 print >>sys.stderr, "Input file %s and output file %s are the same!"%(a,o)
1717 sys.exit(1)
1718
1719 print >>sys.stderr, "Processing",a,"to",o
1720
1721 if opts.rawdata.lower() in ('wiff','qstar'):
1722 r = QStarWiff(a,opts.peaks)
1723 elif opts.rawdata.lower() in ('t2d','dat','ab4700','ab4800','mariner','voyager'):
1724 r = AB4700T2D(a,opts.peaks)
1725 elif opts.rawdata.lower() in ('mzxml',):
1726 r = FromMzXML(a)
1727 elif a[-5:].lower() in ('.wiff',):
1728 r = QStarWiff(a,opts.peaks)
1729 elif a[-4:].lower() in ('.t2m',):
1730 r = AB4700T2D(a,opts.peaks)
1731 elif a[-6:].lower() in ('.mzxml',):
1732 r = FromMzXML(a)
1733 else:
1734 parser.error("Bad rawdata format specification.")
1735 sys.exit(1)
1736
1737 if opts.xmlformat.lower() == 'mzxml':
1738 x = ToMzXML(r,o,opts.compress,filt,opts.compress_peaks,opts.version)
1739 elif opts.xmlformat.lower() == 'mzdata':
1740 x = ToMzData(r,o,opts.compress,filt)
1741 elif opts.output[-6:].lower() in ('.mzxml',):
1742 x = ToMzXML(r,opts.output,opts.compress,filt,opts.compress_peaks,opts.version)
1743 elif opts.output[-7:].lower() in ('.mzdata',):
1744 x = ToMzData(r,opts.output,opts.compress,filt)
1745 else:
1746 parser.error("Bad xml format specification.")
1747 sys.exit(1)
1748
1749 x.write(debug=opts.debug)