ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/pymsxml/pymsxml.py
Revision: 1.9
Committed: Tue Aug 7 14:29:44 2007 UTC (8 years, 10 months ago) by nje01
Branch: MAIN
CVS Tags: HEAD
Changes since 1.8: +1 -1 lines
Log Message:
*** empty log message ***

Line File contents
1 #!/usr/bin/env python
2
3 import sys,os,os.path
4 import re
5 import sha
6 import math
7 import tempfile
8 import csv
9 import gzip
10 import zlib
11 from array import array
12 from base64 import b64encode,b64decode
13 from urllib import quote,unquote
14 from xml.sax import *
15
16 class NameVersion:
17 def __init__(self):
18 self.name_ = "PyMsXML"
19 self.majorVer = 0
20 self.minorVer = 5
21 self.revVer = 4
22 def version(self):
23 return "%d.%d.%d"%(self.majorVer,self.minorVer,self.revVer)
24 def name(self):
25 return self.name_
26
27 def fileSHA(filename):
28 s = sha.new()
29 h = open(filename,'rb')
30 while True:
31 buffer = h.read(1024)
32 if buffer == '':
33 break
34 s.update(buffer)
35 h.close()
36 return s.hexdigest().lower()
37
38 class ToMzXML:
39 def __init__(self,reader,filename,cmpfmt=None,filt=None,peaksCompress=False,version="3.0"):
40 self.filename = filename
41 self.compressfmt = cmpfmt
42 self.reader = reader
43 self.initSHA()
44 self.writeByteCounter = 0
45 self.actualScanCount = 0
46 self.scanIndices = {}
47 self.filter = filt
48 if not version in ('2.1','2.2','3.0'):
49 raise "Bad mzXML version \"%s\""%(version,)
50 self.version = version
51 if peaksCompress and float(self.version) < 3.0:
52 raise "Cannot compress peaks until mzXML version 3.0"
53 self.peakcompress = peaksCompress
54
55 def __del__(self):
56 if hasattr(self,'tmpFileName') and self.tmpFileName and self.tmpFileName != '':
57 os.unlink(self.tmpFileName)
58
59 def getFilename(self):
60 return self.filename
61
62 def initSHA(self):
63 self.sha1 = sha.new()
64
65 def updateSHA(self,s):
66 self.sha1.update(s)
67
68 def getSHA(self):
69 return self.sha1.hexdigest().lower()
70
71 def writeString(self,fh,data):
72 self.writeByteCounter += len(data)
73 self.sha1.update(data)
74 fh.write(data)
75
76 def write(self,debug=False):
77
78 # Make a temporary file for the scan data to count the number of spectra.
79 (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
80 tmpFile = os.fdopen(tmpFileFD,'wb')
81
82 # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
83 self.write_scans(tmpFile,debug)
84
85 tmpFile.close()
86
87 # Reset!
88
89 self.initSHA()
90 self.writeByteCounter = 0
91
92 if self.compressfmt == None and not self.filename.endswith('.gz'):
93 xmlFile = open(self.filename,'wb')
94 elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
95 if self.filename.endswith('.gz'):
96 xmlFile = gzip.open(self.filename,'wb')
97 else:
98 xmlFile = gzip.open(self.filename+'.gz','wb')
99 else:
100 print >>sys.stderr, "Bad compressfmt specification"
101 sys.exit(1)
102
103 self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
104
105 outStr = "<mzXML "
106 outStr += "xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_%s\" "%(self.version,)
107 outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
108 outStr += "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_%s http://sashimi.sourceforge.net/schema_revision/mzXML_%s/mzXML_idx_%s.xsd\""%(self.version,self.version,self.version)
109 outStr += ">\n"
110
111 self.writeString (xmlFile,outStr)
112
113 outStr = "<msRun"
114 outStr += " scanCount=\"%d\"" % (self.actualScanCount,)
115 md = self.reader.getMsRunMetaData()
116 for (k,v) in md.items():
117 k,f = k.split(':')
118 if self.version not in ('2.2',) or (k != 'startTime'and k != 'endTime'):
119 outStr += (" %%s=\"%s\""%(f,))%(k,v)
120 outStr += ">\n"
121
122 self.writeString (xmlFile,outStr)
123
124 filehashes = {}
125
126 for f in self.reader.getFilenames():
127 if len(f) == 2:
128 outStr = "<parentFile "
129 outStr += "fileName=\"%s\" " % (quote(f[0]),)
130 outStr += "fileType=\"RAWData\" "
131 outStr += "fileSha1=\"%s\"" % (f[1],)
132 outStr += "/>\n"
133 else:
134 outStr = "<parentFile "
135 outStr += "fileName=\"%s\" " % (f[0],)
136 outStr += "fileType=\"%s\" " % (f[1],)
137 outStr += "fileSha1=\"%s\"" % (f[2],)
138 outStr += "/>\n"
139 self.writeString (xmlFile,outStr)
140
141 if float(self.version) >= 3.0:
142 outStr = "<msInstrument msInstrumentID=\"%s\">\n" % (self.reader.msInstrumentID(),)
143 else:
144 outStr = "<msInstrument>\n"
145
146 outStr += "<msManufacturer category=\"msManufacturer\" value=\"%s\"/>\n" % (self.reader.msManufacturer(),)
147 outStr += "<msModel category=\"msModel\" value=\"%s\"/>\n" % (self.reader.msModel(),)
148 if self.reader.msIonisation():
149 outStr += "<msIonisation category=\"msIonisation\" value=\"%s\"/>\n" % (self.reader.msIonisation(),)
150 if self.reader.msMassAnalyzer():
151 outStr += "<msMassAnalyzer category=\"msMassAnalyzer\" value=\"%s\"/>\n" % (self.reader.msMassAnalyzer(),)
152 if self.reader.msDetector():
153 outStr += "<msDetector category=\"msDetector\" value=\"%s\"/>\n" % (self.reader.msDetector(),)
154
155 self.writeString (xmlFile,outStr)
156
157 outStr = "<software "
158 outStr += "type=\"acquisition\" "
159 outStr += "name=\"%s\" "%(self.reader.acquisitionSoftware(),)
160 outStr += "version=\"%s\""%(self.reader.acquisitionSoftwareVersion(),)
161 outStr += "/>\n"
162
163 self.writeString (xmlFile,outStr)
164
165 outStr = "</msInstrument>\n"
166
167 self.writeString (xmlFile,outStr)
168
169 outStr = "<dataProcessing>\n"
170
171 self.writeString (xmlFile,outStr)
172
173 nv = NameVersion()
174
175 outStr = "<software "
176 outStr += "type=\"conversion\" "
177 outStr += "name=\"%s\" "%(nv.name(),)
178 outStr += "version=\"%s\"" % (nv.version(),)
179 outStr += "/>\n"
180
181 self.writeString (xmlFile,outStr)
182
183 if (self.reader.peakDetect(1)):
184 outStr = "<processingOperation "
185 outStr += "name=\"peak_detection_level_1\" "
186 outStr += "value=\"true\""
187 outStr += "/>\n"
188 outStr += "<processingOperation "
189 outStr += "name=\"peak_detection_level_1_threshold\" "
190 outStr += "value=\"1%\""
191 outStr += "/>\n"
192 outStr += "<processingOperation "
193 outStr += "name=\"peak_detection_level_1_software\" "
194 outStr += "value=\"%s\""%(self.reader.acquisitionSoftware(),)
195 outStr += "/>\n"
196 self.writeString (xmlFile,outStr)
197 if (self.reader.peakDetect(2)):
198 outStr = "<processingOperation "
199 outStr += "name=\"peak_detection_level_2\" "
200 outStr += "value=\"true\""
201 outStr += "/>\n"
202 outStr += "<processingOperation "
203 outStr += "name=\"peak_detection_level_2_threshold\" "
204 outStr += "value=\"1%\""
205 outStr += "/>\n"
206 outStr += "<processingOperation "
207 outStr += "name=\"peak_detection_level_2_software\" "
208 outStr += "value=\"%s\""%(self.reader.acquisitionSoftware(),)
209 outStr += "/>\n"
210 self.writeString (xmlFile,outStr)
211
212 outStr = "<processingOperation "
213 outStr += "name=\"min_peaks_per_spectra\" "
214 outStr += "value=\"1\""
215 outStr += "/>\n"
216
217 self.writeString (xmlFile,outStr)
218
219 outStr = "</dataProcessing>\n"
220
221 self.writeString (xmlFile,outStr)
222
223 if self.reader.maldi():
224 outStr = '<spotting>\n'
225 for d in self.reader.plateData():
226 outStr += '<plate plateID="%s" spotXCount="%s" spotYCount="%s">\n' % \
227 (d['plateID'],d['spotXCount'],d['spotYCount'])
228 outStr += '<plateManufacturer category="plateManufacturer" value="%s"/>\n' % \
229 (d['plateManufacturer'],)
230 outStr += '<plateModel category="plateModel" value="%s"/>\n' % \
231 (d['plateModel'],)
232 for s in self.reader.spotData(d['plateID']):
233 outStr += '<spot spotID="%s" spotXPosition="%s" spotYPosition="%s">\n' % \
234 (s['spotID'],s['spotXPosition'],s['spotYPosition'])
235 outStr += '<maldiMatrix category="maldiMatrix" value="%s"/>\n' % \
236 (s['maldiMatrix'],)
237 outStr += '</spot>\n'
238 outStr += '</plate>\n'
239 outStr += '</spotting>\n'
240 self.writeString(xmlFile,outStr)
241
242 if self.reader.lc():
243 pass
244
245 scanOffset = self.writeByteCounter
246
247 tmpFile = open(self.tmpFileName,'rb')
248 while True:
249 tmpStr = tmpFile.read(1024)
250 if tmpStr == '':
251 break
252 self.writeString(xmlFile,tmpStr)
253 tmpFile.close()
254 os.unlink(self.tmpFileName)
255 self.tmpFileName = ''
256
257 outStr = "</msRun>\n"
258 self.writeString (xmlFile,outStr)
259
260 indexOffset = self.writeByteCounter
261
262 outStr = "<index "
263 outStr += "name=\"scan\" "
264 outStr += ">\n"
265
266 self.writeString (xmlFile,outStr)
267
268 for i in xrange(1,self.actualScanCount+1):
269 outStr = "<offset id=\"%d\">" % (i,)
270 outStr += "%d</offset>\n" % (self.scanIndices[i] + scanOffset,)
271 self.writeString(xmlFile,outStr)
272
273 outStr = "</index>\n"
274 self.writeString (xmlFile,outStr)
275
276 outStr = "<indexOffset>%d</indexOffset>\n" % (indexOffset,)
277
278 self.writeString (xmlFile,outStr)
279
280 self.writeString (xmlFile,"<sha1>")
281
282 outStr = self.getSHA()
283
284 self.writeString (xmlFile,outStr)
285
286 self.writeString (xmlFile,"</sha1>\n")
287
288 outStr = "</mzXML>\n"
289 self.writeString (xmlFile,outStr)
290
291 xmlFile.close()
292
293 def write_scans(self,xmlFile,debug=False):
294
295 msLevel = 0
296 scanNumber = 0
297 ancestors = []
298 self.writeByteCounter = 0
299
300 for (s,d) in self.reader.spectra():
301
302 if self.filter != None and not self.filter.test(d):
303 continue
304
305 if debug and scanNumber >= 10:
306 break
307
308 if not d.has_key('msLevel'):
309 print >>sys.stderr, "Required scan attributes missing."
310 sys.exit(1)
311
312 prevLevel = msLevel
313 msLevel = d['msLevel']
314
315 if prevLevel < msLevel and prevLevel > 0:
316 # We went "in" a level, push scan number of parent
317 ancestors.append((scanNumber,prevSpec))
318 elif prevLevel > msLevel and msLevel > 0:
319 if len(ancestors) == 0:
320 pass #print >>sys.stderr, "No ancestor for scan %s at level %s"%(scanNumber,msLevel)
321 else:
322 ancestors.pop()
323
324 outStr = ''
325 if prevLevel > msLevel:
326 for m in xrange(0,prevLevel-msLevel+1):
327 outStr += "</scan>\n"
328 else:
329 if prevLevel > 0 and prevLevel == msLevel:
330 outStr += "</scan>\n"
331
332 self.writeString(xmlFile,outStr)
333
334 scanNumber = scanNumber + 1
335
336 self.scanIndices[scanNumber] = self.writeByteCounter
337
338 totIonCurrent = 0
339 maxmz = None
340 minmz = None
341 basePeakMz = None
342 basePeakIntensity = None
343 peaksCount = 0
344 for i in xrange(0,len(s),2):
345 x = s[i]; y = s[i+1]
346 if minmz is None or minmz > x:
347 minmz = x
348 if maxmz is None or maxmz < x:
349 maxmz = x
350 totIonCurrent += y
351 if basePeakIntensity is None or basePeakIntensity < y:
352 basePeakIntensity = y
353 basePeakMz = x
354 peaksCount += 1
355
356 outStr = "<scan num=\"%d\" msLevel=\"%d\"" % (scanNumber,msLevel)
357 outStr += " peaksCount=\"%d\"" % (peaksCount,)
358 outStr += " lowMz=\"%f\"" % (minmz,)
359 outStr += " highMz=\"%f\"" % (maxmz,)
360 outStr += " totIonCurrent=\"%f\"" % (totIonCurrent,)
361 outStr += " basePeakMz=\"%f\"" % (basePeakMz,)
362 outStr += " basePeakIntensity=\"%f\"" % (basePeakIntensity,)
363
364 for (k,v) in d.items():
365 if k.startswith('scan.'):
366 k,f = k.split(':')
367 k = k[5:]
368 outStr += (" %%s=\"%s\""%(f,))%(k,v)
369 outStr += ">\n"
370 self.writeString(xmlFile,outStr)
371
372 if self.version in ('2.2',):
373 any = False
374 for (k,v) in d.items():
375 if k.startswith('scanOrigin.'):
376 if not any:
377 any = True
378 outStr = "<scanOrigin"
379 k,f = k.split(':')
380 k = k[11:]
381 outStr += (" %%s=\"%s\""%(f,))%(k,v)
382 if any:
383 outStr += "/>\n"
384 self.writeString(xmlFile,outStr)
385
386 if msLevel > 1:
387
388 if not d.has_key('precursorMz') :
389 print >>sys.stderr, "Required precursorMz attribute missing."
390 sys.exit(1)
391
392 # We scan the parent spectrum in the region of the
393 # precursorMz to establish the intensity. Optionally,
394 # we tune the precursorMz itself, on the basis of this
395
396 pMz = d['precursorMz']
397 tol = self.reader.precursorPrecision(pMz)
398 pMzLB = pMz-tol
399 pMzUB = pMz+tol
400 pMzIntensity = 0
401
402 if len(ancestors) > 0:
403 for i in xrange(0,len(ancestors[-1][1]),2):
404 x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
405 if x < pMzLB:
406 continue
407 if x > pMzUB:
408 break
409 if pMzIntensity < y:
410 pMzIntensity = y
411 pMzTune = x
412
413 if self.reader.precursorTune():
414 pMz = pMzTune
415
416 outStr = "<precursorMz"
417 for (k,v) in d.items():
418 if k.startswith('precursorMz.'):
419 k,f = k.split(':')
420 k = k[12:]
421 outStr += (" %%s=\"%s\""%(f,))%(k,v)
422 if len(ancestors)>0:
423 outStr += " precursorScanNum=\"%d\""%(ancestors[-1][0],)
424 outStr += " precursorIntensity=\"%f\">"%(pMzIntensity,)
425 else:
426 outStr += ">"
427 outStr += "%f</precursorMz>\n" % (pMz,)
428 self.writeString(xmlFile,outStr)
429
430 if self.reader.maldi() and self.version in ('2.2',):
431 outStr = '<maldi'
432 for (k,v) in d.items():
433 if k.startswith('maldi.'):
434 k,f = k.split(':')
435 k = k[6:]
436 outStr += (" %%s=\"%s\""%(f,))%(k,v)
437 outStr += " plateID=\"%s\""%(d['plateID'],)
438 outStr += " spotID=\"%s\""%(d['spotID'],)
439 outStr += "/>\n"
440 self.writeString(xmlFile,outStr)
441
442 if sys.byteorder != 'big':
443 s.byteswap()
444
445 if debug:
446 s = s[:20]
447
448 specstr = s.tostring()
449 if self.peakcompress:
450 specstr = zlib.compress(specstr,9)
451 outStr = "<peaks precision=\"32\" byteOrder=\"network\" contentType=\"m/z-int\" compressionType=\"zlib\" compressedLen=\"%d\">"%(len(specstr),)
452 else:
453 if float(self.version) >= 3.0:
454 outStr = "<peaks precision=\"32\" byteOrder=\"network\" contentType=\"m/z-int\" compressionType=\"none\" compressedLen=\"%d\">"%(len(specstr),)
455 else:
456 outStr = "<peaks precision=\"32\" byteOrder=\"network\" pairOrder=\"m/z-int\">"
457
458 self.writeString(xmlFile,outStr)
459
460 # Spec says the byte order shall be
461 # network, which is the same as big endian.
462
463 if sys.byteorder != 'big':
464 s.byteswap()
465
466 outStr = b64encode(specstr)
467 self.writeString(xmlFile,outStr)
468
469 outStr = "</peaks>\n"
470 self.writeString(xmlFile,outStr)
471
472 if self.reader.maldi() and self.version not in ('2.2',):
473 outStr = ''
474 for (k,v) in d.items():
475 if k.startswith('maldi.'):
476 k,f = k.split(':')
477 k = k[6:]
478 outStr += '<nameValue'
479 outStr += ' name="maldi.%s"'%(k,)
480 outStr += (' value="%s"'%(f,))%(v,)
481 outStr += '/>\n'
482 outStr += '<nameValue'
483 outStr += ' name="maldi.plateID"'
484 outStr += ' value="%s"'%(d['plateID'],)
485 outStr += '/>\n'
486 outStr += '<nameValue'
487 outStr += ' name="maldi.spotID"'
488 outStr += ' value="%s"'%(d['spotID'],)
489 outStr += '/>\n'
490 self.writeString(xmlFile,outStr)
491
492 if self.version not in ('2.2',):
493 outStr = ''
494 for (k,v) in d.items():
495 if k.startswith('scanOrigin.'):
496 k,f = k.split(':')
497 k = k[11:]
498 outStr += '<nameValue'
499 outStr += ' name="scanOrigin.%s"'%(k,)
500 outStr += (' value="%s"'%(f,))%(v,)
501 outStr += '/>\n'
502 self.writeString(xmlFile,outStr)
503
504 outStr = ''
505 for (k,v) in d.items():
506 if k.startswith('nameValue.'):
507 k,f = k.split(':')
508 k = k[10:]
509 outStr += '<nameValue'
510 outStr += ' name="%s"'%(k,)
511 outStr += (' value="%s"'%(f,))%(v,)
512 outStr += '/>\n'
513 if len(outStr) > 0:
514 self.writeString(xmlFile,outStr)
515
516 xmlFile.flush()
517
518 prevSpec = s
519
520 outStr = ""
521 for m in xrange(0,len(ancestors)+1):
522 outStr += "</scan>\n"
523
524 self.writeString(xmlFile,outStr)
525 self.actualScanCount = scanNumber
526
527 class ToMzData:
528 def __init__(self,reader,filename,cmpfmt=None,filt=None):
529 self.filename = filename
530 self.compressfmt = cmpfmt
531 self.reader = reader
532 self.actualScanCount = 0
533 self.initSHA()
534 self.filter = filt
535
536 def writeString(self,fh,data):
537 # self.writeByteCounter += len(data)
538 self.sha1.update(data)
539 fh.write(data)
540
541 def initSHA(self):
542 self.sha1 = sha.new()
543
544 def updateSHA(self,s):
545 self.sha1.update(s)
546
547 def getSHA(self):
548 return self.sha1.hexdigest().lower()
549
550 def write(self,debug=False):
551
552 # Make a temporary file for the scan data to count the number of spectra.
553 (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
554 tmpFile = os.fdopen(tmpFileFD,'wb')
555
556 # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
557 self.write_scans(tmpFile,debug)
558
559 tmpFile.close()
560
561 scanhash = self.getSHA()
562
563 self.initSHA()
564
565 if self.compressfmt == None and not self.filename.endswith('.gz'):
566 xmlFile = open(self.filename,'wb')
567 elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
568 if self.filename.endswith('.gz'):
569 xmlFile = gzip.open(self.filename,'wb')
570 else:
571 xmlFile = gzip.open(self.filename+'.gz','wb')
572 else:
573 print >>sys.stderr, "Bad compressfmt specification"
574 sys.exit(1)
575
576 self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
577
578 outStr = "<mzData "
579 outStr += "version=\"1.05\" "
580 outStr += "accessionNumber=\"pymsxml:%s\" "%(scanhash,)
581 outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
582 # outStr += "xmlns=\"http://psidev.sourceforge.net/ms/xml/mzdata\" "
583 outStr += "xsi:noNamespaceSchemaLocation=\"http://psidev.sourceforge.net/ms/xml/mzdata/mzdata.xsd\""
584 outStr += ">\n"
585
586 self.writeString (xmlFile,outStr)
587
588 outStr = "<description>\n"
589 outStr += "<admin>\n"
590 outStr += "<sampleName></sampleName>\n"
591 outStr += "<contact>\n"
592 outStr += "<name></name>\n"
593 outStr += "<institution></institution>\n"
594 outStr += "</contact>\n"
595 outStr += "</admin>\n"
596 outStr += "<instrument>\n"
597 outStr += "<instrumentName></instrumentName>\n"
598 outStr += "<source>\n"
599 outStr += "</source>\n"
600 outStr += "<analyzerList count=\"1\">\n"
601 outStr += "<analyzer>\n"
602 outStr += "</analyzer>\n"
603 outStr += "</analyzerList>\n"
604 outStr += "<detector>\n"
605 outStr += "</detector>\n"
606 outStr += "</instrument>\n"
607 outStr += "<dataProcessing>\n"
608 outStr += "<software>\n"
609 outStr += "<name>%s</name>\n"%(self.reader.acquisitionSoftware(),)
610 outStr += "<version>%s</version>\n"%(self.reader.acquisitionSoftwareVersion(),)
611 outStr += "</software>\n"
612 outStr += "<software>\n"
613 nv = NameVersion()
614 outStr += "<name>%s</name>\n"%(nv.name(),)
615 outStr += "<version>%s</version>\n"%(nv.version(),)
616 outStr += "</software>\n"
617 outStr += "</dataProcessing>\n"
618 outStr += "</description>\n"
619
620 self.writeString (xmlFile,outStr)
621
622 outStr = "<spectrumList"
623 outStr += " count=\"%d\">\n"%(self.actualScanCount,)
624
625 self.writeString (xmlFile,outStr)
626
627 tmpFile = open(self.tmpFileName,'rb')
628 while True:
629 tmpStr = tmpFile.read(1024)
630 if tmpStr == '':
631 break
632 self.writeString(xmlFile,tmpStr)
633 tmpFile.close()
634 os.unlink(self.tmpFileName)
635 self.tmpFileName = ''
636
637 outStr = "</spectrumList>\n</mzData>\n"
638
639 self.writeString (xmlFile,outStr)
640
641 def write_scans(self,xmlFile,debug=False):
642
643 msLevel = 0
644 scanNumber = 0
645 ancestors = []
646
647 self.initSHA()
648 # self.writeByteCounter = 0
649
650 for (s,d) in self.reader.spectra():
651
652 if self.filter != None and not self.filter.test(d):
653 continue
654
655 if debug and scanNumber >= 10:
656 break
657
658 if not d.has_key('msLevel'):
659 print >>sys.stderr, "Required scan attributes missing."
660 sys.exit(1)
661
662 prevLevel = msLevel
663 msLevel = d['msLevel']
664
665 if prevLevel < msLevel and prevLevel > 0:
666 # We went "in" a level, push scan number of parent
667 ancestors.append((scanNumber,prevSpec))
668 elif prevLevel > msLevel and msLevel > 0:
669 ancestors.pop()
670
671 scanNumber = scanNumber + 1
672
673 totIonCurrent = 0
674 maxmz = None
675 minmz = None
676 basePeakMz = None
677 basePeakIntensity = None
678 peaksCount = 0
679 for i in xrange(0,len(s),2):
680 x = s[i]; y = s[i+1]
681 if minmz is None or minmz > x:
682 minmz = x
683 if maxmz is None or maxmz < x:
684 maxmz = x
685 totIonCurrent += y
686 if basePeakIntensity is None or basePeakIntensity < y:
687 basePeakIntensity = y
688 basePeakMz = x
689 peaksCount += 1
690
691 outStr = "<spectrum id=\"%d\">\n" % (scanNumber,)
692 outStr += "<spectrumDesc>\n"
693 outStr += "<spectrumSettings>\n"
694
695 self.writeString (xmlFile,outStr)
696
697 # outStr += "<acqSpecification>\n"
698 # outStr += "</acqSpecification>\n"
699
700 outStr = "<spectrumInstrument"
701 outStr += " msLevel=\"%d\""%(msLevel,)
702 if d.has_key('scan.startMz') and \
703 d.has_key('scan.endMz'):
704 outStr += " mzRangeStart=\"%f\""%(d['scan.startMz'],)
705 outStr += " mzRangeStop=\"%f\""%(d['scan.endMz'],)
706 outStr += ">\n"
707 for (k,v) in d.items():
708 if k.startswith('scan.'):
709 k,f = k.split(':')
710 k = k[5:]
711
712 if k == "polarity":
713 if v == '+':
714 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Positive",)
715 elif v == '-':
716 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Negative",)
717 elif k == 'retentionTime':
718 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"TimeInSeconds\" value=\"%f\"/>\n"%(v,)
719 else:
720 outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
721 outStr += "<userParam name=\"lowMz\" value=\"%f\"/>\n"%(minmz,)
722 outStr += "<userParam name=\"highMz\" value=\"%f\"/>\n"%(maxmz,)
723 outStr += "<userParam name=\"totIonCurrent\" value=\"%f\"/>\n"%(totIonCurrent,)
724 outStr += "<userParam name=\"basePeakMz\" value=\"%f\"/>\n"%(basePeakMz,)
725 outStr += "<userParam name=\"basePeakIntensity\" value=\"%f\"/>\n"%(basePeakIntensity,)
726 outStr += "</spectrumInstrument>\n"
727 outStr += "</spectrumSettings>\n"
728
729 self.writeString (xmlFile,outStr)
730
731 if msLevel > 1:
732
733 if not d.has_key('precursorMz') :
734 print >>sys.stderr, "Required precursorMz attribute missing."
735 sys.exit(1)
736
737 # We scan the parent spectrum in the region of the
738 # precursorMz to establish the intensity. Optionally,
739 # we tune the precursorMz itself, on the basis of this
740
741 pMz = d['precursorMz']
742 tol = self.reader.precursorPrecision(pMz)
743 pMzLB = pMz-tol
744 pMzUB = pMz+tol
745 pMzIntensity = 0
746
747 for i in xrange(0,len(ancestors[-1][1]),2):
748 x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
749 if x < pMzLB:
750 continue
751 if x > pMzUB:
752 break
753 if pMzIntensity < y:
754 pMzIntensity = y
755 pMzTune = x
756
757 if self.reader.precursorTune():
758 pMz = pMzTune
759
760 outStr = "<precursorList count=\"1\">\n"
761
762 outStr += "<precursor"
763 outStr += " msLevel=\"%d\""%(msLevel,)
764 outStr += " spectrumRef=\"%d\""%(ancestors[-1][0],)
765 outStr += ">\n"
766
767 self.writeString(xmlFile,outStr)
768
769 outStr = "<ionSelection>\n"
770 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"MassToChargeRatio\" value=\"%f\"/>\n"%(pMz,)
771 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Intensity\" value=\"%f\"/>\n"%(pMzIntensity,)
772 for (k,v) in d.items():
773 if k.startswith('precursorMz.'):
774 k,f = k.split(':')
775 k = k[12:]
776 outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
777 outStr += "</ionSelection>\n"
778
779 self.writeString(xmlFile,outStr)
780
781 outStr = "<activation>\n"
782 outStr += "</activation>\n"
783 outStr += "</precursor>\n"
784 outStr += "</precursorList>\n"
785
786 self.writeString(xmlFile,outStr)
787
788 if self.reader.maldi():
789 outStr = '<maldi'
790 for (k,v) in d.items():
791 if k.startswith('maldi.'):
792 k,f = k.split(':')
793 k = k[6:]
794 outStr += (" %%s=\"%s\""%(f,))%(k,v)
795 outStr += " plateID=\"%s\""%(d['plateID'],)
796 outStr += " spotID=\"%s\""%(d['spotID'],)
797 outStr += "/>\n"
798 self.writeString(xmlFile,outStr)
799
800 outStr = "</spectrumDesc>\n"
801 self.writeString(xmlFile,outStr)
802
803 outStr = "<mzArrayBinary>\n"
804 outStr += "<data"
805 outStr += " precision=\"32\""
806 outStr += " endian=\"%s\""%(sys.byteorder,)
807 outStr += " length=\"%d\""%(peaksCount,)
808 outStr += ">"
809 self.writeString(xmlFile,outStr)
810
811 outStr = b64encode(s[0:peaksCount*2:2].tostring())
812 if not debug:
813 self.writeString(xmlFile,outStr)
814 else:
815 self.writeString(xmlFile,outStr[0:100])
816
817 outStr = "</data>\n</mzArrayBinary>\n"
818 self.writeString(xmlFile,outStr)
819
820 outStr = "<intenArrayBinary>\n"
821 outStr += "<data"
822 outStr += " precision=\"32\""
823 outStr += " endian=\"%s\""%(sys.byteorder,)
824 outStr += " length=\"%d\""%(peaksCount,)
825 outStr += ">"
826 self.writeString(xmlFile,outStr)
827
828 outStr = b64encode(s[1:peaksCount*2:2].tostring())
829 if not debug:
830 self.writeString(xmlFile,outStr)
831 else:
832 self.writeString(xmlFile,outStr[0:100])
833
834 outStr = "</data>\n</intenArrayBinary>\n"
835 self.writeString(xmlFile,outStr)
836
837 if self.reader.maldi():
838 outStr = ''
839 for (k,v) in d.items():
840 if k.startswith('maldi.'):
841 k,f = k.split(':')
842 k = k[6:]
843 outStr += '<nameValue'
844 outStr += ' name="%s"'%(k,)
845 outStr += (' value="%f"'%(f,))%(v,)
846 outStr += '/>\n'
847 ## outStr += '<nameValue'
848 ## outStr += ' name="plateID"'
849 ## outStr += ' value="%s"'%(d['plateID'],)
850 ## outStr += '/>\n'
851 ## outStr += '<nameValue'
852 ## outStr += ' name="spotID"'
853 ## outStr += ' value="%s"'%(d['spotID'],)
854 ## outStr += '/>\n'
855 self.writeString(xmlFile,outStr)
856
857 outStr = "</spectrum>\n"
858 self.writeString(xmlFile,outStr)
859 xmlFile.flush()
860
861 prevSpec = s
862
863 self.actualScanCount = scanNumber
864
865 class QStarWiff:
866 def __init__(self,filename,peaks=None):
867 self.filename = filename
868 self.filehash = fileSHA(filename)
869 self.theTIC = None
870 self.theWF = None
871 self.theFMANSpecData = None
872 self.startTime = None
873 self.stopTime = None
874 self.applyPeakDetect = []
875 if peaks:
876 self.applyPeakDetect = map(int,peaks.split(','))
877
878 if not os.path.exists(self.filename):
879 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
880 sys.exit(1)
881
882 self.filename = os.path.abspath(self.filename)
883
884 def __del__(self):
885 # Probably unnecessary
886 self.close()
887
888 def peakDetect(self,msLevel):
889 return ((msLevel in self.applyPeakDetect) and (msLevel != 1))
890
891 def open(self):
892
893 if self.theTIC is None:
894
895 from win32com.client import Dispatch
896
897 self.theFMANSpecData = Dispatch('Analyst.FMANSpecData')
898 self.theTIC = Dispatch('Analyst.FMANChromData')
899 self.theSPL = Dispatch('Analyst.SpectralPeakList')
900
901 self.theFMANSpecData.WiffFileName = self.filename
902 self.theTIC.WiffFileName = self.filename
903 self.theWF = self.theFMANSpecData.GetWiffFileObject()
904
905 self.theTIC.SetToTIC(1,0,0)
906
907 self.startTime = self.theTIC.GetDataPointXValue(1)
908 self.stopTime = self.theTIC.GetDataPointXValue(self.theTIC.GetNumberOfDataPoints())
909
910 def close(self):
911
912 if not self.theTIC is None:
913
914 if not self.theWF is None:
915 self.theWF.CloseWiffFile()
916 self.theTIC = None
917 self.theFMANSpecData = None
918 self.theWF = None
919
920 def spectra(self):
921 self.open()
922 spectrumCount=0;
923
924 numberOfSamples = self.theWF.GetActualNumberOfSamples()
925 for i in xrange(1,numberOfSamples+1):
926
927 # print >>sys.stderr, "Sample %d"%i
928 # print >>sys.stderr, "Sample Name: %s"%(self.theWF.GetSampleName(i),)
929
930 sampleName = self.theWF.GetSampleName(i)
931
932 numberOfPeriods = self.theWF.GetActualNumberOfPeriods(i)
933 for j in xrange(0,numberOfPeriods):
934
935 numberOfExperiments = self.theWF.GetNumberOfExperiments(i,j)
936 self.theTIC.SetToTIC(i, j, 0)
937
938 numberOfTICDataPoints = self.theTIC.GetNumberOfDataPoints()
939 for l in xrange(1,numberOfTICDataPoints+1):
940
941 for k in xrange(0,numberOfExperiments):
942
943 self.theExperiment = self.theWF.GetExperimentObject(i, j, k)
944 self.theTIC.SetToTIC(i, j, k)
945
946 # Convert to seconds?
947 xValue = self.theTIC.GetDataPointXValue(l) * 60
948
949 spectrumCount += 1;
950
951 # If any Ion current at time "l"
952 if self.theTIC.GetDataPointYValue(l) > 0:
953
954 self.theFMANSpecData.SetSpectrum(i, j, k, xValue, xValue)
955
956 if self.theExperiment.ScanType == 9:
957 msLevel = 2
958 m = re.search(r'[(]([0-9.]+)[)]',self.theFMANSpecData.DataTitle)
959 fixedMass = float(m.group(1))
960 else:
961 msLevel = 1
962
963 startMass = self.theFMANSpecData.GetStartMass()
964 stopMass = self.theFMANSpecData.GetStopMass()
965 dataArr = array('f');
966 peakDetect = 'False'
967 if self.peakDetect(msLevel):
968 peakDetect = 'True'
969 minY,maxY = self.theFMANSpecData.GetYValueRange()
970 # print minY,maxY
971 self.theFMANSpecData.Threshold(maxY*0.01)
972 self.theSPL.FindPeaksInDataObject(self.theFMANSpecData,50.0)
973 numPeaks = self.theSPL.GetNumberOfPeaks()
974 # print numPeaks
975 for m in xrange(1,numPeaks+1):
976 (x,w,y,yp) = self.theSPL.GetPeak(m)
977 if y <= 0:
978 continue
979 dataArr.append(x)
980 dataArr.append(y)
981 else:
982 numPoints = self.theFMANSpecData.GetNumberOfDataPoints()
983 for m in xrange(1,numPoints+1):
984 if self.theFMANSpecData.GetDataPointYValue(m) <= 0:
985 continue
986 x = self.theFMANSpecData.GetDataPointXValue(m)
987 dataArr.append(x)
988 y = self.theFMANSpecData.GetDataPointYValue(m)
989 dataArr.append(y)
990 polarity = self.theExperiment.Polarity
991
992 if len(dataArr) == 0:
993 continue
994
995 if polarity == 0:
996 polarity_val = "+"
997 else:
998 polarity_val = "-"
999
1000 rt = "PT%fS"%(xValue,)
1001
1002 # Scan level attributes
1003 # Required by mzXML (no format spec. needed)
1004 d = {'msLevel':msLevel}
1005 # Optional ('scan.' and prefixformat spec. needed)
1006 d.update({
1007 'scan.retentionTime:PT%fS':xValue,
1008 'scan.polarity:%s':polarity_val,
1009 'scan.startMz:%f':startMass,
1010 'scan.endMz:%f':stopMass,
1011 })
1012 d.update({'scanOrigin.parentFileID:%s': self.filehash,
1013 'scanOrigin.num:%d': spectrumCount})
1014 d.update({'nameValue.sample:%d':i,
1015 'nameValue.period:%d':j,
1016 'nameValue.experiment:%d':k,
1017 'nameValue.sampleName:%s':sampleName})
1018
1019 if msLevel == 1:
1020 yield (dataArr,d)
1021 else:
1022 # tandem MS scans add the following
1023 # Required by mzXML
1024 d.update({'precursorMz':fixedMass})
1025 # Optional
1026 # d.update({'precursorMz.precursorMzIntensity:f':0.0})
1027 yield (dataArr,d)
1028
1029 def getMsRunMetaData(self):
1030 self.open()
1031 d = {'startTime:PT%fS':self.startTime,
1032 'endTime:PT%fS':self.stopTime,
1033 }
1034 return d
1035
1036 def getFilenames(self):
1037 return [ (self.filename,self.filehash) ]
1038
1039 def msInstrumentID(self):
1040 return "1"
1041
1042 def msManufacturer(self):
1043 return "ABI / SCIEX"
1044
1045 def msModel(self):
1046 return "QSTAR"
1047
1048 def msIonisation(self):
1049 return "ESI"
1050
1051 def msMassAnalyzer(self):
1052 return "Quadrupole"
1053
1054 def msDetector(self):
1055 return None
1056
1057 def acquisitionSoftware(self):
1058 return "Analyst"
1059
1060 def acquisitionSoftwareVersion(self):
1061 return "1.0"
1062
1063 def precursorPrecision(self,mz):
1064 return .1
1065
1066 def precursorTune(self):
1067 return True
1068
1069 def lc(self):
1070 return True
1071
1072 def maldi(self):
1073 return not self.lc()
1074
1075 class mzXML_md_sax(handler.ContentHandler):
1076
1077 def __init__(self):
1078 self.md = {}
1079 self.context = ''
1080
1081 def startElement(self, name, attrs):
1082 self.context += ':%s'%(name,)
1083 # print >>sys.stderr, ">>",self.context
1084 # sys.stderr.flush()
1085 if self.context.endswith(':msRun'):
1086 if attrs.has_key('startTime'):
1087 self.md['startTime'] = float(attrs['startTime'][2:-1])
1088 if attrs.has_key('endTime'):
1089 self.md['endTime'] = float(attrs['endTime'][2:-1])
1090 elif self.context.endswith(':parentFile'):
1091 if not self.md.has_key('parentFile'):
1092 self.md['parentFile'] = []
1093 self.md['parentFile'].append((unquote(attrs['fileName']),attrs['fileType'],attrs['fileSha1']))
1094 elif self.context.endswith(':instrument'):
1095 self.md['msManufacturer'] = attrs['manufacturer']
1096 self.md['msModel'] = attrs['model']
1097 self.md['msIonisation'] = attrs['ionisation']
1098 self.md['msMassAnalyzer'] = attrs['msType']
1099 elif self.context.endswith(':msInstrument:msManufacturer'):
1100 self.md['msManufacturer'] = attrs['value']
1101 elif self.context.endswith(':msInstrument:msModel'):
1102 self.md['msModel'] = attrs['value']
1103 elif self.context.endswith(':msInstrument:msIonisation'):
1104 self.md['msIonisation'] = attrs['value']
1105 elif self.context.endswith(':msInstrument:msMassAnalyzer'):
1106 self.md['msMassAnalyzer'] = attrs['value']
1107 elif self.context.endswith(':msInstrument:msDetector'):
1108 self.md['msDetector'] = attrs['value']
1109 elif self.context.endswith(':msInstrument:software'):
1110 if attrs['type'] == 'acquisition':
1111 self.md['acquisitionSoftware'] = attrs['name']
1112 self.md['acquisitionSoftwareVersion'] = attrs['version']
1113 elif self.context.endswith(':instrument:software'):
1114 if attrs['type'] == 'acquisition':
1115 self.md['acquisitionSoftware'] = attrs['name']
1116 self.md['acquisitionSoftwareVersion'] = attrs['version']
1117 elif self.context.endswith(':scan'):
1118 raise SAXException("Early termination")
1119
1120 def endElement(self, name):
1121 # print >>sys.stderr, "<<",self.context
1122 # sys.stderr.flush()
1123 self.context = self.context[0:-(len(name)+1)]
1124
1125
1126 class mzXML_spec_sax(handler.ContentHandler):
1127
1128 def __init__(self):
1129 self.context = ''
1130 self.content = ''
1131 self.spectra = []
1132 self.done = False
1133 self.scancount = 0
1134 self.scanmax = 1000
1135 self.scanstart = 0
1136
1137 def startElement(self, name, attrs):
1138 self.context += ':%s'%(name,)
1139 # print >>sys.stderr, ">>",self.context
1140 # sys.stderr.flush()
1141 self.content = ''
1142 if name == 'scan':
1143 self.scanmd = {}
1144 self.scanlevel = int(attrs['msLevel'])
1145 self.scannum = int(attrs['num'])
1146 if attrs.has_key('retentionTime'):
1147 self.rt = float(attrs['retentionTime'][2:-1])
1148 if attrs.has_key('endMz'):
1149 self.endMz = float(attrs['endMz'])
1150 if attrs.has_key('startMz'):
1151 self.startMz = float(attrs['startMz'])
1152 self.polarity = attrs.get('polarity',None)
1153 for (k,v) in attrs.items():
1154 self.scanmd[k] = v
1155 self.precursorMz = None
1156 self.scancount += 1
1157 elif name == 'msRun':
1158 self.scancount = 0
1159 self.spectra = []
1160 self.context = ':msRun'
1161 elif name == 'scanOrigin':
1162 self.scanmd['scanOrigin'] = attrs
1163 elif self.context.endswith(':scan:nameValue'):
1164 self.scanmd['nameValue.%s:%%s'%attrs['name']] = attrs['value']
1165
1166 def characters(self, content):
1167 if self.scancount >= self.scanstart:
1168 if self.context.endswith(':peaks') or \
1169 self.context.endswith(':precursorMz'):
1170 self.content += content
1171
1172 def endElement(self, name):
1173 if self.scancount >= self.scanstart:
1174 if self.context.endswith('scan:peaks'):
1175
1176 self.spec = array('f')
1177 # print >>sys.stderr, len(self.content), len(b64decode(self.content)), self.content
1178 self.spec.fromstring(b64decode(self.content))
1179 if sys.byteorder != 'big':
1180 self.spec.byteswap()
1181 elif self.context.endswith(':scan'):
1182 d = {'msLevel':self.scanlevel}
1183 # Optional ('scan.' and prefixformat spec. needed)
1184 if hasattr(self,'rt'):
1185 d.update({'scan.retentionTime:PT%fS':self.rt})
1186 if hasattr(self,'startMz'):
1187 d.update({'scan.startMz:%f':self.startMz})
1188 if hasattr(self,'endMz'):
1189 d.update({'scan.endMz:%f':self.endMz})
1190 if self.polarity != None:
1191 d.update({'scan.polarity:%s':self.polarity})
1192 if self.scanlevel == 2:
1193 d.update({'precursorMz':self.precursorMz})
1194 if self.scanmd.has_key('scanOrigin'):
1195 d.update({'scanOrigin.parentFileID:%s': self.scanmd['scanOrigin']['parentFileID'],
1196 'scanOrigin.num:%d': int(self.scanmd['scanOrigin']['num'])})
1197 for (k,v) in self.scanmd.items():
1198 if not d.has_key(k):
1199 d[k] = v
1200 self.spectra.append((self.spec,d))
1201 if len(self.spectra) >= self.scanmax:
1202 self.scanstart = self.scancount+1
1203 raise SAXException("Early termination")
1204 elif self.context.endswith('scan:precursorMz'):
1205 self.precursorMz = float(self.content)
1206 if self.context.endswith('msRun'):
1207 self.done = True
1208 # print >>sys.stderr, "<<",self.context
1209 # sys.stderr.flush()
1210 self.context = self.context[0:-(len(name)+1)]
1211
1212 class FromMzXML:
1213 def __init__(self,filename):
1214 self.filename = filename
1215 self.parser = None
1216
1217 if not os.path.exists(self.filename):
1218 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1219 sys.exit(1)
1220
1221 if self.filename.endswith('.gz'):
1222 self.compressfmt = 'gz'
1223 else:
1224 self.compressfmt = None
1225
1226 def __del__(self):
1227 self.close()
1228
1229 def open(self):
1230
1231 if self.parser == None:
1232 # print >>sys.stderr, "Parsing out meta-data..."
1233 # sys.stderr.flush()
1234 self.parser = make_parser()
1235 self.handler = mzXML_md_sax()
1236 self.parser.setContentHandler(self.handler)
1237 try:
1238 if self.compressfmt == None:
1239 self.parser.parse(self.filename)
1240 elif self.compressfmt == 'gz':
1241 self.parser.parse(gzip.open(self.filename,'r'))
1242 else:
1243 print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1244 sys.exit(1)
1245 except SAXException:
1246 pass
1247
1248 self.md = self.handler.md
1249
1250 # print >>sys.stderr, self.md
1251 # sys.stderr.flush()
1252
1253 # print >>sys.stderr, "Set up spectrum parser..."
1254 # sys.stderr.flush()
1255 self.parser = make_parser()
1256 self.handler = mzXML_spec_sax()
1257 self.parser.setContentHandler(self.handler)
1258
1259 def close(self):
1260 pass
1261
1262 def spectra(self):
1263 self.open()
1264 while True:
1265 # print >>sys.stderr, "Spectrum parser to read spectra from",self.handler.scanstart
1266 # sys.stderr.flush()
1267 try:
1268 if self.compressfmt == None:
1269 self.parser.parse(self.filename)
1270 elif self.compressfmt == 'gz':
1271 self.parser.parse(gzip.open(self.filename,'r'))
1272 else:
1273 print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1274 sys.exit(1)
1275 except SAXException:
1276 pass
1277 for (s,d) in self.handler.spectra:
1278 # print >>sys.stderr, "Yeild spectrum number:",d['num']
1279 yield (s,d)
1280 if self.handler.done:
1281 break
1282
1283 def getMsRunMetaData(self):
1284 self.open()
1285 d = {}
1286 if self.md.has_key('startTime'):
1287 d.update({'startTime:PT%fS':self.md['startTime']})
1288 if self.md.has_key('endTime'):
1289 d.update({'endTime:PT%fS':self.md['endTime']})
1290 return d
1291
1292 def getFilenames(self):
1293 return self.md['parentFile']
1294
1295 def msInstrumentID(self):
1296 return "1"
1297
1298 def peakDetect(self,level):
1299 return False
1300
1301 def msManufacturer(self):
1302 return self.md.get('msManufacturer','')
1303
1304 def msModel(self):
1305 return self.md.get('msModel','')
1306
1307 def msIonisation(self):
1308 return self.md.get('msIonisation','')
1309
1310 def msMassAnalyzer(self):
1311 return self.md.get('msMassAnalyzer','')
1312
1313 def msDetector(self):
1314 return self.md.get('msDetector','')
1315
1316 def acquisitionSoftware(self):
1317 return self.md.get('acquisitionSoftware','')
1318
1319 def acquisitionSoftwareVersion(self):
1320 return self.md.get('acquisitionSoftwareVersion','')
1321
1322 def precursorPrecision(self,mz):
1323 return 0
1324
1325 def precursorTune(self):
1326 return False
1327
1328 def lc(self):
1329 return True
1330
1331 def maldi(self):
1332 return not self.lc()
1333
1334 class AB4700T2D:
1335 def __init__(self,filename,peaks=None):
1336 self.filename = filename
1337 self.datafiles = None
1338 self.deapp = None
1339 self.metadata = {}
1340 self.platespots = {}
1341 self.applyPeakDetect = []
1342 if peaks:
1343 self.applyPeakDetect = map(int,peaks.split(','))
1344
1345 if not os.path.exists(self.filename):
1346 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1347 sys.exit(1)
1348
1349 self.filename = os.path.abspath(self.filename)
1350
1351 self.dir = os.path.split(self.filename)[0]
1352
1353 def __del__(self):
1354 # Probably unnecessary
1355 self.close()
1356
1357 def read_metadata(self):
1358
1359 h = open(self.filename,'rb')
1360 r = csv.reader(h,'excel-tab')
1361
1362 self.metadata['PLATEDEF'] = []
1363 self.metadata['PLATE'] = []
1364 self.metadata['SPOT'] = []
1365 self.metadata['SCAN'] = []
1366
1367 for l in r:
1368 if len(l) == 0:
1369 continue
1370 kw = l.pop(0).upper()
1371 if kw in ('PLATEDEF','PLATE','SPOT','SCAN'):
1372 self.metadata[kw].append({})
1373 while len(l)>=2:
1374 k = l.pop(0);v = l.pop(0);
1375 if (kw == 'PLATE' and \
1376 (k in ('plateID','spotXCount','spotYCount','plateManufacturer','plateModel'))) or \
1377 (kw == 'PLATEDEF' and \
1378 (k in ('plateID','plateManufacturer','plateModel','spotNaming','maldiMatrix'))) or \
1379 (kw == 'SPOT' and \
1380 (k in ('plateID','spotID','spotXPosition','spotYPosition','maldiMatrix'))) or \
1381 (kw == 'SCAN' and \
1382 (k in ('plateID','spotID','filename','index'))):
1383 self.metadata[kw][-1][k] = v
1384 elif k == '':
1385 break
1386 else:
1387 print >>sys.stderr,"Bad key %s for %s row"%(k,kw)
1388 sys.exit(1)
1389 h.close()
1390
1391 for pd in self.metadata['PLATEDEF']:
1392 # insert all the gory details needed for a particular plate model etc...
1393 if pd['plateManufacturer'] == 'ABI / SCIEX' and pd['plateModel'] == '01-192+06-BB':
1394 if not pd.has_key('spotNaming') or not pd.has_key('maldiMatrix') or \
1395 not pd['spotNaming'] in ('alpha','parallel','antiparallel'):
1396 print >>sys.stderr,"Bad plate definition, missing maldiMatrix or spotNaming, or bad spotNaming value."
1397 sys.exit(1)
1398 self.metadata['PLATE'].append({
1399 'plateID':pd['plateID'],
1400 'plateManufacturer':pd['plateManufacturer'],
1401 'plateModel':pd['plateModel'],
1402 'spotXCount':24,
1403 'spotYCount':8,
1404 })
1405 if pd['spotNaming'] == 'alpha':
1406 for y in xrange(0,8):
1407 for x in xrange(0,24):
1408 self.metadata['SPOT'].append({
1409 'plateID':pd['plateID'],
1410 'spotID':"%c%d"%(y+ord('A'),x+1),
1411 'spotXPosition':x,
1412 'spotYPosition':2*y+(x%2),
1413 'maldiMatrix':pd['maldiMatrix']
1414 })
1415 else:
1416 print >>sys.stderr,"Valid spotNaming value, not yet implemented."
1417 sys.exit(1)
1418
1419 for pl in self.metadata['PLATE']:
1420 self.platespots[pl['plateID']] = []
1421
1422 for sp in self.metadata['SPOT']:
1423 self.platespots[sp['plateID']].append(sp)
1424
1425 self.datafiles = [ (os.path.join(self.dir,md['filename']),int(md['index'])) for md in self.metadata['SCAN'] ]
1426 self.distinct_datafiles = dict([(os.path.join(self.dir,md['filename']),
1427 fileSHA(os.path.join(self.dir,md['filename']))) for md in self.metadata['SCAN']])
1428
1429 self.maptoscan = {}
1430 index = 0
1431 for md in self.metadata['SCAN']:
1432 self.maptoscan[(os.path.join(self.dir,md['filename']),int(md['index']))] = index
1433 index += 1
1434
1435 def make_metadata(self):
1436 self.deapp.Documents.Open(self.filename)
1437 doc = self.deapp.Documents.Item(0)
1438 sv = doc.SpecView
1439 nspectra = sv.TotalSpectrum;
1440 self.deapp.Documents.Close()
1441 self.remsettingsfile(self.filename)
1442
1443 self.datafiles = [ (self.filename,i) for i in range(1,nspectra+1) ]
1444 self.distinct_datafiles = dict([ (self.filename, fileSHA(self.filename)) ])
1445 self.maptoscan = {}
1446 for (f,i) in self.datafiles:
1447 self.maptoscan[(f,i)] = i
1448
1449 def open(self):
1450
1451 if self.deapp is None:
1452
1453 from win32com.client import Dispatch, gencache
1454 self.deapp = Dispatch('DataExplorer.Application',
1455 resultCLSID='{3FED40F1-D409-11D1-8B56-0060971CB54B}')
1456 self.delib = gencache.EnsureModule('{06972F50-13F6-11D3-A5CB-0060971CB54B}',0,4,2)
1457 try:
1458 self.deapp.AutomatedProcessing = 1
1459 except AttributeError:
1460 pass
1461 try:
1462 self.deapp.Visible = 0
1463 except AttributeError:
1464 pass
1465
1466 try:
1467 self.read_metadata()
1468 except:
1469 self.make_metadata()
1470
1471 def close(self):
1472
1473 if not self.deapp is None:
1474 self.deapp.Quit()
1475 self.deapp = None
1476
1477 def peakDetect(self,msLevel):
1478 return msLevel in self.applyPeakDetect
1479
1480 # remove .set and .cts files to prevent DataExplorer crashes - DT 11-1-2006
1481 def remsettingsfile(self,fn):
1482 # print "rmsettingsfile: %s"%fn
1483 if ((len(fn) > 3) and (fn[-4:].lower() == '.t2d')):
1484 path2del = fn[:-4] + '.cts'
1485 if os.path.exists(path2del):
1486 os.unlink(path2del)
1487 path2del = fn[:-4] + '.set'
1488 if os.path.exists(path2del):
1489 os.unlink(path2del)
1490
1491 def spectra(self):
1492 self.open()
1493
1494 prevfile = ''
1495 for (f,i) in self.datafiles:
1496
1497 if f != prevfile:
1498 self.deapp.Documents.Close()
1499 self.remsettingsfile(prevfile)
1500
1501 if not os.path.exists(f):
1502 print >>sys.stderr, "Filename %s does not exist."%(f,)
1503 sys.exit(1)
1504
1505 self.remsettingsfile(f)
1506 self.deapp.Documents.Open(f)
1507 # print "open %s"%f
1508
1509 doc = self.deapp.Documents.Item(0)
1510 sv = doc.SpecView
1511 try:
1512 cv = doc.ChroView
1513 cv.XAxisUnits = self.delib.constants.deChroXAxisTime
1514 n,xaxis = cv.GetRawData(0,0)
1515 except:
1516 xaxis = None
1517
1518 if i > sv.TotalSpectrum:
1519 prevfile = f
1520 continue
1521
1522 sv.SetSpectrumAt(i-1)
1523
1524 if xaxis != None:
1525 retentionTime = xaxis[i-1][0]*60.0;
1526 else:
1527 retentionTime = None
1528 (tf,fixedMass) = doc.InstrumentSettings.GetSetting(self.delib.constants.dePreCursorIon,i-1,None)
1529
1530 if fixedMass > 0:
1531 msLevel = 2
1532 else:
1533 msLevel = 1
1534
1535 dataArr = array('f');
1536 peakDetect="False"
1537 if self.peakDetect(msLevel):
1538 peakDetect="True"
1539 sv.CorrectBaseline()
1540 doc.SpecSetup.NoiseReductionStdev = 2.0
1541 sv.FilterNoise(self.delib.constants.deNoiseRemoval)
1542 sv.PeakSettings.PeakThresholdPercent = 1
1543 st,sp = sv.GetPeakData(self.delib.constants.deSpecPeakAll,
1544 self.delib.constants.deSpecPeakSortMass, 0, 0)
1545 for j in xrange(len(sp)):
1546 x = sp[j][self.delib.constants.deSpecPeakCentroidMass]
1547 y = sp[j][self.delib.constants.deSpecPeakArea]
1548 if y <= 0:
1549 continue
1550 dataArr.append(x)
1551 dataArr.append(y)
1552 else:
1553 n,s = sv.GetRawData(0,0)
1554 for j in xrange(n):
1555 dataArr.append(s[j][0])
1556 dataArr.append(s[j][1])
1557
1558 if len(dataArr) == 0:
1559 continue
1560
1561 polarity_val = "+"
1562
1563 # Scan level attributes
1564 # Required by mzXML (no format spec. needed)
1565 d = {'msLevel':msLevel}
1566 # Optional ('scan.' and prefixformat spec. needed)
1567 d.update({
1568 'scan.polarity:%s':polarity_val,
1569 })
1570
1571 scanindex = self.maptoscan[(f,i)]
1572 if len(self.metadata['SCAN']) > 0:
1573 d.update({
1574 'plateID':self.metadata['SCAN'][scanindex]['plateID'],
1575 'spotID':self.metadata['SCAN'][scanindex]['spotID'],
1576 })
1577 if retentionTime != None:
1578 d.update({'scan.retentionTime:PT%fS':retentionTime});
1579 d.update({'scanOrigin.parentFileID:%s': self.distinct_datafiles[f],
1580 'scanOrigin.num:%d': i})
1581 if msLevel == 1:
1582 yield (dataArr,d)
1583 else:
1584 # tandem MS scans add the following
1585 # Required by mzXML
1586 d.update({'precursorMz':fixedMass})
1587 # Optional
1588 d.update({'precursorMz.precursorCharge:%d':1})
1589 yield (dataArr,d)
1590
1591 prevfile = f
1592
1593 self.deapp.Documents.Close()
1594 self.remsettingsfile(prevfile)
1595 self.remsettingsfile(f)
1596
1597 def getMsRunMetaData(self):
1598 self.open()
1599 return {}
1600
1601 def getFilenames(self):
1602 return [ t for t in self.distinct_datafiles.items() ]
1603
1604 def msInstrumentID(self):
1605 return "1"
1606
1607 def msManufacturer(self):
1608 return "ABI / SCIEX"
1609
1610 def msModel(self):
1611 return "4700 Proteomic Analyzer"
1612
1613 def msIonisation(self):
1614 return "MALDI"
1615
1616 def msMassAnalyzer(self):
1617 return "TOF"
1618
1619 def msDetector(self):
1620 return None
1621
1622 def acquisitionSoftware(self):
1623 return "DataExplorer"
1624
1625 def acquisitionSoftwareVersion(self):
1626 return "1.0"
1627
1628 def precursorPrecision(self,mz):
1629 return .1
1630
1631 def precursorTune(self):
1632 return False
1633
1634 def lc(self):
1635 return len(self.metadata['PLATE']) == 0
1636
1637 def maldi(self):
1638 return not self.lc()
1639
1640 def plateData(self):
1641 self.open()
1642 for d in self.metadata['PLATE']:
1643 yield d
1644
1645 def spotData(self,plate):
1646 self.open()
1647 for s in self.platespots[plate]:
1648 yield s
1649
1650 class scanfilter:
1651 def __init__(self,str):
1652 self.rtre = re.compile('^PT([0-9]+(\.[0-9]+)?)S$')
1653 self.intre = re.compile('^[1-9][0-9]*$')
1654 self.floatre = re.compile('^[0-9]+(\.[0-9]+)?$')
1655 self.cl = []
1656 clause = str.split(',')
1657 for cl in clause:
1658 scl = cl.split('.')
1659 m = self.rtre.match(scl[2].strip())
1660 if m != None:
1661 v = float(m.group(1))
1662 elif self.floatre.match(scl[2].strip()):
1663 v = float(scl[2])
1664 elif self.intre.match(scl[2].strip()):
1665 v = int(scl[2])
1666 else:
1667 v = scl[2]
1668 self.cl.append((scl[0],scl[1],v))
1669
1670 def test(self,d):
1671 a = {}
1672 for (k,v) in d.iteritems():
1673 k0 = k.split(':')[0].split('.')[-1]
1674 a[k0] = v
1675 for cl in self.cl:
1676 if type(a[cl[0]]) == type(u'') or \
1677 type(a[cl[0]]) == type('') :
1678 m = self.rtre.match(a[cl[0]].strip())
1679 if m != None:
1680 v1 = float(m.group(1))
1681 elif self.floatre.match(a[cl[0]].strip()):
1682 v1 = float(a[cl[0]])
1683 elif self.intre.match(a[cl[0]].strip()):
1684 v1 = int(a[cl[0]])
1685 else:
1686 v1 = a[cl[0]].strip()
1687 else:
1688 v1 = a[cl[0]]
1689 v2 = cl[2]
1690 # print >>sys.stderr, "Spectrum test:",v1,type(v1),cl[1],v2,type(v2)
1691 # sys.stderr.flush()
1692 if cl[1] == 'eq' and v1 != v2:
1693 # print >>sys.stderr, "FALSE"
1694 # sys.stderr.flush()
1695 return False
1696 elif cl[1] == 'ne' and not v1 != v2:
1697 return False
1698 elif cl[1] == 'lt' and not v1 < v2:
1699 return False
1700 elif cl[1] == 'le' and not v1 <= v2:
1701 return False
1702 elif cl[1] == 'gt' and not v1 > v2:
1703 return False
1704 elif cl[1] == 'ge' and not v1 >= v2:
1705 return False
1706 # print >>sys.stderr, "TRUE"
1707 # sys.stderr.flush()
1708 return True
1709
1710 if __name__ == '__main__':
1711
1712 from optparse import OptionParser
1713 import sys
1714 import glob
1715
1716 parser = OptionParser()
1717 parser.add_option("-R", "--rawdata", type="string", dest="rawdata", default="",\
1718 help="Format of raw data. Optional if raw spectra file ends in .wiff or .t2m.")
1719 parser.add_option("-X", "--xmlformat", type="string", dest="xmlformat", default="",\
1720 help="XML format to output. Optional if output file ends in .mzxml or .mzdata.")
1721 parser.add_option("-o", "--output", type="string", dest="output", default="",\
1722 help="Output file.")
1723 parser.add_option("-p", "--peaks", type="string", dest="peaks", default="2",\
1724 help="Level(s) of spectra to apply peak detection to (comma separated). QStar,4700 only.")
1725 parser.add_option("-f", "--filter", type="string", dest="filter", default=None,\
1726 help="Filter on mzxml scan meta-data: field.op.value[,field.op.value]. Default: No filter.")
1727 parser.add_option("-V", "--version", type='string', dest="version", default="3.0",
1728 help="XML version. mzXML only. Valid options '2.1','2.2','3.0'. Default: '3.0'.")
1729 parser.add_option("-z", "--compress_peaks", action="store_true", dest="compress_peaks", default=None,\
1730 help="Compress mzXML peaks data using zlib. Default: False")
1731 parser.add_option("-Z", "--compress", type="string", dest="compress", default=None,\
1732 help="Compress output file. Valid options 'gz'. Default: None.")
1733 parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False,\
1734 help="Debug. First 10 spectra only, and truncated peaks data element. Default: False.")
1735
1736 ( opts, args ) = parser.parse_args()
1737
1738 if opts.output == "" and opts.xmlformat == "":
1739 parser.error("Either -o or -X must be suppied.")
1740 sys.exit(1)
1741
1742 if opts.filter != None:
1743 filt=scanfilter(opts.filter)
1744 else:
1745 filt=None
1746
1747 argsglob = []
1748 for a in args:
1749 argsglob.extend(glob.glob(a))
1750
1751 if opts.output != "" and len(argsglob) > 1:
1752 parser.error("At most one raw spectrum data file is permitted, if -o option is used.")
1753
1754 for a in argsglob:
1755
1756 if opts.output != "":
1757 o = opts.output
1758 else:
1759 p = a.rfind('.')
1760 o = a[0:p]+'.'+opts.xmlformat
1761
1762 if o == a:
1763 print >>sys.stderr, "Input file %s and output file %s are the same!"%(a,o)
1764 sys.exit(1)
1765
1766 print >>sys.stderr, "Processing",a,"to",o
1767
1768 if opts.rawdata.lower() in ('wiff','qstar'):
1769 r = QStarWiff(a,opts.peaks)
1770 elif opts.rawdata.lower() in ('t2d','dat','ab4700','ab4800','mariner','voyager'):
1771 r = AB4700T2D(a,opts.peaks)
1772 elif opts.rawdata.lower() in ('mzxml',):
1773 r = FromMzXML(a)
1774 elif a[-5:].lower() in ('.wiff',):
1775 r = QStarWiff(a,opts.peaks)
1776 elif a[-4:].lower() in ('.t2m',):
1777 r = AB4700T2D(a,opts.peaks)
1778 elif a[-6:].lower() in ('.mzxml',):
1779 r = FromMzXML(a)
1780 else:
1781 parser.error("Bad rawdata format specification.")
1782 sys.exit(1)
1783
1784 if opts.xmlformat.lower() == 'mzxml':
1785 x = ToMzXML(r,o,opts.compress,filt,opts.compress_peaks,opts.version)
1786 elif opts.xmlformat.lower() == 'mzdata':
1787 x = ToMzData(r,o,opts.compress,filt)
1788 elif re.search(r'\.mzxml(\.gz)?$',opts.output,re.IGNORECASE):
1789 x = ToMzXML(r,opts.output,opts.compress,filt,opts.compress_peaks,opts.version)
1790 elif re.search(r'\.mzdata(\.gz)?$',opts.output,re.IGNORECASE):
1791 x = ToMzData(r,opts.output,opts.compress,filt)
1792 else:
1793 parser.error("Bad xml format specification.")
1794 sys.exit(1)
1795
1796 x.write(debug=opts.debug)