ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/pymsxml/pymsxml.py
Revision: 1.1
Committed: Sat Nov 4 14:01:59 2006 UTC (15 years, 11 months ago) by nje01
Branch: MAIN
Log Message:
Initial revision

Line File contents
1 #!/usr/bin/env python
2
3 import sys,os,os.path
4 import re
5 import sha
6 import math
7 import tempfile
8 import csv
9 import gzip
10 import zipfile
11 from array import array
12 from base64 import b64encode,b64decode
13 from urllib import quote,unquote
14 from xml.sax import *
15
16 class NameVersion:
17 def __init__(self):
18 self.name_ = "PyMsXML"
19 self.majorVer = 0
20 self.minorVer = 4
21 self.revVer = 0
22 def version(self):
23 return "%d.%d.%d"%(self.majorVer,self.minorVer,self.revVer)
24 def name(self):
25 return self.name_
26
27 def fileSHA(filename):
28 s = sha.new()
29 h = open(filename,'rb')
30 while True:
31 buffer = h.read(1024)
32 if buffer == '':
33 break
34 s.update(buffer)
35 h.close()
36 return s.hexdigest().lower()
37
38 class ToMzXML:
39 def __init__(self,reader,filename,cmpfmt=None,filt=None):
40 self.filename = filename
41 self.compressfmt = cmpfmt
42 self.reader = reader
43 self.initSHA()
44 self.writeByteCounter = 0
45 self.actualScanCount = 0
46 self.scanIndices = {}
47 self.filter = filt
48
49 def __del__(self):
50 if hasattr(self,'tmpFileName') and self.tmpFileName and self.tmpFileName != '':
51 os.unlink(self.tmpFileName)
52
53 def getFilename(self):
54 return self.filename
55
56 def initSHA(self):
57 self.sha1 = sha.new()
58
59 def updateSHA(self,s):
60 self.sha1.update(s)
61
62 def getSHA(self):
63 return self.sha1.hexdigest().lower()
64
65 def writeString(self,fh,data):
66 self.writeByteCounter += len(data)
67 self.sha1.update(data)
68 fh.write(data)
69
70 def write(self,debug=False):
71
72 # Make a temporary file for the scan data to count the number of spectra.
73 (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
74 tmpFile = os.fdopen(tmpFileFD,'wb')
75
76 # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
77 self.write_scans(tmpFile,debug)
78
79 tmpFile.close()
80
81 # Reset!
82
83 self.initSHA()
84 self.writeByteCounter = 0
85
86 if self.compressfmt == None and not self.filename.endswith('.gz'):
87 xmlFile = open(self.filename,'wb')
88 elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
89 if self.filename.endswith('.gz'):
90 xmlFile = gzip.open(self.filename,'wb')
91 else:
92 xmlFile = gzip.open(self.filename+'.gz','wb')
93 else:
94 print >>sys.stderr, "Bad compressfmt specification"
95 sys.exit(1)
96
97 self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
98
99 outStr = "<mzXML "
100 outStr += "xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1\" "
101 outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
102 outStr += "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd\""
103 outStr += ">\n"
104
105 self.writeString (xmlFile,outStr)
106
107 outStr = "<msRun"
108 outStr += " scanCount=\"%d\"" % (self.actualScanCount,)
109 for (k,v) in self.reader.getMsRunMetaData().items():
110 k,f = k.split(':')
111 outStr += (" %%s=\"%s\""%(f,))%(k,v)
112 outStr += ">\n"
113
114 self.writeString (xmlFile,outStr)
115
116 filehashes = {}
117
118 for f in self.reader.getFilenames():
119 if len(f) == 2:
120 outStr = "<parentFile "
121 outStr += "fileName=\"%s\" " % (quote(f[0]),)
122 outStr += "fileType=\"RAWData\" "
123 outStr += "fileSha1=\"%s\"" % (f[1],)
124 outStr += "/>\n"
125 else:
126 outStr = "<parentFile "
127 outStr += "fileName=\"%s\" " % (f[0],)
128 outStr += "fileType=\"%s\" " % (f[1],)
129 outStr += "fileSha1=\"%s\"" % (f[2],)
130 outStr += "/>\n"
131 self.writeString (xmlFile,outStr)
132
133 outStr = "<msInstrument>\n"
134 outStr += "<msManufacturer category=\"msManufacturer\" value=\"%s\"/>\n" % (self.reader.msManufacturer(),)
135 outStr += "<msModel category=\"msModel\" value=\"%s\"/>\n" % (self.reader.msModel(),)
136 if self.reader.msIonisation():
137 outStr += "<msIonisation category=\"msIonisation\" value=\"%s\"/>\n" % (self.reader.msIonisation(),)
138 if self.reader.msMassAnalyzer():
139 outStr += "<msMassAnalyzer category=\"msMassAnalyzer\" value=\"%s\"/>\n" % (self.reader.msMassAnalyzer(),)
140 if self.reader.msDetector():
141 outStr += "<msDetector category=\"msDetector\" value=\"%s\"/>\n" % (self.reader.msDetector(),)
142
143 self.writeString (xmlFile,outStr)
144
145 outStr = "<software "
146 outStr += "type=\"acquisition\" "
147 outStr += "name=\"%s\" "%(self.reader.acquisitionSoftware(),)
148 outStr += "version=\"%s\""%(self.reader.acquisitionSoftwareVersion(),)
149 outStr += "/>\n"
150
151 self.writeString (xmlFile,outStr)
152
153 outStr = "</msInstrument>\n"
154
155 self.writeString (xmlFile,outStr)
156
157 outStr = "<dataProcessing>\n"
158
159 self.writeString (xmlFile,outStr)
160
161 nv = NameVersion()
162
163 outStr = "<software "
164 outStr += "type=\"conversion\" "
165 outStr += "name=\"%s\" "%(nv.name(),)
166 outStr += "version=\"%s\"" % (nv.version(),)
167 outStr += "/>\n"
168
169 self.writeString (xmlFile,outStr)
170
171 if (self.reader.peakDetect(1)):
172 outStr = "<processingOperation "
173 outStr += "name=\"peak_detection_level_1\" "
174 outStr += "value=\"true\""
175 outStr += "/>\n"
176 outStr += "<processingOperation "
177 outStr += "name=\"peak_detection_level_1_threshold\" "
178 outStr += "value=\"1%\""
179 outStr += "/>\n"
180 outStr += "<processingOperation "
181 outStr += "name=\"peak_detection_level_1_software\" "
182 outStr += "value=\"Analyst\""
183 outStr += "/>\n"
184 self.writeString (xmlFile,outStr)
185 if (self.reader.peakDetect(2)):
186 outStr = "<processingOperation "
187 outStr += "name=\"peak_detection_level_2\" "
188 outStr += "value=\"true\""
189 outStr += "/>\n"
190 outStr += "<processingOperation "
191 outStr += "name=\"peak_detection_level_2_threshold\" "
192 outStr += "value=\"1%\""
193 outStr += "/>\n"
194 outStr += "<processingOperation "
195 outStr += "name=\"peak_detection_level_2_software\" "
196 outStr += "value=\"Analyst\""
197 outStr += "/>\n"
198 self.writeString (xmlFile,outStr)
199
200 outStr = "<processingOperation "
201 outStr += "name=\"min_peaks_per_spectra\" "
202 outStr += "value=\"1\""
203 outStr += "/>\n"
204
205 self.writeString (xmlFile,outStr)
206
207 outStr = "</dataProcessing>\n"
208
209 self.writeString (xmlFile,outStr)
210
211 if self.reader.maldi():
212 outStr = '<spotting>\n'
213 for d in self.reader.plateData():
214 outStr += '<plate plateID="%s" spotXCount="%s" spotYCount="%s">\n' % \
215 (d['plateID'],d['spotXCount'],d['spotYCount'])
216 outStr += '<plateManufacturer category="plateManufacturer" value="%s"/>\n' % \
217 (d['plateManufacturer'],)
218 outStr += '<plateModel category="plateModel" value="%s"/>\n' % \
219 (d['plateModel'],)
220 for s in self.reader.spotData(d['plateID']):
221 outStr += '<spot spotID="%s" spotXPosition="%s" spotYPosition="%s">\n' % \
222 (s['spotID'],s['spotXPosition'],s['spotYPosition'])
223 outStr += '<maldiMatrix category="maldiMatrix" value="%s"/>\n' % \
224 (s['maldiMatrix'],)
225 outStr += '</spot>\n'
226 outStr += '</plate>\n'
227 outStr += '</spotting>\n'
228 self.writeString(xmlFile,outStr)
229
230 if self.reader.lc():
231 pass
232
233 scanOffset = self.writeByteCounter
234
235 tmpFile = open(self.tmpFileName,'rb')
236 while True:
237 tmpStr = tmpFile.read(1024)
238 if tmpStr == '':
239 break
240 self.writeString(xmlFile,tmpStr)
241 tmpFile.close()
242 os.unlink(self.tmpFileName)
243 self.tmpFileName = ''
244
245 outStr = "</msRun>\n"
246 self.writeString (xmlFile,outStr)
247
248 indexOffset = self.writeByteCounter
249
250 outStr = "<index "
251 outStr += "name=\"scan\" "
252 outStr += ">\n"
253
254 self.writeString (xmlFile,outStr)
255
256 for i in xrange(1,self.actualScanCount+1):
257 outStr = "<offset id=\"%d\">" % (i,)
258 outStr += "%d</offset>\n" % (self.scanIndices[i] + scanOffset,)
259 self.writeString(xmlFile,outStr)
260
261 outStr = "</index>\n"
262 self.writeString (xmlFile,outStr)
263
264 outStr = "<indexOffset>%d</indexOffset>\n" % (indexOffset,)
265
266 self.writeString (xmlFile,outStr)
267
268 self.writeString (xmlFile,"<sha1>")
269
270 outStr = self.getSHA()
271
272 self.writeString (xmlFile,outStr)
273
274 self.writeString (xmlFile,"</sha1>\n")
275
276 outStr = "</mzXML>\n"
277 self.writeString (xmlFile,outStr)
278
279 xmlFile.close()
280
281 def write_scans(self,xmlFile,debug=False):
282
283 msLevel = 0
284 scanNumber = 0
285 ancestors = []
286 self.writeByteCounter = 0
287
288 for (s,d) in self.reader.spectra():
289
290 if self.filter != None and not self.filter.test(d):
291 continue
292
293 if debug and scanNumber >= 10:
294 break
295
296 if not d.has_key('msLevel'):
297 print >>sys.stderr, "Required scan attributes missing."
298 sys.exit(1)
299
300 prevLevel = msLevel
301 msLevel = d['msLevel']
302
303 if prevLevel < msLevel and prevLevel > 0:
304 # We went "in" a level, push scan number of parent
305 ancestors.append((scanNumber,prevSpec))
306 elif prevLevel > msLevel and msLevel > 0:
307 if len(ancestors) == 0:
308 pass #print >>sys.stderr, "No ancestor for scan %s at level %s"%(scanNumber,msLevel)
309 else:
310 ancestors.pop()
311
312 outStr = ''
313 if prevLevel > msLevel:
314 for m in xrange(0,prevLevel-msLevel+1):
315 outStr += "</scan>\n"
316 else:
317 if prevLevel > 0 and prevLevel == msLevel:
318 outStr += "</scan>\n"
319
320 self.writeString(xmlFile,outStr)
321
322 scanNumber = scanNumber + 1
323
324 self.scanIndices[scanNumber] = self.writeByteCounter
325
326 totIonCurrent = 0
327 maxmz = None
328 minmz = None
329 basePeakMz = None
330 basePeakIntensity = None
331 peaksCount = 0
332 for i in xrange(0,len(s),2):
333 x = s[i]; y = s[i+1]
334 if minmz is None or minmz > x:
335 minmz = x
336 if maxmz is None or maxmz < x:
337 maxmz = x
338 totIonCurrent += y
339 if basePeakIntensity is None or basePeakIntensity < y:
340 basePeakIntensity = y
341 basePeakMz = x
342 peaksCount += 1
343
344 outStr = "<scan num=\"%d\" msLevel=\"%d\"" % (scanNumber,msLevel)
345 outStr += " peaksCount=\"%d\"" % (peaksCount,)
346 outStr += " lowMz=\"%f\"" % (minmz,)
347 outStr += " highMz=\"%f\"" % (maxmz,)
348 outStr += " totIonCurrent=\"%f\"" % (totIonCurrent,)
349 outStr += " basePeakMz=\"%f\"" % (basePeakMz,)
350 outStr += " basePeakIntensity=\"%f\"" % (basePeakIntensity,)
351
352 for (k,v) in d.items():
353 if k.startswith('scan.'):
354 k,f = k.split(':')
355 k = k[5:]
356 outStr += (" %%s=\"%s\""%(f,))%(k,v)
357 outStr += ">\n"
358 self.writeString(xmlFile,outStr)
359
360 if msLevel > 1:
361
362 if not d.has_key('precursorMz') :
363 print >>sys.stderr, "Required precursorMz attribute missing."
364 sys.exit(1)
365
366 # We scan the parent spectrum in the region of the
367 # precursorMz to establish the intensity. Optionally,
368 # we tune the precursorMz itself, on the basis of this
369
370 pMz = d['precursorMz']
371 tol = self.reader.precursorPrecision(pMz)
372 pMzLB = pMz-tol
373 pMzUB = pMz+tol
374 pMzIntensity = 0
375
376 if len(ancestors) > 0:
377 for i in xrange(0,len(ancestors[-1][1]),2):
378 x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
379 if x < pMzLB:
380 continue
381 if x > pMzUB:
382 break
383 if pMzIntensity < y:
384 pMzIntensity = y
385 pMzTune = x
386
387 if self.reader.precursorTune():
388 pMz = pMzTune
389
390 outStr = "<precursorMz"
391 for (k,v) in d.items():
392 if k.startswith('precursorMz.'):
393 k,f = k.split(':')
394 k = k[12:]
395 outStr += (" %%s=\"%s\""%(f,))%(k,v)
396 if len(ancestors)>0:
397 outStr += " precursorScanNum=\"%d\""%(ancestors[-1][0],)
398 outStr += " precursorIntensity=\"%f\">"%(pMzIntensity,)
399 else:
400 outStr += ">"
401 outStr += "%f</precursorMz>\n" % (pMz,)
402 self.writeString(xmlFile,outStr)
403
404 if self.reader.maldi():
405 outStr = '<maldi'
406 for (k,v) in d.items():
407 if k.startswith('maldi.'):
408 k,f = k.split(':')
409 k = k[6:]
410 outStr += (" %%s=\"%s\""%(f,))%(k,v)
411 outStr += " plateID=\"%s\""%(d['plateID'],)
412 outStr += " spotID=\"%s\""%(d['spotID'],)
413 outStr += "/>\n"
414 self.writeString(xmlFile,outStr)
415
416 outStr = "<peaks precision=\"32\" byteOrder=\"network\" pairOrder=\"m/z-int\">"
417 self.writeString(xmlFile,outStr)
418
419 # Spec says the byte order shall be
420 # network, which is the same as big endian.
421
422 if sys.byteorder != 'big':
423 s.byteswap()
424
425 outStr = b64encode(s.tostring())
426 if not debug:
427 self.writeString(xmlFile,outStr)
428 else:
429 self.writeString(xmlFile,outStr[0:100])
430
431 if sys.byteorder != 'big':
432 s.byteswap()
433
434 outStr = "</peaks>\n"
435 self.writeString(xmlFile,outStr)
436
437 if self.reader.maldi():
438 outStr = ''
439 for (k,v) in d.items():
440 if k.startswith('maldi.'):
441 k,f = k.split(':')
442 k = k[6:]
443 outStr += '<nameValue'
444 outStr += ' name="%s"'%(k,)
445 outStr += (' value="%f"'%(f,))%(v,)
446 outStr += '/>\n'
447 ## outStr += '<nameValue'
448 ## outStr += ' name="plateID"'
449 ## outStr += ' value="%s"'%(d['plateID'],)
450 ## outStr += '/>\n'
451 ## outStr += '<nameValue'
452 ## outStr += ' name="spotID"'
453 ## outStr += ' value="%s"'%(d['spotID'],)
454 ## outStr += '/>\n'
455 self.writeString(xmlFile,outStr)
456
457 xmlFile.flush()
458
459 prevSpec = s
460
461 outStr = ""
462 for m in xrange(0,msLevel):
463 outStr += "</scan>\n"
464
465 self.writeString(xmlFile,outStr)
466 self.actualScanCount = scanNumber
467
468 class ToMzData:
469 def __init__(self,reader,filename,cmpfmt=None,filt=None):
470 self.filename = filename
471 self.compressfmt = cmpfmt
472 self.reader = reader
473 self.actualScanCount = 0
474 self.initSHA()
475 self.filter = filt
476
477 def writeString(self,fh,data):
478 # self.writeByteCounter += len(data)
479 self.sha1.update(data)
480 fh.write(data)
481
482 def initSHA(self):
483 self.sha1 = sha.new()
484
485 def updateSHA(self,s):
486 self.sha1.update(s)
487
488 def getSHA(self):
489 return self.sha1.hexdigest().lower()
490
491 def write(self,debug=False):
492
493 # Make a temporary file for the scan data to count the number of spectra.
494 (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
495 tmpFile = os.fdopen(tmpFileFD,'wb')
496
497 # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
498 self.write_scans(tmpFile,debug)
499
500 tmpFile.close()
501
502 scanhash = self.getSHA()
503
504 self.initSHA()
505
506 if self.compressfmt == None and not self.filename.endswith('.gz'):
507 xmlFile = open(self.filename,'wb')
508 elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
509 if self.filename.endswith('.gz'):
510 xmlFile = gzip.open(self.filename,'wb')
511 else:
512 xmlFile = gzip.open(self.filename+'.gz','wb')
513 else:
514 print >>sys.stderr, "Bad compressfmt specification"
515 sys.exit(1)
516
517 self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
518
519 outStr = "<mzData "
520 outStr += "version=\"1.05\" "
521 outStr += "accessionNumber=\"pymsxml:%s\" "%(scanhash,)
522 outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
523 # outStr += "xmlns=\"http://psidev.sourceforge.net/ms/xml/mzdata\" "
524 outStr += "xsi:noNamespaceSchemaLocation=\"http://psidev.sourceforge.net/ms/xml/mzdata/mzdata.xsd\""
525 outStr += ">\n"
526
527 self.writeString (xmlFile,outStr)
528
529 outStr = "<description>\n"
530 outStr += "<admin>\n"
531 outStr += "<sampleName></sampleName>\n"
532 outStr += "<contact>\n"
533 outStr += "<name></name>\n"
534 outStr += "<institution></institution>\n"
535 outStr += "</contact>\n"
536 outStr += "</admin>\n"
537 outStr += "<instrument>\n"
538 outStr += "<instrumentName></instrumentName>\n"
539 outStr += "<source>\n"
540 outStr += "</source>\n"
541 outStr += "<analyzerList count=\"1\">\n"
542 outStr += "<analyzer>\n"
543 outStr += "</analyzer>\n"
544 outStr += "</analyzerList>\n"
545 outStr += "<detector>\n"
546 outStr += "</detector>\n"
547 outStr += "</instrument>\n"
548 outStr += "<dataProcessing>\n"
549 outStr += "<software>\n"
550 outStr += "<name>%s</name>\n"%(self.reader.acquisitionSoftware(),)
551 outStr += "<version>%s</version>\n"%(self.reader.acquisitionSoftwareVersion(),)
552 outStr += "</software>\n"
553 outStr += "<software>\n"
554 nv = NameVersion()
555 outStr += "<name>%s</name>\n"%(nv.name(),)
556 outStr += "<version>%s</version>\n"%(nv.version(),)
557 outStr += "</software>\n"
558 outStr += "</dataProcessing>\n"
559 outStr += "</description>\n"
560
561 self.writeString (xmlFile,outStr)
562
563 outStr = "<spectrumList"
564 outStr += " count=\"%d\">\n"%(self.actualScanCount,)
565
566 self.writeString (xmlFile,outStr)
567
568 tmpFile = open(self.tmpFileName,'rb')
569 while True:
570 tmpStr = tmpFile.read(1024)
571 if tmpStr == '':
572 break
573 self.writeString(xmlFile,tmpStr)
574 tmpFile.close()
575 os.unlink(self.tmpFileName)
576 self.tmpFileName = ''
577
578 outStr = "</spectrumList>\n</mzData>\n"
579
580 self.writeString (xmlFile,outStr)
581
582 def write_scans(self,xmlFile,debug=False):
583
584 msLevel = 0
585 scanNumber = 0
586 ancestors = []
587
588 self.initSHA()
589 # self.writeByteCounter = 0
590
591 for (s,d) in self.reader.spectra():
592
593 if self.filter != None and not self.filter.test(d):
594 continue
595
596 if debug and scanNumber >= 10:
597 break
598
599 if not d.has_key('msLevel'):
600 print >>sys.stderr, "Required scan attributes missing."
601 sys.exit(1)
602
603 prevLevel = msLevel
604 msLevel = d['msLevel']
605
606 if prevLevel < msLevel and prevLevel > 0:
607 # We went "in" a level, push scan number of parent
608 ancestors.append((scanNumber,prevSpec))
609 elif prevLevel > msLevel and msLevel > 0:
610 ancestors.pop()
611
612 scanNumber = scanNumber + 1
613
614 totIonCurrent = 0
615 maxmz = None
616 minmz = None
617 basePeakMz = None
618 basePeakIntensity = None
619 peaksCount = 0
620 for i in xrange(0,len(s),2):
621 x = s[i]; y = s[i+1]
622 if minmz is None or minmz > x:
623 minmz = x
624 if maxmz is None or maxmz < x:
625 maxmz = x
626 totIonCurrent += y
627 if basePeakIntensity is None or basePeakIntensity < y:
628 basePeakIntensity = y
629 basePeakMz = x
630 peaksCount += 1
631
632 outStr = "<spectrum id=\"%d\">\n" % (scanNumber,)
633 outStr += "<spectrumDesc>\n"
634 outStr += "<spectrumSettings>\n"
635
636 self.writeString (xmlFile,outStr)
637
638 # outStr += "<acqSpecification>\n"
639 # outStr += "</acqSpecification>\n"
640
641 outStr = "<spectrumInstrument"
642 outStr += " msLevel=\"%d\""%(msLevel,)
643 if d.has_key('scan.startMz') and \
644 d.has_key('scan.endMz'):
645 outStr += " mzRangeStart=\"%f\""%(d['scan.startMz'],)
646 outStr += " mzRangeStop=\"%f\""%(d['scan.endMz'],)
647 outStr += ">\n"
648 for (k,v) in d.items():
649 if k.startswith('scan.'):
650 k,f = k.split(':')
651 k = k[5:]
652
653 if k == "polarity":
654 if v == '+':
655 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Positive",)
656 elif v == '-':
657 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Negative",)
658 elif k == 'retentionTime':
659 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"TimeInSeconds\" value=\"%f\"/>\n"%(v,)
660 else:
661 outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
662 outStr += "<userParam name=\"lowMz\" value=\"%f\"/>\n"%(minmz,)
663 outStr += "<userParam name=\"highMz\" value=\"%f\"/>\n"%(maxmz,)
664 outStr += "<userParam name=\"totIonCurrent\" value=\"%f\"/>\n"%(totIonCurrent,)
665 outStr += "<userParam name=\"basePeakMz\" value=\"%f\"/>\n"%(basePeakMz,)
666 outStr += "<userParam name=\"basePeakIntensity\" value=\"%f\"/>\n"%(basePeakIntensity,)
667 outStr += "</spectrumInstrument>\n"
668 outStr += "</spectrumSettings>\n"
669
670 self.writeString (xmlFile,outStr)
671
672 if msLevel > 1:
673
674 if not d.has_key('precursorMz') :
675 print >>sys.stderr, "Required precursorMz attribute missing."
676 sys.exit(1)
677
678 # We scan the parent spectrum in the region of the
679 # precursorMz to establish the intensity. Optionally,
680 # we tune the precursorMz itself, on the basis of this
681
682 pMz = d['precursorMz']
683 tol = self.reader.precursorPrecision(pMz)
684 pMzLB = pMz-tol
685 pMzUB = pMz+tol
686 pMzIntensity = 0
687
688 for i in xrange(0,len(ancestors[-1][1]),2):
689 x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
690 if x < pMzLB:
691 continue
692 if x > pMzUB:
693 break
694 if pMzIntensity < y:
695 pMzIntensity = y
696 pMzTune = x
697
698 if self.reader.precursorTune():
699 pMz = pMzTune
700
701 outStr = "<precursorList count=\"1\">\n"
702
703 outStr += "<precursor"
704 outStr += " msLevel=\"%d\""%(msLevel,)
705 outStr += " spectrumRef=\"%d\""%(ancestors[-1][0],)
706 outStr += ">\n"
707
708 self.writeString(xmlFile,outStr)
709
710 outStr = "<ionSelection>\n"
711 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"MassToChargeRatio\" value=\"%f\"/>\n"%(pMz,)
712 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Intensity\" value=\"%f\"/>\n"%(pMzIntensity,)
713 for (k,v) in d.items():
714 if k.startswith('precursorMz.'):
715 k,f = k.split(':')
716 k = k[12:]
717 outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
718 outStr += "</ionSelection>\n"
719
720 self.writeString(xmlFile,outStr)
721
722 outStr = "<activation>\n"
723 outStr += "</activation>\n"
724 outStr += "</precursor>\n"
725 outStr += "</precursorList>\n"
726
727 self.writeString(xmlFile,outStr)
728
729 if self.reader.maldi():
730 outStr = '<maldi'
731 for (k,v) in d.items():
732 if k.startswith('maldi.'):
733 k,f = k.split(':')
734 k = k[6:]
735 outStr += (" %%s=\"%s\""%(f,))%(k,v)
736 outStr += " plateID=\"%s\""%(d['plateID'],)
737 outStr += " spotID=\"%s\""%(d['spotID'],)
738 outStr += "/>\n"
739 self.writeString(xmlFile,outStr)
740
741 outStr = "</spectrumDesc>\n"
742 self.writeString(xmlFile,outStr)
743
744 outStr = "<mzArrayBinary>\n"
745 outStr += "<data"
746 outStr += " precision=\"32\""
747 outStr += " endian=\"%s\""%(sys.byteorder,)
748 outStr += " length=\"%d\""%(peaksCount,)
749 outStr += ">"
750 self.writeString(xmlFile,outStr)
751
752 outStr = b64encode(s[0:peaksCount*2:2].tostring())
753 if not debug:
754 self.writeString(xmlFile,outStr)
755 else:
756 self.writeString(xmlFile,outStr[0:100])
757
758 outStr = "</data>\n</mzArrayBinary>\n"
759 self.writeString(xmlFile,outStr)
760
761 outStr = "<intenArrayBinary>\n"
762 outStr += "<data"
763 outStr += " precision=\"32\""
764 outStr += " endian=\"%s\""%(sys.byteorder,)
765 outStr += " length=\"%d\""%(peaksCount,)
766 outStr += ">"
767 self.writeString(xmlFile,outStr)
768
769 outStr = b64encode(s[1:peaksCount*2:2].tostring())
770 if not debug:
771 self.writeString(xmlFile,outStr)
772 else:
773 self.writeString(xmlFile,outStr[0:100])
774
775 outStr = "</data>\n</intenArrayBinary>\n"
776 self.writeString(xmlFile,outStr)
777
778 if self.reader.maldi():
779 outStr = ''
780 for (k,v) in d.items():
781 if k.startswith('maldi.'):
782 k,f = k.split(':')
783 k = k[6:]
784 outStr += '<nameValue'
785 outStr += ' name="%s"'%(k,)
786 outStr += (' value="%f"'%(f,))%(v,)
787 outStr += '/>\n'
788 ## outStr += '<nameValue'
789 ## outStr += ' name="plateID"'
790 ## outStr += ' value="%s"'%(d['plateID'],)
791 ## outStr += '/>\n'
792 ## outStr += '<nameValue'
793 ## outStr += ' name="spotID"'
794 ## outStr += ' value="%s"'%(d['spotID'],)
795 ## outStr += '/>\n'
796 self.writeString(xmlFile,outStr)
797
798 outStr = "</spectrum>\n"
799 self.writeString(xmlFile,outStr)
800 xmlFile.flush()
801
802 prevSpec = s
803
804 self.actualScanCount = scanNumber
805
806 class QStarWiff:
807 def __init__(self,filename,peaks=None):
808 self.filename = filename
809 self.filehash = fileSHA(filename)
810 self.theTIC = None
811 self.theWF = None
812 self.theFMANSpecData = None
813 self.startTime = None
814 self.stopTime = None
815 if peaks:
816 self.applyPeakDetect = map(int,peaks.split(','))
817
818 if not os.path.exists(self.filename):
819 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
820 sys.exit(1)
821
822 self.filename = os.path.abspath(self.filename)
823
824 def __del__(self):
825 # Probably unnecessary
826 self.close()
827
828 def peakDetect(self,msLevel):
829 return ((msLevel in self.applyPeakDetect) and (msLevel != 1))
830
831 def open(self):
832
833 if self.theTIC is None:
834
835 from win32com.client import Dispatch
836
837 self.theFMANSpecData = Dispatch('Analyst.FMANSpecData')
838 self.theTIC = Dispatch('Analyst.FMANChromData')
839 self.theSPL = Dispatch('Analyst.SpectralPeakList')
840
841 self.theFMANSpecData.WiffFileName = self.filename
842 self.theTIC.WiffFileName = self.filename
843 self.theWF = self.theFMANSpecData.GetWiffFileObject()
844
845 self.theTIC.SetToTIC(1,0,0)
846
847 self.startTime = self.theTIC.GetDataPointXValue(1)
848 self.stopTime = self.theTIC.GetDataPointXValue(self.theTIC.GetNumberOfDataPoints())
849
850 def close(self):
851
852 if not self.theTIC is None:
853
854 if not self.theWF is None:
855 self.theWF.CloseWiffFile()
856 self.theTIC = None
857 self.theFMANSpecData = None
858 self.theWF = None
859
860 def spectra(self):
861 self.open()
862 spectrumCount=0;
863
864 numberOfSamples = self.theWF.GetActualNumberOfSamples()
865 for i in xrange(1,numberOfSamples+1):
866
867 # print >>sys.stderr, "Sample %d"%i
868 # print >>sys.stderr, "Sample Name: %s"%(self.theWF.GetSampleName(i),)
869
870 sampleName = self.theWF.GetSampleName(i)
871
872 numberOfPeriods = self.theWF.GetActualNumberOfPeriods(i)
873 for j in xrange(0,numberOfPeriods):
874
875 numberOfExperiments = self.theWF.GetNumberOfExperiments(i,j)
876 self.theTIC.SetToTIC(i, j, 0)
877
878 numberOfTICDataPoints = self.theTIC.GetNumberOfDataPoints()
879 for l in xrange(1,numberOfTICDataPoints+1):
880
881 for k in xrange(0,numberOfExperiments):
882
883 self.theExperiment = self.theWF.GetExperimentObject(i, j, k)
884 self.theTIC.SetToTIC(i, j, k)
885
886 # Convert to seconds?
887 xValue = self.theTIC.GetDataPointXValue(l) * 60
888
889 spectrumCount += 1;
890
891 # If any Ion current at time "l"
892 if self.theTIC.GetDataPointYValue(l) > 0:
893
894 self.theFMANSpecData.SetSpectrum(i, j, k, xValue, xValue)
895
896 if self.theExperiment.ScanType == 9:
897 msLevel = 2
898 m = re.search(r'[(]([0-9.]+)[)]',self.theFMANSpecData.DataTitle)
899 fixedMass = float(m.group(1))
900 else:
901 msLevel = 1
902
903 startMass = self.theFMANSpecData.GetStartMass()
904 stopMass = self.theFMANSpecData.GetStopMass()
905 dataArr = array('f');
906 peakDetect = 'False'
907 if self.peakDetect(msLevel):
908 peakDetect = 'True'
909 minY,maxY = self.theFMANSpecData.GetYValueRange()
910 print minY,maxY
911 self.theFMANSpecData.Threshold(maxY*0.01)
912 self.theSPL.FindPeaksInDataObject(self.theFMANSpecData,50.0)
913 numPeaks = self.theSPL.GetNumberOfPeaks()
914 print numPeaks
915 for m in xrange(1,numPeaks+1):
916 (x,w,y,yp) = self.theSPL.GetPeak(m)
917 if y <= 0:
918 continue
919 dataArr.append(x)
920 dataArr.append(y)
921 else:
922 numPoints = self.theFMANSpecData.GetNumberOfDataPoints()
923 for m in xrange(1,numPoints+1):
924 if self.theFMANSpecData.GetDataPointYValue(m) <= 0:
925 continue
926 x = self.theFMANSpecData.GetDataPointXValue(m)
927 dataArr.append(x)
928 y = self.theFMANSpecData.GetDataPointYValue(m)
929 dataArr.append(y)
930 polarity = self.theExperiment.Polarity
931
932 if len(dataArr) == 0:
933 continue
934
935 if polarity == 0:
936 polarity_val = "+"
937 else:
938 polarity_val = "-"
939
940 rt = "PT%fS"%(xValue,)
941
942 # Scan level attributes
943 # Required by mzXML (no format spec. needed)
944 d = {'msLevel':msLevel,'scan.peakDetect:%s':peakDetect}
945 # Optional ('scan.' and prefixformat spec. needed)
946 d.update({
947 'scan.retentionTime:PT%fS':xValue,
948 'scan.polarity:%s':polarity_val,
949 'scan.startMz:%f':startMass,
950 'scan.endMz:%f':stopMass,
951 'scan.sample:%d':i,
952 'scan.period:%d':j,
953 'scan.experiment:%d':k,
954 'scan.sampleName:%s':sampleName
955 })
956 d.update({'scanOrigin.parentID:%s': self.filehash[f],
957 'scanOrigin.num:%d': spectrumCount})
958 if msLevel == 1:
959 yield (dataArr,d)
960 else:
961 # tandem MS scans add the following
962 # Required by mzXML
963 d.update({'precursorMz':fixedMass})
964 # Optional
965 # d.update({'precursorMz.precursorMzIntensity:f':0.0})
966 yield (dataArr,d)
967
968 def getMsRunMetaData(self):
969 self.open()
970 d = {'startTime:PT%fS':self.startTime,
971 'endTime:PT%fS':self.stopTime,
972 }
973 return d
974
975 def getFilenames(self):
976 return [ (self.filename,self.filehash) ]
977
978 def msManufacturer(self):
979 return "ABI / SCIEX"
980
981 def msModel(self):
982 return "QSTAR"
983
984 def msIonisation(self):
985 return "ESI"
986
987 def msMassAnalyzer(self):
988 return "Quadrupole"
989
990 def msDetector(self):
991 return None
992
993 def acquisitionSoftware(self):
994 return "Analyst"
995
996 def acquisitionSoftwareVersion(self):
997 return "1.0"
998
999 def precursorPrecision(self,mz):
1000 return .1
1001
1002 def precursorTune(self):
1003 return True
1004
1005 def lc(self):
1006 return True
1007
1008 def maldi(self):
1009 return not self.lc()
1010
1011 class mzXML_md_sax(handler.ContentHandler):
1012
1013 def __init__(self):
1014 self.md = {}
1015 self.context = ''
1016
1017 def startElement(self, name, attrs):
1018 self.context += ':%s'%(name,)
1019 # print >>sys.stderr, ">>",self.context
1020 # sys.stderr.flush()
1021 if self.context.endswith(':msRun'):
1022 self.md['startTime'] = float(attrs['startTime'][2:-1])
1023 self.md['endTime'] = float(attrs['endTime'][2:-1])
1024 elif self.context.endswith(':parentFile'):
1025 self.md['fileName'] = unquote(attrs['fileName'])
1026 self.md['fileType'] = attrs['fileType']
1027 self.md['fileSha1'] = attrs['fileSha1']
1028 elif self.context.endswith(':instrument'):
1029 self.md['msManufacturer'] = attrs['manufacturer']
1030 self.md['msModel'] = attrs['model']
1031 self.md['msIonisation'] = attrs['ionisation']
1032 self.md['msMassAnalyzer'] = attrs['msType']
1033 elif self.context.endswith(':msInstrument:msManufacturer'):
1034 self.md['msManufacturer'] = attrs['value']
1035 elif self.context.endswith(':msInstrument:msModel'):
1036 self.md['msModel'] = attrs['value']
1037 elif self.context.endswith(':msInstrument:msIonisation'):
1038 self.md['msIonisation'] = attrs['value']
1039 elif self.context.endswith(':msInstrument:msMassAnalyzer'):
1040 self.md['msMassAnalyzer'] = attrs['value']
1041 elif self.context.endswith(':msInstrument:msDetector'):
1042 self.md['msDetector'] = attrs['value']
1043 elif self.context.endswith(':msInstrument:software'):
1044 if attrs['type'] == 'acquisition':
1045 self.md['acquisitionSoftware'] = attrs['name']
1046 self.md['acquisitionSoftwareVersion'] = attrs['version']
1047 elif self.context.endswith(':instrument:software'):
1048 if attrs['type'] == 'acquisition':
1049 self.md['acquisitionSoftware'] = attrs['name']
1050 self.md['acquisitionSoftwareVersion'] = attrs['version']
1051 elif self.context.endswith(':scan'):
1052 raise SAXException("Early termination")
1053
1054 def endElement(self, name):
1055 # print >>sys.stderr, "<<",self.context
1056 # sys.stderr.flush()
1057 self.context = self.context[0:-(len(name)+1)]
1058
1059
1060 class mzXML_spec_sax(handler.ContentHandler):
1061
1062 def __init__(self):
1063 self.context = ''
1064 self.content = ''
1065 self.spectra = []
1066 self.done = False
1067 self.scancount = 0
1068 self.scanmax = 1000
1069 self.scanstart = 0
1070
1071 def startElement(self, name, attrs):
1072 self.context += ':%s'%(name,)
1073 # print >>sys.stderr, ">>",self.context
1074 # sys.stderr.flush()
1075 self.content = ''
1076 if name == 'scan':
1077 self.scanmd = {}
1078 self.scanlevel = int(attrs['msLevel'])
1079 self.scannum = int(attrs['num'])
1080 if attrs.has_key('retentionTime'):
1081 self.rt = float(attrs['retentionTime'][2:-1])
1082 self.endMz = float(attrs['endMz'])
1083 self.startMz = float(attrs['startMz'])
1084 self.polarity = attrs.get('polarity',None)
1085 for (k,v) in attrs.items():
1086 self.scanmd[k] = v
1087 self.precursorMz = None
1088 self.scancount += 1
1089 elif name == 'msRun':
1090 self.scancount = 0
1091 self.spectra = []
1092 self.context = ':msRun'
1093 elif name == 'scanOrigin':
1094 self.scanmd['scanOrigin'] = attrs
1095
1096 def characters(self, content):
1097 if self.scancount >= self.scanstart:
1098 if self.context.endswith(':peaks') or \
1099 self.context.endswith(':precursorMz'):
1100 self.content += content
1101
1102 def endElement(self, name):
1103 if self.scancount >= self.scanstart:
1104 if self.context.endswith('scan:peaks'):
1105
1106 spec = array('f')
1107 # print >>sys.stderr, len(self.content), len(b64decode(self.content)), self.content
1108 spec.fromstring(b64decode(self.content))
1109 if sys.byteorder != 'big':
1110 spec.byteswap()
1111 d = {'msLevel':self.scanlevel}
1112 # Optional ('scan.' and prefixformat spec. needed)
1113 d.update({
1114 'scan.retentionTime:PT%fS':self.rt,
1115 'scan.startMz:%f':self.startMz,
1116 'scan.endMz:%f':self.endMz
1117 })
1118 if self.polarity != None:
1119 d.update({'scan.polarity:%s':self.polarity})
1120 if self.scanlevel == 2:
1121 d.update({'precursorMz':self.precursorMz})
1122 if self.scanmd.has_key('scanOrigin'):
1123 d.update({'scanOrigin.parentID:%s': self.scanmd['scanOrigin']['parentID'],
1124 'scanOrigin.num:%d': self.scanmd['scanOrigin']['num']})
1125 for (k,v) in self.scanmd.items():
1126 if not d.has_key(k):
1127 d[k] = v
1128 self.spectra.append((spec,d))
1129 if len(self.spectra) >= self.scanmax:
1130 self.scanstart = self.scancount+1
1131 raise SAXException("Early termination")
1132 elif self.context.endswith('scan:precursorMz'):
1133 self.precursorMz = float(self.content)
1134 if self.context.endswith('msRun'):
1135 self.done = True
1136 # print >>sys.stderr, "<<",self.context
1137 # sys.stderr.flush()
1138 self.context = self.context[0:-(len(name)+1)]
1139
1140 class FromMzXML:
1141 def __init__(self,filename):
1142 self.filename = filename
1143 self.parser = None
1144
1145 if not os.path.exists(self.filename):
1146 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1147 sys.exit(1)
1148
1149 if self.filename.endswith('.gz'):
1150 self.compressfmt = 'gz'
1151 else:
1152 self.compressfmt = None
1153
1154 def __del__(self):
1155 self.close()
1156
1157 def open(self):
1158
1159 if self.parser == None:
1160 # print >>sys.stderr, "Parsing out meta-data..."
1161 # sys.stderr.flush()
1162 self.parser = make_parser()
1163 self.handler = mzXML_md_sax()
1164 self.parser.setContentHandler(self.handler)
1165 try:
1166 if self.compressfmt == None:
1167 self.parser.parse(self.filename)
1168 elif self.compressfmt == 'gz':
1169 self.parser.parse(gzip.open(self.filename,'r'))
1170 else:
1171 print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1172 sys.exit(1)
1173 except SAXException:
1174 pass
1175
1176 self.md = self.handler.md
1177
1178 # print >>sys.stderr, self.md
1179 # sys.stderr.flush()
1180
1181 # print >>sys.stderr, "Set up spectrum parser..."
1182 # sys.stderr.flush()
1183 self.parser = make_parser()
1184 self.handler = mzXML_spec_sax()
1185 self.parser.setContentHandler(self.handler)
1186
1187 def close(self):
1188 pass
1189
1190 def spectra(self):
1191 self.open()
1192 while True:
1193 # print >>sys.stderr, "Spectrum parser to read spectra from",self.handler.scanstart
1194 # sys.stderr.flush()
1195 try:
1196 if self.compressfmt == None:
1197 self.parser.parse(self.filename)
1198 elif self.compressfmt == 'gz':
1199 self.parser.parse(gzip.open(self.filename,'r'))
1200 else:
1201 print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1202 sys.exit(1)
1203 except SAXException:
1204 pass
1205 for (s,d) in self.handler.spectra:
1206 # print >>sys.stderr, "Yeild spectrum number:",d['num']
1207 yield (s,d)
1208 if self.handler.done:
1209 break
1210
1211 def getMsRunMetaData(self):
1212 self.open()
1213 d = {'startTime:PT%fS':self.md['startTime'],
1214 'endTime:PT%fS':self.md['endTime']
1215 }
1216 return d
1217
1218 def getFilenames(self):
1219 return [ (self.md['fileName'],self.md['fileType'],self.md['fileSha1']) ]
1220
1221 def msManufacturer(self):
1222 return self.md.get('msManufacturer','')
1223
1224 def msModel(self):
1225 return self.md.get('msModel','')
1226
1227 def msIonisation(self):
1228 return self.md.get('msIonisation','')
1229
1230 def msMassAnalyzer(self):
1231 return self.md.get('msMassAnalyzer','')
1232
1233 def msDetector(self):
1234 return self.md.get('msDetector','')
1235
1236 def acquisitionSoftware(self):
1237 return self.md.get('acquisitionSoftware','')
1238
1239 def acquisitionSoftwareVersion(self):
1240 return self.md.get('acquisitionSoftwareVersion','')
1241
1242 def precursorPrecision(self,mz):
1243 return 0
1244
1245 def precursorTune(self):
1246 return False
1247
1248 def lc(self):
1249 return True
1250
1251 def maldi(self):
1252 return not self.lc()
1253
1254 class AB4700T2D:
1255 def __init__(self,filename,peaks=None):
1256 self.filename = filename
1257 self.datafiles = None
1258 self.deapp = None
1259 self.metadata = {}
1260 self.platespots = {}
1261 self.applyPeakDetect = []
1262 if peaks:
1263 self.applyPeakDetect = map(int,peaks.split(','))
1264
1265 if not os.path.exists(self.filename):
1266 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1267 sys.exit(1)
1268
1269 self.filename = os.path.abspath(self.filename)
1270
1271 self.dir = os.path.split(self.filename)[0]
1272
1273 def __del__(self):
1274 # Probably unnecessary
1275 self.close()
1276
1277 def open(self):
1278
1279 if self.deapp is None:
1280
1281 self.metadata['PLATEDEF'] = []
1282 self.metadata['PLATE'] = []
1283 self.metadata['SPOT'] = []
1284 self.metadata['SCAN'] = []
1285
1286 h = open(self.filename,'rb')
1287 r = csv.reader(h,'excel-tab')
1288 for l in r:
1289 if len(l) == 0:
1290 continue
1291 kw = l.pop(0).upper()
1292 if kw in ('PLATEDEF','PLATE','SPOT','SCAN'):
1293 self.metadata[kw].append({})
1294 while len(l)>=2:
1295 k = l.pop(0);v = l.pop(0);
1296 if (kw == 'PLATE' and \
1297 (k in ('plateID','spotXCount','spotYCount','plateManufacturer','plateModel'))) or \
1298 (kw == 'PLATEDEF' and \
1299 (k in ('plateID','plateManufacturer','plateModel','spotNaming','maldiMatrix'))) or \
1300 (kw == 'SPOT' and \
1301 (k in ('plateID','spotID','spotXPosition','spotYPosition','maldiMatrix'))) or \
1302 (kw == 'SCAN' and \
1303 (k in ('plateID','spotID','filename','index'))):
1304 self.metadata[kw][-1][k] = v
1305 elif k == '':
1306 break
1307 else:
1308 print >>sys.stderr,"Bad key %s for %s row"%(k,kw)
1309 sys.exit(1)
1310 h.close()
1311
1312 for pd in self.metadata['PLATEDEF']:
1313 # insert all the gory details needed for a particular plate model etc...
1314 if pd['plateManufacturer'] == 'ABI / SCIEX' and pd['plateModel'] == '01-192+06-BB':
1315 if not pd.has_key('spotNaming') or not pd.has_key('maldiMatrix') or \
1316 not pd['spotNaming'] in ('alpha','parallel','antiparallel'):
1317 print >>sys.stderr,"Bad plate definition, missing maldiMatrix or spotNaming, or bad spotNaming value."
1318 sys.exit(1)
1319 self.metadata['PLATE'].append({
1320 'plateID':pd['plateID'],
1321 'plateManufacturer':pd['plateManufacturer'],
1322 'plateModel':pd['plateModel'],
1323 'spotXCount':24,
1324 'spotYCount':8,
1325 })
1326 if pd['spotNaming'] == 'alpha':
1327 for y in xrange(0,8):
1328 for x in xrange(0,24):
1329 self.metadata['SPOT'].append({
1330 'plateID':pd['plateID'],
1331 'spotID':"%c%d"%(y+ord('A'),x+1),
1332 'spotXPosition':x,
1333 'spotYPosition':2*y+(x%2),
1334 'maldiMatrix':pd['maldiMatrix']
1335 })
1336 else:
1337 print >>sys.stderr,"Valid spotNaming value, not yet implemented."
1338 sys.exit(1)
1339
1340 for pl in self.metadata['PLATE']:
1341 self.platespots[pl['plateID']] = []
1342
1343 for sp in self.metadata['SPOT']:
1344 self.platespots[sp['plateID']].append(sp)
1345
1346 self.datafiles = [ (os.path.join(self.dir,md['filename']),int(md['index'])) for md in self.metadata['SCAN'] ]
1347 self.distinct_datafiles = dict([(os.path.join(self.dir,md['filename']),
1348 fileSHA(os.path.join(self.dir,md['filename']))) for md in self.metadata['SCAN']])
1349
1350 self.maptoscan = {}
1351 index = 0
1352 for md in self.metadata['SCAN']:
1353 self.maptoscan[(os.path.join(self.dir,md['filename']),int(md['index']))] = index
1354 index += 1
1355
1356 from win32com.client import Dispatch, gencache
1357 self.deapp = Dispatch('DataExplorer.Application',
1358 resultCLSID='{3FED40F1-D409-11D1-8B56-0060971CB54B}')
1359 self.delib = gencache.EnsureModule('{06972F50-13F6-11D3-A5CB-0060971CB54B}',0,4,2)
1360 self.deapp.AutomatedProcessing = 1
1361 self.deapp.Visible = 0
1362
1363 def close(self):
1364
1365 if not self.deapp is None:
1366 self.deapp.Quit()
1367 self.deapp = None
1368
1369 def peakDetect(self,msLevel):
1370 return msLevel in self.applyPeakDetect
1371
1372 # remove .set and .cts files to prevent DataExplorer crashes - DT 11-1-2006
1373 def remsettingsfile(self,fn):
1374
1375 if ((len(fn) > 3) and (fn[-4:].lower() == '.t2d')):
1376 path2del = fn[:-4] + '.cts'
1377 if os.path.exists(path2del):
1378 os.unlink(path2del)
1379 path2del = fn[:-4] + '.set'
1380 if os.path.exists(path2del):
1381 os.unlink(path2del)
1382
1383 def spectra(self):
1384 self.open()
1385
1386 prevfile = ''
1387 for (f,i) in self.datafiles:
1388
1389 if f != prevfile:
1390 self.deapp.Documents.Close()
1391
1392 if not os.path.exists(f):
1393 print >>sys.stderr, "Filename %s does not exist."%(f,)
1394 sys.exit(1)
1395
1396 self.remsettingsfile(f)
1397 self.remsettingsfile(prevfile)
1398
1399 self.deapp.Documents.Open(f)
1400
1401 doc = self.deapp.Documents.Item(0)
1402 sv = doc.SpecView
1403
1404 sv.SetSpectrumAt(i-1)
1405
1406 (tf,fixedMass) = doc.InstrumentSettings.GetSetting(self.delib.constants.dePreCursorIon,i-1,None)
1407
1408 if fixedMass > 0:
1409 msLevel = 2
1410 else:
1411 msLevel = 1
1412
1413 dataArr = array('f');
1414 peakDetect="False"
1415 if self.peakDetect(msLevel):
1416 peakDetect="True"
1417 sv.CorrectBaseline()
1418 doc.SpecSetup.NoiseReductionStdev = 2.0
1419 sv.FilterNoise(self.delib.constants.deNoiseRemoval)
1420 sv.PeakSettings.PeakThresholdPercent = 1
1421 st,sp = sv.GetPeakData(self.delib.constants.deSpecPeakAll,
1422 self.delib.constants.deSpecPeakSortMass, 0, 0)
1423 for j in xrange(len(sp)):
1424 x = sp[j][self.delib.constants.deSpecPeakCentroidMass]
1425 y = sp[j][self.delib.constants.deSpecPeakArea]
1426 if y <= 0:
1427 continue
1428 dataArr.append(x)
1429 dataArr.append(y)
1430 else:
1431 n,s = sv.GetRawData(0,0)
1432 for j in xrange(n):
1433 dataArr.append(s[j][0])
1434 dataArr.append(s[j][1])
1435
1436 if len(dataArr) == 0:
1437 continue
1438
1439 polarity_val = "+"
1440
1441 # Scan level attributes
1442 # Required by mzXML (no format spec. needed)
1443 d = {'msLevel':msLevel,'scan.peakDetect:%s':peakDetect}
1444 # Optional ('scan.' and prefixformat spec. needed)
1445 d.update({
1446 'scan.polarity:%s':polarity_val,
1447 })
1448
1449 scanindex = self.maptoscan[(f,i)]
1450 d.update({
1451 'plateID':self.metadata['SCAN'][scanindex]['plateID'],
1452 'spotID':self.metadata['SCAN'][scanindex]['spotID'],
1453 })
1454 d.update({'scanOrigin.parentID:%s': self.distinct_datafiles[f],
1455 'scanOrigin.num:%d': i})
1456 if msLevel == 1:
1457 yield (dataArr,d)
1458 else:
1459 # tandem MS scans add the following
1460 # Required by mzXML
1461 d.update({'precursorMz':fixedMass})
1462 # Optional
1463 d.update({'precursorMz.precursorCharge:%d':1})
1464 yield (dataArr,d)
1465
1466 prevfile = f
1467
1468 self.deapp.Documents.Close()
1469
1470 self.remsettingsfile(prevfile)
1471
1472 def getMsRunMetaData(self):
1473 self.open()
1474 return {}
1475
1476 def getFilenames(self):
1477 return [ t for t in self.distinct_datafiles.items() ]
1478
1479 def msManufacturer(self):
1480 return "ABI / SCIEX"
1481
1482 def msModel(self):
1483 return "4700 Proteomic Analyzer"
1484
1485 def msIonisation(self):
1486 return "MALDI"
1487
1488 def msMassAnalyzer(self):
1489 return "TOF"
1490
1491 def msDetector(self):
1492 return None
1493
1494 def acquisitionSoftware(self):
1495 return "DataExplorer"
1496
1497 def acquisitionSoftwareVersion(self):
1498 return "1.0"
1499
1500 def precursorPrecision(self,mz):
1501 return .1
1502
1503 def precursorTune(self):
1504 return False
1505
1506 def lc(self):
1507 return False
1508
1509 def maldi(self):
1510 return not self.lc()
1511
1512 def plateData(self):
1513 self.open()
1514 for d in self.metadata['PLATE']:
1515 yield d
1516
1517 def spotData(self,plate):
1518 self.open()
1519 for s in self.platespots[plate]:
1520 yield s
1521
1522 class scanfilter:
1523 def __init__(self,str):
1524 self.rtre = re.compile('^PT([0-9]+(\.[0-9]+)?)S$')
1525 self.intre = re.compile('^[1-9][0-9]*$')
1526 self.floatre = re.compile('^[0-9]+(\.[0-9]+)?$')
1527 self.cl = []
1528 clause = str.split(',')
1529 for cl in clause:
1530 scl = cl.split('.')
1531 m = self.rtre.match(scl[2].strip())
1532 if m != None:
1533 v = float(m.group(1))
1534 elif self.floatre.match(scl[2].strip()):
1535 v = float(scl[2])
1536 elif self.intre.match(scl[2].strip()):
1537 v = int(scl[2])
1538 else:
1539 v = scl[2]
1540 self.cl.append((scl[0],scl[1],v))
1541
1542 def test(self,d):
1543 a = {}
1544 for (k,v) in d.iteritems():
1545 k0 = k.split(':')[0].split('.')[-1]
1546 a[k0] = v
1547 for cl in self.cl:
1548 if type(a[cl[0]]) == type(u'') or \
1549 type(a[cl[0]]) == type('') :
1550 m = self.rtre.match(a[cl[0]].strip())
1551 if m != None:
1552 v1 = float(m.group(1))
1553 elif self.floatre.match(a[cl[0]].strip()):
1554 v1 = float(a[cl[0]])
1555 elif self.intre.match(a[cl[0]].strip()):
1556 v1 = int(a[cl[0]])
1557 else:
1558 v1 = a[cl[0]].strip()
1559 else:
1560 v1 = a[cl[0]]
1561 v2 = cl[2]
1562 # print >>sys.stderr, "Spectrum test:",v1,type(v1),cl[1],v2,type(v2)
1563 # sys.stderr.flush()
1564 if cl[1] == 'eq' and v1 != v2:
1565 # print >>sys.stderr, "FALSE"
1566 # sys.stderr.flush()
1567 return False
1568 elif cl[1] == 'ne' and not v1 != v2:
1569 return False
1570 elif cl[1] == 'lt' and not v1 < v2:
1571 return False
1572 elif cl[1] == 'le' and not v1 <= v2:
1573 return False
1574 elif cl[1] == 'gt' and not v1 > v2:
1575 return False
1576 elif cl[1] == 'ge' and not v1 >= v2:
1577 return False
1578 # print >>sys.stderr, "TRUE"
1579 # sys.stderr.flush()
1580 return True
1581
1582 if __name__ == '__main__':
1583
1584 from optparse import OptionParser
1585 import sys
1586 import glob
1587
1588 parser = OptionParser()
1589 parser.add_option("-R", "--rawdata", type="string", dest="rawdata", default="",\
1590 help="Format of raw data. Optional if raw spectra file ends in .wiff or .t2m.")
1591 parser.add_option("-X", "--xmlformat", type="string", dest="xmlformat", default="",\
1592 help="XML format to output. Optional if output file ends in .mzxml or .mzdata.")
1593 parser.add_option("-o", "--output", type="string", dest="output", default="",\
1594 help="Output file.")
1595 parser.add_option("-p", "--peaks", type="string", dest="peaks", default="2",\
1596 help="Level(s) of spectra to apply peak detection to (comma separated). QStar,4700 only.")
1597 parser.add_option("-f", "--filter", type="string", dest="filter", default=None,\
1598 help="Filter on mzxml scan meta-data: field.op.value[,field.op.value]. Default: No filter.")
1599 parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False,\
1600 help="Debug. First 10 spectra only, and truncated peaks data element. Default: False.")
1601 parser.add_option("-Z", "--compress", type="string", dest="compress", default=None,\
1602 help="Compress output file. Valid options 'gz'. Default: None.")
1603
1604 ( opts, args ) = parser.parse_args()
1605
1606 if opts.output == "" and opts.xmlformat == "":
1607 parser.error("Either -o or -X must be suppied.")
1608 sys.exit(1)
1609
1610 if opts.filter != None:
1611 filt=scanfilter(opts.filter)
1612 else:
1613 filt=None
1614
1615 argsglob = []
1616 for a in args:
1617 argsglob.extend(glob.glob(a))
1618
1619 if opts.output != "" and len(argsglob) > 1:
1620 parser.error("At most one raw spectrum data file is permitted, if -o option is used.")
1621
1622 for a in argsglob:
1623
1624 if opts.output != "":
1625 o = opts.output
1626 else:
1627 p = a.rfind('.')
1628 o = a[0:p]+'.'+opts.xmlformat
1629
1630 if o == a:
1631 print >>sys.stderr, "Input file %s and output file %s are the same!"%(a,o)
1632 sys.exit(1)
1633
1634 print >>sys.stderr, "Processing",a,"to",o
1635
1636 if opts.rawdata.lower() in ('wiff','qstar'):
1637 r = QStarWiff(a,opts.peaks)
1638 elif opts.rawdata.lower() in ('t2d','dat','ab4700','ab4800','mariner','voyager'):
1639 r = AB4700T2D(a,opts.peaks)
1640 elif opts.rawdata.lower() in ('mzxml',):
1641 r = FromMzXML(a)
1642 elif a[-5:].lower() in ('.wiff',):
1643 r = QStarWiff(a,opts.peaks)
1644 elif a[-4:].lower() in ('.t2m',):
1645 r = AB4700T2D(a,opts.peaks)
1646 elif a[-6:].lower() in ('.mzxml',):
1647 r = FromMzXML(a)
1648 else:
1649 parser.error("Bad rawdata format specification.")
1650 sys.exit(1)
1651
1652 if opts.xmlformat.lower() == 'mzxml':
1653 x = ToMzXML(r,o,opts.compress,filt)
1654 elif opts.xmlformat.lower() == 'mzdata':
1655 x = ToMzData(r,o,opts.compress,filt)
1656 elif opts.output[-6:].lower() in ('.mzxml',):
1657 x = ToMzXML(r,opts.output,opts.compress,filt)
1658 elif opts.output[-7:].lower() in ('.mzdata',):
1659 x = ToMzData(r,opts.output,opts.compress,filt)
1660 else:
1661 parser.error("Bad xml format specification.")
1662 sys.exit(1)
1663
1664 x.write(debug=opts.debug)