ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/pymsxml/pymsxml.py
Revision: 1.2
Committed: Sat Nov 4 14:04:22 2006 UTC (15 years, 11 months ago) by nje01
Branch: MAIN
Changes since 1.1: +29 -14 lines
Log Message:
*** empty log message ***

Line File contents
1 #!/usr/bin/env python
2
3 import sys,os,os.path
4 import re
5 import sha
6 import math
7 import tempfile
8 import csv
9 import gzip
10 import zipfile
11 from array import array
12 from base64 import b64encode,b64decode
13 from urllib import quote,unquote
14 from xml.sax import *
15
16 class NameVersion:
17 def __init__(self):
18 self.name_ = "PyMsXML"
19 self.majorVer = 0
20 self.minorVer = 4
21 self.revVer = 0
22 def version(self):
23 return "%d.%d.%d"%(self.majorVer,self.minorVer,self.revVer)
24 def name(self):
25 return self.name_
26
27 def fileSHA(filename):
28 s = sha.new()
29 h = open(filename,'rb')
30 while True:
31 buffer = h.read(1024)
32 if buffer == '':
33 break
34 s.update(buffer)
35 h.close()
36 return s.hexdigest().lower()
37
38 class ToMzXML:
39 def __init__(self,reader,filename,cmpfmt=None,filt=None):
40 self.filename = filename
41 self.compressfmt = cmpfmt
42 self.reader = reader
43 self.initSHA()
44 self.writeByteCounter = 0
45 self.actualScanCount = 0
46 self.scanIndices = {}
47 self.filter = filt
48
49 def __del__(self):
50 if hasattr(self,'tmpFileName') and self.tmpFileName and self.tmpFileName != '':
51 os.unlink(self.tmpFileName)
52
53 def getFilename(self):
54 return self.filename
55
56 def initSHA(self):
57 self.sha1 = sha.new()
58
59 def updateSHA(self,s):
60 self.sha1.update(s)
61
62 def getSHA(self):
63 return self.sha1.hexdigest().lower()
64
65 def writeString(self,fh,data):
66 self.writeByteCounter += len(data)
67 self.sha1.update(data)
68 fh.write(data)
69
70 def write(self,debug=False):
71
72 # Make a temporary file for the scan data to count the number of spectra.
73 (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
74 tmpFile = os.fdopen(tmpFileFD,'wb')
75
76 # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
77 self.write_scans(tmpFile,debug)
78
79 tmpFile.close()
80
81 # Reset!
82
83 self.initSHA()
84 self.writeByteCounter = 0
85
86 if self.compressfmt == None and not self.filename.endswith('.gz'):
87 xmlFile = open(self.filename,'wb')
88 elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
89 if self.filename.endswith('.gz'):
90 xmlFile = gzip.open(self.filename,'wb')
91 else:
92 xmlFile = gzip.open(self.filename+'.gz','wb')
93 else:
94 print >>sys.stderr, "Bad compressfmt specification"
95 sys.exit(1)
96
97 self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
98
99 outStr = "<mzXML "
100 outStr += "xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1\" "
101 outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
102 outStr += "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd\""
103 outStr += ">\n"
104
105 self.writeString (xmlFile,outStr)
106
107 outStr = "<msRun"
108 outStr += " scanCount=\"%d\"" % (self.actualScanCount,)
109 for (k,v) in self.reader.getMsRunMetaData().items():
110 k,f = k.split(':')
111 outStr += (" %%s=\"%s\""%(f,))%(k,v)
112 outStr += ">\n"
113
114 self.writeString (xmlFile,outStr)
115
116 filehashes = {}
117
118 for f in self.reader.getFilenames():
119 if len(f) == 2:
120 outStr = "<parentFile "
121 outStr += "fileName=\"%s\" " % (quote(f[0]),)
122 outStr += "fileType=\"RAWData\" "
123 outStr += "fileSha1=\"%s\"" % (f[1],)
124 outStr += "/>\n"
125 else:
126 outStr = "<parentFile "
127 outStr += "fileName=\"%s\" " % (f[0],)
128 outStr += "fileType=\"%s\" " % (f[1],)
129 outStr += "fileSha1=\"%s\"" % (f[2],)
130 outStr += "/>\n"
131 self.writeString (xmlFile,outStr)
132
133 outStr = "<msInstrument>\n"
134 outStr += "<msManufacturer category=\"msManufacturer\" value=\"%s\"/>\n" % (self.reader.msManufacturer(),)
135 outStr += "<msModel category=\"msModel\" value=\"%s\"/>\n" % (self.reader.msModel(),)
136 if self.reader.msIonisation():
137 outStr += "<msIonisation category=\"msIonisation\" value=\"%s\"/>\n" % (self.reader.msIonisation(),)
138 if self.reader.msMassAnalyzer():
139 outStr += "<msMassAnalyzer category=\"msMassAnalyzer\" value=\"%s\"/>\n" % (self.reader.msMassAnalyzer(),)
140 if self.reader.msDetector():
141 outStr += "<msDetector category=\"msDetector\" value=\"%s\"/>\n" % (self.reader.msDetector(),)
142
143 self.writeString (xmlFile,outStr)
144
145 outStr = "<software "
146 outStr += "type=\"acquisition\" "
147 outStr += "name=\"%s\" "%(self.reader.acquisitionSoftware(),)
148 outStr += "version=\"%s\""%(self.reader.acquisitionSoftwareVersion(),)
149 outStr += "/>\n"
150
151 self.writeString (xmlFile,outStr)
152
153 outStr = "</msInstrument>\n"
154
155 self.writeString (xmlFile,outStr)
156
157 outStr = "<dataProcessing>\n"
158
159 self.writeString (xmlFile,outStr)
160
161 nv = NameVersion()
162
163 outStr = "<software "
164 outStr += "type=\"conversion\" "
165 outStr += "name=\"%s\" "%(nv.name(),)
166 outStr += "version=\"%s\"" % (nv.version(),)
167 outStr += "/>\n"
168
169 self.writeString (xmlFile,outStr)
170
171 if (self.reader.peakDetect(1)):
172 outStr = "<processingOperation "
173 outStr += "name=\"peak_detection_level_1\" "
174 outStr += "value=\"true\""
175 outStr += "/>\n"
176 outStr += "<processingOperation "
177 outStr += "name=\"peak_detection_level_1_threshold\" "
178 outStr += "value=\"1%\""
179 outStr += "/>\n"
180 outStr += "<processingOperation "
181 outStr += "name=\"peak_detection_level_1_software\" "
182 outStr += "value=\"Analyst\""
183 outStr += "/>\n"
184 self.writeString (xmlFile,outStr)
185 if (self.reader.peakDetect(2)):
186 outStr = "<processingOperation "
187 outStr += "name=\"peak_detection_level_2\" "
188 outStr += "value=\"true\""
189 outStr += "/>\n"
190 outStr += "<processingOperation "
191 outStr += "name=\"peak_detection_level_2_threshold\" "
192 outStr += "value=\"1%\""
193 outStr += "/>\n"
194 outStr += "<processingOperation "
195 outStr += "name=\"peak_detection_level_2_software\" "
196 outStr += "value=\"Analyst\""
197 outStr += "/>\n"
198 self.writeString (xmlFile,outStr)
199
200 outStr = "<processingOperation "
201 outStr += "name=\"min_peaks_per_spectra\" "
202 outStr += "value=\"1\""
203 outStr += "/>\n"
204
205 self.writeString (xmlFile,outStr)
206
207 outStr = "</dataProcessing>\n"
208
209 self.writeString (xmlFile,outStr)
210
211 if self.reader.maldi():
212 outStr = '<spotting>\n'
213 for d in self.reader.plateData():
214 outStr += '<plate plateID="%s" spotXCount="%s" spotYCount="%s">\n' % \
215 (d['plateID'],d['spotXCount'],d['spotYCount'])
216 outStr += '<plateManufacturer category="plateManufacturer" value="%s"/>\n' % \
217 (d['plateManufacturer'],)
218 outStr += '<plateModel category="plateModel" value="%s"/>\n' % \
219 (d['plateModel'],)
220 for s in self.reader.spotData(d['plateID']):
221 outStr += '<spot spotID="%s" spotXPosition="%s" spotYPosition="%s">\n' % \
222 (s['spotID'],s['spotXPosition'],s['spotYPosition'])
223 outStr += '<maldiMatrix category="maldiMatrix" value="%s"/>\n' % \
224 (s['maldiMatrix'],)
225 outStr += '</spot>\n'
226 outStr += '</plate>\n'
227 outStr += '</spotting>\n'
228 self.writeString(xmlFile,outStr)
229
230 if self.reader.lc():
231 pass
232
233 scanOffset = self.writeByteCounter
234
235 tmpFile = open(self.tmpFileName,'rb')
236 while True:
237 tmpStr = tmpFile.read(1024)
238 if tmpStr == '':
239 break
240 self.writeString(xmlFile,tmpStr)
241 tmpFile.close()
242 os.unlink(self.tmpFileName)
243 self.tmpFileName = ''
244
245 outStr = "</msRun>\n"
246 self.writeString (xmlFile,outStr)
247
248 indexOffset = self.writeByteCounter
249
250 outStr = "<index "
251 outStr += "name=\"scan\" "
252 outStr += ">\n"
253
254 self.writeString (xmlFile,outStr)
255
256 for i in xrange(1,self.actualScanCount+1):
257 outStr = "<offset id=\"%d\">" % (i,)
258 outStr += "%d</offset>\n" % (self.scanIndices[i] + scanOffset,)
259 self.writeString(xmlFile,outStr)
260
261 outStr = "</index>\n"
262 self.writeString (xmlFile,outStr)
263
264 outStr = "<indexOffset>%d</indexOffset>\n" % (indexOffset,)
265
266 self.writeString (xmlFile,outStr)
267
268 self.writeString (xmlFile,"<sha1>")
269
270 outStr = self.getSHA()
271
272 self.writeString (xmlFile,outStr)
273
274 self.writeString (xmlFile,"</sha1>\n")
275
276 outStr = "</mzXML>\n"
277 self.writeString (xmlFile,outStr)
278
279 xmlFile.close()
280
281 def write_scans(self,xmlFile,debug=False):
282
283 msLevel = 0
284 scanNumber = 0
285 ancestors = []
286 self.writeByteCounter = 0
287
288 for (s,d) in self.reader.spectra():
289
290 if self.filter != None and not self.filter.test(d):
291 continue
292
293 if debug and scanNumber >= 10:
294 break
295
296 if not d.has_key('msLevel'):
297 print >>sys.stderr, "Required scan attributes missing."
298 sys.exit(1)
299
300 prevLevel = msLevel
301 msLevel = d['msLevel']
302
303 if prevLevel < msLevel and prevLevel > 0:
304 # We went "in" a level, push scan number of parent
305 ancestors.append((scanNumber,prevSpec))
306 elif prevLevel > msLevel and msLevel > 0:
307 if len(ancestors) == 0:
308 pass #print >>sys.stderr, "No ancestor for scan %s at level %s"%(scanNumber,msLevel)
309 else:
310 ancestors.pop()
311
312 outStr = ''
313 if prevLevel > msLevel:
314 for m in xrange(0,prevLevel-msLevel+1):
315 outStr += "</scan>\n"
316 else:
317 if prevLevel > 0 and prevLevel == msLevel:
318 outStr += "</scan>\n"
319
320 self.writeString(xmlFile,outStr)
321
322 scanNumber = scanNumber + 1
323
324 self.scanIndices[scanNumber] = self.writeByteCounter
325
326 totIonCurrent = 0
327 maxmz = None
328 minmz = None
329 basePeakMz = None
330 basePeakIntensity = None
331 peaksCount = 0
332 for i in xrange(0,len(s),2):
333 x = s[i]; y = s[i+1]
334 if minmz is None or minmz > x:
335 minmz = x
336 if maxmz is None or maxmz < x:
337 maxmz = x
338 totIonCurrent += y
339 if basePeakIntensity is None or basePeakIntensity < y:
340 basePeakIntensity = y
341 basePeakMz = x
342 peaksCount += 1
343
344 outStr = "<scan num=\"%d\" msLevel=\"%d\"" % (scanNumber,msLevel)
345 outStr += " peaksCount=\"%d\"" % (peaksCount,)
346 outStr += " lowMz=\"%f\"" % (minmz,)
347 outStr += " highMz=\"%f\"" % (maxmz,)
348 outStr += " totIonCurrent=\"%f\"" % (totIonCurrent,)
349 outStr += " basePeakMz=\"%f\"" % (basePeakMz,)
350 outStr += " basePeakIntensity=\"%f\"" % (basePeakIntensity,)
351
352 for (k,v) in d.items():
353 if k.startswith('scan.'):
354 k,f = k.split(':')
355 k = k[5:]
356 outStr += (" %%s=\"%s\""%(f,))%(k,v)
357 outStr += ">\n"
358 self.writeString(xmlFile,outStr)
359
360 any = False
361 for (k,v) in d.items():
362 if k.startswith('scanOrigin.'):
363 if not any:
364 any = True
365 outStr = "<scanOrigin"
366 k,f = k.split(':')
367 k = k[11:]
368 outStr += (" %%s=\"%s\""%(f,))%(k,v)
369 if any:
370 outStr += "/>\n"
371 self.writeString(xmlFile,outStr)
372
373 if msLevel > 1:
374
375 if not d.has_key('precursorMz') :
376 print >>sys.stderr, "Required precursorMz attribute missing."
377 sys.exit(1)
378
379 # We scan the parent spectrum in the region of the
380 # precursorMz to establish the intensity. Optionally,
381 # we tune the precursorMz itself, on the basis of this
382
383 pMz = d['precursorMz']
384 tol = self.reader.precursorPrecision(pMz)
385 pMzLB = pMz-tol
386 pMzUB = pMz+tol
387 pMzIntensity = 0
388
389 if len(ancestors) > 0:
390 for i in xrange(0,len(ancestors[-1][1]),2):
391 x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
392 if x < pMzLB:
393 continue
394 if x > pMzUB:
395 break
396 if pMzIntensity < y:
397 pMzIntensity = y
398 pMzTune = x
399
400 if self.reader.precursorTune():
401 pMz = pMzTune
402
403 outStr = "<precursorMz"
404 for (k,v) in d.items():
405 if k.startswith('precursorMz.'):
406 k,f = k.split(':')
407 k = k[12:]
408 outStr += (" %%s=\"%s\""%(f,))%(k,v)
409 if len(ancestors)>0:
410 outStr += " precursorScanNum=\"%d\""%(ancestors[-1][0],)
411 outStr += " precursorIntensity=\"%f\">"%(pMzIntensity,)
412 else:
413 outStr += ">"
414 outStr += "%f</precursorMz>\n" % (pMz,)
415 self.writeString(xmlFile,outStr)
416
417 if self.reader.maldi():
418 outStr = '<maldi'
419 for (k,v) in d.items():
420 if k.startswith('maldi.'):
421 k,f = k.split(':')
422 k = k[6:]
423 outStr += (" %%s=\"%s\""%(f,))%(k,v)
424 outStr += " plateID=\"%s\""%(d['plateID'],)
425 outStr += " spotID=\"%s\""%(d['spotID'],)
426 outStr += "/>\n"
427 self.writeString(xmlFile,outStr)
428
429 outStr = "<peaks precision=\"32\" byteOrder=\"network\" pairOrder=\"m/z-int\">"
430 self.writeString(xmlFile,outStr)
431
432 # Spec says the byte order shall be
433 # network, which is the same as big endian.
434
435 if sys.byteorder != 'big':
436 s.byteswap()
437
438 outStr = b64encode(s.tostring())
439 if not debug:
440 self.writeString(xmlFile,outStr)
441 else:
442 self.writeString(xmlFile,outStr[0:100])
443
444 if sys.byteorder != 'big':
445 s.byteswap()
446
447 outStr = "</peaks>\n"
448 self.writeString(xmlFile,outStr)
449
450 if self.reader.maldi():
451 outStr = ''
452 for (k,v) in d.items():
453 if k.startswith('maldi.'):
454 k,f = k.split(':')
455 k = k[6:]
456 outStr += '<nameValue'
457 outStr += ' name="%s"'%(k,)
458 outStr += (' value="%f"'%(f,))%(v,)
459 outStr += '/>\n'
460 ## outStr += '<nameValue'
461 ## outStr += ' name="plateID"'
462 ## outStr += ' value="%s"'%(d['plateID'],)
463 ## outStr += '/>\n'
464 ## outStr += '<nameValue'
465 ## outStr += ' name="spotID"'
466 ## outStr += ' value="%s"'%(d['spotID'],)
467 ## outStr += '/>\n'
468 self.writeString(xmlFile,outStr)
469
470 xmlFile.flush()
471
472 prevSpec = s
473
474 outStr = ""
475 for m in xrange(0,msLevel):
476 outStr += "</scan>\n"
477
478 self.writeString(xmlFile,outStr)
479 self.actualScanCount = scanNumber
480
481 class ToMzData:
482 def __init__(self,reader,filename,cmpfmt=None,filt=None):
483 self.filename = filename
484 self.compressfmt = cmpfmt
485 self.reader = reader
486 self.actualScanCount = 0
487 self.initSHA()
488 self.filter = filt
489
490 def writeString(self,fh,data):
491 # self.writeByteCounter += len(data)
492 self.sha1.update(data)
493 fh.write(data)
494
495 def initSHA(self):
496 self.sha1 = sha.new()
497
498 def updateSHA(self,s):
499 self.sha1.update(s)
500
501 def getSHA(self):
502 return self.sha1.hexdigest().lower()
503
504 def write(self,debug=False):
505
506 # Make a temporary file for the scan data to count the number of spectra.
507 (tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml')
508 tmpFile = os.fdopen(tmpFileFD,'wb')
509
510 # Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount.
511 self.write_scans(tmpFile,debug)
512
513 tmpFile.close()
514
515 scanhash = self.getSHA()
516
517 self.initSHA()
518
519 if self.compressfmt == None and not self.filename.endswith('.gz'):
520 xmlFile = open(self.filename,'wb')
521 elif self.compressfmt == 'gz' or self.filename.endswith('.gz'):
522 if self.filename.endswith('.gz'):
523 xmlFile = gzip.open(self.filename,'wb')
524 else:
525 xmlFile = gzip.open(self.filename+'.gz','wb')
526 else:
527 print >>sys.stderr, "Bad compressfmt specification"
528 sys.exit(1)
529
530 self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n')
531
532 outStr = "<mzData "
533 outStr += "version=\"1.05\" "
534 outStr += "accessionNumber=\"pymsxml:%s\" "%(scanhash,)
535 outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
536 # outStr += "xmlns=\"http://psidev.sourceforge.net/ms/xml/mzdata\" "
537 outStr += "xsi:noNamespaceSchemaLocation=\"http://psidev.sourceforge.net/ms/xml/mzdata/mzdata.xsd\""
538 outStr += ">\n"
539
540 self.writeString (xmlFile,outStr)
541
542 outStr = "<description>\n"
543 outStr += "<admin>\n"
544 outStr += "<sampleName></sampleName>\n"
545 outStr += "<contact>\n"
546 outStr += "<name></name>\n"
547 outStr += "<institution></institution>\n"
548 outStr += "</contact>\n"
549 outStr += "</admin>\n"
550 outStr += "<instrument>\n"
551 outStr += "<instrumentName></instrumentName>\n"
552 outStr += "<source>\n"
553 outStr += "</source>\n"
554 outStr += "<analyzerList count=\"1\">\n"
555 outStr += "<analyzer>\n"
556 outStr += "</analyzer>\n"
557 outStr += "</analyzerList>\n"
558 outStr += "<detector>\n"
559 outStr += "</detector>\n"
560 outStr += "</instrument>\n"
561 outStr += "<dataProcessing>\n"
562 outStr += "<software>\n"
563 outStr += "<name>%s</name>\n"%(self.reader.acquisitionSoftware(),)
564 outStr += "<version>%s</version>\n"%(self.reader.acquisitionSoftwareVersion(),)
565 outStr += "</software>\n"
566 outStr += "<software>\n"
567 nv = NameVersion()
568 outStr += "<name>%s</name>\n"%(nv.name(),)
569 outStr += "<version>%s</version>\n"%(nv.version(),)
570 outStr += "</software>\n"
571 outStr += "</dataProcessing>\n"
572 outStr += "</description>\n"
573
574 self.writeString (xmlFile,outStr)
575
576 outStr = "<spectrumList"
577 outStr += " count=\"%d\">\n"%(self.actualScanCount,)
578
579 self.writeString (xmlFile,outStr)
580
581 tmpFile = open(self.tmpFileName,'rb')
582 while True:
583 tmpStr = tmpFile.read(1024)
584 if tmpStr == '':
585 break
586 self.writeString(xmlFile,tmpStr)
587 tmpFile.close()
588 os.unlink(self.tmpFileName)
589 self.tmpFileName = ''
590
591 outStr = "</spectrumList>\n</mzData>\n"
592
593 self.writeString (xmlFile,outStr)
594
595 def write_scans(self,xmlFile,debug=False):
596
597 msLevel = 0
598 scanNumber = 0
599 ancestors = []
600
601 self.initSHA()
602 # self.writeByteCounter = 0
603
604 for (s,d) in self.reader.spectra():
605
606 if self.filter != None and not self.filter.test(d):
607 continue
608
609 if debug and scanNumber >= 10:
610 break
611
612 if not d.has_key('msLevel'):
613 print >>sys.stderr, "Required scan attributes missing."
614 sys.exit(1)
615
616 prevLevel = msLevel
617 msLevel = d['msLevel']
618
619 if prevLevel < msLevel and prevLevel > 0:
620 # We went "in" a level, push scan number of parent
621 ancestors.append((scanNumber,prevSpec))
622 elif prevLevel > msLevel and msLevel > 0:
623 ancestors.pop()
624
625 scanNumber = scanNumber + 1
626
627 totIonCurrent = 0
628 maxmz = None
629 minmz = None
630 basePeakMz = None
631 basePeakIntensity = None
632 peaksCount = 0
633 for i in xrange(0,len(s),2):
634 x = s[i]; y = s[i+1]
635 if minmz is None or minmz > x:
636 minmz = x
637 if maxmz is None or maxmz < x:
638 maxmz = x
639 totIonCurrent += y
640 if basePeakIntensity is None or basePeakIntensity < y:
641 basePeakIntensity = y
642 basePeakMz = x
643 peaksCount += 1
644
645 outStr = "<spectrum id=\"%d\">\n" % (scanNumber,)
646 outStr += "<spectrumDesc>\n"
647 outStr += "<spectrumSettings>\n"
648
649 self.writeString (xmlFile,outStr)
650
651 # outStr += "<acqSpecification>\n"
652 # outStr += "</acqSpecification>\n"
653
654 outStr = "<spectrumInstrument"
655 outStr += " msLevel=\"%d\""%(msLevel,)
656 if d.has_key('scan.startMz') and \
657 d.has_key('scan.endMz'):
658 outStr += " mzRangeStart=\"%f\""%(d['scan.startMz'],)
659 outStr += " mzRangeStop=\"%f\""%(d['scan.endMz'],)
660 outStr += ">\n"
661 for (k,v) in d.items():
662 if k.startswith('scan.'):
663 k,f = k.split(':')
664 k = k[5:]
665
666 if k == "polarity":
667 if v == '+':
668 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Positive",)
669 elif v == '-':
670 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Negative",)
671 elif k == 'retentionTime':
672 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"TimeInSeconds\" value=\"%f\"/>\n"%(v,)
673 else:
674 outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
675 outStr += "<userParam name=\"lowMz\" value=\"%f\"/>\n"%(minmz,)
676 outStr += "<userParam name=\"highMz\" value=\"%f\"/>\n"%(maxmz,)
677 outStr += "<userParam name=\"totIonCurrent\" value=\"%f\"/>\n"%(totIonCurrent,)
678 outStr += "<userParam name=\"basePeakMz\" value=\"%f\"/>\n"%(basePeakMz,)
679 outStr += "<userParam name=\"basePeakIntensity\" value=\"%f\"/>\n"%(basePeakIntensity,)
680 outStr += "</spectrumInstrument>\n"
681 outStr += "</spectrumSettings>\n"
682
683 self.writeString (xmlFile,outStr)
684
685 if msLevel > 1:
686
687 if not d.has_key('precursorMz') :
688 print >>sys.stderr, "Required precursorMz attribute missing."
689 sys.exit(1)
690
691 # We scan the parent spectrum in the region of the
692 # precursorMz to establish the intensity. Optionally,
693 # we tune the precursorMz itself, on the basis of this
694
695 pMz = d['precursorMz']
696 tol = self.reader.precursorPrecision(pMz)
697 pMzLB = pMz-tol
698 pMzUB = pMz+tol
699 pMzIntensity = 0
700
701 for i in xrange(0,len(ancestors[-1][1]),2):
702 x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1];
703 if x < pMzLB:
704 continue
705 if x > pMzUB:
706 break
707 if pMzIntensity < y:
708 pMzIntensity = y
709 pMzTune = x
710
711 if self.reader.precursorTune():
712 pMz = pMzTune
713
714 outStr = "<precursorList count=\"1\">\n"
715
716 outStr += "<precursor"
717 outStr += " msLevel=\"%d\""%(msLevel,)
718 outStr += " spectrumRef=\"%d\""%(ancestors[-1][0],)
719 outStr += ">\n"
720
721 self.writeString(xmlFile,outStr)
722
723 outStr = "<ionSelection>\n"
724 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"MassToChargeRatio\" value=\"%f\"/>\n"%(pMz,)
725 outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Intensity\" value=\"%f\"/>\n"%(pMzIntensity,)
726 for (k,v) in d.items():
727 if k.startswith('precursorMz.'):
728 k,f = k.split(':')
729 k = k[12:]
730 outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v)
731 outStr += "</ionSelection>\n"
732
733 self.writeString(xmlFile,outStr)
734
735 outStr = "<activation>\n"
736 outStr += "</activation>\n"
737 outStr += "</precursor>\n"
738 outStr += "</precursorList>\n"
739
740 self.writeString(xmlFile,outStr)
741
742 if self.reader.maldi():
743 outStr = '<maldi'
744 for (k,v) in d.items():
745 if k.startswith('maldi.'):
746 k,f = k.split(':')
747 k = k[6:]
748 outStr += (" %%s=\"%s\""%(f,))%(k,v)
749 outStr += " plateID=\"%s\""%(d['plateID'],)
750 outStr += " spotID=\"%s\""%(d['spotID'],)
751 outStr += "/>\n"
752 self.writeString(xmlFile,outStr)
753
754 outStr = "</spectrumDesc>\n"
755 self.writeString(xmlFile,outStr)
756
757 outStr = "<mzArrayBinary>\n"
758 outStr += "<data"
759 outStr += " precision=\"32\""
760 outStr += " endian=\"%s\""%(sys.byteorder,)
761 outStr += " length=\"%d\""%(peaksCount,)
762 outStr += ">"
763 self.writeString(xmlFile,outStr)
764
765 outStr = b64encode(s[0:peaksCount*2:2].tostring())
766 if not debug:
767 self.writeString(xmlFile,outStr)
768 else:
769 self.writeString(xmlFile,outStr[0:100])
770
771 outStr = "</data>\n</mzArrayBinary>\n"
772 self.writeString(xmlFile,outStr)
773
774 outStr = "<intenArrayBinary>\n"
775 outStr += "<data"
776 outStr += " precision=\"32\""
777 outStr += " endian=\"%s\""%(sys.byteorder,)
778 outStr += " length=\"%d\""%(peaksCount,)
779 outStr += ">"
780 self.writeString(xmlFile,outStr)
781
782 outStr = b64encode(s[1:peaksCount*2:2].tostring())
783 if not debug:
784 self.writeString(xmlFile,outStr)
785 else:
786 self.writeString(xmlFile,outStr[0:100])
787
788 outStr = "</data>\n</intenArrayBinary>\n"
789 self.writeString(xmlFile,outStr)
790
791 if self.reader.maldi():
792 outStr = ''
793 for (k,v) in d.items():
794 if k.startswith('maldi.'):
795 k,f = k.split(':')
796 k = k[6:]
797 outStr += '<nameValue'
798 outStr += ' name="%s"'%(k,)
799 outStr += (' value="%f"'%(f,))%(v,)
800 outStr += '/>\n'
801 ## outStr += '<nameValue'
802 ## outStr += ' name="plateID"'
803 ## outStr += ' value="%s"'%(d['plateID'],)
804 ## outStr += '/>\n'
805 ## outStr += '<nameValue'
806 ## outStr += ' name="spotID"'
807 ## outStr += ' value="%s"'%(d['spotID'],)
808 ## outStr += '/>\n'
809 self.writeString(xmlFile,outStr)
810
811 outStr = "</spectrum>\n"
812 self.writeString(xmlFile,outStr)
813 xmlFile.flush()
814
815 prevSpec = s
816
817 self.actualScanCount = scanNumber
818
819 class QStarWiff:
820 def __init__(self,filename,peaks=None):
821 self.filename = filename
822 self.filehash = fileSHA(filename)
823 self.theTIC = None
824 self.theWF = None
825 self.theFMANSpecData = None
826 self.startTime = None
827 self.stopTime = None
828 self.applyPeakDetect = []
829 if peaks:
830 self.applyPeakDetect = map(int,peaks.split(','))
831
832 if not os.path.exists(self.filename):
833 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
834 sys.exit(1)
835
836 self.filename = os.path.abspath(self.filename)
837
838 def __del__(self):
839 # Probably unnecessary
840 self.close()
841
842 def peakDetect(self,msLevel):
843 return ((msLevel in self.applyPeakDetect) and (msLevel != 1))
844
845 def open(self):
846
847 if self.theTIC is None:
848
849 from win32com.client import Dispatch
850
851 self.theFMANSpecData = Dispatch('Analyst.FMANSpecData')
852 self.theTIC = Dispatch('Analyst.FMANChromData')
853 self.theSPL = Dispatch('Analyst.SpectralPeakList')
854
855 self.theFMANSpecData.WiffFileName = self.filename
856 self.theTIC.WiffFileName = self.filename
857 self.theWF = self.theFMANSpecData.GetWiffFileObject()
858
859 self.theTIC.SetToTIC(1,0,0)
860
861 self.startTime = self.theTIC.GetDataPointXValue(1)
862 self.stopTime = self.theTIC.GetDataPointXValue(self.theTIC.GetNumberOfDataPoints())
863
864 def close(self):
865
866 if not self.theTIC is None:
867
868 if not self.theWF is None:
869 self.theWF.CloseWiffFile()
870 self.theTIC = None
871 self.theFMANSpecData = None
872 self.theWF = None
873
874 def spectra(self):
875 self.open()
876 spectrumCount=0;
877
878 numberOfSamples = self.theWF.GetActualNumberOfSamples()
879 for i in xrange(1,numberOfSamples+1):
880
881 # print >>sys.stderr, "Sample %d"%i
882 # print >>sys.stderr, "Sample Name: %s"%(self.theWF.GetSampleName(i),)
883
884 sampleName = self.theWF.GetSampleName(i)
885
886 numberOfPeriods = self.theWF.GetActualNumberOfPeriods(i)
887 for j in xrange(0,numberOfPeriods):
888
889 numberOfExperiments = self.theWF.GetNumberOfExperiments(i,j)
890 self.theTIC.SetToTIC(i, j, 0)
891
892 numberOfTICDataPoints = self.theTIC.GetNumberOfDataPoints()
893 for l in xrange(1,numberOfTICDataPoints+1):
894
895 for k in xrange(0,numberOfExperiments):
896
897 self.theExperiment = self.theWF.GetExperimentObject(i, j, k)
898 self.theTIC.SetToTIC(i, j, k)
899
900 # Convert to seconds?
901 xValue = self.theTIC.GetDataPointXValue(l) * 60
902
903 spectrumCount += 1;
904
905 # If any Ion current at time "l"
906 if self.theTIC.GetDataPointYValue(l) > 0:
907
908 self.theFMANSpecData.SetSpectrum(i, j, k, xValue, xValue)
909
910 if self.theExperiment.ScanType == 9:
911 msLevel = 2
912 m = re.search(r'[(]([0-9.]+)[)]',self.theFMANSpecData.DataTitle)
913 fixedMass = float(m.group(1))
914 else:
915 msLevel = 1
916
917 startMass = self.theFMANSpecData.GetStartMass()
918 stopMass = self.theFMANSpecData.GetStopMass()
919 dataArr = array('f');
920 peakDetect = 'False'
921 if self.peakDetect(msLevel):
922 peakDetect = 'True'
923 minY,maxY = self.theFMANSpecData.GetYValueRange()
924 print minY,maxY
925 self.theFMANSpecData.Threshold(maxY*0.01)
926 self.theSPL.FindPeaksInDataObject(self.theFMANSpecData,50.0)
927 numPeaks = self.theSPL.GetNumberOfPeaks()
928 print numPeaks
929 for m in xrange(1,numPeaks+1):
930 (x,w,y,yp) = self.theSPL.GetPeak(m)
931 if y <= 0:
932 continue
933 dataArr.append(x)
934 dataArr.append(y)
935 else:
936 numPoints = self.theFMANSpecData.GetNumberOfDataPoints()
937 for m in xrange(1,numPoints+1):
938 if self.theFMANSpecData.GetDataPointYValue(m) <= 0:
939 continue
940 x = self.theFMANSpecData.GetDataPointXValue(m)
941 dataArr.append(x)
942 y = self.theFMANSpecData.GetDataPointYValue(m)
943 dataArr.append(y)
944 polarity = self.theExperiment.Polarity
945
946 if len(dataArr) == 0:
947 continue
948
949 if polarity == 0:
950 polarity_val = "+"
951 else:
952 polarity_val = "-"
953
954 rt = "PT%fS"%(xValue,)
955
956 # Scan level attributes
957 # Required by mzXML (no format spec. needed)
958 d = {'msLevel':msLevel,'scan.peakDetect:%s':peakDetect}
959 # Optional ('scan.' and prefixformat spec. needed)
960 d.update({
961 'scan.retentionTime:PT%fS':xValue,
962 'scan.polarity:%s':polarity_val,
963 'scan.startMz:%f':startMass,
964 'scan.endMz:%f':stopMass,
965 'scan.sample:%d':i,
966 'scan.period:%d':j,
967 'scan.experiment:%d':k,
968 'scan.sampleName:%s':sampleName
969 })
970 d.update({'scanOrigin.parentFileID:%s': self.filehash,
971 'scanOrigin.num:%d': spectrumCount})
972 if msLevel == 1:
973 yield (dataArr,d)
974 else:
975 # tandem MS scans add the following
976 # Required by mzXML
977 d.update({'precursorMz':fixedMass})
978 # Optional
979 # d.update({'precursorMz.precursorMzIntensity:f':0.0})
980 yield (dataArr,d)
981
982 def getMsRunMetaData(self):
983 self.open()
984 d = {'startTime:PT%fS':self.startTime,
985 'endTime:PT%fS':self.stopTime,
986 }
987 return d
988
989 def getFilenames(self):
990 return [ (self.filename,self.filehash) ]
991
992 def msManufacturer(self):
993 return "ABI / SCIEX"
994
995 def msModel(self):
996 return "QSTAR"
997
998 def msIonisation(self):
999 return "ESI"
1000
1001 def msMassAnalyzer(self):
1002 return "Quadrupole"
1003
1004 def msDetector(self):
1005 return None
1006
1007 def acquisitionSoftware(self):
1008 return "Analyst"
1009
1010 def acquisitionSoftwareVersion(self):
1011 return "1.0"
1012
1013 def precursorPrecision(self,mz):
1014 return .1
1015
1016 def precursorTune(self):
1017 return True
1018
1019 def lc(self):
1020 return True
1021
1022 def maldi(self):
1023 return not self.lc()
1024
1025 class mzXML_md_sax(handler.ContentHandler):
1026
1027 def __init__(self):
1028 self.md = {}
1029 self.context = ''
1030
1031 def startElement(self, name, attrs):
1032 self.context += ':%s'%(name,)
1033 # print >>sys.stderr, ">>",self.context
1034 # sys.stderr.flush()
1035 if self.context.endswith(':msRun'):
1036 self.md['startTime'] = float(attrs['startTime'][2:-1])
1037 self.md['endTime'] = float(attrs['endTime'][2:-1])
1038 elif self.context.endswith(':parentFile'):
1039 self.md['fileName'] = unquote(attrs['fileName'])
1040 self.md['fileType'] = attrs['fileType']
1041 self.md['fileSha1'] = attrs['fileSha1']
1042 elif self.context.endswith(':instrument'):
1043 self.md['msManufacturer'] = attrs['manufacturer']
1044 self.md['msModel'] = attrs['model']
1045 self.md['msIonisation'] = attrs['ionisation']
1046 self.md['msMassAnalyzer'] = attrs['msType']
1047 elif self.context.endswith(':msInstrument:msManufacturer'):
1048 self.md['msManufacturer'] = attrs['value']
1049 elif self.context.endswith(':msInstrument:msModel'):
1050 self.md['msModel'] = attrs['value']
1051 elif self.context.endswith(':msInstrument:msIonisation'):
1052 self.md['msIonisation'] = attrs['value']
1053 elif self.context.endswith(':msInstrument:msMassAnalyzer'):
1054 self.md['msMassAnalyzer'] = attrs['value']
1055 elif self.context.endswith(':msInstrument:msDetector'):
1056 self.md['msDetector'] = attrs['value']
1057 elif self.context.endswith(':msInstrument:software'):
1058 if attrs['type'] == 'acquisition':
1059 self.md['acquisitionSoftware'] = attrs['name']
1060 self.md['acquisitionSoftwareVersion'] = attrs['version']
1061 elif self.context.endswith(':instrument:software'):
1062 if attrs['type'] == 'acquisition':
1063 self.md['acquisitionSoftware'] = attrs['name']
1064 self.md['acquisitionSoftwareVersion'] = attrs['version']
1065 elif self.context.endswith(':scan'):
1066 raise SAXException("Early termination")
1067
1068 def endElement(self, name):
1069 # print >>sys.stderr, "<<",self.context
1070 # sys.stderr.flush()
1071 self.context = self.context[0:-(len(name)+1)]
1072
1073
1074 class mzXML_spec_sax(handler.ContentHandler):
1075
1076 def __init__(self):
1077 self.context = ''
1078 self.content = ''
1079 self.spectra = []
1080 self.done = False
1081 self.scancount = 0
1082 self.scanmax = 1000
1083 self.scanstart = 0
1084
1085 def startElement(self, name, attrs):
1086 self.context += ':%s'%(name,)
1087 # print >>sys.stderr, ">>",self.context
1088 # sys.stderr.flush()
1089 self.content = ''
1090 if name == 'scan':
1091 self.scanmd = {}
1092 self.scanlevel = int(attrs['msLevel'])
1093 self.scannum = int(attrs['num'])
1094 if attrs.has_key('retentionTime'):
1095 self.rt = float(attrs['retentionTime'][2:-1])
1096 self.endMz = float(attrs['endMz'])
1097 self.startMz = float(attrs['startMz'])
1098 self.polarity = attrs.get('polarity',None)
1099 for (k,v) in attrs.items():
1100 self.scanmd[k] = v
1101 self.precursorMz = None
1102 self.scancount += 1
1103 elif name == 'msRun':
1104 self.scancount = 0
1105 self.spectra = []
1106 self.context = ':msRun'
1107 elif name == 'scanOrigin':
1108 self.scanmd['scanOrigin'] = attrs
1109
1110 def characters(self, content):
1111 if self.scancount >= self.scanstart:
1112 if self.context.endswith(':peaks') or \
1113 self.context.endswith(':precursorMz'):
1114 self.content += content
1115
1116 def endElement(self, name):
1117 if self.scancount >= self.scanstart:
1118 if self.context.endswith('scan:peaks'):
1119
1120 spec = array('f')
1121 # print >>sys.stderr, len(self.content), len(b64decode(self.content)), self.content
1122 spec.fromstring(b64decode(self.content))
1123 if sys.byteorder != 'big':
1124 spec.byteswap()
1125 d = {'msLevel':self.scanlevel}
1126 # Optional ('scan.' and prefixformat spec. needed)
1127 d.update({
1128 'scan.retentionTime:PT%fS':self.rt,
1129 'scan.startMz:%f':self.startMz,
1130 'scan.endMz:%f':self.endMz
1131 })
1132 if self.polarity != None:
1133 d.update({'scan.polarity:%s':self.polarity})
1134 if self.scanlevel == 2:
1135 d.update({'precursorMz':self.precursorMz})
1136 if self.scanmd.has_key('scanOrigin'):
1137 d.update({'scanOrigin.parentFileID:%s': self.scanmd['scanOrigin']['parentFileID'],
1138 'scanOrigin.num:%d': self.scanmd['scanOrigin']['num']})
1139 for (k,v) in self.scanmd.items():
1140 if not d.has_key(k):
1141 d[k] = v
1142 self.spectra.append((spec,d))
1143 if len(self.spectra) >= self.scanmax:
1144 self.scanstart = self.scancount+1
1145 raise SAXException("Early termination")
1146 elif self.context.endswith('scan:precursorMz'):
1147 self.precursorMz = float(self.content)
1148 if self.context.endswith('msRun'):
1149 self.done = True
1150 # print >>sys.stderr, "<<",self.context
1151 # sys.stderr.flush()
1152 self.context = self.context[0:-(len(name)+1)]
1153
1154 class FromMzXML:
1155 def __init__(self,filename):
1156 self.filename = filename
1157 self.parser = None
1158
1159 if not os.path.exists(self.filename):
1160 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1161 sys.exit(1)
1162
1163 if self.filename.endswith('.gz'):
1164 self.compressfmt = 'gz'
1165 else:
1166 self.compressfmt = None
1167
1168 def __del__(self):
1169 self.close()
1170
1171 def open(self):
1172
1173 if self.parser == None:
1174 # print >>sys.stderr, "Parsing out meta-data..."
1175 # sys.stderr.flush()
1176 self.parser = make_parser()
1177 self.handler = mzXML_md_sax()
1178 self.parser.setContentHandler(self.handler)
1179 try:
1180 if self.compressfmt == None:
1181 self.parser.parse(self.filename)
1182 elif self.compressfmt == 'gz':
1183 self.parser.parse(gzip.open(self.filename,'r'))
1184 else:
1185 print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1186 sys.exit(1)
1187 except SAXException:
1188 pass
1189
1190 self.md = self.handler.md
1191
1192 # print >>sys.stderr, self.md
1193 # sys.stderr.flush()
1194
1195 # print >>sys.stderr, "Set up spectrum parser..."
1196 # sys.stderr.flush()
1197 self.parser = make_parser()
1198 self.handler = mzXML_spec_sax()
1199 self.parser.setContentHandler(self.handler)
1200
1201 def close(self):
1202 pass
1203
1204 def spectra(self):
1205 self.open()
1206 while True:
1207 # print >>sys.stderr, "Spectrum parser to read spectra from",self.handler.scanstart
1208 # sys.stderr.flush()
1209 try:
1210 if self.compressfmt == None:
1211 self.parser.parse(self.filename)
1212 elif self.compressfmt == 'gz':
1213 self.parser.parse(gzip.open(self.filename,'r'))
1214 else:
1215 print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,)
1216 sys.exit(1)
1217 except SAXException:
1218 pass
1219 for (s,d) in self.handler.spectra:
1220 # print >>sys.stderr, "Yeild spectrum number:",d['num']
1221 yield (s,d)
1222 if self.handler.done:
1223 break
1224
1225 def getMsRunMetaData(self):
1226 self.open()
1227 d = {'startTime:PT%fS':self.md['startTime'],
1228 'endTime:PT%fS':self.md['endTime']
1229 }
1230 return d
1231
1232 def getFilenames(self):
1233 return [ (self.md['fileName'],self.md['fileType'],self.md['fileSha1']) ]
1234
1235 def msManufacturer(self):
1236 return self.md.get('msManufacturer','')
1237
1238 def msModel(self):
1239 return self.md.get('msModel','')
1240
1241 def msIonisation(self):
1242 return self.md.get('msIonisation','')
1243
1244 def msMassAnalyzer(self):
1245 return self.md.get('msMassAnalyzer','')
1246
1247 def msDetector(self):
1248 return self.md.get('msDetector','')
1249
1250 def acquisitionSoftware(self):
1251 return self.md.get('acquisitionSoftware','')
1252
1253 def acquisitionSoftwareVersion(self):
1254 return self.md.get('acquisitionSoftwareVersion','')
1255
1256 def precursorPrecision(self,mz):
1257 return 0
1258
1259 def precursorTune(self):
1260 return False
1261
1262 def lc(self):
1263 return True
1264
1265 def maldi(self):
1266 return not self.lc()
1267
1268 class AB4700T2D:
1269 def __init__(self,filename,peaks=None):
1270 self.filename = filename
1271 self.datafiles = None
1272 self.deapp = None
1273 self.metadata = {}
1274 self.platespots = {}
1275 self.applyPeakDetect = []
1276 if peaks:
1277 self.applyPeakDetect = map(int,peaks.split(','))
1278
1279 if not os.path.exists(self.filename):
1280 print >>sys.stderr, "Filename %s does not exist."%(self.filename,)
1281 sys.exit(1)
1282
1283 self.filename = os.path.abspath(self.filename)
1284
1285 self.dir = os.path.split(self.filename)[0]
1286
1287 def __del__(self):
1288 # Probably unnecessary
1289 self.close()
1290
1291 def open(self):
1292
1293 if self.deapp is None:
1294
1295 self.metadata['PLATEDEF'] = []
1296 self.metadata['PLATE'] = []
1297 self.metadata['SPOT'] = []
1298 self.metadata['SCAN'] = []
1299
1300 h = open(self.filename,'rb')
1301 r = csv.reader(h,'excel-tab')
1302 for l in r:
1303 if len(l) == 0:
1304 continue
1305 kw = l.pop(0).upper()
1306 if kw in ('PLATEDEF','PLATE','SPOT','SCAN'):
1307 self.metadata[kw].append({})
1308 while len(l)>=2:
1309 k = l.pop(0);v = l.pop(0);
1310 if (kw == 'PLATE' and \
1311 (k in ('plateID','spotXCount','spotYCount','plateManufacturer','plateModel'))) or \
1312 (kw == 'PLATEDEF' and \
1313 (k in ('plateID','plateManufacturer','plateModel','spotNaming','maldiMatrix'))) or \
1314 (kw == 'SPOT' and \
1315 (k in ('plateID','spotID','spotXPosition','spotYPosition','maldiMatrix'))) or \
1316 (kw == 'SCAN' and \
1317 (k in ('plateID','spotID','filename','index'))):
1318 self.metadata[kw][-1][k] = v
1319 elif k == '':
1320 break
1321 else:
1322 print >>sys.stderr,"Bad key %s for %s row"%(k,kw)
1323 sys.exit(1)
1324 h.close()
1325
1326 for pd in self.metadata['PLATEDEF']:
1327 # insert all the gory details needed for a particular plate model etc...
1328 if pd['plateManufacturer'] == 'ABI / SCIEX' and pd['plateModel'] == '01-192+06-BB':
1329 if not pd.has_key('spotNaming') or not pd.has_key('maldiMatrix') or \
1330 not pd['spotNaming'] in ('alpha','parallel','antiparallel'):
1331 print >>sys.stderr,"Bad plate definition, missing maldiMatrix or spotNaming, or bad spotNaming value."
1332 sys.exit(1)
1333 self.metadata['PLATE'].append({
1334 'plateID':pd['plateID'],
1335 'plateManufacturer':pd['plateManufacturer'],
1336 'plateModel':pd['plateModel'],
1337 'spotXCount':24,
1338 'spotYCount':8,
1339 })
1340 if pd['spotNaming'] == 'alpha':
1341 for y in xrange(0,8):
1342 for x in xrange(0,24):
1343 self.metadata['SPOT'].append({
1344 'plateID':pd['plateID'],
1345 'spotID':"%c%d"%(y+ord('A'),x+1),
1346 'spotXPosition':x,
1347 'spotYPosition':2*y+(x%2),
1348 'maldiMatrix':pd['maldiMatrix']
1349 })
1350 else:
1351 print >>sys.stderr,"Valid spotNaming value, not yet implemented."
1352 sys.exit(1)
1353
1354 for pl in self.metadata['PLATE']:
1355 self.platespots[pl['plateID']] = []
1356
1357 for sp in self.metadata['SPOT']:
1358 self.platespots[sp['plateID']].append(sp)
1359
1360 self.datafiles = [ (os.path.join(self.dir,md['filename']),int(md['index'])) for md in self.metadata['SCAN'] ]
1361 self.distinct_datafiles = dict([(os.path.join(self.dir,md['filename']),
1362 fileSHA(os.path.join(self.dir,md['filename']))) for md in self.metadata['SCAN']])
1363
1364 self.maptoscan = {}
1365 index = 0
1366 for md in self.metadata['SCAN']:
1367 self.maptoscan[(os.path.join(self.dir,md['filename']),int(md['index']))] = index
1368 index += 1
1369
1370 from win32com.client import Dispatch, gencache
1371 self.deapp = Dispatch('DataExplorer.Application',
1372 resultCLSID='{3FED40F1-D409-11D1-8B56-0060971CB54B}')
1373 self.delib = gencache.EnsureModule('{06972F50-13F6-11D3-A5CB-0060971CB54B}',0,4,2)
1374 self.deapp.AutomatedProcessing = 1
1375 self.deapp.Visible = 0
1376
1377 def close(self):
1378
1379 if not self.deapp is None:
1380 self.deapp.Quit()
1381 self.deapp = None
1382
1383 def peakDetect(self,msLevel):
1384 return msLevel in self.applyPeakDetect
1385
1386 # remove .set and .cts files to prevent DataExplorer crashes - DT 11-1-2006
1387 def remsettingsfile(self,fn):
1388 # print "rmsettingsfile: %s"%fn
1389 if ((len(fn) > 3) and (fn[-4:].lower() == '.t2d')):
1390 path2del = fn[:-4] + '.cts'
1391 if os.path.exists(path2del):
1392 os.unlink(path2del)
1393 path2del = fn[:-4] + '.set'
1394 if os.path.exists(path2del):
1395 os.unlink(path2del)
1396
1397 def spectra(self):
1398 self.open()
1399
1400 prevfile = ''
1401 for (f,i) in self.datafiles:
1402
1403 if f != prevfile:
1404 self.deapp.Documents.Close()
1405 self.remsettingsfile(prevfile)
1406
1407 if not os.path.exists(f):
1408 print >>sys.stderr, "Filename %s does not exist."%(f,)
1409 sys.exit(1)
1410
1411 self.remsettingsfile(f)
1412 self.deapp.Documents.Open(f)
1413 # print "open %s"%f
1414
1415
1416 doc = self.deapp.Documents.Item(0)
1417 sv = doc.SpecView
1418
1419 sv.SetSpectrumAt(i-1)
1420
1421 (tf,fixedMass) = doc.InstrumentSettings.GetSetting(self.delib.constants.dePreCursorIon,i-1,None)
1422
1423 if fixedMass > 0:
1424 msLevel = 2
1425 else:
1426 msLevel = 1
1427
1428 dataArr = array('f');
1429 peakDetect="False"
1430 if self.peakDetect(msLevel):
1431 peakDetect="True"
1432 sv.CorrectBaseline()
1433 doc.SpecSetup.NoiseReductionStdev = 2.0
1434 sv.FilterNoise(self.delib.constants.deNoiseRemoval)
1435 sv.PeakSettings.PeakThresholdPercent = 1
1436 st,sp = sv.GetPeakData(self.delib.constants.deSpecPeakAll,
1437 self.delib.constants.deSpecPeakSortMass, 0, 0)
1438 for j in xrange(len(sp)):
1439 x = sp[j][self.delib.constants.deSpecPeakCentroidMass]
1440 y = sp[j][self.delib.constants.deSpecPeakArea]
1441 if y <= 0:
1442 continue
1443 dataArr.append(x)
1444 dataArr.append(y)
1445 else:
1446 n,s = sv.GetRawData(0,0)
1447 for j in xrange(n):
1448 dataArr.append(s[j][0])
1449 dataArr.append(s[j][1])
1450
1451 if len(dataArr) == 0:
1452 continue
1453
1454 polarity_val = "+"
1455
1456 # Scan level attributes
1457 # Required by mzXML (no format spec. needed)
1458 d = {'msLevel':msLevel,'scan.peakDetect:%s':peakDetect}
1459 # Optional ('scan.' and prefixformat spec. needed)
1460 d.update({
1461 'scan.polarity:%s':polarity_val,
1462 })
1463
1464 scanindex = self.maptoscan[(f,i)]
1465 d.update({
1466 'plateID':self.metadata['SCAN'][scanindex]['plateID'],
1467 'spotID':self.metadata['SCAN'][scanindex]['spotID'],
1468 })
1469 d.update({'scanOrigin.parentFileID:%s': self.distinct_datafiles[f],
1470 'scanOrigin.num:%d': i})
1471 if msLevel == 1:
1472 yield (dataArr,d)
1473 else:
1474 # tandem MS scans add the following
1475 # Required by mzXML
1476 d.update({'precursorMz':fixedMass})
1477 # Optional
1478 d.update({'precursorMz.precursorCharge:%d':1})
1479 yield (dataArr,d)
1480
1481 prevfile = f
1482
1483 self.deapp.Documents.Close()
1484 self.remsettingsfile(prevfile)
1485 self.remsettingsfile(f)
1486
1487 def getMsRunMetaData(self):
1488 self.open()
1489 return {}
1490
1491 def getFilenames(self):
1492 return [ t for t in self.distinct_datafiles.items() ]
1493
1494 def msManufacturer(self):
1495 return "ABI / SCIEX"
1496
1497 def msModel(self):
1498 return "4700 Proteomic Analyzer"
1499
1500 def msIonisation(self):
1501 return "MALDI"
1502
1503 def msMassAnalyzer(self):
1504 return "TOF"
1505
1506 def msDetector(self):
1507 return None
1508
1509 def acquisitionSoftware(self):
1510 return "DataExplorer"
1511
1512 def acquisitionSoftwareVersion(self):
1513 return "1.0"
1514
1515 def precursorPrecision(self,mz):
1516 return .1
1517
1518 def precursorTune(self):
1519 return False
1520
1521 def lc(self):
1522 return False
1523
1524 def maldi(self):
1525 return not self.lc()
1526
1527 def plateData(self):
1528 self.open()
1529 for d in self.metadata['PLATE']:
1530 yield d
1531
1532 def spotData(self,plate):
1533 self.open()
1534 for s in self.platespots[plate]:
1535 yield s
1536
1537 class scanfilter:
1538 def __init__(self,str):
1539 self.rtre = re.compile('^PT([0-9]+(\.[0-9]+)?)S$')
1540 self.intre = re.compile('^[1-9][0-9]*$')
1541 self.floatre = re.compile('^[0-9]+(\.[0-9]+)?$')
1542 self.cl = []
1543 clause = str.split(',')
1544 for cl in clause:
1545 scl = cl.split('.')
1546 m = self.rtre.match(scl[2].strip())
1547 if m != None:
1548 v = float(m.group(1))
1549 elif self.floatre.match(scl[2].strip()):
1550 v = float(scl[2])
1551 elif self.intre.match(scl[2].strip()):
1552 v = int(scl[2])
1553 else:
1554 v = scl[2]
1555 self.cl.append((scl[0],scl[1],v))
1556
1557 def test(self,d):
1558 a = {}
1559 for (k,v) in d.iteritems():
1560 k0 = k.split(':')[0].split('.')[-1]
1561 a[k0] = v
1562 for cl in self.cl:
1563 if type(a[cl[0]]) == type(u'') or \
1564 type(a[cl[0]]) == type('') :
1565 m = self.rtre.match(a[cl[0]].strip())
1566 if m != None:
1567 v1 = float(m.group(1))
1568 elif self.floatre.match(a[cl[0]].strip()):
1569 v1 = float(a[cl[0]])
1570 elif self.intre.match(a[cl[0]].strip()):
1571 v1 = int(a[cl[0]])
1572 else:
1573 v1 = a[cl[0]].strip()
1574 else:
1575 v1 = a[cl[0]]
1576 v2 = cl[2]
1577 # print >>sys.stderr, "Spectrum test:",v1,type(v1),cl[1],v2,type(v2)
1578 # sys.stderr.flush()
1579 if cl[1] == 'eq' and v1 != v2:
1580 # print >>sys.stderr, "FALSE"
1581 # sys.stderr.flush()
1582 return False
1583 elif cl[1] == 'ne' and not v1 != v2:
1584 return False
1585 elif cl[1] == 'lt' and not v1 < v2:
1586 return False
1587 elif cl[1] == 'le' and not v1 <= v2:
1588 return False
1589 elif cl[1] == 'gt' and not v1 > v2:
1590 return False
1591 elif cl[1] == 'ge' and not v1 >= v2:
1592 return False
1593 # print >>sys.stderr, "TRUE"
1594 # sys.stderr.flush()
1595 return True
1596
1597 if __name__ == '__main__':
1598
1599 from optparse import OptionParser
1600 import sys
1601 import glob
1602
1603 parser = OptionParser()
1604 parser.add_option("-R", "--rawdata", type="string", dest="rawdata", default="",\
1605 help="Format of raw data. Optional if raw spectra file ends in .wiff or .t2m.")
1606 parser.add_option("-X", "--xmlformat", type="string", dest="xmlformat", default="",\
1607 help="XML format to output. Optional if output file ends in .mzxml or .mzdata.")
1608 parser.add_option("-o", "--output", type="string", dest="output", default="",\
1609 help="Output file.")
1610 parser.add_option("-p", "--peaks", type="string", dest="peaks", default="2",\
1611 help="Level(s) of spectra to apply peak detection to (comma separated). QStar,4700 only.")
1612 parser.add_option("-f", "--filter", type="string", dest="filter", default=None,\
1613 help="Filter on mzxml scan meta-data: field.op.value[,field.op.value]. Default: No filter.")
1614 parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False,\
1615 help="Debug. First 10 spectra only, and truncated peaks data element. Default: False.")
1616 parser.add_option("-Z", "--compress", type="string", dest="compress", default=None,\
1617 help="Compress output file. Valid options 'gz'. Default: None.")
1618
1619 ( opts, args ) = parser.parse_args()
1620
1621 if opts.output == "" and opts.xmlformat == "":
1622 parser.error("Either -o or -X must be suppied.")
1623 sys.exit(1)
1624
1625 if opts.filter != None:
1626 filt=scanfilter(opts.filter)
1627 else:
1628 filt=None
1629
1630 argsglob = []
1631 for a in args:
1632 argsglob.extend(glob.glob(a))
1633
1634 if opts.output != "" and len(argsglob) > 1:
1635 parser.error("At most one raw spectrum data file is permitted, if -o option is used.")
1636
1637 for a in argsglob:
1638
1639 if opts.output != "":
1640 o = opts.output
1641 else:
1642 p = a.rfind('.')
1643 o = a[0:p]+'.'+opts.xmlformat
1644
1645 if o == a:
1646 print >>sys.stderr, "Input file %s and output file %s are the same!"%(a,o)
1647 sys.exit(1)
1648
1649 print >>sys.stderr, "Processing",a,"to",o
1650
1651 if opts.rawdata.lower() in ('wiff','qstar'):
1652 r = QStarWiff(a,opts.peaks)
1653 elif opts.rawdata.lower() in ('t2d','dat','ab4700','ab4800','mariner','voyager'):
1654 r = AB4700T2D(a,opts.peaks)
1655 elif opts.rawdata.lower() in ('mzxml',):
1656 r = FromMzXML(a)
1657 elif a[-5:].lower() in ('.wiff',):
1658 r = QStarWiff(a,opts.peaks)
1659 elif a[-4:].lower() in ('.t2m',):
1660 r = AB4700T2D(a,opts.peaks)
1661 elif a[-6:].lower() in ('.mzxml',):
1662 r = FromMzXML(a)
1663 else:
1664 parser.error("Bad rawdata format specification.")
1665 sys.exit(1)
1666
1667 if opts.xmlformat.lower() == 'mzxml':
1668 x = ToMzXML(r,o,opts.compress,filt)
1669 elif opts.xmlformat.lower() == 'mzdata':
1670 x = ToMzData(r,o,opts.compress,filt)
1671 elif opts.output[-6:].lower() in ('.mzxml',):
1672 x = ToMzXML(r,opts.output,opts.compress,filt)
1673 elif opts.output[-7:].lower() in ('.mzdata',):
1674 x = ToMzData(r,opts.output,opts.compress,filt)
1675 else:
1676 parser.error("Bad xml format specification.")
1677 sys.exit(1)
1678
1679 x.write(debug=opts.debug)