7 |
|
import tempfile |
8 |
|
import csv |
9 |
|
import gzip |
10 |
< |
import zipfile |
10 |
> |
import zlib |
11 |
|
from array import array |
12 |
|
from base64 import b64encode,b64decode |
13 |
|
from urllib import quote,unquote |
17 |
|
def __init__(self): |
18 |
|
self.name_ = "PyMsXML" |
19 |
|
self.majorVer = 0 |
20 |
< |
self.minorVer = 4 |
20 |
> |
self.minorVer = 5 |
21 |
|
self.revVer = 0 |
22 |
|
def version(self): |
23 |
|
return "%d.%d.%d"%(self.majorVer,self.minorVer,self.revVer) |
36 |
|
return s.hexdigest().lower() |
37 |
|
|
38 |
|
class ToMzXML: |
39 |
< |
def __init__(self,reader,filename,cmpfmt=None,filt=None): |
39 |
> |
def __init__(self,reader,filename,cmpfmt=None,filt=None,peaksCompress=False,version="3.0"): |
40 |
|
self.filename = filename |
41 |
|
self.compressfmt = cmpfmt |
42 |
|
self.reader = reader |
45 |
|
self.actualScanCount = 0 |
46 |
|
self.scanIndices = {} |
47 |
|
self.filter = filt |
48 |
+ |
if not version in ('2.1','2.2','3.0'): |
49 |
+ |
raise "Bad mzXML version \"%s\""%(version,) |
50 |
+ |
self.version = version |
51 |
+ |
if peaksCompress and float(self.version) < 3.0: |
52 |
+ |
raise "Cannot compress peaks until mzXML version 3.0" |
53 |
+ |
self.peakcompress = peaksCompress |
54 |
|
|
55 |
|
def __del__(self): |
56 |
|
if hasattr(self,'tmpFileName') and self.tmpFileName and self.tmpFileName != '': |
103 |
|
self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n') |
104 |
|
|
105 |
|
outStr = "<mzXML " |
106 |
< |
outStr += "xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1\" " |
106 |
> |
outStr += "xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_%s\" "%(self.version,) |
107 |
|
outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" " |
108 |
< |
outStr += "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd\"" |
108 |
> |
outStr += "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_%s http://sashimi.sourceforge.net/schema_revision/mzXML_%s/mzXML_idx_%s.xsd\""%(self.version,self.version,self.version) |
109 |
|
outStr += ">\n" |
110 |
|
|
111 |
|
self.writeString (xmlFile,outStr) |
112 |
|
|
113 |
|
outStr = "<msRun" |
114 |
|
outStr += " scanCount=\"%d\"" % (self.actualScanCount,) |
115 |
< |
for (k,v) in self.reader.getMsRunMetaData().items(): |
115 |
> |
md = self.reader.getMsRunMetaData() |
116 |
> |
for (k,v) in md.items(): |
117 |
|
k,f = k.split(':') |
118 |
< |
outStr += (" %%s=\"%s\""%(f,))%(k,v) |
118 |
> |
if self.version not in ('2.2',) or (k != 'startTime'and k != 'endTime'): |
119 |
> |
outStr += (" %%s=\"%s\""%(f,))%(k,v) |
120 |
|
outStr += ">\n" |
121 |
|
|
122 |
|
self.writeString (xmlFile,outStr) |
137 |
|
outStr += "fileSha1=\"%s\"" % (f[2],) |
138 |
|
outStr += "/>\n" |
139 |
|
self.writeString (xmlFile,outStr) |
140 |
< |
|
141 |
< |
outStr = "<msInstrument>\n" |
140 |
> |
|
141 |
> |
if float(self.version) >= 3.0: |
142 |
> |
outStr = "<msInstrument msInstrumentID=\"%s\">\n" % (self.reader.msInstrumentID(),) |
143 |
> |
else: |
144 |
> |
outStr = "<msInstrument>\n" |
145 |
> |
|
146 |
|
outStr += "<msManufacturer category=\"msManufacturer\" value=\"%s\"/>\n" % (self.reader.msManufacturer(),) |
147 |
|
outStr += "<msModel category=\"msModel\" value=\"%s\"/>\n" % (self.reader.msModel(),) |
148 |
|
if self.reader.msIonisation(): |
191 |
|
outStr += "/>\n" |
192 |
|
outStr += "<processingOperation " |
193 |
|
outStr += "name=\"peak_detection_level_1_software\" " |
194 |
< |
outStr += "value=\"Analyst\"" |
194 |
> |
outStr += "value=\"%s\""%(self.reader.acquisitionSoftware(),) |
195 |
|
outStr += "/>\n" |
196 |
|
self.writeString (xmlFile,outStr) |
197 |
|
if (self.reader.peakDetect(2)): |
205 |
|
outStr += "/>\n" |
206 |
|
outStr += "<processingOperation " |
207 |
|
outStr += "name=\"peak_detection_level_2_software\" " |
208 |
< |
outStr += "value=\"Analyst\"" |
208 |
> |
outStr += "value=\"%s\""%(self.reader.acquisitionSoftware(),) |
209 |
|
outStr += "/>\n" |
210 |
|
self.writeString (xmlFile,outStr) |
211 |
|
|
369 |
|
outStr += ">\n" |
370 |
|
self.writeString(xmlFile,outStr) |
371 |
|
|
372 |
< |
any = False |
373 |
< |
for (k,v) in d.items(): |
374 |
< |
if k.startswith('scanOrigin.'): |
375 |
< |
if not any: |
376 |
< |
any = True |
377 |
< |
outStr = "<scanOrigin" |
378 |
< |
k,f = k.split(':') |
379 |
< |
k = k[11:] |
380 |
< |
outStr += (" %%s=\"%s\""%(f,))%(k,v) |
381 |
< |
if any: |
382 |
< |
outStr += "/>\n" |
383 |
< |
self.writeString(xmlFile,outStr) |
372 |
> |
if self.version in ('2.2',): |
373 |
> |
any = False |
374 |
> |
for (k,v) in d.items(): |
375 |
> |
if k.startswith('scanOrigin.'): |
376 |
> |
if not any: |
377 |
> |
any = True |
378 |
> |
outStr = "<scanOrigin" |
379 |
> |
k,f = k.split(':') |
380 |
> |
k = k[11:] |
381 |
> |
outStr += (" %%s=\"%s\""%(f,))%(k,v) |
382 |
> |
if any: |
383 |
> |
outStr += "/>\n" |
384 |
> |
self.writeString(xmlFile,outStr) |
385 |
|
|
386 |
|
if msLevel > 1: |
387 |
|
|
427 |
|
outStr += "%f</precursorMz>\n" % (pMz,) |
428 |
|
self.writeString(xmlFile,outStr) |
429 |
|
|
430 |
< |
if self.reader.maldi(): |
430 |
> |
if self.reader.maldi() and self.version in ('2.2',): |
431 |
|
outStr = '<maldi' |
432 |
|
for (k,v) in d.items(): |
433 |
|
if k.startswith('maldi.'): |
439 |
|
outStr += "/>\n" |
440 |
|
self.writeString(xmlFile,outStr) |
441 |
|
|
442 |
< |
outStr = "<peaks precision=\"32\" byteOrder=\"network\" pairOrder=\"m/z-int\">" |
442 |
> |
if sys.byteorder != 'big': |
443 |
> |
s.byteswap() |
444 |
> |
|
445 |
> |
if debug: |
446 |
> |
s = s[:20] |
447 |
> |
|
448 |
> |
specstr = s.tostring() |
449 |
> |
if self.peakcompress: |
450 |
> |
specstr = zlib.compress(specstr,9) |
451 |
> |
outStr = "<peaks precision=\"32\" byteOrder=\"network\" contentType=\"m/z-int\" compressionType=\"zlib\" compressedLen=\"%d\">"%(len(specstr),) |
452 |
> |
else: |
453 |
> |
if float(self.version) >= 3.0: |
454 |
> |
outStr = "<peaks precision=\"32\" byteOrder=\"network\" contentType=\"m/z-int\" compressionType=\"none\" compressedLen=\"%d\">"%(len(specstr),) |
455 |
> |
else: |
456 |
> |
outStr = "<peaks precision=\"32\" byteOrder=\"network\" pairOrder=\"m/z-int\">" |
457 |
> |
|
458 |
|
self.writeString(xmlFile,outStr) |
459 |
|
|
460 |
|
# Spec says the byte order shall be |
463 |
|
if sys.byteorder != 'big': |
464 |
|
s.byteswap() |
465 |
|
|
466 |
< |
outStr = b64encode(s.tostring()) |
467 |
< |
if not debug: |
440 |
< |
self.writeString(xmlFile,outStr) |
441 |
< |
else: |
442 |
< |
self.writeString(xmlFile,outStr[0:100]) |
443 |
< |
|
444 |
< |
if sys.byteorder != 'big': |
445 |
< |
s.byteswap() |
466 |
> |
outStr = b64encode(specstr) |
467 |
> |
self.writeString(xmlFile,outStr) |
468 |
|
|
469 |
|
outStr = "</peaks>\n" |
470 |
|
self.writeString(xmlFile,outStr) |
471 |
|
|
472 |
< |
if self.reader.maldi(): |
472 |
> |
if self.reader.maldi() and self.version not in ('2.2',): |
473 |
|
outStr = '' |
474 |
|
for (k,v) in d.items(): |
475 |
|
if k.startswith('maldi.'): |
476 |
|
k,f = k.split(':') |
477 |
|
k = k[6:] |
478 |
|
outStr += '<nameValue' |
479 |
< |
outStr += ' name="%s"'%(k,) |
480 |
< |
outStr += (' value="%f"'%(f,))%(v,) |
479 |
> |
outStr += ' name="maldi.%s"'%(k,) |
480 |
> |
outStr += (' value="%s"'%(f,))%(v,) |
481 |
|
outStr += '/>\n' |
482 |
< |
## outStr += '<nameValue' |
483 |
< |
## outStr += ' name="plateID"' |
484 |
< |
## outStr += ' value="%s"'%(d['plateID'],) |
485 |
< |
## outStr += '/>\n' |
486 |
< |
## outStr += '<nameValue' |
487 |
< |
## outStr += ' name="spotID"' |
488 |
< |
## outStr += ' value="%s"'%(d['spotID'],) |
489 |
< |
## outStr += '/>\n' |
482 |
> |
outStr += '<nameValue' |
483 |
> |
outStr += ' name="maldi.plateID"' |
484 |
> |
outStr += ' value="%s"'%(d['plateID'],) |
485 |
> |
outStr += '/>\n' |
486 |
> |
outStr += '<nameValue' |
487 |
> |
outStr += ' name="maldi.spotID"' |
488 |
> |
outStr += ' value="%s"'%(d['spotID'],) |
489 |
> |
outStr += '/>\n' |
490 |
> |
self.writeString(xmlFile,outStr) |
491 |
> |
|
492 |
> |
if self.version not in ('2.2',): |
493 |
> |
outStr = '' |
494 |
> |
for (k,v) in d.items(): |
495 |
> |
if k.startswith('scanOrigin.'): |
496 |
> |
k,f = k.split(':') |
497 |
> |
k = k[11:] |
498 |
> |
outStr += '<nameValue' |
499 |
> |
outStr += ' name="scanOrigin.%s"'%(k,) |
500 |
> |
outStr += (' value="%s"'%(f,))%(v,) |
501 |
> |
outStr += '/>\n' |
502 |
> |
self.writeString(xmlFile,outStr) |
503 |
> |
|
504 |
> |
outStr = '' |
505 |
> |
for (k,v) in d.items(): |
506 |
> |
if k.startswith('nameValue.'): |
507 |
> |
k,f = k.split(':') |
508 |
> |
k = k[10:] |
509 |
> |
outStr += '<nameValue' |
510 |
> |
outStr += ' name="%s"'%(k,) |
511 |
> |
outStr += (' value="%s"'%(f,))%(v,) |
512 |
> |
outStr += '/>\n' |
513 |
> |
if len(outStr) > 0: |
514 |
|
self.writeString(xmlFile,outStr) |
515 |
|
|
516 |
|
xmlFile.flush() |
967 |
|
if self.peakDetect(msLevel): |
968 |
|
peakDetect = 'True' |
969 |
|
minY,maxY = self.theFMANSpecData.GetYValueRange() |
970 |
< |
print minY,maxY |
970 |
> |
# print minY,maxY |
971 |
|
self.theFMANSpecData.Threshold(maxY*0.01) |
972 |
|
self.theSPL.FindPeaksInDataObject(self.theFMANSpecData,50.0) |
973 |
|
numPeaks = self.theSPL.GetNumberOfPeaks() |
974 |
< |
print numPeaks |
974 |
> |
# print numPeaks |
975 |
|
for m in xrange(1,numPeaks+1): |
976 |
|
(x,w,y,yp) = self.theSPL.GetPeak(m) |
977 |
|
if y <= 0: |
1001 |
|
|
1002 |
|
# Scan level attributes |
1003 |
|
# Required by mzXML (no format spec. needed) |
1004 |
< |
d = {'msLevel':msLevel,'scan.peakDetect:%s':peakDetect} |
1004 |
> |
d = {'msLevel':msLevel} |
1005 |
|
# Optional ('scan.' and prefixformat spec. needed) |
1006 |
|
d.update({ |
1007 |
|
'scan.retentionTime:PT%fS':xValue, |
1008 |
|
'scan.polarity:%s':polarity_val, |
1009 |
|
'scan.startMz:%f':startMass, |
1010 |
|
'scan.endMz:%f':stopMass, |
965 |
– |
'scan.sample:%d':i, |
966 |
– |
'scan.period:%d':j, |
967 |
– |
'scan.experiment:%d':k, |
968 |
– |
'scan.sampleName:%s':sampleName |
1011 |
|
}) |
1012 |
|
d.update({'scanOrigin.parentFileID:%s': self.filehash, |
1013 |
|
'scanOrigin.num:%d': spectrumCount}) |
1014 |
+ |
d.update({'nameValue.sample:%d':i, |
1015 |
+ |
'nameValue.period:%d':j, |
1016 |
+ |
'nameValue.experiment:%d':k, |
1017 |
+ |
'nameValue.sampleName:%s':sampleName}) |
1018 |
+ |
|
1019 |
|
if msLevel == 1: |
1020 |
|
yield (dataArr,d) |
1021 |
|
else: |
1036 |
|
def getFilenames(self): |
1037 |
|
return [ (self.filename,self.filehash) ] |
1038 |
|
|
1039 |
+ |
def msInstrumentID(self): |
1040 |
+ |
return "1" |
1041 |
+ |
|
1042 |
|
def msManufacturer(self): |
1043 |
|
return "ABI / SCIEX" |
1044 |
|
|
1052 |
|
return "Quadrupole" |
1053 |
|
|
1054 |
|
def msDetector(self): |
1055 |
< |
return None |
1055 |
> |
return "LE" |
1056 |
|
|
1057 |
|
def acquisitionSoftware(self): |
1058 |
|
return "Analyst" |
1505 |
|
|
1506 |
|
# Scan level attributes |
1507 |
|
# Required by mzXML (no format spec. needed) |
1508 |
< |
d = {'msLevel':msLevel,'scan.peakDetect:%s':peakDetect} |
1508 |
> |
d = {'msLevel':msLevel} |
1509 |
|
# Optional ('scan.' and prefixformat spec. needed) |
1510 |
|
d.update({ |
1511 |
|
'scan.polarity:%s':polarity_val, |
1541 |
|
def getFilenames(self): |
1542 |
|
return [ t for t in self.distinct_datafiles.items() ] |
1543 |
|
|
1544 |
+ |
def msInstrumentID(self): |
1545 |
+ |
return "1" |
1546 |
+ |
|
1547 |
|
def msManufacturer(self): |
1548 |
|
return "ABI / SCIEX" |
1549 |
|
|
1557 |
|
return "TOF" |
1558 |
|
|
1559 |
|
def msDetector(self): |
1560 |
< |
return None |
1560 |
> |
return "LE" |
1561 |
|
|
1562 |
|
def acquisitionSoftware(self): |
1563 |
|
return "DataExplorer" |
1664 |
|
help="Level(s) of spectra to apply peak detection to (comma separated). QStar,4700 only.") |
1665 |
|
parser.add_option("-f", "--filter", type="string", dest="filter", default=None,\ |
1666 |
|
help="Filter on mzxml scan meta-data: field.op.value[,field.op.value]. Default: No filter.") |
1667 |
< |
parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False,\ |
1668 |
< |
help="Debug. First 10 spectra only, and truncated peaks data element. Default: False.") |
1667 |
> |
parser.add_option("-V", "--version", type='string', dest="version", default="3.0", |
1668 |
> |
help="XML version. mzXML only. Valid options '2.1','2.2','3.0'. Default: '3.0'.") |
1669 |
> |
parser.add_option("-z", "--compress_peaks", action="store_true", dest="compress_peaks", default=None,\ |
1670 |
> |
help="Compress mzXML peaks data using zlib. Default: False") |
1671 |
|
parser.add_option("-Z", "--compress", type="string", dest="compress", default=None,\ |
1672 |
|
help="Compress output file. Valid options 'gz'. Default: None.") |
1673 |
+ |
parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False,\ |
1674 |
+ |
help="Debug. First 10 spectra only, and truncated peaks data element. Default: False.") |
1675 |
|
|
1676 |
|
( opts, args ) = parser.parse_args() |
1677 |
|
|
1722 |
|
sys.exit(1) |
1723 |
|
|
1724 |
|
if opts.xmlformat.lower() == 'mzxml': |
1725 |
< |
x = ToMzXML(r,o,opts.compress,filt) |
1725 |
> |
x = ToMzXML(r,o,opts.compress,filt,opts.compress_peaks,opts.version) |
1726 |
|
elif opts.xmlformat.lower() == 'mzdata': |
1727 |
|
x = ToMzData(r,o,opts.compress,filt) |
1728 |
|
elif opts.output[-6:].lower() in ('.mzxml',): |
1729 |
< |
x = ToMzXML(r,opts.output,opts.compress,filt) |
1729 |
> |
x = ToMzXML(r,opts.output,opts.compress,filt,opts.compress_peaks,opts.version) |
1730 |
|
elif opts.output[-7:].lower() in ('.mzdata',): |
1731 |
|
x = ToMzData(r,opts.output,opts.compress,filt) |
1732 |
|
else: |