1 |
nje01 |
1.1 |
#!/usr/bin/env python |
2 |
|
|
|
3 |
|
|
import sys,os,os.path |
4 |
|
|
import re |
5 |
|
|
import sha |
6 |
|
|
import math |
7 |
|
|
import tempfile |
8 |
|
|
import csv |
9 |
|
|
import gzip |
10 |
nje01 |
1.3 |
import zlib |
11 |
nje01 |
1.1 |
from array import array |
12 |
|
|
from base64 import b64encode,b64decode |
13 |
|
|
from urllib import quote,unquote |
14 |
|
|
from xml.sax import * |
15 |
|
|
|
16 |
|
|
class NameVersion: |
17 |
|
|
def __init__(self): |
18 |
|
|
self.name_ = "PyMsXML" |
19 |
|
|
self.majorVer = 0 |
20 |
nje01 |
1.3 |
self.minorVer = 5 |
21 |
nje01 |
1.5 |
self.revVer = 2 |
22 |
nje01 |
1.1 |
def version(self): |
23 |
|
|
return "%d.%d.%d"%(self.majorVer,self.minorVer,self.revVer) |
24 |
|
|
def name(self): |
25 |
|
|
return self.name_ |
26 |
|
|
|
27 |
|
|
def fileSHA(filename): |
28 |
|
|
s = sha.new() |
29 |
|
|
h = open(filename,'rb') |
30 |
|
|
while True: |
31 |
|
|
buffer = h.read(1024) |
32 |
|
|
if buffer == '': |
33 |
|
|
break |
34 |
|
|
s.update(buffer) |
35 |
|
|
h.close() |
36 |
|
|
return s.hexdigest().lower() |
37 |
|
|
|
38 |
|
|
class ToMzXML: |
39 |
nje01 |
1.3 |
def __init__(self,reader,filename,cmpfmt=None,filt=None,peaksCompress=False,version="3.0"): |
40 |
nje01 |
1.1 |
self.filename = filename |
41 |
|
|
self.compressfmt = cmpfmt |
42 |
|
|
self.reader = reader |
43 |
|
|
self.initSHA() |
44 |
|
|
self.writeByteCounter = 0 |
45 |
|
|
self.actualScanCount = 0 |
46 |
|
|
self.scanIndices = {} |
47 |
|
|
self.filter = filt |
48 |
nje01 |
1.3 |
if not version in ('2.1','2.2','3.0'): |
49 |
|
|
raise "Bad mzXML version \"%s\""%(version,) |
50 |
|
|
self.version = version |
51 |
|
|
if peaksCompress and float(self.version) < 3.0: |
52 |
|
|
raise "Cannot compress peaks until mzXML version 3.0" |
53 |
|
|
self.peakcompress = peaksCompress |
54 |
nje01 |
1.1 |
|
55 |
|
|
def __del__(self): |
56 |
|
|
if hasattr(self,'tmpFileName') and self.tmpFileName and self.tmpFileName != '': |
57 |
|
|
os.unlink(self.tmpFileName) |
58 |
|
|
|
59 |
|
|
def getFilename(self): |
60 |
|
|
return self.filename |
61 |
|
|
|
62 |
|
|
def initSHA(self): |
63 |
|
|
self.sha1 = sha.new() |
64 |
|
|
|
65 |
|
|
def updateSHA(self,s): |
66 |
|
|
self.sha1.update(s) |
67 |
|
|
|
68 |
|
|
def getSHA(self): |
69 |
|
|
return self.sha1.hexdigest().lower() |
70 |
|
|
|
71 |
|
|
def writeString(self,fh,data): |
72 |
|
|
self.writeByteCounter += len(data) |
73 |
|
|
self.sha1.update(data) |
74 |
|
|
fh.write(data) |
75 |
|
|
|
76 |
|
|
def write(self,debug=False): |
77 |
|
|
|
78 |
|
|
# Make a temporary file for the scan data to count the number of spectra. |
79 |
|
|
(tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml') |
80 |
|
|
tmpFile = os.fdopen(tmpFileFD,'wb') |
81 |
|
|
|
82 |
|
|
# Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount. |
83 |
|
|
self.write_scans(tmpFile,debug) |
84 |
|
|
|
85 |
|
|
tmpFile.close() |
86 |
|
|
|
87 |
|
|
# Reset! |
88 |
|
|
|
89 |
|
|
self.initSHA() |
90 |
|
|
self.writeByteCounter = 0 |
91 |
|
|
|
92 |
|
|
if self.compressfmt == None and not self.filename.endswith('.gz'): |
93 |
|
|
xmlFile = open(self.filename,'wb') |
94 |
|
|
elif self.compressfmt == 'gz' or self.filename.endswith('.gz'): |
95 |
|
|
if self.filename.endswith('.gz'): |
96 |
|
|
xmlFile = gzip.open(self.filename,'wb') |
97 |
|
|
else: |
98 |
|
|
xmlFile = gzip.open(self.filename+'.gz','wb') |
99 |
|
|
else: |
100 |
|
|
print >>sys.stderr, "Bad compressfmt specification" |
101 |
|
|
sys.exit(1) |
102 |
|
|
|
103 |
|
|
self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n') |
104 |
|
|
|
105 |
|
|
outStr = "<mzXML " |
106 |
nje01 |
1.3 |
outStr += "xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_%s\" "%(self.version,) |
107 |
nje01 |
1.1 |
outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" " |
108 |
nje01 |
1.3 |
outStr += "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_%s http://sashimi.sourceforge.net/schema_revision/mzXML_%s/mzXML_idx_%s.xsd\""%(self.version,self.version,self.version) |
109 |
nje01 |
1.1 |
outStr += ">\n" |
110 |
|
|
|
111 |
|
|
self.writeString (xmlFile,outStr) |
112 |
|
|
|
113 |
|
|
outStr = "<msRun" |
114 |
|
|
outStr += " scanCount=\"%d\"" % (self.actualScanCount,) |
115 |
nje01 |
1.3 |
md = self.reader.getMsRunMetaData() |
116 |
|
|
for (k,v) in md.items(): |
117 |
nje01 |
1.1 |
k,f = k.split(':') |
118 |
nje01 |
1.3 |
if self.version not in ('2.2',) or (k != 'startTime'and k != 'endTime'): |
119 |
|
|
outStr += (" %%s=\"%s\""%(f,))%(k,v) |
120 |
nje01 |
1.1 |
outStr += ">\n" |
121 |
|
|
|
122 |
|
|
self.writeString (xmlFile,outStr) |
123 |
|
|
|
124 |
|
|
filehashes = {} |
125 |
|
|
|
126 |
|
|
for f in self.reader.getFilenames(): |
127 |
|
|
if len(f) == 2: |
128 |
|
|
outStr = "<parentFile " |
129 |
|
|
outStr += "fileName=\"%s\" " % (quote(f[0]),) |
130 |
|
|
outStr += "fileType=\"RAWData\" " |
131 |
|
|
outStr += "fileSha1=\"%s\"" % (f[1],) |
132 |
|
|
outStr += "/>\n" |
133 |
|
|
else: |
134 |
|
|
outStr = "<parentFile " |
135 |
|
|
outStr += "fileName=\"%s\" " % (f[0],) |
136 |
|
|
outStr += "fileType=\"%s\" " % (f[1],) |
137 |
|
|
outStr += "fileSha1=\"%s\"" % (f[2],) |
138 |
|
|
outStr += "/>\n" |
139 |
|
|
self.writeString (xmlFile,outStr) |
140 |
nje01 |
1.3 |
|
141 |
|
|
if float(self.version) >= 3.0: |
142 |
|
|
outStr = "<msInstrument msInstrumentID=\"%s\">\n" % (self.reader.msInstrumentID(),) |
143 |
|
|
else: |
144 |
|
|
outStr = "<msInstrument>\n" |
145 |
|
|
|
146 |
nje01 |
1.1 |
outStr += "<msManufacturer category=\"msManufacturer\" value=\"%s\"/>\n" % (self.reader.msManufacturer(),) |
147 |
|
|
outStr += "<msModel category=\"msModel\" value=\"%s\"/>\n" % (self.reader.msModel(),) |
148 |
|
|
if self.reader.msIonisation(): |
149 |
|
|
outStr += "<msIonisation category=\"msIonisation\" value=\"%s\"/>\n" % (self.reader.msIonisation(),) |
150 |
|
|
if self.reader.msMassAnalyzer(): |
151 |
|
|
outStr += "<msMassAnalyzer category=\"msMassAnalyzer\" value=\"%s\"/>\n" % (self.reader.msMassAnalyzer(),) |
152 |
|
|
if self.reader.msDetector(): |
153 |
|
|
outStr += "<msDetector category=\"msDetector\" value=\"%s\"/>\n" % (self.reader.msDetector(),) |
154 |
|
|
|
155 |
|
|
self.writeString (xmlFile,outStr) |
156 |
|
|
|
157 |
|
|
outStr = "<software " |
158 |
|
|
outStr += "type=\"acquisition\" " |
159 |
|
|
outStr += "name=\"%s\" "%(self.reader.acquisitionSoftware(),) |
160 |
|
|
outStr += "version=\"%s\""%(self.reader.acquisitionSoftwareVersion(),) |
161 |
|
|
outStr += "/>\n" |
162 |
|
|
|
163 |
|
|
self.writeString (xmlFile,outStr) |
164 |
|
|
|
165 |
|
|
outStr = "</msInstrument>\n" |
166 |
|
|
|
167 |
|
|
self.writeString (xmlFile,outStr) |
168 |
|
|
|
169 |
|
|
outStr = "<dataProcessing>\n" |
170 |
|
|
|
171 |
|
|
self.writeString (xmlFile,outStr) |
172 |
|
|
|
173 |
|
|
nv = NameVersion() |
174 |
|
|
|
175 |
|
|
outStr = "<software " |
176 |
|
|
outStr += "type=\"conversion\" " |
177 |
|
|
outStr += "name=\"%s\" "%(nv.name(),) |
178 |
|
|
outStr += "version=\"%s\"" % (nv.version(),) |
179 |
|
|
outStr += "/>\n" |
180 |
|
|
|
181 |
|
|
self.writeString (xmlFile,outStr) |
182 |
|
|
|
183 |
|
|
if (self.reader.peakDetect(1)): |
184 |
|
|
outStr = "<processingOperation " |
185 |
|
|
outStr += "name=\"peak_detection_level_1\" " |
186 |
|
|
outStr += "value=\"true\"" |
187 |
|
|
outStr += "/>\n" |
188 |
|
|
outStr += "<processingOperation " |
189 |
|
|
outStr += "name=\"peak_detection_level_1_threshold\" " |
190 |
|
|
outStr += "value=\"1%\"" |
191 |
|
|
outStr += "/>\n" |
192 |
|
|
outStr += "<processingOperation " |
193 |
|
|
outStr += "name=\"peak_detection_level_1_software\" " |
194 |
nje01 |
1.3 |
outStr += "value=\"%s\""%(self.reader.acquisitionSoftware(),) |
195 |
nje01 |
1.1 |
outStr += "/>\n" |
196 |
|
|
self.writeString (xmlFile,outStr) |
197 |
|
|
if (self.reader.peakDetect(2)): |
198 |
|
|
outStr = "<processingOperation " |
199 |
|
|
outStr += "name=\"peak_detection_level_2\" " |
200 |
|
|
outStr += "value=\"true\"" |
201 |
|
|
outStr += "/>\n" |
202 |
|
|
outStr += "<processingOperation " |
203 |
|
|
outStr += "name=\"peak_detection_level_2_threshold\" " |
204 |
|
|
outStr += "value=\"1%\"" |
205 |
|
|
outStr += "/>\n" |
206 |
|
|
outStr += "<processingOperation " |
207 |
|
|
outStr += "name=\"peak_detection_level_2_software\" " |
208 |
nje01 |
1.3 |
outStr += "value=\"%s\""%(self.reader.acquisitionSoftware(),) |
209 |
nje01 |
1.1 |
outStr += "/>\n" |
210 |
|
|
self.writeString (xmlFile,outStr) |
211 |
|
|
|
212 |
|
|
outStr = "<processingOperation " |
213 |
|
|
outStr += "name=\"min_peaks_per_spectra\" " |
214 |
|
|
outStr += "value=\"1\"" |
215 |
|
|
outStr += "/>\n" |
216 |
|
|
|
217 |
|
|
self.writeString (xmlFile,outStr) |
218 |
|
|
|
219 |
|
|
outStr = "</dataProcessing>\n" |
220 |
|
|
|
221 |
|
|
self.writeString (xmlFile,outStr) |
222 |
|
|
|
223 |
|
|
if self.reader.maldi(): |
224 |
|
|
outStr = '<spotting>\n' |
225 |
|
|
for d in self.reader.plateData(): |
226 |
|
|
outStr += '<plate plateID="%s" spotXCount="%s" spotYCount="%s">\n' % \ |
227 |
|
|
(d['plateID'],d['spotXCount'],d['spotYCount']) |
228 |
|
|
outStr += '<plateManufacturer category="plateManufacturer" value="%s"/>\n' % \ |
229 |
|
|
(d['plateManufacturer'],) |
230 |
|
|
outStr += '<plateModel category="plateModel" value="%s"/>\n' % \ |
231 |
|
|
(d['plateModel'],) |
232 |
|
|
for s in self.reader.spotData(d['plateID']): |
233 |
|
|
outStr += '<spot spotID="%s" spotXPosition="%s" spotYPosition="%s">\n' % \ |
234 |
|
|
(s['spotID'],s['spotXPosition'],s['spotYPosition']) |
235 |
|
|
outStr += '<maldiMatrix category="maldiMatrix" value="%s"/>\n' % \ |
236 |
|
|
(s['maldiMatrix'],) |
237 |
|
|
outStr += '</spot>\n' |
238 |
|
|
outStr += '</plate>\n' |
239 |
|
|
outStr += '</spotting>\n' |
240 |
|
|
self.writeString(xmlFile,outStr) |
241 |
|
|
|
242 |
|
|
if self.reader.lc(): |
243 |
|
|
pass |
244 |
|
|
|
245 |
|
|
scanOffset = self.writeByteCounter |
246 |
|
|
|
247 |
|
|
tmpFile = open(self.tmpFileName,'rb') |
248 |
|
|
while True: |
249 |
|
|
tmpStr = tmpFile.read(1024) |
250 |
|
|
if tmpStr == '': |
251 |
|
|
break |
252 |
|
|
self.writeString(xmlFile,tmpStr) |
253 |
|
|
tmpFile.close() |
254 |
|
|
os.unlink(self.tmpFileName) |
255 |
|
|
self.tmpFileName = '' |
256 |
|
|
|
257 |
|
|
outStr = "</msRun>\n" |
258 |
|
|
self.writeString (xmlFile,outStr) |
259 |
|
|
|
260 |
|
|
indexOffset = self.writeByteCounter |
261 |
|
|
|
262 |
|
|
outStr = "<index " |
263 |
|
|
outStr += "name=\"scan\" " |
264 |
|
|
outStr += ">\n" |
265 |
|
|
|
266 |
|
|
self.writeString (xmlFile,outStr) |
267 |
|
|
|
268 |
|
|
for i in xrange(1,self.actualScanCount+1): |
269 |
|
|
outStr = "<offset id=\"%d\">" % (i,) |
270 |
|
|
outStr += "%d</offset>\n" % (self.scanIndices[i] + scanOffset,) |
271 |
|
|
self.writeString(xmlFile,outStr) |
272 |
|
|
|
273 |
|
|
outStr = "</index>\n" |
274 |
|
|
self.writeString (xmlFile,outStr) |
275 |
|
|
|
276 |
|
|
outStr = "<indexOffset>%d</indexOffset>\n" % (indexOffset,) |
277 |
|
|
|
278 |
|
|
self.writeString (xmlFile,outStr) |
279 |
|
|
|
280 |
|
|
self.writeString (xmlFile,"<sha1>") |
281 |
|
|
|
282 |
|
|
outStr = self.getSHA() |
283 |
|
|
|
284 |
|
|
self.writeString (xmlFile,outStr) |
285 |
|
|
|
286 |
|
|
self.writeString (xmlFile,"</sha1>\n") |
287 |
|
|
|
288 |
|
|
outStr = "</mzXML>\n" |
289 |
|
|
self.writeString (xmlFile,outStr) |
290 |
|
|
|
291 |
|
|
xmlFile.close() |
292 |
|
|
|
293 |
|
|
def write_scans(self,xmlFile,debug=False): |
294 |
|
|
|
295 |
|
|
msLevel = 0 |
296 |
|
|
scanNumber = 0 |
297 |
|
|
ancestors = [] |
298 |
|
|
self.writeByteCounter = 0 |
299 |
|
|
|
300 |
|
|
for (s,d) in self.reader.spectra(): |
301 |
|
|
|
302 |
|
|
if self.filter != None and not self.filter.test(d): |
303 |
|
|
continue |
304 |
|
|
|
305 |
|
|
if debug and scanNumber >= 10: |
306 |
|
|
break |
307 |
|
|
|
308 |
|
|
if not d.has_key('msLevel'): |
309 |
|
|
print >>sys.stderr, "Required scan attributes missing." |
310 |
|
|
sys.exit(1) |
311 |
|
|
|
312 |
|
|
prevLevel = msLevel |
313 |
|
|
msLevel = d['msLevel'] |
314 |
|
|
|
315 |
|
|
if prevLevel < msLevel and prevLevel > 0: |
316 |
|
|
# We went "in" a level, push scan number of parent |
317 |
|
|
ancestors.append((scanNumber,prevSpec)) |
318 |
|
|
elif prevLevel > msLevel and msLevel > 0: |
319 |
|
|
if len(ancestors) == 0: |
320 |
|
|
pass #print >>sys.stderr, "No ancestor for scan %s at level %s"%(scanNumber,msLevel) |
321 |
|
|
else: |
322 |
|
|
ancestors.pop() |
323 |
|
|
|
324 |
|
|
outStr = '' |
325 |
|
|
if prevLevel > msLevel: |
326 |
|
|
for m in xrange(0,prevLevel-msLevel+1): |
327 |
|
|
outStr += "</scan>\n" |
328 |
|
|
else: |
329 |
|
|
if prevLevel > 0 and prevLevel == msLevel: |
330 |
|
|
outStr += "</scan>\n" |
331 |
|
|
|
332 |
|
|
self.writeString(xmlFile,outStr) |
333 |
|
|
|
334 |
|
|
scanNumber = scanNumber + 1 |
335 |
|
|
|
336 |
|
|
self.scanIndices[scanNumber] = self.writeByteCounter |
337 |
|
|
|
338 |
|
|
totIonCurrent = 0 |
339 |
|
|
maxmz = None |
340 |
|
|
minmz = None |
341 |
|
|
basePeakMz = None |
342 |
|
|
basePeakIntensity = None |
343 |
|
|
peaksCount = 0 |
344 |
|
|
for i in xrange(0,len(s),2): |
345 |
|
|
x = s[i]; y = s[i+1] |
346 |
|
|
if minmz is None or minmz > x: |
347 |
|
|
minmz = x |
348 |
|
|
if maxmz is None or maxmz < x: |
349 |
|
|
maxmz = x |
350 |
|
|
totIonCurrent += y |
351 |
|
|
if basePeakIntensity is None or basePeakIntensity < y: |
352 |
|
|
basePeakIntensity = y |
353 |
|
|
basePeakMz = x |
354 |
|
|
peaksCount += 1 |
355 |
|
|
|
356 |
|
|
outStr = "<scan num=\"%d\" msLevel=\"%d\"" % (scanNumber,msLevel) |
357 |
|
|
outStr += " peaksCount=\"%d\"" % (peaksCount,) |
358 |
|
|
outStr += " lowMz=\"%f\"" % (minmz,) |
359 |
|
|
outStr += " highMz=\"%f\"" % (maxmz,) |
360 |
|
|
outStr += " totIonCurrent=\"%f\"" % (totIonCurrent,) |
361 |
|
|
outStr += " basePeakMz=\"%f\"" % (basePeakMz,) |
362 |
|
|
outStr += " basePeakIntensity=\"%f\"" % (basePeakIntensity,) |
363 |
|
|
|
364 |
|
|
for (k,v) in d.items(): |
365 |
|
|
if k.startswith('scan.'): |
366 |
|
|
k,f = k.split(':') |
367 |
|
|
k = k[5:] |
368 |
|
|
outStr += (" %%s=\"%s\""%(f,))%(k,v) |
369 |
|
|
outStr += ">\n" |
370 |
|
|
self.writeString(xmlFile,outStr) |
371 |
|
|
|
372 |
nje01 |
1.3 |
if self.version in ('2.2',): |
373 |
|
|
any = False |
374 |
|
|
for (k,v) in d.items(): |
375 |
|
|
if k.startswith('scanOrigin.'): |
376 |
|
|
if not any: |
377 |
|
|
any = True |
378 |
|
|
outStr = "<scanOrigin" |
379 |
|
|
k,f = k.split(':') |
380 |
|
|
k = k[11:] |
381 |
|
|
outStr += (" %%s=\"%s\""%(f,))%(k,v) |
382 |
|
|
if any: |
383 |
|
|
outStr += "/>\n" |
384 |
|
|
self.writeString(xmlFile,outStr) |
385 |
nje01 |
1.2 |
|
386 |
nje01 |
1.1 |
if msLevel > 1: |
387 |
|
|
|
388 |
|
|
if not d.has_key('precursorMz') : |
389 |
|
|
print >>sys.stderr, "Required precursorMz attribute missing." |
390 |
|
|
sys.exit(1) |
391 |
|
|
|
392 |
|
|
# We scan the parent spectrum in the region of the |
393 |
|
|
# precursorMz to establish the intensity. Optionally, |
394 |
|
|
# we tune the precursorMz itself, on the basis of this |
395 |
|
|
|
396 |
|
|
pMz = d['precursorMz'] |
397 |
|
|
tol = self.reader.precursorPrecision(pMz) |
398 |
|
|
pMzLB = pMz-tol |
399 |
|
|
pMzUB = pMz+tol |
400 |
|
|
pMzIntensity = 0 |
401 |
|
|
|
402 |
|
|
if len(ancestors) > 0: |
403 |
|
|
for i in xrange(0,len(ancestors[-1][1]),2): |
404 |
|
|
x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1]; |
405 |
|
|
if x < pMzLB: |
406 |
|
|
continue |
407 |
|
|
if x > pMzUB: |
408 |
|
|
break |
409 |
|
|
if pMzIntensity < y: |
410 |
|
|
pMzIntensity = y |
411 |
|
|
pMzTune = x |
412 |
|
|
|
413 |
|
|
if self.reader.precursorTune(): |
414 |
|
|
pMz = pMzTune |
415 |
|
|
|
416 |
|
|
outStr = "<precursorMz" |
417 |
|
|
for (k,v) in d.items(): |
418 |
|
|
if k.startswith('precursorMz.'): |
419 |
|
|
k,f = k.split(':') |
420 |
|
|
k = k[12:] |
421 |
|
|
outStr += (" %%s=\"%s\""%(f,))%(k,v) |
422 |
|
|
if len(ancestors)>0: |
423 |
|
|
outStr += " precursorScanNum=\"%d\""%(ancestors[-1][0],) |
424 |
|
|
outStr += " precursorIntensity=\"%f\">"%(pMzIntensity,) |
425 |
|
|
else: |
426 |
|
|
outStr += ">" |
427 |
|
|
outStr += "%f</precursorMz>\n" % (pMz,) |
428 |
|
|
self.writeString(xmlFile,outStr) |
429 |
|
|
|
430 |
nje01 |
1.3 |
if self.reader.maldi() and self.version in ('2.2',): |
431 |
nje01 |
1.1 |
outStr = '<maldi' |
432 |
|
|
for (k,v) in d.items(): |
433 |
|
|
if k.startswith('maldi.'): |
434 |
|
|
k,f = k.split(':') |
435 |
|
|
k = k[6:] |
436 |
|
|
outStr += (" %%s=\"%s\""%(f,))%(k,v) |
437 |
|
|
outStr += " plateID=\"%s\""%(d['plateID'],) |
438 |
|
|
outStr += " spotID=\"%s\""%(d['spotID'],) |
439 |
|
|
outStr += "/>\n" |
440 |
|
|
self.writeString(xmlFile,outStr) |
441 |
|
|
|
442 |
nje01 |
1.3 |
if sys.byteorder != 'big': |
443 |
|
|
s.byteswap() |
444 |
|
|
|
445 |
|
|
if debug: |
446 |
|
|
s = s[:20] |
447 |
|
|
|
448 |
|
|
specstr = s.tostring() |
449 |
|
|
if self.peakcompress: |
450 |
|
|
specstr = zlib.compress(specstr,9) |
451 |
|
|
outStr = "<peaks precision=\"32\" byteOrder=\"network\" contentType=\"m/z-int\" compressionType=\"zlib\" compressedLen=\"%d\">"%(len(specstr),) |
452 |
|
|
else: |
453 |
|
|
if float(self.version) >= 3.0: |
454 |
|
|
outStr = "<peaks precision=\"32\" byteOrder=\"network\" contentType=\"m/z-int\" compressionType=\"none\" compressedLen=\"%d\">"%(len(specstr),) |
455 |
|
|
else: |
456 |
|
|
outStr = "<peaks precision=\"32\" byteOrder=\"network\" pairOrder=\"m/z-int\">" |
457 |
|
|
|
458 |
nje01 |
1.1 |
self.writeString(xmlFile,outStr) |
459 |
|
|
|
460 |
|
|
# Spec says the byte order shall be |
461 |
|
|
# network, which is the same as big endian. |
462 |
|
|
|
463 |
|
|
if sys.byteorder != 'big': |
464 |
|
|
s.byteswap() |
465 |
|
|
|
466 |
nje01 |
1.3 |
outStr = b64encode(specstr) |
467 |
|
|
self.writeString(xmlFile,outStr) |
468 |
nje01 |
1.1 |
|
469 |
|
|
outStr = "</peaks>\n" |
470 |
|
|
self.writeString(xmlFile,outStr) |
471 |
|
|
|
472 |
nje01 |
1.3 |
if self.reader.maldi() and self.version not in ('2.2',): |
473 |
nje01 |
1.1 |
outStr = '' |
474 |
|
|
for (k,v) in d.items(): |
475 |
|
|
if k.startswith('maldi.'): |
476 |
|
|
k,f = k.split(':') |
477 |
|
|
k = k[6:] |
478 |
|
|
outStr += '<nameValue' |
479 |
nje01 |
1.3 |
outStr += ' name="maldi.%s"'%(k,) |
480 |
|
|
outStr += (' value="%s"'%(f,))%(v,) |
481 |
|
|
outStr += '/>\n' |
482 |
|
|
outStr += '<nameValue' |
483 |
|
|
outStr += ' name="maldi.plateID"' |
484 |
|
|
outStr += ' value="%s"'%(d['plateID'],) |
485 |
|
|
outStr += '/>\n' |
486 |
|
|
outStr += '<nameValue' |
487 |
|
|
outStr += ' name="maldi.spotID"' |
488 |
|
|
outStr += ' value="%s"'%(d['spotID'],) |
489 |
|
|
outStr += '/>\n' |
490 |
|
|
self.writeString(xmlFile,outStr) |
491 |
|
|
|
492 |
|
|
if self.version not in ('2.2',): |
493 |
|
|
outStr = '' |
494 |
|
|
for (k,v) in d.items(): |
495 |
|
|
if k.startswith('scanOrigin.'): |
496 |
|
|
k,f = k.split(':') |
497 |
|
|
k = k[11:] |
498 |
|
|
outStr += '<nameValue' |
499 |
|
|
outStr += ' name="scanOrigin.%s"'%(k,) |
500 |
|
|
outStr += (' value="%s"'%(f,))%(v,) |
501 |
nje01 |
1.1 |
outStr += '/>\n' |
502 |
nje01 |
1.3 |
self.writeString(xmlFile,outStr) |
503 |
|
|
|
504 |
|
|
outStr = '' |
505 |
|
|
for (k,v) in d.items(): |
506 |
|
|
if k.startswith('nameValue.'): |
507 |
|
|
k,f = k.split(':') |
508 |
|
|
k = k[10:] |
509 |
|
|
outStr += '<nameValue' |
510 |
|
|
outStr += ' name="%s"'%(k,) |
511 |
|
|
outStr += (' value="%s"'%(f,))%(v,) |
512 |
|
|
outStr += '/>\n' |
513 |
|
|
if len(outStr) > 0: |
514 |
nje01 |
1.1 |
self.writeString(xmlFile,outStr) |
515 |
|
|
|
516 |
|
|
xmlFile.flush() |
517 |
|
|
|
518 |
|
|
prevSpec = s |
519 |
|
|
|
520 |
|
|
outStr = "" |
521 |
nje01 |
1.4 |
for m in xrange(0,len(ancestors)+1): |
522 |
nje01 |
1.1 |
outStr += "</scan>\n" |
523 |
|
|
|
524 |
|
|
self.writeString(xmlFile,outStr) |
525 |
|
|
self.actualScanCount = scanNumber |
526 |
|
|
|
527 |
|
|
class ToMzData: |
528 |
|
|
def __init__(self,reader,filename,cmpfmt=None,filt=None): |
529 |
|
|
self.filename = filename |
530 |
|
|
self.compressfmt = cmpfmt |
531 |
|
|
self.reader = reader |
532 |
|
|
self.actualScanCount = 0 |
533 |
|
|
self.initSHA() |
534 |
|
|
self.filter = filt |
535 |
|
|
|
536 |
|
|
def writeString(self,fh,data): |
537 |
|
|
# self.writeByteCounter += len(data) |
538 |
|
|
self.sha1.update(data) |
539 |
|
|
fh.write(data) |
540 |
|
|
|
541 |
|
|
def initSHA(self): |
542 |
|
|
self.sha1 = sha.new() |
543 |
|
|
|
544 |
|
|
def updateSHA(self,s): |
545 |
|
|
self.sha1.update(s) |
546 |
|
|
|
547 |
|
|
def getSHA(self): |
548 |
|
|
return self.sha1.hexdigest().lower() |
549 |
|
|
|
550 |
|
|
def write(self,debug=False): |
551 |
|
|
|
552 |
|
|
# Make a temporary file for the scan data to count the number of spectra. |
553 |
|
|
(tmpFileFD,self.tmpFileName) = tempfile.mkstemp(dir='.',prefix='.pymsxml') |
554 |
|
|
tmpFile = os.fdopen(tmpFileFD,'wb') |
555 |
|
|
|
556 |
|
|
# Has a number of side effects, it fills in scanIndices, and sets self.actualScanCount. |
557 |
|
|
self.write_scans(tmpFile,debug) |
558 |
|
|
|
559 |
|
|
tmpFile.close() |
560 |
|
|
|
561 |
|
|
scanhash = self.getSHA() |
562 |
|
|
|
563 |
|
|
self.initSHA() |
564 |
|
|
|
565 |
|
|
if self.compressfmt == None and not self.filename.endswith('.gz'): |
566 |
|
|
xmlFile = open(self.filename,'wb') |
567 |
|
|
elif self.compressfmt == 'gz' or self.filename.endswith('.gz'): |
568 |
|
|
if self.filename.endswith('.gz'): |
569 |
|
|
xmlFile = gzip.open(self.filename,'wb') |
570 |
|
|
else: |
571 |
|
|
xmlFile = gzip.open(self.filename+'.gz','wb') |
572 |
|
|
else: |
573 |
|
|
print >>sys.stderr, "Bad compressfmt specification" |
574 |
|
|
sys.exit(1) |
575 |
|
|
|
576 |
|
|
self.writeString (xmlFile,'<?xml version="1.0" encoding="ISO-8859-1"?>\n') |
577 |
|
|
|
578 |
|
|
outStr = "<mzData " |
579 |
|
|
outStr += "version=\"1.05\" " |
580 |
|
|
outStr += "accessionNumber=\"pymsxml:%s\" "%(scanhash,) |
581 |
|
|
outStr += "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" " |
582 |
|
|
# outStr += "xmlns=\"http://psidev.sourceforge.net/ms/xml/mzdata\" " |
583 |
|
|
outStr += "xsi:noNamespaceSchemaLocation=\"http://psidev.sourceforge.net/ms/xml/mzdata/mzdata.xsd\"" |
584 |
|
|
outStr += ">\n" |
585 |
|
|
|
586 |
|
|
self.writeString (xmlFile,outStr) |
587 |
|
|
|
588 |
|
|
outStr = "<description>\n" |
589 |
|
|
outStr += "<admin>\n" |
590 |
|
|
outStr += "<sampleName></sampleName>\n" |
591 |
|
|
outStr += "<contact>\n" |
592 |
|
|
outStr += "<name></name>\n" |
593 |
|
|
outStr += "<institution></institution>\n" |
594 |
|
|
outStr += "</contact>\n" |
595 |
|
|
outStr += "</admin>\n" |
596 |
|
|
outStr += "<instrument>\n" |
597 |
|
|
outStr += "<instrumentName></instrumentName>\n" |
598 |
|
|
outStr += "<source>\n" |
599 |
|
|
outStr += "</source>\n" |
600 |
|
|
outStr += "<analyzerList count=\"1\">\n" |
601 |
|
|
outStr += "<analyzer>\n" |
602 |
|
|
outStr += "</analyzer>\n" |
603 |
|
|
outStr += "</analyzerList>\n" |
604 |
|
|
outStr += "<detector>\n" |
605 |
|
|
outStr += "</detector>\n" |
606 |
|
|
outStr += "</instrument>\n" |
607 |
|
|
outStr += "<dataProcessing>\n" |
608 |
|
|
outStr += "<software>\n" |
609 |
|
|
outStr += "<name>%s</name>\n"%(self.reader.acquisitionSoftware(),) |
610 |
|
|
outStr += "<version>%s</version>\n"%(self.reader.acquisitionSoftwareVersion(),) |
611 |
|
|
outStr += "</software>\n" |
612 |
|
|
outStr += "<software>\n" |
613 |
|
|
nv = NameVersion() |
614 |
|
|
outStr += "<name>%s</name>\n"%(nv.name(),) |
615 |
|
|
outStr += "<version>%s</version>\n"%(nv.version(),) |
616 |
|
|
outStr += "</software>\n" |
617 |
|
|
outStr += "</dataProcessing>\n" |
618 |
|
|
outStr += "</description>\n" |
619 |
|
|
|
620 |
|
|
self.writeString (xmlFile,outStr) |
621 |
|
|
|
622 |
|
|
outStr = "<spectrumList" |
623 |
|
|
outStr += " count=\"%d\">\n"%(self.actualScanCount,) |
624 |
|
|
|
625 |
|
|
self.writeString (xmlFile,outStr) |
626 |
|
|
|
627 |
|
|
tmpFile = open(self.tmpFileName,'rb') |
628 |
|
|
while True: |
629 |
|
|
tmpStr = tmpFile.read(1024) |
630 |
|
|
if tmpStr == '': |
631 |
|
|
break |
632 |
|
|
self.writeString(xmlFile,tmpStr) |
633 |
|
|
tmpFile.close() |
634 |
|
|
os.unlink(self.tmpFileName) |
635 |
|
|
self.tmpFileName = '' |
636 |
|
|
|
637 |
|
|
outStr = "</spectrumList>\n</mzData>\n" |
638 |
|
|
|
639 |
|
|
self.writeString (xmlFile,outStr) |
640 |
|
|
|
641 |
|
|
def write_scans(self,xmlFile,debug=False): |
642 |
|
|
|
643 |
|
|
msLevel = 0 |
644 |
|
|
scanNumber = 0 |
645 |
|
|
ancestors = [] |
646 |
|
|
|
647 |
|
|
self.initSHA() |
648 |
|
|
# self.writeByteCounter = 0 |
649 |
|
|
|
650 |
|
|
for (s,d) in self.reader.spectra(): |
651 |
|
|
|
652 |
|
|
if self.filter != None and not self.filter.test(d): |
653 |
|
|
continue |
654 |
|
|
|
655 |
|
|
if debug and scanNumber >= 10: |
656 |
|
|
break |
657 |
|
|
|
658 |
|
|
if not d.has_key('msLevel'): |
659 |
|
|
print >>sys.stderr, "Required scan attributes missing." |
660 |
|
|
sys.exit(1) |
661 |
|
|
|
662 |
|
|
prevLevel = msLevel |
663 |
|
|
msLevel = d['msLevel'] |
664 |
|
|
|
665 |
|
|
if prevLevel < msLevel and prevLevel > 0: |
666 |
|
|
# We went "in" a level, push scan number of parent |
667 |
|
|
ancestors.append((scanNumber,prevSpec)) |
668 |
|
|
elif prevLevel > msLevel and msLevel > 0: |
669 |
|
|
ancestors.pop() |
670 |
|
|
|
671 |
|
|
scanNumber = scanNumber + 1 |
672 |
|
|
|
673 |
|
|
totIonCurrent = 0 |
674 |
|
|
maxmz = None |
675 |
|
|
minmz = None |
676 |
|
|
basePeakMz = None |
677 |
|
|
basePeakIntensity = None |
678 |
|
|
peaksCount = 0 |
679 |
|
|
for i in xrange(0,len(s),2): |
680 |
|
|
x = s[i]; y = s[i+1] |
681 |
|
|
if minmz is None or minmz > x: |
682 |
|
|
minmz = x |
683 |
|
|
if maxmz is None or maxmz < x: |
684 |
|
|
maxmz = x |
685 |
|
|
totIonCurrent += y |
686 |
|
|
if basePeakIntensity is None or basePeakIntensity < y: |
687 |
|
|
basePeakIntensity = y |
688 |
|
|
basePeakMz = x |
689 |
|
|
peaksCount += 1 |
690 |
|
|
|
691 |
|
|
outStr = "<spectrum id=\"%d\">\n" % (scanNumber,) |
692 |
|
|
outStr += "<spectrumDesc>\n" |
693 |
|
|
outStr += "<spectrumSettings>\n" |
694 |
|
|
|
695 |
|
|
self.writeString (xmlFile,outStr) |
696 |
|
|
|
697 |
|
|
# outStr += "<acqSpecification>\n" |
698 |
|
|
# outStr += "</acqSpecification>\n" |
699 |
|
|
|
700 |
|
|
outStr = "<spectrumInstrument" |
701 |
|
|
outStr += " msLevel=\"%d\""%(msLevel,) |
702 |
|
|
if d.has_key('scan.startMz') and \ |
703 |
|
|
d.has_key('scan.endMz'): |
704 |
|
|
outStr += " mzRangeStart=\"%f\""%(d['scan.startMz'],) |
705 |
|
|
outStr += " mzRangeStop=\"%f\""%(d['scan.endMz'],) |
706 |
|
|
outStr += ">\n" |
707 |
|
|
for (k,v) in d.items(): |
708 |
|
|
if k.startswith('scan.'): |
709 |
|
|
k,f = k.split(':') |
710 |
|
|
k = k[5:] |
711 |
|
|
|
712 |
|
|
if k == "polarity": |
713 |
|
|
if v == '+': |
714 |
|
|
outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Positive",) |
715 |
|
|
elif v == '-': |
716 |
|
|
outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Polarity\" value=\"%s\"/>\n"%("Negative",) |
717 |
|
|
elif k == 'retentionTime': |
718 |
|
|
outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"TimeInSeconds\" value=\"%f\"/>\n"%(v,) |
719 |
|
|
else: |
720 |
|
|
outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v) |
721 |
|
|
outStr += "<userParam name=\"lowMz\" value=\"%f\"/>\n"%(minmz,) |
722 |
|
|
outStr += "<userParam name=\"highMz\" value=\"%f\"/>\n"%(maxmz,) |
723 |
|
|
outStr += "<userParam name=\"totIonCurrent\" value=\"%f\"/>\n"%(totIonCurrent,) |
724 |
|
|
outStr += "<userParam name=\"basePeakMz\" value=\"%f\"/>\n"%(basePeakMz,) |
725 |
|
|
outStr += "<userParam name=\"basePeakIntensity\" value=\"%f\"/>\n"%(basePeakIntensity,) |
726 |
|
|
outStr += "</spectrumInstrument>\n" |
727 |
|
|
outStr += "</spectrumSettings>\n" |
728 |
|
|
|
729 |
|
|
self.writeString (xmlFile,outStr) |
730 |
|
|
|
731 |
|
|
if msLevel > 1: |
732 |
|
|
|
733 |
|
|
if not d.has_key('precursorMz') : |
734 |
|
|
print >>sys.stderr, "Required precursorMz attribute missing." |
735 |
|
|
sys.exit(1) |
736 |
|
|
|
737 |
|
|
# We scan the parent spectrum in the region of the |
738 |
|
|
# precursorMz to establish the intensity. Optionally, |
739 |
|
|
# we tune the precursorMz itself, on the basis of this |
740 |
|
|
|
741 |
|
|
pMz = d['precursorMz'] |
742 |
|
|
tol = self.reader.precursorPrecision(pMz) |
743 |
|
|
pMzLB = pMz-tol |
744 |
|
|
pMzUB = pMz+tol |
745 |
|
|
pMzIntensity = 0 |
746 |
|
|
|
747 |
|
|
for i in xrange(0,len(ancestors[-1][1]),2): |
748 |
|
|
x = ancestors[-1][1][i]; y = ancestors[-1][1][i+1]; |
749 |
|
|
if x < pMzLB: |
750 |
|
|
continue |
751 |
|
|
if x > pMzUB: |
752 |
|
|
break |
753 |
|
|
if pMzIntensity < y: |
754 |
|
|
pMzIntensity = y |
755 |
|
|
pMzTune = x |
756 |
|
|
|
757 |
|
|
if self.reader.precursorTune(): |
758 |
|
|
pMz = pMzTune |
759 |
|
|
|
760 |
|
|
outStr = "<precursorList count=\"1\">\n" |
761 |
|
|
|
762 |
|
|
outStr += "<precursor" |
763 |
|
|
outStr += " msLevel=\"%d\""%(msLevel,) |
764 |
|
|
outStr += " spectrumRef=\"%d\""%(ancestors[-1][0],) |
765 |
|
|
outStr += ">\n" |
766 |
|
|
|
767 |
|
|
self.writeString(xmlFile,outStr) |
768 |
|
|
|
769 |
|
|
outStr = "<ionSelection>\n" |
770 |
|
|
outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"MassToChargeRatio\" value=\"%f\"/>\n"%(pMz,) |
771 |
|
|
outStr += "<cvParam cvLabel=\"\" accession=\"\" name=\"Intensity\" value=\"%f\"/>\n"%(pMzIntensity,) |
772 |
|
|
for (k,v) in d.items(): |
773 |
|
|
if k.startswith('precursorMz.'): |
774 |
|
|
k,f = k.split(':') |
775 |
|
|
k = k[12:] |
776 |
|
|
outStr += ("<userParam name=\"%%s\" value=\"%s\"/>\n"%(f,))%(k,v) |
777 |
|
|
outStr += "</ionSelection>\n" |
778 |
|
|
|
779 |
|
|
self.writeString(xmlFile,outStr) |
780 |
|
|
|
781 |
|
|
outStr = "<activation>\n" |
782 |
|
|
outStr += "</activation>\n" |
783 |
|
|
outStr += "</precursor>\n" |
784 |
|
|
outStr += "</precursorList>\n" |
785 |
|
|
|
786 |
|
|
self.writeString(xmlFile,outStr) |
787 |
|
|
|
788 |
|
|
if self.reader.maldi(): |
789 |
|
|
outStr = '<maldi' |
790 |
|
|
for (k,v) in d.items(): |
791 |
|
|
if k.startswith('maldi.'): |
792 |
|
|
k,f = k.split(':') |
793 |
|
|
k = k[6:] |
794 |
|
|
outStr += (" %%s=\"%s\""%(f,))%(k,v) |
795 |
|
|
outStr += " plateID=\"%s\""%(d['plateID'],) |
796 |
|
|
outStr += " spotID=\"%s\""%(d['spotID'],) |
797 |
|
|
outStr += "/>\n" |
798 |
|
|
self.writeString(xmlFile,outStr) |
799 |
|
|
|
800 |
|
|
outStr = "</spectrumDesc>\n" |
801 |
|
|
self.writeString(xmlFile,outStr) |
802 |
|
|
|
803 |
|
|
outStr = "<mzArrayBinary>\n" |
804 |
|
|
outStr += "<data" |
805 |
|
|
outStr += " precision=\"32\"" |
806 |
|
|
outStr += " endian=\"%s\""%(sys.byteorder,) |
807 |
|
|
outStr += " length=\"%d\""%(peaksCount,) |
808 |
|
|
outStr += ">" |
809 |
|
|
self.writeString(xmlFile,outStr) |
810 |
|
|
|
811 |
|
|
outStr = b64encode(s[0:peaksCount*2:2].tostring()) |
812 |
|
|
if not debug: |
813 |
|
|
self.writeString(xmlFile,outStr) |
814 |
|
|
else: |
815 |
|
|
self.writeString(xmlFile,outStr[0:100]) |
816 |
|
|
|
817 |
|
|
outStr = "</data>\n</mzArrayBinary>\n" |
818 |
|
|
self.writeString(xmlFile,outStr) |
819 |
|
|
|
820 |
|
|
outStr = "<intenArrayBinary>\n" |
821 |
|
|
outStr += "<data" |
822 |
|
|
outStr += " precision=\"32\"" |
823 |
|
|
outStr += " endian=\"%s\""%(sys.byteorder,) |
824 |
|
|
outStr += " length=\"%d\""%(peaksCount,) |
825 |
|
|
outStr += ">" |
826 |
|
|
self.writeString(xmlFile,outStr) |
827 |
|
|
|
828 |
|
|
outStr = b64encode(s[1:peaksCount*2:2].tostring()) |
829 |
|
|
if not debug: |
830 |
|
|
self.writeString(xmlFile,outStr) |
831 |
|
|
else: |
832 |
|
|
self.writeString(xmlFile,outStr[0:100]) |
833 |
|
|
|
834 |
|
|
outStr = "</data>\n</intenArrayBinary>\n" |
835 |
|
|
self.writeString(xmlFile,outStr) |
836 |
|
|
|
837 |
|
|
if self.reader.maldi(): |
838 |
|
|
outStr = '' |
839 |
|
|
for (k,v) in d.items(): |
840 |
|
|
if k.startswith('maldi.'): |
841 |
|
|
k,f = k.split(':') |
842 |
|
|
k = k[6:] |
843 |
|
|
outStr += '<nameValue' |
844 |
|
|
outStr += ' name="%s"'%(k,) |
845 |
|
|
outStr += (' value="%f"'%(f,))%(v,) |
846 |
|
|
outStr += '/>\n' |
847 |
|
|
## outStr += '<nameValue' |
848 |
|
|
## outStr += ' name="plateID"' |
849 |
|
|
## outStr += ' value="%s"'%(d['plateID'],) |
850 |
|
|
## outStr += '/>\n' |
851 |
|
|
## outStr += '<nameValue' |
852 |
|
|
## outStr += ' name="spotID"' |
853 |
|
|
## outStr += ' value="%s"'%(d['spotID'],) |
854 |
|
|
## outStr += '/>\n' |
855 |
|
|
self.writeString(xmlFile,outStr) |
856 |
|
|
|
857 |
|
|
outStr = "</spectrum>\n" |
858 |
|
|
self.writeString(xmlFile,outStr) |
859 |
|
|
xmlFile.flush() |
860 |
|
|
|
861 |
|
|
prevSpec = s |
862 |
|
|
|
863 |
|
|
self.actualScanCount = scanNumber |
864 |
|
|
|
865 |
|
|
class QStarWiff: |
866 |
|
|
def __init__(self,filename,peaks=None): |
867 |
|
|
self.filename = filename |
868 |
|
|
self.filehash = fileSHA(filename) |
869 |
|
|
self.theTIC = None |
870 |
|
|
self.theWF = None |
871 |
|
|
self.theFMANSpecData = None |
872 |
|
|
self.startTime = None |
873 |
|
|
self.stopTime = None |
874 |
nje01 |
1.2 |
self.applyPeakDetect = [] |
875 |
nje01 |
1.1 |
if peaks: |
876 |
|
|
self.applyPeakDetect = map(int,peaks.split(',')) |
877 |
|
|
|
878 |
|
|
if not os.path.exists(self.filename): |
879 |
|
|
print >>sys.stderr, "Filename %s does not exist."%(self.filename,) |
880 |
|
|
sys.exit(1) |
881 |
|
|
|
882 |
|
|
self.filename = os.path.abspath(self.filename) |
883 |
|
|
|
884 |
|
|
def __del__(self): |
885 |
|
|
# Probably unnecessary |
886 |
|
|
self.close() |
887 |
|
|
|
888 |
|
|
def peakDetect(self,msLevel): |
889 |
|
|
return ((msLevel in self.applyPeakDetect) and (msLevel != 1)) |
890 |
|
|
|
891 |
|
|
def open(self): |
892 |
|
|
|
893 |
|
|
if self.theTIC is None: |
894 |
|
|
|
895 |
|
|
from win32com.client import Dispatch |
896 |
|
|
|
897 |
|
|
self.theFMANSpecData = Dispatch('Analyst.FMANSpecData') |
898 |
|
|
self.theTIC = Dispatch('Analyst.FMANChromData') |
899 |
|
|
self.theSPL = Dispatch('Analyst.SpectralPeakList') |
900 |
|
|
|
901 |
|
|
self.theFMANSpecData.WiffFileName = self.filename |
902 |
|
|
self.theTIC.WiffFileName = self.filename |
903 |
|
|
self.theWF = self.theFMANSpecData.GetWiffFileObject() |
904 |
|
|
|
905 |
|
|
self.theTIC.SetToTIC(1,0,0) |
906 |
|
|
|
907 |
|
|
self.startTime = self.theTIC.GetDataPointXValue(1) |
908 |
|
|
self.stopTime = self.theTIC.GetDataPointXValue(self.theTIC.GetNumberOfDataPoints()) |
909 |
|
|
|
910 |
|
|
def close(self): |
911 |
|
|
|
912 |
|
|
if not self.theTIC is None: |
913 |
|
|
|
914 |
|
|
if not self.theWF is None: |
915 |
|
|
self.theWF.CloseWiffFile() |
916 |
|
|
self.theTIC = None |
917 |
|
|
self.theFMANSpecData = None |
918 |
|
|
self.theWF = None |
919 |
|
|
|
920 |
|
|
def spectra(self): |
921 |
|
|
self.open() |
922 |
|
|
spectrumCount=0; |
923 |
|
|
|
924 |
|
|
numberOfSamples = self.theWF.GetActualNumberOfSamples() |
925 |
|
|
for i in xrange(1,numberOfSamples+1): |
926 |
|
|
|
927 |
|
|
# print >>sys.stderr, "Sample %d"%i |
928 |
|
|
# print >>sys.stderr, "Sample Name: %s"%(self.theWF.GetSampleName(i),) |
929 |
|
|
|
930 |
|
|
sampleName = self.theWF.GetSampleName(i) |
931 |
|
|
|
932 |
|
|
numberOfPeriods = self.theWF.GetActualNumberOfPeriods(i) |
933 |
|
|
for j in xrange(0,numberOfPeriods): |
934 |
|
|
|
935 |
|
|
numberOfExperiments = self.theWF.GetNumberOfExperiments(i,j) |
936 |
|
|
self.theTIC.SetToTIC(i, j, 0) |
937 |
|
|
|
938 |
|
|
numberOfTICDataPoints = self.theTIC.GetNumberOfDataPoints() |
939 |
|
|
for l in xrange(1,numberOfTICDataPoints+1): |
940 |
|
|
|
941 |
|
|
for k in xrange(0,numberOfExperiments): |
942 |
|
|
|
943 |
|
|
self.theExperiment = self.theWF.GetExperimentObject(i, j, k) |
944 |
|
|
self.theTIC.SetToTIC(i, j, k) |
945 |
|
|
|
946 |
|
|
# Convert to seconds? |
947 |
|
|
xValue = self.theTIC.GetDataPointXValue(l) * 60 |
948 |
|
|
|
949 |
|
|
spectrumCount += 1; |
950 |
|
|
|
951 |
|
|
# If any Ion current at time "l" |
952 |
|
|
if self.theTIC.GetDataPointYValue(l) > 0: |
953 |
|
|
|
954 |
|
|
self.theFMANSpecData.SetSpectrum(i, j, k, xValue, xValue) |
955 |
|
|
|
956 |
|
|
if self.theExperiment.ScanType == 9: |
957 |
|
|
msLevel = 2 |
958 |
|
|
m = re.search(r'[(]([0-9.]+)[)]',self.theFMANSpecData.DataTitle) |
959 |
|
|
fixedMass = float(m.group(1)) |
960 |
|
|
else: |
961 |
|
|
msLevel = 1 |
962 |
|
|
|
963 |
|
|
startMass = self.theFMANSpecData.GetStartMass() |
964 |
|
|
stopMass = self.theFMANSpecData.GetStopMass() |
965 |
|
|
dataArr = array('f'); |
966 |
|
|
peakDetect = 'False' |
967 |
|
|
if self.peakDetect(msLevel): |
968 |
|
|
peakDetect = 'True' |
969 |
|
|
minY,maxY = self.theFMANSpecData.GetYValueRange() |
970 |
nje01 |
1.3 |
# print minY,maxY |
971 |
nje01 |
1.1 |
self.theFMANSpecData.Threshold(maxY*0.01) |
972 |
|
|
self.theSPL.FindPeaksInDataObject(self.theFMANSpecData,50.0) |
973 |
|
|
numPeaks = self.theSPL.GetNumberOfPeaks() |
974 |
nje01 |
1.3 |
# print numPeaks |
975 |
nje01 |
1.1 |
for m in xrange(1,numPeaks+1): |
976 |
|
|
(x,w,y,yp) = self.theSPL.GetPeak(m) |
977 |
|
|
if y <= 0: |
978 |
|
|
continue |
979 |
|
|
dataArr.append(x) |
980 |
|
|
dataArr.append(y) |
981 |
|
|
else: |
982 |
|
|
numPoints = self.theFMANSpecData.GetNumberOfDataPoints() |
983 |
|
|
for m in xrange(1,numPoints+1): |
984 |
|
|
if self.theFMANSpecData.GetDataPointYValue(m) <= 0: |
985 |
|
|
continue |
986 |
|
|
x = self.theFMANSpecData.GetDataPointXValue(m) |
987 |
|
|
dataArr.append(x) |
988 |
|
|
y = self.theFMANSpecData.GetDataPointYValue(m) |
989 |
|
|
dataArr.append(y) |
990 |
|
|
polarity = self.theExperiment.Polarity |
991 |
|
|
|
992 |
|
|
if len(dataArr) == 0: |
993 |
|
|
continue |
994 |
|
|
|
995 |
|
|
if polarity == 0: |
996 |
|
|
polarity_val = "+" |
997 |
|
|
else: |
998 |
|
|
polarity_val = "-" |
999 |
|
|
|
1000 |
|
|
rt = "PT%fS"%(xValue,) |
1001 |
|
|
|
1002 |
|
|
# Scan level attributes |
1003 |
|
|
# Required by mzXML (no format spec. needed) |
1004 |
nje01 |
1.3 |
d = {'msLevel':msLevel} |
1005 |
nje01 |
1.1 |
# Optional ('scan.' and prefixformat spec. needed) |
1006 |
|
|
d.update({ |
1007 |
|
|
'scan.retentionTime:PT%fS':xValue, |
1008 |
|
|
'scan.polarity:%s':polarity_val, |
1009 |
|
|
'scan.startMz:%f':startMass, |
1010 |
|
|
'scan.endMz:%f':stopMass, |
1011 |
|
|
}) |
1012 |
nje01 |
1.2 |
d.update({'scanOrigin.parentFileID:%s': self.filehash, |
1013 |
nje01 |
1.1 |
'scanOrigin.num:%d': spectrumCount}) |
1014 |
nje01 |
1.3 |
d.update({'nameValue.sample:%d':i, |
1015 |
|
|
'nameValue.period:%d':j, |
1016 |
|
|
'nameValue.experiment:%d':k, |
1017 |
|
|
'nameValue.sampleName:%s':sampleName}) |
1018 |
|
|
|
1019 |
nje01 |
1.1 |
if msLevel == 1: |
1020 |
|
|
yield (dataArr,d) |
1021 |
|
|
else: |
1022 |
|
|
# tandem MS scans add the following |
1023 |
|
|
# Required by mzXML |
1024 |
|
|
d.update({'precursorMz':fixedMass}) |
1025 |
|
|
# Optional |
1026 |
|
|
# d.update({'precursorMz.precursorMzIntensity:f':0.0}) |
1027 |
|
|
yield (dataArr,d) |
1028 |
|
|
|
1029 |
|
|
def getMsRunMetaData(self): |
1030 |
|
|
self.open() |
1031 |
|
|
d = {'startTime:PT%fS':self.startTime, |
1032 |
|
|
'endTime:PT%fS':self.stopTime, |
1033 |
|
|
} |
1034 |
|
|
return d |
1035 |
|
|
|
1036 |
|
|
def getFilenames(self): |
1037 |
|
|
return [ (self.filename,self.filehash) ] |
1038 |
|
|
|
1039 |
nje01 |
1.3 |
def msInstrumentID(self): |
1040 |
|
|
return "1" |
1041 |
|
|
|
1042 |
nje01 |
1.1 |
def msManufacturer(self): |
1043 |
|
|
return "ABI / SCIEX" |
1044 |
|
|
|
1045 |
|
|
def msModel(self): |
1046 |
|
|
return "QSTAR" |
1047 |
|
|
|
1048 |
|
|
def msIonisation(self): |
1049 |
|
|
return "ESI" |
1050 |
|
|
|
1051 |
|
|
def msMassAnalyzer(self): |
1052 |
|
|
return "Quadrupole" |
1053 |
|
|
|
1054 |
|
|
def msDetector(self): |
1055 |
nje01 |
1.3 |
return "LE" |
1056 |
nje01 |
1.1 |
|
1057 |
|
|
def acquisitionSoftware(self): |
1058 |
|
|
return "Analyst" |
1059 |
|
|
|
1060 |
|
|
def acquisitionSoftwareVersion(self): |
1061 |
|
|
return "1.0" |
1062 |
|
|
|
1063 |
|
|
def precursorPrecision(self,mz): |
1064 |
|
|
return .1 |
1065 |
|
|
|
1066 |
|
|
def precursorTune(self): |
1067 |
|
|
return True |
1068 |
|
|
|
1069 |
|
|
def lc(self): |
1070 |
|
|
return True |
1071 |
|
|
|
1072 |
|
|
def maldi(self): |
1073 |
|
|
return not self.lc() |
1074 |
|
|
|
1075 |
|
|
class mzXML_md_sax(handler.ContentHandler): |
1076 |
|
|
|
1077 |
|
|
def __init__(self): |
1078 |
|
|
self.md = {} |
1079 |
|
|
self.context = '' |
1080 |
|
|
|
1081 |
|
|
def startElement(self, name, attrs): |
1082 |
|
|
self.context += ':%s'%(name,) |
1083 |
|
|
# print >>sys.stderr, ">>",self.context |
1084 |
|
|
# sys.stderr.flush() |
1085 |
|
|
if self.context.endswith(':msRun'): |
1086 |
nje01 |
1.4 |
if attrs.has_key('startTime'): |
1087 |
|
|
self.md['startTime'] = float(attrs['startTime'][2:-1]) |
1088 |
|
|
if attrs.has_key('endTime'): |
1089 |
|
|
self.md['endTime'] = float(attrs['endTime'][2:-1]) |
1090 |
nje01 |
1.1 |
elif self.context.endswith(':parentFile'): |
1091 |
nje01 |
1.4 |
if not self.md.has_key('parentFile'): |
1092 |
|
|
self.md['parentFile'] = [] |
1093 |
|
|
self.md['parentFile'].append((unquote(attrs['fileName']),attrs['fileType'],attrs['fileSha1'])) |
1094 |
nje01 |
1.1 |
elif self.context.endswith(':instrument'): |
1095 |
|
|
self.md['msManufacturer'] = attrs['manufacturer'] |
1096 |
|
|
self.md['msModel'] = attrs['model'] |
1097 |
|
|
self.md['msIonisation'] = attrs['ionisation'] |
1098 |
|
|
self.md['msMassAnalyzer'] = attrs['msType'] |
1099 |
|
|
elif self.context.endswith(':msInstrument:msManufacturer'): |
1100 |
|
|
self.md['msManufacturer'] = attrs['value'] |
1101 |
|
|
elif self.context.endswith(':msInstrument:msModel'): |
1102 |
|
|
self.md['msModel'] = attrs['value'] |
1103 |
|
|
elif self.context.endswith(':msInstrument:msIonisation'): |
1104 |
|
|
self.md['msIonisation'] = attrs['value'] |
1105 |
|
|
elif self.context.endswith(':msInstrument:msMassAnalyzer'): |
1106 |
|
|
self.md['msMassAnalyzer'] = attrs['value'] |
1107 |
|
|
elif self.context.endswith(':msInstrument:msDetector'): |
1108 |
|
|
self.md['msDetector'] = attrs['value'] |
1109 |
|
|
elif self.context.endswith(':msInstrument:software'): |
1110 |
|
|
if attrs['type'] == 'acquisition': |
1111 |
|
|
self.md['acquisitionSoftware'] = attrs['name'] |
1112 |
|
|
self.md['acquisitionSoftwareVersion'] = attrs['version'] |
1113 |
|
|
elif self.context.endswith(':instrument:software'): |
1114 |
|
|
if attrs['type'] == 'acquisition': |
1115 |
|
|
self.md['acquisitionSoftware'] = attrs['name'] |
1116 |
|
|
self.md['acquisitionSoftwareVersion'] = attrs['version'] |
1117 |
|
|
elif self.context.endswith(':scan'): |
1118 |
|
|
raise SAXException("Early termination") |
1119 |
|
|
|
1120 |
|
|
def endElement(self, name): |
1121 |
|
|
# print >>sys.stderr, "<<",self.context |
1122 |
|
|
# sys.stderr.flush() |
1123 |
|
|
self.context = self.context[0:-(len(name)+1)] |
1124 |
|
|
|
1125 |
|
|
|
1126 |
|
|
class mzXML_spec_sax(handler.ContentHandler): |
1127 |
|
|
|
1128 |
|
|
def __init__(self): |
1129 |
|
|
self.context = '' |
1130 |
|
|
self.content = '' |
1131 |
|
|
self.spectra = [] |
1132 |
|
|
self.done = False |
1133 |
|
|
self.scancount = 0 |
1134 |
|
|
self.scanmax = 1000 |
1135 |
|
|
self.scanstart = 0 |
1136 |
|
|
|
1137 |
|
|
def startElement(self, name, attrs): |
1138 |
|
|
self.context += ':%s'%(name,) |
1139 |
|
|
# print >>sys.stderr, ">>",self.context |
1140 |
|
|
# sys.stderr.flush() |
1141 |
|
|
self.content = '' |
1142 |
|
|
if name == 'scan': |
1143 |
|
|
self.scanmd = {} |
1144 |
|
|
self.scanlevel = int(attrs['msLevel']) |
1145 |
|
|
self.scannum = int(attrs['num']) |
1146 |
|
|
if attrs.has_key('retentionTime'): |
1147 |
|
|
self.rt = float(attrs['retentionTime'][2:-1]) |
1148 |
nje01 |
1.4 |
if attrs.has_key('endMz'): |
1149 |
|
|
self.endMz = float(attrs['endMz']) |
1150 |
|
|
if attrs.has_key('startMz'): |
1151 |
|
|
self.startMz = float(attrs['startMz']) |
1152 |
nje01 |
1.1 |
self.polarity = attrs.get('polarity',None) |
1153 |
|
|
for (k,v) in attrs.items(): |
1154 |
|
|
self.scanmd[k] = v |
1155 |
|
|
self.precursorMz = None |
1156 |
|
|
self.scancount += 1 |
1157 |
|
|
elif name == 'msRun': |
1158 |
|
|
self.scancount = 0 |
1159 |
|
|
self.spectra = [] |
1160 |
|
|
self.context = ':msRun' |
1161 |
|
|
elif name == 'scanOrigin': |
1162 |
|
|
self.scanmd['scanOrigin'] = attrs |
1163 |
nje01 |
1.5 |
elif self.context.endswith(':scan:nameValue'): |
1164 |
|
|
self.scanmd['nameValue.%s:%%s'%attrs['name']] = attrs['value'] |
1165 |
nje01 |
1.1 |
|
1166 |
|
|
def characters(self, content): |
1167 |
|
|
if self.scancount >= self.scanstart: |
1168 |
|
|
if self.context.endswith(':peaks') or \ |
1169 |
|
|
self.context.endswith(':precursorMz'): |
1170 |
|
|
self.content += content |
1171 |
|
|
|
1172 |
|
|
def endElement(self, name): |
1173 |
|
|
if self.scancount >= self.scanstart: |
1174 |
|
|
if self.context.endswith('scan:peaks'): |
1175 |
|
|
|
1176 |
nje01 |
1.5 |
self.spec = array('f') |
1177 |
nje01 |
1.1 |
# print >>sys.stderr, len(self.content), len(b64decode(self.content)), self.content |
1178 |
nje01 |
1.5 |
self.spec.fromstring(b64decode(self.content)) |
1179 |
nje01 |
1.1 |
if sys.byteorder != 'big': |
1180 |
nje01 |
1.5 |
self.spec.byteswap() |
1181 |
|
|
elif self.context.endswith(':scan'): |
1182 |
nje01 |
1.1 |
d = {'msLevel':self.scanlevel} |
1183 |
|
|
# Optional ('scan.' and prefixformat spec. needed) |
1184 |
nje01 |
1.4 |
if hasattr(self,'rt'): |
1185 |
|
|
d.update({'scan.retentionTime:PT%fS':self.rt}) |
1186 |
|
|
if hasattr(self,'startMz'): |
1187 |
|
|
d.update({'scan.startMz:%f':self.startMz}) |
1188 |
|
|
if hasattr(self,'endMz'): |
1189 |
|
|
d.update({'scan.endMz:%f':self.endMz}) |
1190 |
nje01 |
1.1 |
if self.polarity != None: |
1191 |
|
|
d.update({'scan.polarity:%s':self.polarity}) |
1192 |
|
|
if self.scanlevel == 2: |
1193 |
|
|
d.update({'precursorMz':self.precursorMz}) |
1194 |
|
|
if self.scanmd.has_key('scanOrigin'): |
1195 |
nje01 |
1.2 |
d.update({'scanOrigin.parentFileID:%s': self.scanmd['scanOrigin']['parentFileID'], |
1196 |
nje01 |
1.5 |
'scanOrigin.num:%d': int(self.scanmd['scanOrigin']['num'])}) |
1197 |
nje01 |
1.1 |
for (k,v) in self.scanmd.items(): |
1198 |
|
|
if not d.has_key(k): |
1199 |
|
|
d[k] = v |
1200 |
nje01 |
1.5 |
self.spectra.append((self.spec,d)) |
1201 |
nje01 |
1.1 |
if len(self.spectra) >= self.scanmax: |
1202 |
|
|
self.scanstart = self.scancount+1 |
1203 |
|
|
raise SAXException("Early termination") |
1204 |
|
|
elif self.context.endswith('scan:precursorMz'): |
1205 |
|
|
self.precursorMz = float(self.content) |
1206 |
|
|
if self.context.endswith('msRun'): |
1207 |
|
|
self.done = True |
1208 |
|
|
# print >>sys.stderr, "<<",self.context |
1209 |
|
|
# sys.stderr.flush() |
1210 |
|
|
self.context = self.context[0:-(len(name)+1)] |
1211 |
|
|
|
1212 |
|
|
class FromMzXML: |
1213 |
|
|
def __init__(self,filename): |
1214 |
|
|
self.filename = filename |
1215 |
|
|
self.parser = None |
1216 |
|
|
|
1217 |
|
|
if not os.path.exists(self.filename): |
1218 |
|
|
print >>sys.stderr, "Filename %s does not exist."%(self.filename,) |
1219 |
|
|
sys.exit(1) |
1220 |
|
|
|
1221 |
|
|
if self.filename.endswith('.gz'): |
1222 |
|
|
self.compressfmt = 'gz' |
1223 |
|
|
else: |
1224 |
|
|
self.compressfmt = None |
1225 |
|
|
|
1226 |
|
|
def __del__(self): |
1227 |
|
|
self.close() |
1228 |
|
|
|
1229 |
|
|
def open(self): |
1230 |
|
|
|
1231 |
|
|
if self.parser == None: |
1232 |
|
|
# print >>sys.stderr, "Parsing out meta-data..." |
1233 |
|
|
# sys.stderr.flush() |
1234 |
|
|
self.parser = make_parser() |
1235 |
|
|
self.handler = mzXML_md_sax() |
1236 |
|
|
self.parser.setContentHandler(self.handler) |
1237 |
|
|
try: |
1238 |
|
|
if self.compressfmt == None: |
1239 |
|
|
self.parser.parse(self.filename) |
1240 |
|
|
elif self.compressfmt == 'gz': |
1241 |
|
|
self.parser.parse(gzip.open(self.filename,'r')) |
1242 |
|
|
else: |
1243 |
|
|
print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,) |
1244 |
|
|
sys.exit(1) |
1245 |
|
|
except SAXException: |
1246 |
|
|
pass |
1247 |
|
|
|
1248 |
|
|
self.md = self.handler.md |
1249 |
|
|
|
1250 |
|
|
# print >>sys.stderr, self.md |
1251 |
|
|
# sys.stderr.flush() |
1252 |
|
|
|
1253 |
|
|
# print >>sys.stderr, "Set up spectrum parser..." |
1254 |
|
|
# sys.stderr.flush() |
1255 |
|
|
self.parser = make_parser() |
1256 |
|
|
self.handler = mzXML_spec_sax() |
1257 |
|
|
self.parser.setContentHandler(self.handler) |
1258 |
|
|
|
1259 |
|
|
def close(self): |
1260 |
|
|
pass |
1261 |
|
|
|
1262 |
|
|
def spectra(self): |
1263 |
|
|
self.open() |
1264 |
|
|
while True: |
1265 |
|
|
# print >>sys.stderr, "Spectrum parser to read spectra from",self.handler.scanstart |
1266 |
|
|
# sys.stderr.flush() |
1267 |
|
|
try: |
1268 |
|
|
if self.compressfmt == None: |
1269 |
|
|
self.parser.parse(self.filename) |
1270 |
|
|
elif self.compressfmt == 'gz': |
1271 |
|
|
self.parser.parse(gzip.open(self.filename,'r')) |
1272 |
|
|
else: |
1273 |
|
|
print >>sys.stderr, "Bad compressfmt specification: %s"%(self.compressfmt,) |
1274 |
|
|
sys.exit(1) |
1275 |
|
|
except SAXException: |
1276 |
|
|
pass |
1277 |
|
|
for (s,d) in self.handler.spectra: |
1278 |
|
|
# print >>sys.stderr, "Yeild spectrum number:",d['num'] |
1279 |
|
|
yield (s,d) |
1280 |
|
|
if self.handler.done: |
1281 |
|
|
break |
1282 |
|
|
|
1283 |
|
|
def getMsRunMetaData(self): |
1284 |
|
|
self.open() |
1285 |
nje01 |
1.4 |
d = {} |
1286 |
|
|
if self.md.has_key('startTime'): |
1287 |
|
|
d.update({'startTime:PT%fS':self.md['startTime']}) |
1288 |
|
|
if self.md.has_key('endTime'): |
1289 |
|
|
d.update({'endTime:PT%fS':self.md['endTime']}) |
1290 |
nje01 |
1.1 |
return d |
1291 |
|
|
|
1292 |
|
|
def getFilenames(self): |
1293 |
nje01 |
1.4 |
return self.md['parentFile'] |
1294 |
|
|
|
1295 |
|
|
def msInstrumentID(self): |
1296 |
|
|
return "1" |
1297 |
|
|
|
1298 |
|
|
def peakDetect(self,level): |
1299 |
|
|
return False |
1300 |
nje01 |
1.1 |
|
1301 |
|
|
def msManufacturer(self): |
1302 |
|
|
return self.md.get('msManufacturer','') |
1303 |
|
|
|
1304 |
|
|
def msModel(self): |
1305 |
|
|
return self.md.get('msModel','') |
1306 |
|
|
|
1307 |
|
|
def msIonisation(self): |
1308 |
|
|
return self.md.get('msIonisation','') |
1309 |
|
|
|
1310 |
|
|
def msMassAnalyzer(self): |
1311 |
|
|
return self.md.get('msMassAnalyzer','') |
1312 |
|
|
|
1313 |
|
|
def msDetector(self): |
1314 |
|
|
return self.md.get('msDetector','') |
1315 |
|
|
|
1316 |
|
|
def acquisitionSoftware(self): |
1317 |
|
|
return self.md.get('acquisitionSoftware','') |
1318 |
|
|
|
1319 |
|
|
def acquisitionSoftwareVersion(self): |
1320 |
|
|
return self.md.get('acquisitionSoftwareVersion','') |
1321 |
|
|
|
1322 |
|
|
def precursorPrecision(self,mz): |
1323 |
|
|
return 0 |
1324 |
|
|
|
1325 |
|
|
def precursorTune(self): |
1326 |
|
|
return False |
1327 |
|
|
|
1328 |
|
|
def lc(self): |
1329 |
|
|
return True |
1330 |
|
|
|
1331 |
|
|
def maldi(self): |
1332 |
|
|
return not self.lc() |
1333 |
|
|
|
1334 |
|
|
class AB4700T2D: |
1335 |
|
|
def __init__(self,filename,peaks=None): |
1336 |
|
|
self.filename = filename |
1337 |
|
|
self.datafiles = None |
1338 |
|
|
self.deapp = None |
1339 |
|
|
self.metadata = {} |
1340 |
|
|
self.platespots = {} |
1341 |
|
|
self.applyPeakDetect = [] |
1342 |
|
|
if peaks: |
1343 |
|
|
self.applyPeakDetect = map(int,peaks.split(',')) |
1344 |
|
|
|
1345 |
|
|
if not os.path.exists(self.filename): |
1346 |
|
|
print >>sys.stderr, "Filename %s does not exist."%(self.filename,) |
1347 |
|
|
sys.exit(1) |
1348 |
|
|
|
1349 |
|
|
self.filename = os.path.abspath(self.filename) |
1350 |
|
|
|
1351 |
|
|
self.dir = os.path.split(self.filename)[0] |
1352 |
|
|
|
1353 |
|
|
def __del__(self): |
1354 |
|
|
# Probably unnecessary |
1355 |
|
|
self.close() |
1356 |
|
|
|
1357 |
|
|
def open(self): |
1358 |
|
|
|
1359 |
|
|
if self.deapp is None: |
1360 |
|
|
|
1361 |
|
|
self.metadata['PLATEDEF'] = [] |
1362 |
|
|
self.metadata['PLATE'] = [] |
1363 |
|
|
self.metadata['SPOT'] = [] |
1364 |
|
|
self.metadata['SCAN'] = [] |
1365 |
|
|
|
1366 |
|
|
h = open(self.filename,'rb') |
1367 |
|
|
r = csv.reader(h,'excel-tab') |
1368 |
|
|
for l in r: |
1369 |
|
|
if len(l) == 0: |
1370 |
|
|
continue |
1371 |
|
|
kw = l.pop(0).upper() |
1372 |
|
|
if kw in ('PLATEDEF','PLATE','SPOT','SCAN'): |
1373 |
|
|
self.metadata[kw].append({}) |
1374 |
|
|
while len(l)>=2: |
1375 |
|
|
k = l.pop(0);v = l.pop(0); |
1376 |
|
|
if (kw == 'PLATE' and \ |
1377 |
|
|
(k in ('plateID','spotXCount','spotYCount','plateManufacturer','plateModel'))) or \ |
1378 |
|
|
(kw == 'PLATEDEF' and \ |
1379 |
|
|
(k in ('plateID','plateManufacturer','plateModel','spotNaming','maldiMatrix'))) or \ |
1380 |
|
|
(kw == 'SPOT' and \ |
1381 |
|
|
(k in ('plateID','spotID','spotXPosition','spotYPosition','maldiMatrix'))) or \ |
1382 |
|
|
(kw == 'SCAN' and \ |
1383 |
|
|
(k in ('plateID','spotID','filename','index'))): |
1384 |
|
|
self.metadata[kw][-1][k] = v |
1385 |
|
|
elif k == '': |
1386 |
|
|
break |
1387 |
|
|
else: |
1388 |
|
|
print >>sys.stderr,"Bad key %s for %s row"%(k,kw) |
1389 |
|
|
sys.exit(1) |
1390 |
|
|
h.close() |
1391 |
|
|
|
1392 |
|
|
for pd in self.metadata['PLATEDEF']: |
1393 |
|
|
# insert all the gory details needed for a particular plate model etc... |
1394 |
|
|
if pd['plateManufacturer'] == 'ABI / SCIEX' and pd['plateModel'] == '01-192+06-BB': |
1395 |
|
|
if not pd.has_key('spotNaming') or not pd.has_key('maldiMatrix') or \ |
1396 |
|
|
not pd['spotNaming'] in ('alpha','parallel','antiparallel'): |
1397 |
|
|
print >>sys.stderr,"Bad plate definition, missing maldiMatrix or spotNaming, or bad spotNaming value." |
1398 |
|
|
sys.exit(1) |
1399 |
|
|
self.metadata['PLATE'].append({ |
1400 |
|
|
'plateID':pd['plateID'], |
1401 |
|
|
'plateManufacturer':pd['plateManufacturer'], |
1402 |
|
|
'plateModel':pd['plateModel'], |
1403 |
|
|
'spotXCount':24, |
1404 |
|
|
'spotYCount':8, |
1405 |
|
|
}) |
1406 |
|
|
if pd['spotNaming'] == 'alpha': |
1407 |
|
|
for y in xrange(0,8): |
1408 |
|
|
for x in xrange(0,24): |
1409 |
|
|
self.metadata['SPOT'].append({ |
1410 |
|
|
'plateID':pd['plateID'], |
1411 |
|
|
'spotID':"%c%d"%(y+ord('A'),x+1), |
1412 |
|
|
'spotXPosition':x, |
1413 |
|
|
'spotYPosition':2*y+(x%2), |
1414 |
|
|
'maldiMatrix':pd['maldiMatrix'] |
1415 |
|
|
}) |
1416 |
|
|
else: |
1417 |
|
|
print >>sys.stderr,"Valid spotNaming value, not yet implemented." |
1418 |
|
|
sys.exit(1) |
1419 |
|
|
|
1420 |
|
|
for pl in self.metadata['PLATE']: |
1421 |
|
|
self.platespots[pl['plateID']] = [] |
1422 |
|
|
|
1423 |
|
|
for sp in self.metadata['SPOT']: |
1424 |
|
|
self.platespots[sp['plateID']].append(sp) |
1425 |
|
|
|
1426 |
|
|
self.datafiles = [ (os.path.join(self.dir,md['filename']),int(md['index'])) for md in self.metadata['SCAN'] ] |
1427 |
|
|
self.distinct_datafiles = dict([(os.path.join(self.dir,md['filename']), |
1428 |
|
|
fileSHA(os.path.join(self.dir,md['filename']))) for md in self.metadata['SCAN']]) |
1429 |
|
|
|
1430 |
|
|
self.maptoscan = {} |
1431 |
|
|
index = 0 |
1432 |
|
|
for md in self.metadata['SCAN']: |
1433 |
|
|
self.maptoscan[(os.path.join(self.dir,md['filename']),int(md['index']))] = index |
1434 |
|
|
index += 1 |
1435 |
|
|
|
1436 |
|
|
from win32com.client import Dispatch, gencache |
1437 |
|
|
self.deapp = Dispatch('DataExplorer.Application', |
1438 |
|
|
resultCLSID='{3FED40F1-D409-11D1-8B56-0060971CB54B}') |
1439 |
|
|
self.delib = gencache.EnsureModule('{06972F50-13F6-11D3-A5CB-0060971CB54B}',0,4,2) |
1440 |
|
|
self.deapp.AutomatedProcessing = 1 |
1441 |
|
|
self.deapp.Visible = 0 |
1442 |
|
|
|
1443 |
|
|
def close(self): |
1444 |
|
|
|
1445 |
|
|
if not self.deapp is None: |
1446 |
|
|
self.deapp.Quit() |
1447 |
|
|
self.deapp = None |
1448 |
|
|
|
1449 |
|
|
def peakDetect(self,msLevel): |
1450 |
|
|
return msLevel in self.applyPeakDetect |
1451 |
|
|
|
1452 |
|
|
# remove .set and .cts files to prevent DataExplorer crashes - DT 11-1-2006 |
1453 |
|
|
def remsettingsfile(self,fn): |
1454 |
nje01 |
1.2 |
# print "rmsettingsfile: %s"%fn |
1455 |
|
|
if ((len(fn) > 3) and (fn[-4:].lower() == '.t2d')): |
1456 |
|
|
path2del = fn[:-4] + '.cts' |
1457 |
|
|
if os.path.exists(path2del): |
1458 |
|
|
os.unlink(path2del) |
1459 |
|
|
path2del = fn[:-4] + '.set' |
1460 |
|
|
if os.path.exists(path2del): |
1461 |
|
|
os.unlink(path2del) |
1462 |
nje01 |
1.1 |
|
1463 |
|
|
def spectra(self): |
1464 |
|
|
self.open() |
1465 |
|
|
|
1466 |
|
|
prevfile = '' |
1467 |
|
|
for (f,i) in self.datafiles: |
1468 |
|
|
|
1469 |
|
|
if f != prevfile: |
1470 |
|
|
self.deapp.Documents.Close() |
1471 |
nje01 |
1.2 |
self.remsettingsfile(prevfile) |
1472 |
nje01 |
1.1 |
|
1473 |
|
|
if not os.path.exists(f): |
1474 |
|
|
print >>sys.stderr, "Filename %s does not exist."%(f,) |
1475 |
|
|
sys.exit(1) |
1476 |
|
|
|
1477 |
|
|
self.remsettingsfile(f) |
1478 |
nje01 |
1.2 |
self.deapp.Documents.Open(f) |
1479 |
|
|
# print "open %s"%f |
1480 |
nje01 |
1.1 |
|
1481 |
|
|
|
1482 |
|
|
doc = self.deapp.Documents.Item(0) |
1483 |
|
|
sv = doc.SpecView |
1484 |
|
|
|
1485 |
|
|
sv.SetSpectrumAt(i-1) |
1486 |
|
|
|
1487 |
|
|
(tf,fixedMass) = doc.InstrumentSettings.GetSetting(self.delib.constants.dePreCursorIon,i-1,None) |
1488 |
|
|
|
1489 |
|
|
if fixedMass > 0: |
1490 |
|
|
msLevel = 2 |
1491 |
|
|
else: |
1492 |
|
|
msLevel = 1 |
1493 |
|
|
|
1494 |
|
|
dataArr = array('f'); |
1495 |
|
|
peakDetect="False" |
1496 |
|
|
if self.peakDetect(msLevel): |
1497 |
|
|
peakDetect="True" |
1498 |
|
|
sv.CorrectBaseline() |
1499 |
|
|
doc.SpecSetup.NoiseReductionStdev = 2.0 |
1500 |
|
|
sv.FilterNoise(self.delib.constants.deNoiseRemoval) |
1501 |
|
|
sv.PeakSettings.PeakThresholdPercent = 1 |
1502 |
|
|
st,sp = sv.GetPeakData(self.delib.constants.deSpecPeakAll, |
1503 |
|
|
self.delib.constants.deSpecPeakSortMass, 0, 0) |
1504 |
|
|
for j in xrange(len(sp)): |
1505 |
|
|
x = sp[j][self.delib.constants.deSpecPeakCentroidMass] |
1506 |
|
|
y = sp[j][self.delib.constants.deSpecPeakArea] |
1507 |
|
|
if y <= 0: |
1508 |
|
|
continue |
1509 |
|
|
dataArr.append(x) |
1510 |
|
|
dataArr.append(y) |
1511 |
|
|
else: |
1512 |
|
|
n,s = sv.GetRawData(0,0) |
1513 |
|
|
for j in xrange(n): |
1514 |
|
|
dataArr.append(s[j][0]) |
1515 |
|
|
dataArr.append(s[j][1]) |
1516 |
|
|
|
1517 |
|
|
if len(dataArr) == 0: |
1518 |
|
|
continue |
1519 |
|
|
|
1520 |
|
|
polarity_val = "+" |
1521 |
|
|
|
1522 |
|
|
# Scan level attributes |
1523 |
|
|
# Required by mzXML (no format spec. needed) |
1524 |
nje01 |
1.3 |
d = {'msLevel':msLevel} |
1525 |
nje01 |
1.1 |
# Optional ('scan.' and prefixformat spec. needed) |
1526 |
|
|
d.update({ |
1527 |
|
|
'scan.polarity:%s':polarity_val, |
1528 |
|
|
}) |
1529 |
|
|
|
1530 |
|
|
scanindex = self.maptoscan[(f,i)] |
1531 |
|
|
d.update({ |
1532 |
|
|
'plateID':self.metadata['SCAN'][scanindex]['plateID'], |
1533 |
|
|
'spotID':self.metadata['SCAN'][scanindex]['spotID'], |
1534 |
|
|
}) |
1535 |
nje01 |
1.2 |
d.update({'scanOrigin.parentFileID:%s': self.distinct_datafiles[f], |
1536 |
nje01 |
1.1 |
'scanOrigin.num:%d': i}) |
1537 |
|
|
if msLevel == 1: |
1538 |
|
|
yield (dataArr,d) |
1539 |
|
|
else: |
1540 |
|
|
# tandem MS scans add the following |
1541 |
|
|
# Required by mzXML |
1542 |
|
|
d.update({'precursorMz':fixedMass}) |
1543 |
|
|
# Optional |
1544 |
|
|
d.update({'precursorMz.precursorCharge:%d':1}) |
1545 |
|
|
yield (dataArr,d) |
1546 |
|
|
|
1547 |
|
|
prevfile = f |
1548 |
|
|
|
1549 |
|
|
self.deapp.Documents.Close() |
1550 |
|
|
self.remsettingsfile(prevfile) |
1551 |
nje01 |
1.2 |
self.remsettingsfile(f) |
1552 |
nje01 |
1.1 |
|
1553 |
|
|
def getMsRunMetaData(self): |
1554 |
|
|
self.open() |
1555 |
|
|
return {} |
1556 |
|
|
|
1557 |
|
|
def getFilenames(self): |
1558 |
|
|
return [ t for t in self.distinct_datafiles.items() ] |
1559 |
|
|
|
1560 |
nje01 |
1.3 |
def msInstrumentID(self): |
1561 |
|
|
return "1" |
1562 |
|
|
|
1563 |
nje01 |
1.1 |
def msManufacturer(self): |
1564 |
|
|
return "ABI / SCIEX" |
1565 |
|
|
|
1566 |
|
|
def msModel(self): |
1567 |
|
|
return "4700 Proteomic Analyzer" |
1568 |
|
|
|
1569 |
|
|
def msIonisation(self): |
1570 |
|
|
return "MALDI" |
1571 |
|
|
|
1572 |
|
|
def msMassAnalyzer(self): |
1573 |
|
|
return "TOF" |
1574 |
|
|
|
1575 |
|
|
def msDetector(self): |
1576 |
nje01 |
1.3 |
return "LE" |
1577 |
nje01 |
1.1 |
|
1578 |
|
|
def acquisitionSoftware(self): |
1579 |
|
|
return "DataExplorer" |
1580 |
|
|
|
1581 |
|
|
def acquisitionSoftwareVersion(self): |
1582 |
|
|
return "1.0" |
1583 |
|
|
|
1584 |
|
|
def precursorPrecision(self,mz): |
1585 |
|
|
return .1 |
1586 |
|
|
|
1587 |
|
|
def precursorTune(self): |
1588 |
|
|
return False |
1589 |
|
|
|
1590 |
|
|
def lc(self): |
1591 |
|
|
return False |
1592 |
|
|
|
1593 |
|
|
def maldi(self): |
1594 |
|
|
return not self.lc() |
1595 |
|
|
|
1596 |
|
|
def plateData(self): |
1597 |
|
|
self.open() |
1598 |
|
|
for d in self.metadata['PLATE']: |
1599 |
|
|
yield d |
1600 |
|
|
|
1601 |
|
|
def spotData(self,plate): |
1602 |
|
|
self.open() |
1603 |
|
|
for s in self.platespots[plate]: |
1604 |
|
|
yield s |
1605 |
|
|
|
1606 |
|
|
class scanfilter: |
1607 |
|
|
def __init__(self,str): |
1608 |
|
|
self.rtre = re.compile('^PT([0-9]+(\.[0-9]+)?)S$') |
1609 |
|
|
self.intre = re.compile('^[1-9][0-9]*$') |
1610 |
|
|
self.floatre = re.compile('^[0-9]+(\.[0-9]+)?$') |
1611 |
|
|
self.cl = [] |
1612 |
|
|
clause = str.split(',') |
1613 |
|
|
for cl in clause: |
1614 |
|
|
scl = cl.split('.') |
1615 |
|
|
m = self.rtre.match(scl[2].strip()) |
1616 |
|
|
if m != None: |
1617 |
|
|
v = float(m.group(1)) |
1618 |
|
|
elif self.floatre.match(scl[2].strip()): |
1619 |
|
|
v = float(scl[2]) |
1620 |
|
|
elif self.intre.match(scl[2].strip()): |
1621 |
|
|
v = int(scl[2]) |
1622 |
|
|
else: |
1623 |
|
|
v = scl[2] |
1624 |
|
|
self.cl.append((scl[0],scl[1],v)) |
1625 |
|
|
|
1626 |
|
|
def test(self,d): |
1627 |
|
|
a = {} |
1628 |
|
|
for (k,v) in d.iteritems(): |
1629 |
|
|
k0 = k.split(':')[0].split('.')[-1] |
1630 |
|
|
a[k0] = v |
1631 |
|
|
for cl in self.cl: |
1632 |
|
|
if type(a[cl[0]]) == type(u'') or \ |
1633 |
|
|
type(a[cl[0]]) == type('') : |
1634 |
|
|
m = self.rtre.match(a[cl[0]].strip()) |
1635 |
|
|
if m != None: |
1636 |
|
|
v1 = float(m.group(1)) |
1637 |
|
|
elif self.floatre.match(a[cl[0]].strip()): |
1638 |
|
|
v1 = float(a[cl[0]]) |
1639 |
|
|
elif self.intre.match(a[cl[0]].strip()): |
1640 |
|
|
v1 = int(a[cl[0]]) |
1641 |
|
|
else: |
1642 |
|
|
v1 = a[cl[0]].strip() |
1643 |
|
|
else: |
1644 |
|
|
v1 = a[cl[0]] |
1645 |
|
|
v2 = cl[2] |
1646 |
|
|
# print >>sys.stderr, "Spectrum test:",v1,type(v1),cl[1],v2,type(v2) |
1647 |
|
|
# sys.stderr.flush() |
1648 |
|
|
if cl[1] == 'eq' and v1 != v2: |
1649 |
|
|
# print >>sys.stderr, "FALSE" |
1650 |
|
|
# sys.stderr.flush() |
1651 |
|
|
return False |
1652 |
|
|
elif cl[1] == 'ne' and not v1 != v2: |
1653 |
|
|
return False |
1654 |
|
|
elif cl[1] == 'lt' and not v1 < v2: |
1655 |
|
|
return False |
1656 |
|
|
elif cl[1] == 'le' and not v1 <= v2: |
1657 |
|
|
return False |
1658 |
|
|
elif cl[1] == 'gt' and not v1 > v2: |
1659 |
|
|
return False |
1660 |
|
|
elif cl[1] == 'ge' and not v1 >= v2: |
1661 |
|
|
return False |
1662 |
|
|
# print >>sys.stderr, "TRUE" |
1663 |
|
|
# sys.stderr.flush() |
1664 |
|
|
return True |
1665 |
|
|
|
1666 |
|
|
if __name__ == '__main__': |
1667 |
|
|
|
1668 |
|
|
from optparse import OptionParser |
1669 |
|
|
import sys |
1670 |
|
|
import glob |
1671 |
|
|
|
1672 |
|
|
parser = OptionParser() |
1673 |
|
|
parser.add_option("-R", "--rawdata", type="string", dest="rawdata", default="",\ |
1674 |
|
|
help="Format of raw data. Optional if raw spectra file ends in .wiff or .t2m.") |
1675 |
|
|
parser.add_option("-X", "--xmlformat", type="string", dest="xmlformat", default="",\ |
1676 |
|
|
help="XML format to output. Optional if output file ends in .mzxml or .mzdata.") |
1677 |
|
|
parser.add_option("-o", "--output", type="string", dest="output", default="",\ |
1678 |
|
|
help="Output file.") |
1679 |
|
|
parser.add_option("-p", "--peaks", type="string", dest="peaks", default="2",\ |
1680 |
|
|
help="Level(s) of spectra to apply peak detection to (comma separated). QStar,4700 only.") |
1681 |
|
|
parser.add_option("-f", "--filter", type="string", dest="filter", default=None,\ |
1682 |
|
|
help="Filter on mzxml scan meta-data: field.op.value[,field.op.value]. Default: No filter.") |
1683 |
nje01 |
1.3 |
parser.add_option("-V", "--version", type='string', dest="version", default="3.0", |
1684 |
|
|
help="XML version. mzXML only. Valid options '2.1','2.2','3.0'. Default: '3.0'.") |
1685 |
|
|
parser.add_option("-z", "--compress_peaks", action="store_true", dest="compress_peaks", default=None,\ |
1686 |
|
|
help="Compress mzXML peaks data using zlib. Default: False") |
1687 |
|
|
parser.add_option("-Z", "--compress", type="string", dest="compress", default=None,\ |
1688 |
|
|
help="Compress output file. Valid options 'gz'. Default: None.") |
1689 |
nje01 |
1.1 |
parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False,\ |
1690 |
|
|
help="Debug. First 10 spectra only, and truncated peaks data element. Default: False.") |
1691 |
|
|
|
1692 |
|
|
( opts, args ) = parser.parse_args() |
1693 |
|
|
|
1694 |
|
|
if opts.output == "" and opts.xmlformat == "": |
1695 |
|
|
parser.error("Either -o or -X must be suppied.") |
1696 |
|
|
sys.exit(1) |
1697 |
|
|
|
1698 |
|
|
if opts.filter != None: |
1699 |
|
|
filt=scanfilter(opts.filter) |
1700 |
|
|
else: |
1701 |
|
|
filt=None |
1702 |
|
|
|
1703 |
|
|
argsglob = [] |
1704 |
|
|
for a in args: |
1705 |
|
|
argsglob.extend(glob.glob(a)) |
1706 |
|
|
|
1707 |
|
|
if opts.output != "" and len(argsglob) > 1: |
1708 |
|
|
parser.error("At most one raw spectrum data file is permitted, if -o option is used.") |
1709 |
|
|
|
1710 |
|
|
for a in argsglob: |
1711 |
|
|
|
1712 |
|
|
if opts.output != "": |
1713 |
|
|
o = opts.output |
1714 |
|
|
else: |
1715 |
|
|
p = a.rfind('.') |
1716 |
|
|
o = a[0:p]+'.'+opts.xmlformat |
1717 |
|
|
|
1718 |
|
|
if o == a: |
1719 |
|
|
print >>sys.stderr, "Input file %s and output file %s are the same!"%(a,o) |
1720 |
|
|
sys.exit(1) |
1721 |
|
|
|
1722 |
|
|
print >>sys.stderr, "Processing",a,"to",o |
1723 |
|
|
|
1724 |
|
|
if opts.rawdata.lower() in ('wiff','qstar'): |
1725 |
|
|
r = QStarWiff(a,opts.peaks) |
1726 |
|
|
elif opts.rawdata.lower() in ('t2d','dat','ab4700','ab4800','mariner','voyager'): |
1727 |
|
|
r = AB4700T2D(a,opts.peaks) |
1728 |
|
|
elif opts.rawdata.lower() in ('mzxml',): |
1729 |
|
|
r = FromMzXML(a) |
1730 |
|
|
elif a[-5:].lower() in ('.wiff',): |
1731 |
|
|
r = QStarWiff(a,opts.peaks) |
1732 |
|
|
elif a[-4:].lower() in ('.t2m',): |
1733 |
|
|
r = AB4700T2D(a,opts.peaks) |
1734 |
|
|
elif a[-6:].lower() in ('.mzxml',): |
1735 |
|
|
r = FromMzXML(a) |
1736 |
|
|
else: |
1737 |
|
|
parser.error("Bad rawdata format specification.") |
1738 |
|
|
sys.exit(1) |
1739 |
|
|
|
1740 |
|
|
if opts.xmlformat.lower() == 'mzxml': |
1741 |
nje01 |
1.3 |
x = ToMzXML(r,o,opts.compress,filt,opts.compress_peaks,opts.version) |
1742 |
nje01 |
1.1 |
elif opts.xmlformat.lower() == 'mzdata': |
1743 |
|
|
x = ToMzData(r,o,opts.compress,filt) |
1744 |
|
|
elif opts.output[-6:].lower() in ('.mzxml',): |
1745 |
nje01 |
1.3 |
x = ToMzXML(r,opts.output,opts.compress,filt,opts.compress_peaks,opts.version) |
1746 |
nje01 |
1.1 |
elif opts.output[-7:].lower() in ('.mzdata',): |
1747 |
|
|
x = ToMzData(r,opts.output,opts.compress,filt) |
1748 |
|
|
else: |
1749 |
|
|
parser.error("Bad xml format specification.") |
1750 |
|
|
sys.exit(1) |
1751 |
|
|
|
1752 |
|
|
x.write(debug=opts.debug) |