1 |
gpertea |
29 |
/* |
2 |
|
|
* common.cpp |
3 |
|
|
* TopHat |
4 |
|
|
* |
5 |
|
|
* Created by Cole Trapnell on 11/26/08. |
6 |
|
|
* Copyright 2008 Cole Trapnell. All rights reserved. |
7 |
|
|
* |
8 |
|
|
*/ |
9 |
|
|
|
10 |
|
|
#ifdef HAVE_CONFIG_H |
11 |
|
|
#include <config.h> |
12 |
|
|
#endif |
13 |
|
|
|
14 |
|
|
#include <iostream> |
15 |
|
|
#include <sstream> |
16 |
|
|
#include <cstdarg> |
17 |
|
|
#include <getopt.h> |
18 |
|
|
|
19 |
|
|
#include "common.h" |
20 |
|
|
|
21 |
|
|
using namespace std; |
22 |
|
|
|
23 |
|
|
#ifdef MEM_DEBUG |
24 |
|
|
//function for debugging memory usage of current program in Linux |
25 |
|
|
|
26 |
|
|
#include <unistd.h> |
27 |
|
|
#include <ios> |
28 |
|
|
#include <fstream> |
29 |
|
|
|
30 |
|
|
////////////////////////////////////////////////////////////////////////////// |
31 |
|
|
// process_mem_usage(double &, double &) - takes two doubles by reference, |
32 |
|
|
// attempts to read the system-dependent data for a process' virtual memory |
33 |
|
|
// size and resident set size, and return the results in KB. |
34 |
|
|
// |
35 |
|
|
// On failure, returns 0.0, 0.0 |
36 |
|
|
|
37 |
|
|
void process_mem_usage(double& vm_usage, double& resident_set) { |
38 |
|
|
using std::ios_base; |
39 |
|
|
using std::ifstream; |
40 |
|
|
using std::string; |
41 |
|
|
vm_usage = 0.0; |
42 |
|
|
resident_set = 0.0; |
43 |
|
|
// 'file' stat seems to give the most reliable results |
44 |
|
|
ifstream stat_stream("/proc/self/stat",ios_base::in); |
45 |
|
|
// dummy vars for leading entries in stat that we don't care about |
46 |
|
|
string pid, comm, state, ppid, pgrp, session, tty_nr; |
47 |
|
|
string tpgid, flags, minflt, cminflt, majflt, cmajflt; |
48 |
|
|
string utime, stime, cutime, cstime, priority, nice; |
49 |
|
|
string O, itrealvalue, starttime; |
50 |
|
|
|
51 |
|
|
// the two fields we want |
52 |
|
|
// |
53 |
|
|
unsigned long vsize; |
54 |
|
|
long rss; |
55 |
|
|
|
56 |
|
|
stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr |
57 |
|
|
>> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt |
58 |
|
|
>> utime >> stime >> cutime >> cstime >> priority >> nice |
59 |
|
|
>> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest |
60 |
|
|
|
61 |
|
|
stat_stream.close(); |
62 |
|
|
|
63 |
|
|
long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages |
64 |
|
|
vm_usage = vsize / 1024.0; |
65 |
|
|
resident_set = rss * page_size_kb; |
66 |
|
|
} |
67 |
|
|
|
68 |
|
|
void print_mem_usage() { |
69 |
|
|
double vs, rs; |
70 |
|
|
process_mem_usage(vs,rs); |
71 |
|
|
vs/=1024; |
72 |
|
|
rs/=1024; |
73 |
|
|
fprintf(stderr, "VMSize: %6.1fMB\tRSize: %6.1fMB\n", vs, rs); |
74 |
|
|
} |
75 |
|
|
#endif |
76 |
|
|
|
77 |
|
|
|
78 |
|
|
unsigned int max_insertion_length = 3; |
79 |
|
|
unsigned int max_deletion_length = 3; |
80 |
|
|
|
81 |
|
|
|
82 |
|
|
int inner_dist_mean = 200; |
83 |
|
|
int inner_dist_std_dev = 20; |
84 |
|
|
int max_mate_inner_dist = -1; |
85 |
|
|
|
86 |
|
|
int min_anchor_len = 8; |
87 |
|
|
int min_report_intron_length = 50; |
88 |
|
|
int max_report_intron_length = 500000; |
89 |
|
|
|
90 |
|
|
int min_closure_intron_length = 50; |
91 |
|
|
int max_closure_intron_length = 5000; |
92 |
|
|
|
93 |
|
|
int min_coverage_intron_length = 50; |
94 |
|
|
int max_coverage_intron_length = 20000; |
95 |
|
|
|
96 |
|
|
int min_segment_intron_length = 50; |
97 |
|
|
int max_segment_intron_length = 500000; |
98 |
|
|
|
99 |
|
|
uint32_t min_closure_exon_length = 100; |
100 |
|
|
|
101 |
|
|
int island_extension = 25; |
102 |
|
|
int segment_length = 25; |
103 |
|
|
int segment_mismatches = 2; |
104 |
|
|
|
105 |
|
|
int max_splice_mismatches = 1; |
106 |
|
|
|
107 |
|
|
ReadFormat reads_format = FASTQ; |
108 |
|
|
|
109 |
|
|
bool verbose = false; |
110 |
|
|
|
111 |
|
|
int max_multihits = 40; |
112 |
|
|
bool no_closure_search = false; |
113 |
|
|
bool no_coverage_search = false; |
114 |
|
|
bool no_microexon_search = false; |
115 |
|
|
bool butterfly_search = false; |
116 |
|
|
int num_cpus = 1; |
117 |
|
|
float min_isoform_fraction = 0.15f; |
118 |
|
|
|
119 |
|
|
string output_dir = "tophat_out"; |
120 |
|
|
string aux_outfile = ""; //auxiliary output file name (e.g. prep_reads read stats) |
121 |
|
|
string gene_filter = ""; |
122 |
|
|
string gff_file = ""; |
123 |
|
|
string ium_reads = ""; |
124 |
|
|
string sam_header = ""; |
125 |
|
|
string sam_readgroup_id = ""; |
126 |
|
|
string zpacker = ""; |
127 |
|
|
string samtools_path = "samtools"; |
128 |
|
|
|
129 |
|
|
bool solexa_quals = false; |
130 |
|
|
bool phred64_quals = false; |
131 |
|
|
bool quals = false; |
132 |
|
|
bool integer_quals = false; |
133 |
|
|
bool color = false; |
134 |
|
|
bool color_out = false; |
135 |
|
|
|
136 |
|
|
string gtf_juncs = ""; |
137 |
|
|
|
138 |
|
|
eLIBRARY_TYPE library_type = LIBRARY_TYPE_NONE; |
139 |
|
|
|
140 |
|
|
extern void print_usage(); |
141 |
|
|
|
142 |
|
|
/** |
143 |
|
|
* Parse an int out of optarg and enforce that it be at least 'lower'; |
144 |
|
|
* if it is less than 'lower', than output the given error message and |
145 |
|
|
* exit with an error and a usage message. |
146 |
|
|
*/ |
147 |
|
|
|
148 |
|
|
int parseIntOpt(int lower, const char *errmsg, void (*print_usage)()) { |
149 |
|
|
long l; |
150 |
|
|
char *endPtr= NULL; |
151 |
|
|
l = strtol(optarg, &endPtr, 10); |
152 |
|
|
if (endPtr != NULL) { |
153 |
|
|
if (l < lower) { |
154 |
|
|
cerr << errmsg << endl; |
155 |
|
|
print_usage(); |
156 |
|
|
exit(1); |
157 |
|
|
} |
158 |
|
|
return (int32_t)l; |
159 |
|
|
} |
160 |
|
|
cerr << errmsg << endl; |
161 |
|
|
print_usage(); |
162 |
|
|
exit(1); |
163 |
|
|
return -1; |
164 |
|
|
} |
165 |
|
|
|
166 |
|
|
/** |
167 |
|
|
* Parse an int out of optarg and enforce that it be at least 'lower'; |
168 |
|
|
* if it is less than 'lower', than output the given error message and |
169 |
|
|
* exit with an error and a usage message. |
170 |
|
|
*/ |
171 |
|
|
static float parseFloatOpt(float lower, float upper, const char *errmsg, void (*print_usage)()) { |
172 |
|
|
float l; |
173 |
|
|
l = (float)atof(optarg); |
174 |
|
|
|
175 |
|
|
if (l < lower) { |
176 |
|
|
cerr << errmsg << endl; |
177 |
|
|
print_usage(); |
178 |
|
|
exit(1); |
179 |
|
|
} |
180 |
|
|
|
181 |
|
|
if (l > upper) |
182 |
|
|
{ |
183 |
|
|
cerr << errmsg << endl; |
184 |
|
|
print_usage(); |
185 |
|
|
exit(1); |
186 |
|
|
} |
187 |
|
|
|
188 |
|
|
return l; |
189 |
|
|
|
190 |
|
|
cerr << errmsg << endl; |
191 |
|
|
print_usage(); |
192 |
|
|
exit(1); |
193 |
|
|
return -1; |
194 |
|
|
} |
195 |
|
|
|
196 |
|
|
/* |
197 |
|
|
this is from |
198 |
|
|
http://www.winehq.org/pipermail/wine-patches/2001-November/001322.html |
199 |
|
|
*/ |
200 |
|
|
char* get_token(char** str, const char* delims) |
201 |
|
|
{ |
202 |
|
|
char* token; |
203 |
|
|
if (*str == NULL) |
204 |
|
|
return NULL; |
205 |
|
|
|
206 |
|
|
token = *str; |
207 |
|
|
while (**str != '\0') |
208 |
|
|
{ |
209 |
|
|
if (strchr(delims, **str) != NULL) |
210 |
|
|
{ |
211 |
|
|
**str = '\0'; |
212 |
|
|
++(*str); |
213 |
|
|
return token; |
214 |
|
|
} |
215 |
|
|
|
216 |
|
|
++(*str); |
217 |
|
|
} |
218 |
|
|
|
219 |
|
|
*str = NULL; |
220 |
|
|
return token; |
221 |
|
|
} |
222 |
|
|
|
223 |
|
|
|
224 |
|
|
const char *short_options = "QCp:z:"; |
225 |
|
|
|
226 |
|
|
enum |
227 |
|
|
{ |
228 |
|
|
OPT_FASTA = 127, |
229 |
|
|
OPT_FASTQ, |
230 |
|
|
OPT_MIN_ANCHOR, |
231 |
|
|
OPT_SPLICE_MISMATCHES, |
232 |
|
|
OPT_VERBOSE, |
233 |
|
|
OPT_INSERT_LENGTH_MEAN, |
234 |
|
|
OPT_INSERT_LENGTH_STD_DEV, |
235 |
|
|
OPT_MIN_ISOFORM_FRACTION, |
236 |
|
|
OPT_OUTPUT_DIR, |
237 |
|
|
OPT_GENE_FILTER, |
238 |
|
|
OPT_GFF_ANNOTATIONS, |
239 |
|
|
OPT_MAX_MULTIHITS, |
240 |
|
|
OPT_NO_CLOSURE_SEARCH, |
241 |
|
|
OPT_NO_COVERAGE_SEARCH, |
242 |
|
|
OPT_NO_MICROEXON_SEARCH, |
243 |
|
|
OPT_SEGMENT_LENGTH, |
244 |
|
|
OPT_SEGMENT_MISMATCHES, |
245 |
|
|
OPT_MIN_CLOSURE_EXON, |
246 |
|
|
OPT_MAX_CLOSURE_INTRON, |
247 |
|
|
OPT_MIN_CLOSURE_INTRON, |
248 |
|
|
OPT_MAX_COVERAGE_INTRON, |
249 |
|
|
OPT_MIN_COVERAGE_INTRON, |
250 |
|
|
OPT_MIN_SEGMENT_INTRON, |
251 |
|
|
OPT_MAX_SEGMENT_INTRON, |
252 |
|
|
OPT_MIN_REPORT_INTRON, |
253 |
|
|
OPT_MAX_REPORT_INTRON, |
254 |
|
|
OPT_IUM_READS, |
255 |
|
|
OPT_BUTTERFLY_SEARCH, |
256 |
|
|
OPT_SOLEXA_QUALS, |
257 |
|
|
OPT_PHRED64_QUALS, |
258 |
|
|
OPT_SAM_HEADER, |
259 |
|
|
OPT_SAM_READGROUP_ID, |
260 |
|
|
OPT_QUALS, |
261 |
|
|
OPT_INTEGER_QUALS, |
262 |
|
|
OPT_COLOR, |
263 |
|
|
OPT_COLOR_OUT, |
264 |
|
|
OPT_LIBRARY_TYPE, |
265 |
|
|
OPT_MAX_DELETION_LENGTH, |
266 |
|
|
OPT_MAX_INSERTION_LENGTH, |
267 |
|
|
OPT_NUM_CPUS, |
268 |
|
|
OPT_ZPACKER, |
269 |
|
|
OPT_SAMTOOLS, |
270 |
|
|
OPT_AUX_OUT, |
271 |
|
|
OPT_GTF_JUNCS |
272 |
|
|
}; |
273 |
|
|
|
274 |
|
|
static struct option long_options[] = { |
275 |
|
|
{"fasta", no_argument, 0, OPT_FASTA}, |
276 |
|
|
{"fastq", no_argument, 0, OPT_FASTQ}, |
277 |
|
|
{"min-anchor", required_argument, 0, OPT_MIN_ANCHOR}, |
278 |
|
|
{"sam-header", required_argument, 0, OPT_SAM_HEADER}, |
279 |
|
|
{"rg-id", required_argument, 0, OPT_SAM_READGROUP_ID}, |
280 |
|
|
{"splice-mismatches", required_argument, 0, OPT_SPLICE_MISMATCHES}, |
281 |
|
|
{"verbose", no_argument, 0, OPT_VERBOSE}, |
282 |
|
|
{"inner-dist-mean", required_argument, 0, OPT_INSERT_LENGTH_MEAN}, |
283 |
|
|
{"inner-dist-std-dev", required_argument, 0, OPT_INSERT_LENGTH_STD_DEV}, |
284 |
|
|
{"output-dir", required_argument, 0, OPT_OUTPUT_DIR}, |
285 |
|
|
{"gene-filter", required_argument, 0, OPT_GENE_FILTER}, |
286 |
|
|
{"gtf-annotations", required_argument, 0, OPT_GFF_ANNOTATIONS}, |
287 |
|
|
{"max-multihits", required_argument, 0, OPT_MAX_MULTIHITS}, |
288 |
|
|
{"no-closure-search", no_argument, 0, OPT_NO_CLOSURE_SEARCH}, |
289 |
|
|
{"no-coverage-search", no_argument, 0, OPT_NO_COVERAGE_SEARCH}, |
290 |
|
|
{"no-microexon-search", no_argument, 0, OPT_NO_MICROEXON_SEARCH}, |
291 |
|
|
{"segment-length", required_argument, 0, OPT_SEGMENT_LENGTH}, |
292 |
|
|
{"segment-mismatches", required_argument, 0, OPT_SEGMENT_MISMATCHES}, |
293 |
|
|
{"min-closure-exon", required_argument, 0, OPT_MIN_CLOSURE_EXON}, |
294 |
|
|
{"min-closure-intron", required_argument, 0, OPT_MIN_CLOSURE_INTRON}, |
295 |
|
|
{"max-closure-intron", required_argument, 0, OPT_MAX_CLOSURE_INTRON}, |
296 |
|
|
{"min-coverage-intron", required_argument, 0, OPT_MIN_COVERAGE_INTRON}, |
297 |
|
|
{"max-coverage-intron", required_argument, 0, OPT_MAX_COVERAGE_INTRON}, |
298 |
|
|
{"min-segment-intron", required_argument, 0, OPT_MIN_SEGMENT_INTRON}, |
299 |
|
|
{"max-segment-intron", required_argument, 0, OPT_MAX_SEGMENT_INTRON}, |
300 |
|
|
{"min-report-intron", required_argument, 0, OPT_MIN_REPORT_INTRON}, |
301 |
|
|
{"max-report-intron", required_argument, 0, OPT_MAX_REPORT_INTRON}, |
302 |
|
|
{"min-isoform-fraction",required_argument, 0, OPT_MIN_ISOFORM_FRACTION}, |
303 |
|
|
{"ium-reads", required_argument, 0, OPT_IUM_READS}, |
304 |
|
|
{"butterfly-search", no_argument, 0, OPT_BUTTERFLY_SEARCH}, |
305 |
|
|
{"solexa-quals", no_argument, 0, OPT_SOLEXA_QUALS}, |
306 |
|
|
{"phred64-quals", no_argument, 0, OPT_PHRED64_QUALS}, |
307 |
|
|
{"quals", no_argument, 0, OPT_QUALS}, |
308 |
|
|
{"integer-quals", no_argument, 0, OPT_INTEGER_QUALS}, |
309 |
|
|
{"color", no_argument, 0, OPT_COLOR}, |
310 |
|
|
{"color-out", no_argument, 0, OPT_COLOR_OUT}, |
311 |
|
|
{"library-type", required_argument, 0, OPT_LIBRARY_TYPE}, |
312 |
|
|
{"max-deletion-length", required_argument, 0, OPT_MAX_DELETION_LENGTH}, |
313 |
|
|
{"max-insertion-length", required_argument, 0, OPT_MAX_INSERTION_LENGTH}, |
314 |
|
|
{"num-threads", required_argument, 0, OPT_NUM_CPUS}, |
315 |
|
|
{"zpacker", required_argument, 0, OPT_ZPACKER}, |
316 |
|
|
{"samtools", required_argument, 0, OPT_SAMTOOLS}, |
317 |
|
|
{"aux-outfile", required_argument, 0, OPT_AUX_OUT}, |
318 |
|
|
{"gtf-juncs", required_argument, 0, OPT_GTF_JUNCS}, |
319 |
|
|
{0, 0, 0, 0} // terminator |
320 |
|
|
}; |
321 |
|
|
|
322 |
|
|
|
323 |
|
|
void str_appendInt(string& str, int v) { |
324 |
|
|
stringstream ss; |
325 |
|
|
ss << v; |
326 |
|
|
str.append(ss.str()); |
327 |
|
|
} |
328 |
|
|
|
329 |
|
|
bool str_endsWith(string& str, const char* suffix) { |
330 |
|
|
if (str.empty() || str.length()<3) return false; |
331 |
|
|
size_t l=strlen(suffix); |
332 |
|
|
if (str.length()<=l) return false; |
333 |
|
|
if (str.rfind(suffix, str.length()-l-1)!=string::npos) return true; |
334 |
|
|
return false; |
335 |
|
|
} |
336 |
|
|
|
337 |
|
|
int parse_options(int argc, char** argv, void (*print_usage)()) |
338 |
|
|
{ |
339 |
|
|
int option_index = 0; |
340 |
|
|
int next_option; |
341 |
|
|
do { |
342 |
|
|
next_option = getopt_long(argc, argv, short_options, long_options, &option_index); |
343 |
|
|
switch (next_option) { |
344 |
|
|
case -1: |
345 |
|
|
break; |
346 |
|
|
case OPT_FASTA: |
347 |
|
|
reads_format = FASTA; |
348 |
|
|
break; |
349 |
|
|
case OPT_FASTQ: |
350 |
|
|
reads_format = FASTQ; |
351 |
|
|
break; |
352 |
|
|
case OPT_MIN_ANCHOR: |
353 |
|
|
min_anchor_len = (uint32_t)parseIntOpt(3, "--min-anchor arg must be at least 3", print_usage); |
354 |
|
|
break; |
355 |
|
|
case OPT_SPLICE_MISMATCHES: |
356 |
|
|
max_splice_mismatches = parseIntOpt(0, "--splice-mismatches arg must be at least 0", print_usage); |
357 |
|
|
break; |
358 |
|
|
case OPT_VERBOSE: |
359 |
|
|
verbose = true; |
360 |
|
|
break; |
361 |
|
|
case OPT_INSERT_LENGTH_MEAN: |
362 |
|
|
inner_dist_mean = parseIntOpt(-1024, "--inner-dist-mean arg must be at least -1024", print_usage); |
363 |
|
|
break; |
364 |
|
|
case OPT_INSERT_LENGTH_STD_DEV: |
365 |
|
|
inner_dist_std_dev = parseIntOpt(0, "--inner-dist-std-dev arg must be at least 0", print_usage); |
366 |
|
|
break; |
367 |
|
|
case OPT_OUTPUT_DIR: |
368 |
|
|
output_dir = optarg; |
369 |
|
|
break; |
370 |
|
|
case OPT_GENE_FILTER: |
371 |
|
|
gene_filter = optarg; |
372 |
|
|
break; |
373 |
|
|
case OPT_GFF_ANNOTATIONS: |
374 |
|
|
gff_file = optarg; |
375 |
|
|
break; |
376 |
|
|
case OPT_MAX_MULTIHITS: |
377 |
|
|
max_multihits = parseIntOpt(1, "--max-multihits arg must be at least 1", print_usage); |
378 |
|
|
break; |
379 |
|
|
case OPT_NO_CLOSURE_SEARCH: |
380 |
|
|
no_closure_search = true; |
381 |
|
|
break; |
382 |
|
|
case OPT_NO_COVERAGE_SEARCH: |
383 |
|
|
no_coverage_search = true; |
384 |
|
|
break; |
385 |
|
|
case OPT_NO_MICROEXON_SEARCH: |
386 |
|
|
no_microexon_search = true; |
387 |
|
|
break; |
388 |
|
|
case OPT_SEGMENT_LENGTH: |
389 |
|
|
segment_length = parseIntOpt(4, "--segment-length arg must be at least 4", print_usage); |
390 |
|
|
break; |
391 |
|
|
case OPT_SEGMENT_MISMATCHES: |
392 |
|
|
segment_mismatches = parseIntOpt(0, "--segment-mismatches arg must be at least 0", print_usage); |
393 |
|
|
break; |
394 |
|
|
case OPT_MIN_CLOSURE_EXON: |
395 |
|
|
min_closure_exon_length = parseIntOpt(1, "--min-closure-exon arg must be at least 1", print_usage); |
396 |
|
|
break; |
397 |
|
|
case OPT_MIN_CLOSURE_INTRON: |
398 |
|
|
min_closure_intron_length = parseIntOpt(1, "--min-closure-intron arg must be at least 1", print_usage); |
399 |
|
|
break; |
400 |
|
|
case OPT_MAX_CLOSURE_INTRON: |
401 |
|
|
max_closure_intron_length = parseIntOpt(1, "--max-closure-intron arg must be at least 1", print_usage); |
402 |
|
|
break; |
403 |
|
|
case OPT_MIN_COVERAGE_INTRON: |
404 |
|
|
min_coverage_intron_length = parseIntOpt(1, "--min-coverage-intron arg must be at least 1", print_usage); |
405 |
|
|
break; |
406 |
|
|
case OPT_MAX_COVERAGE_INTRON: |
407 |
|
|
max_coverage_intron_length = parseIntOpt(1, "--max-coverage-intron arg must be at least 1", print_usage); |
408 |
|
|
break; |
409 |
|
|
case OPT_MIN_SEGMENT_INTRON: |
410 |
|
|
min_segment_intron_length = parseIntOpt(1, "--min-segment-intron arg must be at least 1", print_usage); |
411 |
|
|
break; |
412 |
|
|
case OPT_MAX_SEGMENT_INTRON: |
413 |
|
|
max_segment_intron_length = parseIntOpt(1, "--max-segment-intron arg must be at least 1", print_usage); |
414 |
|
|
break; |
415 |
|
|
case OPT_MIN_REPORT_INTRON: |
416 |
|
|
min_report_intron_length = parseIntOpt(1, "--min-report-intron arg must be at least 1", print_usage); |
417 |
|
|
break; |
418 |
|
|
case OPT_MAX_REPORT_INTRON: |
419 |
|
|
max_report_intron_length = parseIntOpt(1, "--max-report-intron arg must be at least 1", print_usage); |
420 |
|
|
break; |
421 |
|
|
case OPT_MIN_ISOFORM_FRACTION: |
422 |
|
|
min_isoform_fraction = parseFloatOpt(0.0f, 1.0f, "--min-isoform-fraction arg must be [0.0,1.0]", print_usage); |
423 |
|
|
break; |
424 |
|
|
case OPT_IUM_READS: |
425 |
|
|
ium_reads = optarg; |
426 |
|
|
break; |
427 |
|
|
case OPT_SAM_HEADER: |
428 |
|
|
sam_header = optarg; |
429 |
|
|
break; |
430 |
|
|
case OPT_SAM_READGROUP_ID: |
431 |
|
|
sam_readgroup_id = optarg; |
432 |
|
|
break; |
433 |
|
|
case OPT_BUTTERFLY_SEARCH: |
434 |
|
|
butterfly_search = true; |
435 |
|
|
break; |
436 |
|
|
case OPT_SOLEXA_QUALS: |
437 |
|
|
solexa_quals = true; |
438 |
|
|
break; |
439 |
|
|
case OPT_PHRED64_QUALS: |
440 |
|
|
phred64_quals = true; |
441 |
|
|
break; |
442 |
|
|
case 'Q': |
443 |
|
|
case OPT_QUALS: |
444 |
|
|
quals = true; |
445 |
|
|
break; |
446 |
|
|
case OPT_INTEGER_QUALS: |
447 |
|
|
integer_quals = true; |
448 |
|
|
break; |
449 |
|
|
case 'C': |
450 |
|
|
case OPT_COLOR: |
451 |
|
|
color = true; |
452 |
|
|
break; |
453 |
|
|
case OPT_COLOR_OUT: |
454 |
|
|
color_out = true; |
455 |
|
|
break; |
456 |
|
|
case OPT_LIBRARY_TYPE: |
457 |
|
|
if (strcmp(optarg, "fr-unstranded") == 0) |
458 |
|
|
library_type = FR_UNSTRANDED; |
459 |
|
|
else if (strcmp(optarg, "fr-firststrand") == 0) |
460 |
|
|
library_type = FR_FIRSTSTRAND; |
461 |
|
|
else if (strcmp(optarg, "fr-secondstrand") == 0) |
462 |
|
|
library_type = FR_SECONDSTRAND; |
463 |
|
|
else if (strcmp(optarg, "ff-unstranded") == 0) |
464 |
|
|
library_type = FF_UNSTRANDED; |
465 |
|
|
else if (strcmp(optarg, "ff-firststrand") == 0) |
466 |
|
|
library_type = FF_FIRSTSTRAND; |
467 |
|
|
else if (strcmp(optarg, "ff-secondstrand") == 0) |
468 |
|
|
library_type = FF_SECONDSTRAND; |
469 |
|
|
break; |
470 |
|
|
case OPT_MAX_DELETION_LENGTH: |
471 |
|
|
max_deletion_length = parseIntOpt(0, "--max-deletion-length must be at least 0", print_usage); |
472 |
|
|
break; |
473 |
|
|
case OPT_MAX_INSERTION_LENGTH: |
474 |
|
|
max_insertion_length = parseIntOpt(0, "--max-insertion-length must be at least 0", print_usage); |
475 |
|
|
break; |
476 |
|
|
case 'z': |
477 |
|
|
case OPT_ZPACKER: |
478 |
|
|
zpacker = optarg; |
479 |
|
|
break; |
480 |
|
|
case OPT_SAMTOOLS: |
481 |
|
|
samtools_path = optarg; |
482 |
|
|
break; |
483 |
|
|
case OPT_AUX_OUT: |
484 |
|
|
aux_outfile = optarg; |
485 |
|
|
break; |
486 |
|
|
case 'p': |
487 |
|
|
case OPT_NUM_CPUS: |
488 |
|
|
num_cpus=parseIntOpt(1,"-p/--num-threads must be at least 1",print_usage); |
489 |
|
|
break; |
490 |
|
|
case OPT_GTF_JUNCS: |
491 |
|
|
gtf_juncs = optarg; |
492 |
|
|
break; |
493 |
|
|
default: |
494 |
|
|
print_usage(); |
495 |
|
|
return 1; |
496 |
|
|
} |
497 |
|
|
} while(next_option != -1); |
498 |
|
|
|
499 |
|
|
return 0; |
500 |
|
|
} |
501 |
|
|
|
502 |
|
|
|
503 |
|
|
// Error routine (prints error message and exits!) |
504 |
|
|
void err_exit(const char* format,...){ |
505 |
|
|
va_list arguments; |
506 |
|
|
va_start(arguments,format); |
507 |
|
|
vfprintf(stderr,format,arguments); |
508 |
|
|
va_end(arguments); |
509 |
|
|
#ifdef DEBUG |
510 |
|
|
// trigger a core dump for later inspection |
511 |
|
|
abort(); |
512 |
|
|
#endif |
513 |
|
|
exit(1); |
514 |
|
|
} |
515 |
|
|
|
516 |
|
|
FILE* FZPipe::openRead(const char* fname, string& popencmd) { |
517 |
|
|
pipecmd=popencmd; |
518 |
|
|
filename=fname; |
519 |
|
|
if (pipecmd.empty()) { |
520 |
|
|
file=fopen(filename.c_str(), "r"); |
521 |
|
|
} |
522 |
|
|
else { |
523 |
|
|
string pcmd(pipecmd); |
524 |
|
|
pcmd.append(" '"); |
525 |
|
|
pcmd.append(filename); |
526 |
|
|
pcmd.append("'"); |
527 |
|
|
file=popen(pcmd.c_str(), "r"); |
528 |
|
|
} |
529 |
|
|
return file; |
530 |
|
|
} |
531 |
|
|
|
532 |
|
|
FILE* FZPipe::openRead(const char* fname) { |
533 |
|
|
string pcmd; |
534 |
|
|
return this->openRead(fname,pcmd); |
535 |
|
|
} |
536 |
|
|
|
537 |
|
|
FILE* FZPipe::openWrite(const char* fname, string& popencmd) { |
538 |
|
|
pipecmd=popencmd; |
539 |
|
|
filename=fname; |
540 |
|
|
if (pipecmd.empty()) { |
541 |
|
|
file=fopen(filename.c_str(), "w"); |
542 |
|
|
} |
543 |
|
|
else { |
544 |
|
|
string pcmd(pipecmd); |
545 |
|
|
pcmd.append(" - > '"); |
546 |
|
|
pcmd.append(filename.c_str()); |
547 |
|
|
pcmd.append("'"); |
548 |
|
|
file=popen(pcmd.c_str(), "w"); |
549 |
|
|
} |
550 |
|
|
return file; |
551 |
|
|
} |
552 |
|
|
|
553 |
|
|
|
554 |
|
|
FILE* FZPipe::openWrite(const char* fname) { |
555 |
|
|
string pcmd; |
556 |
|
|
return this->openWrite(fname,pcmd); |
557 |
|
|
} |
558 |
|
|
|
559 |
|
|
void FZPipe::rewind() { |
560 |
gpertea |
67 |
if (is_bam) return; //does nothing for BAM mappings |
561 |
gpertea |
29 |
if (pipecmd.empty()) { |
562 |
|
|
if (file!=NULL) { |
563 |
|
|
::rewind(file); |
564 |
|
|
return; |
565 |
|
|
} |
566 |
|
|
if (!filename.empty()) { |
567 |
|
|
file=fopen(filename.c_str(),"r"); |
568 |
|
|
return; |
569 |
|
|
} |
570 |
|
|
} |
571 |
|
|
if (filename.empty()) |
572 |
|
|
err_die("Error: FZStream::rewind() failed (missing filename)!\n"); |
573 |
|
|
this->close(); |
574 |
|
|
string pcmd(pipecmd); |
575 |
|
|
pcmd.append(" '"); |
576 |
|
|
pcmd.append(filename); |
577 |
|
|
pcmd.append("'"); |
578 |
|
|
file=popen(pcmd.c_str(), "r"); |
579 |
|
|
if (file==NULL) { |
580 |
|
|
err_die("Error: FZStream::rewind() popen(%s) failed!\n",pcmd.c_str()); |
581 |
|
|
} |
582 |
|
|
} |
583 |
|
|
|
584 |
|
|
|
585 |
|
|
string getFext(const string& s) { |
586 |
|
|
string r(""); |
587 |
|
|
//if (xpos!=NULL) *xpos=0; |
588 |
|
|
if (s.empty() || s=="-") return r; |
589 |
|
|
int slen=(int)s.length(); |
590 |
|
|
int p=s.rfind('.'); |
591 |
|
|
int d=s.rfind('/'); |
592 |
|
|
if (p<=0 || p>slen-2 || p<slen-7 || p<d) return r; |
593 |
|
|
r=s.substr(p+1); |
594 |
|
|
//if (xpos!=NULL) *xpos=p+1; |
595 |
|
|
for(size_t i=0; i!=r.length(); i++) |
596 |
|
|
r[i] = std::tolower(r[i]); |
597 |
|
|
return r; |
598 |
|
|
} |
599 |
|
|
|
600 |
|
|
string guess_packer(const string& fname, bool use_all_cpus) { |
601 |
|
|
//only needed for the primary input files (given by user) |
602 |
|
|
string picmd(""); |
603 |
|
|
string fext=getFext(fname); |
604 |
gpertea |
32 |
if (fext=="bam") { |
605 |
|
|
picmd="bam2fastx"; |
606 |
|
|
return picmd; |
607 |
|
|
} |
608 |
gpertea |
29 |
if (fext=="gz" || fext=="gzip" || fext=="z") { |
609 |
|
|
if (use_all_cpus && str_endsWith(zpacker,"pigz")) { |
610 |
|
|
picmd=zpacker; |
611 |
|
|
if (num_cpus<2) picmd.append(" -p1"); |
612 |
|
|
else { |
613 |
|
|
picmd.append(" -p"); |
614 |
|
|
str_appendInt(picmd, num_cpus); |
615 |
|
|
//picmd.append(" -cd"); |
616 |
|
|
} |
617 |
|
|
} |
618 |
|
|
else picmd="gzip"; |
619 |
|
|
} |
620 |
|
|
else if (fext=="bz2" || fext=="bzip2" || fext=="bz" || fext=="bzip") { |
621 |
|
|
if (use_all_cpus && str_endsWith(zpacker,"pbzip2")) { |
622 |
|
|
picmd=zpacker; |
623 |
|
|
if (num_cpus<2) picmd.append(" -p1"); |
624 |
|
|
else { |
625 |
|
|
picmd.append(" -p"); |
626 |
|
|
str_appendInt(picmd, num_cpus); |
627 |
|
|
//picmd.append(" -cd"); |
628 |
|
|
} |
629 |
|
|
} |
630 |
|
|
else picmd="bzip2"; |
631 |
|
|
} |
632 |
|
|
return picmd; |
633 |
|
|
} |
634 |
|
|
|
635 |
|
|
/* |
636 |
|
|
string getBam2SamCmd(const string& fname) { |
637 |
|
|
string pipecmd(""); |
638 |
|
|
string fext=getFext(fname); |
639 |
|
|
if (fext=="bam") { |
640 |
|
|
pipecmd=samtools_path; |
641 |
|
|
pipecmd.append(" view"); |
642 |
|
|
} |
643 |
|
|
return pipecmd; |
644 |
|
|
} |
645 |
|
|
*/ |
646 |
|
|
|
647 |
|
|
void err_die(const char* format,...) { // Error exit |
648 |
|
|
va_list arguments; |
649 |
|
|
va_start(arguments,format); |
650 |
|
|
vfprintf(stderr,format,arguments); |
651 |
|
|
va_end(arguments); |
652 |
|
|
exit(1); |
653 |
|
|
} |
654 |
|
|
|
655 |
|
|
string getUnpackCmd(const string& fname, bool use_all_cpus) { |
656 |
gpertea |
32 |
//prep_reads should use guess_packer() instead |
657 |
gpertea |
29 |
string pipecmd(""); |
658 |
gpertea |
32 |
string fext=getFext(fname); |
659 |
|
|
if (fext=="bam") { |
660 |
|
|
pipecmd="bam2fastx"; |
661 |
|
|
return pipecmd; |
662 |
|
|
} |
663 |
|
|
if (zpacker.empty() || fext!="z") { |
664 |
gpertea |
29 |
return pipecmd; |
665 |
|
|
} |
666 |
|
|
pipecmd=zpacker; |
667 |
|
|
if (str_endsWith(pipecmd, "pigz") ||str_endsWith(pipecmd, "pbzip2")) { |
668 |
|
|
if (use_all_cpus==false) pipecmd.append(" -p1"); |
669 |
|
|
else if (num_cpus>1) { |
670 |
|
|
pipecmd.append(" -p"); |
671 |
|
|
str_appendInt(pipecmd,num_cpus); |
672 |
|
|
} |
673 |
|
|
} |
674 |
|
|
if (!pipecmd.empty()) pipecmd.append(" -cd"); |
675 |
|
|
return pipecmd; |
676 |
|
|
} |
677 |
|
|
|
678 |
|
|
void checkSamHeader() { |
679 |
|
|
if (sam_header.empty()) |
680 |
|
|
err_die("Error: writeSamHeader() with empty sam_header string\n"); |
681 |
|
|
//copy the SAM header |
682 |
|
|
FILE* fh=fopen(sam_header.c_str(), "r"); |
683 |
|
|
if (fh==NULL) |
684 |
|
|
err_die("Error: cannot open SAM header file %s\n",sam_header.c_str()); |
685 |
|
|
fclose(fh); |
686 |
|
|
} |
687 |
|
|
|
688 |
|
|
void writeSamHeader(FILE* fout) { |
689 |
|
|
if (fout==NULL) |
690 |
|
|
err_die("Error: writeSamHeader(NULL)\n"); |
691 |
|
|
checkSamHeader(); |
692 |
|
|
//copy the SAM header |
693 |
|
|
FILE* fh=fopen(sam_header.c_str(), "r"); |
694 |
|
|
int ch=-1; |
695 |
|
|
while ((ch=fgetc(fh))!=EOF) { |
696 |
|
|
if (fputc(ch, fout)==EOF) |
697 |
|
|
err_die("Error copying SAM header\n"); |
698 |
|
|
} |
699 |
|
|
fclose(fh); |
700 |
|
|
} |
701 |
|
|
|
702 |
|
|
//auxiliary functions for BAM record handling |
703 |
|
|
uint8_t* realloc_bdata(bam1_t *b, int size) { |
704 |
|
|
if (b->m_data < size) { |
705 |
|
|
b->m_data = size; |
706 |
|
|
kroundup32(b->m_data); |
707 |
|
|
b->data = (uint8_t*)realloc(b->data, b->m_data); |
708 |
|
|
} |
709 |
|
|
if (b->data_len<size) b->data_len=size; |
710 |
|
|
return b->data; |
711 |
|
|
} |
712 |
|
|
|
713 |
|
|
uint8_t* dupalloc_bdata(bam1_t *b, int size) { |
714 |
|
|
//same as realloc_bdata, but does not free previous data |
715 |
|
|
//but returns it instead |
716 |
|
|
//it ALWAYS duplicates data |
717 |
|
|
b->m_data = size; |
718 |
|
|
kroundup32(b->m_data); |
719 |
|
|
uint8_t* odata=b->data; |
720 |
|
|
b->data = (uint8_t*)malloc(b->m_data); |
721 |
|
|
memcpy((void*)b->data, (void*)odata, b->data_len); |
722 |
|
|
b->data_len=size; |
723 |
|
|
return odata; //user must FREE this after |
724 |
|
|
} |
725 |
|
|
|
726 |
|
|
extern unsigned short bam_char2flag_table[]; |
727 |
|
|
|
728 |
|
|
GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid, |
729 |
|
|
int pos, bool reverse, const char* qseq, const char* cigar, const char* quals) { |
730 |
|
|
novel=true; |
731 |
|
|
b=bam_init1(); |
732 |
|
|
b->core.tid=gseq_tid; |
733 |
|
|
if (pos<=0) { |
734 |
|
|
b->core.pos=-1; //unmapped |
735 |
|
|
//if (gseq_tid<0) |
736 |
|
|
b->core.flag |= BAM_FUNMAP; |
737 |
|
|
} |
738 |
|
|
else b->core.pos=pos-1; //BAM is 0-based |
739 |
|
|
b->core.qual=255; |
740 |
|
|
int l_qseq=strlen(qseq); |
741 |
|
|
//this may not be accurate, setting CIGAR is the correct way |
742 |
|
|
//b->core.bin = bam_reg2bin(b->core.pos, b->core.pos+l_qseq-1); |
743 |
|
|
b->core.l_qname=strlen(qname)+1; //includes the \0 at the end |
744 |
|
|
memcpy(realloc_bdata(b, b->core.l_qname), qname, b->core.l_qname); |
745 |
|
|
set_cigar(cigar); //this will also set core.bin |
746 |
|
|
add_sequence(qseq, l_qseq); |
747 |
|
|
add_quals(quals); //quals must be given as Phred33 |
748 |
|
|
if (reverse) { b->core.flag |= BAM_FREVERSE ; } |
749 |
|
|
} |
750 |
|
|
|
751 |
|
|
GBamRecord::GBamRecord(const char* qname, int32_t flags, int32_t g_tid, |
752 |
|
|
int pos, int map_qual, const char* cigar, int32_t mg_tid, int mate_pos, |
753 |
|
|
int insert_size, const char* qseq, const char* quals, |
754 |
|
|
const vector<string>* aux_strings) { |
755 |
|
|
novel=true; |
756 |
|
|
b=bam_init1(); |
757 |
|
|
b->core.tid=g_tid; |
758 |
|
|
b->core.pos = (pos<=0) ? -1 : pos-1; //BAM is 0-based |
759 |
|
|
b->core.qual=map_qual; |
760 |
|
|
int l_qseq=strlen(qseq); |
761 |
|
|
b->core.l_qname=strlen(qname)+1; //includes the \0 at the end |
762 |
|
|
memcpy(realloc_bdata(b, b->core.l_qname), qname, b->core.l_qname); |
763 |
|
|
set_cigar(cigar); //this will also set core.bin |
764 |
|
|
add_sequence(qseq, l_qseq); |
765 |
|
|
add_quals(quals); //quals must be given as Phred33 |
766 |
|
|
set_flags(flags); |
767 |
|
|
set_mdata(mg_tid, (int32_t)(mate_pos-1), (int32_t)insert_size); |
768 |
|
|
if (aux_strings!=NULL) { |
769 |
|
|
for (vector<string>::const_iterator itr=aux_strings->begin(); |
770 |
|
|
itr!=aux_strings->end(); ++itr) { |
771 |
|
|
add_aux(itr->c_str()); |
772 |
|
|
} |
773 |
|
|
} |
774 |
|
|
} |
775 |
|
|
void GBamRecord::set_cigar(const char* cigar) { |
776 |
|
|
//requires b->core.pos and b->core.flag to have been set properly PRIOR to this call |
777 |
|
|
int doff=b->core.l_qname; |
778 |
|
|
uint8_t* after_cigar=NULL; |
779 |
|
|
int after_cigar_len=0; |
780 |
|
|
uint8_t* prev_bdata=NULL; |
781 |
|
|
if (b->data_len>doff) { |
782 |
|
|
//cigar string already allocated, replace it |
783 |
|
|
int d=b->core.l_qname + b->core.n_cigar * 4;//offset of after-cigar data |
784 |
|
|
after_cigar=b->data+d; |
785 |
|
|
after_cigar_len=b->data_len-d; |
786 |
|
|
} |
787 |
|
|
const char *s; |
788 |
|
|
char *t; |
789 |
|
|
int i, op; |
790 |
|
|
long x; |
791 |
|
|
b->core.n_cigar = 0; |
792 |
|
|
if (cigar != NULL && strcmp(cigar, "*") != 0) { |
793 |
|
|
for (s = cigar; *s; ++s) { |
794 |
|
|
if (isalpha(*s)) b->core.n_cigar++; |
795 |
|
|
else if (!isdigit(*s)) { |
796 |
|
|
err_die("Error: invalid CIGAR character (%s)\n",cigar); |
797 |
|
|
} |
798 |
|
|
} |
799 |
|
|
if (after_cigar_len>0) { //replace/insert into existing full data |
800 |
|
|
prev_bdata=dupalloc_bdata(b, doff + b->core.n_cigar * 4 + after_cigar_len); |
801 |
|
|
memcpy((void*)(b->data+doff+b->core.n_cigar*4),(void*)after_cigar, after_cigar_len); |
802 |
|
|
free(prev_bdata); |
803 |
|
|
} |
804 |
|
|
else { |
805 |
|
|
realloc_bdata(b, doff + b->core.n_cigar * 4); |
806 |
|
|
} |
807 |
|
|
for (i = 0, s = cigar; i != b->core.n_cigar; ++i) { |
808 |
|
|
x = strtol(s, &t, 10); |
809 |
|
|
op = toupper(*t); |
810 |
|
|
if (op == 'M' || op == '=' || op == 'X') op = BAM_CMATCH; |
811 |
|
|
else if (op == 'I') op = BAM_CINS; |
812 |
|
|
else if (op == 'D') op = BAM_CDEL; |
813 |
|
|
else if (op == 'N') op = BAM_CREF_SKIP; |
814 |
|
|
else if (op == 'S') op = BAM_CSOFT_CLIP; |
815 |
|
|
else if (op == 'H') op = BAM_CHARD_CLIP; |
816 |
|
|
else if (op == 'P') op = BAM_CPAD; |
817 |
|
|
else err_die("Error: invalid CIGAR operation (%s)\n",cigar); |
818 |
|
|
s = t + 1; |
819 |
|
|
bam1_cigar(b)[i] = x << BAM_CIGAR_SHIFT | op; |
820 |
|
|
} |
821 |
|
|
if (*s) err_die("Error: unmatched CIGAR operation (%s)\n",cigar); |
822 |
|
|
b->core.bin = bam_reg2bin(b->core.pos, bam_calend(&b->core, bam1_cigar(b))); |
823 |
|
|
} else {//no CIGAR string given |
824 |
|
|
if (!(b->core.flag&BAM_FUNMAP)) { |
825 |
|
|
fprintf(stderr, "Warning: mapped sequence without CIGAR (%s)\n", (char*)b->data); |
826 |
|
|
b->core.flag |= BAM_FUNMAP; |
827 |
|
|
} |
828 |
|
|
b->core.bin = bam_reg2bin(b->core.pos, b->core.pos + 1); |
829 |
|
|
} |
830 |
|
|
} //set_cigar() |
831 |
|
|
|
832 |
|
|
void GBamRecord::add_sequence(const char* qseq, int slen) { |
833 |
|
|
//must be called AFTER set_cigar (cannot replace existing sequence for now) |
834 |
|
|
if (qseq==NULL) return; //should we ever care about this? |
835 |
|
|
if (slen<0) slen=strlen(qseq); |
836 |
|
|
int doff = b->core.l_qname + b->core.n_cigar * 4; |
837 |
|
|
if (strcmp(qseq, "*")!=0) { |
838 |
|
|
b->core.l_qseq=slen; |
839 |
|
|
if (b->core.n_cigar && b->core.l_qseq != (int32_t)bam_cigar2qlen(&b->core, bam1_cigar(b))) |
840 |
|
|
err_die("Error: CIGAR and sequence length are inconsistent!(%s)\n", |
841 |
|
|
qseq); |
842 |
|
|
uint8_t* p = (uint8_t*)realloc_bdata(b, doff + (b->core.l_qseq+1)/2 + b->core.l_qseq) + doff; |
843 |
|
|
//also allocated quals memory |
844 |
|
|
memset(p, 0, (b->core.l_qseq+1)/2); |
845 |
|
|
for (int i = 0; i < b->core.l_qseq; ++i) |
846 |
|
|
p[i/2] |= bam_nt16_table[(int)qseq[i]] << 4*(1-i%2); |
847 |
|
|
} else b->core.l_qseq = 0; |
848 |
|
|
} |
849 |
|
|
|
850 |
|
|
void GBamRecord::add_quals(const char* quals) { |
851 |
|
|
//requires core.l_qseq already set |
852 |
|
|
//and must be called AFTER add_sequence(), which also allocates the memory for quals |
853 |
|
|
uint8_t* p = b->data+(b->core.l_qname + b->core.n_cigar * 4 + (b->core.l_qseq+1)/2); |
854 |
|
|
if (quals==NULL || strcmp(quals, "*") == 0) { |
855 |
|
|
for (int i=0;i < b->core.l_qseq; i++) p[i] = 0xff; |
856 |
|
|
return; |
857 |
|
|
} |
858 |
|
|
for (int i=0;i < b->core.l_qseq; i++) p[i] = quals[i]-33; |
859 |
|
|
} |
860 |
|
|
|
861 |
|
|
void GBamRecord::add_aux(const char* str) { |
862 |
|
|
//requires: being called AFTER add_quals() |
863 |
|
|
static char tag[2]; |
864 |
|
|
static uint8_t abuf[512]; |
865 |
|
|
//requires: being called AFTER add_quals() |
866 |
|
|
int strl=strlen(str); |
867 |
|
|
//int doff = b->core.l_qname + b->core.n_cigar*4 + (b->core.l_qseq+1)/2 + b->core.l_qseq + b->l_aux; |
868 |
|
|
//int doff0=doff; |
869 |
|
|
if (strl < 6 || str[2] != ':' || str[4] != ':') |
870 |
|
|
parse_error("missing colon in auxiliary data"); |
871 |
|
|
tag[0] = str[0]; tag[1] = str[1]; |
872 |
|
|
uint8_t atype = str[3]; |
873 |
|
|
uint8_t* adata=abuf; |
874 |
|
|
int alen=0; |
875 |
|
|
if (atype == 'A' || atype == 'a' || atype == 'c' || atype == 'C') { // c and C for backward compatibility |
876 |
|
|
atype='A'; |
877 |
|
|
alen=1; |
878 |
|
|
adata=(uint8_t*)&str[5]; |
879 |
|
|
} |
880 |
|
|
else if (atype == 'I' || atype == 'i') { |
881 |
|
|
long long x=(long long)atoll(str + 5); |
882 |
|
|
if (x < 0) { |
883 |
|
|
if (x >= -127) { |
884 |
|
|
atype='c'; |
885 |
|
|
abuf[0] = (int8_t)x; |
886 |
|
|
alen=1; |
887 |
|
|
} |
888 |
|
|
else if (x >= -32767) { |
889 |
|
|
atype = 's'; |
890 |
|
|
*(int16_t*)abuf = (int16_t)x; |
891 |
|
|
alen=2; |
892 |
|
|
} |
893 |
|
|
else { |
894 |
|
|
atype='i'; |
895 |
|
|
*(int32_t*)abuf = (int32_t)x; |
896 |
|
|
alen=4; |
897 |
|
|
if (x < -2147483648ll) |
898 |
|
|
fprintf(stderr, "Parse warning: integer %lld is out of range.", |
899 |
|
|
x); |
900 |
|
|
} |
901 |
|
|
} else { //x >=0 |
902 |
|
|
if (x <= 255) { |
903 |
|
|
atype = 'C'; |
904 |
|
|
abuf[0] = (uint8_t)x; |
905 |
|
|
alen=1; |
906 |
|
|
} |
907 |
|
|
else if (x <= 65535) { |
908 |
|
|
atype='S'; |
909 |
|
|
*(uint16_t*)abuf = (uint16_t)x; |
910 |
|
|
alen=2; |
911 |
|
|
} |
912 |
|
|
else { |
913 |
|
|
atype='I'; |
914 |
|
|
*(uint32_t*)abuf = (uint32_t)x; |
915 |
|
|
alen=4; |
916 |
|
|
if (x > 4294967295ll) |
917 |
|
|
fprintf(stderr, "Parse warning: integer %lld is out of range.", |
918 |
|
|
x); |
919 |
|
|
} |
920 |
|
|
} |
921 |
|
|
} //integer type |
922 |
|
|
else if (atype == 'f') { |
923 |
|
|
*(float*)abuf = (float)atof(str + 5); |
924 |
|
|
alen = sizeof(float); |
925 |
|
|
} |
926 |
|
|
else if (atype == 'd') { //? |
927 |
|
|
*(float*)abuf = (float)atof(str + 9); |
928 |
|
|
alen=8; |
929 |
|
|
} |
930 |
|
|
else if (atype == 'Z' || atype == 'H') { |
931 |
|
|
if (atype == 'H') { // check whether the hex string is valid |
932 |
|
|
if ((strl - 5) % 2 == 1) parse_error("length of the hex string not even"); |
933 |
|
|
for (int i = 0; i < strl - 5; ++i) { |
934 |
|
|
int c = toupper(str[5 + i]); |
935 |
|
|
if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F'))) |
936 |
|
|
parse_error("invalid hex character"); |
937 |
|
|
} |
938 |
|
|
} |
939 |
|
|
memcpy(abuf, str + 5, strl - 5); |
940 |
|
|
abuf[strl-5] = 0; |
941 |
|
|
alen=strl-4; |
942 |
|
|
} else parse_error("unrecognized aux type"); |
943 |
|
|
this->add_aux(tag, atype, alen, adata); |
944 |
|
|
}//add_aux() |