1 |
/* |
2 |
* common.cpp |
3 |
* TopHat |
4 |
* |
5 |
* Created by Cole Trapnell on 11/26/08. |
6 |
* Copyright 2008 Cole Trapnell. All rights reserved. |
7 |
* |
8 |
*/ |
9 |
|
10 |
#ifdef HAVE_CONFIG_H |
11 |
#include <config.h> |
12 |
#endif |
13 |
|
14 |
#include <iostream> |
15 |
#include <sstream> |
16 |
#include <cstdarg> |
17 |
#include <limits> |
18 |
#include <getopt.h> |
19 |
|
20 |
#include "common.h" |
21 |
|
22 |
using namespace std; |
23 |
|
24 |
#ifdef MEM_DEBUG |
25 |
//function for debugging memory usage of current program in Linux |
26 |
|
27 |
#include <unistd.h> |
28 |
#include <ios> |
29 |
#include <fstream> |
30 |
|
31 |
////////////////////////////////////////////////////////////////////////////// |
32 |
// process_mem_usage(double &, double &) - takes two doubles by reference, |
33 |
// attempts to read the system-dependent data for a process' virtual memory |
34 |
// size and resident set size, and return the results in KB. |
35 |
// |
36 |
// On failure, returns 0.0, 0.0 |
37 |
|
38 |
void process_mem_usage(double& vm_usage, double& resident_set) { |
39 |
using std::ios_base; |
40 |
using std::ifstream; |
41 |
using std::string; |
42 |
vm_usage = 0.0; |
43 |
resident_set = 0.0; |
44 |
// 'file' stat seems to give the most reliable results |
45 |
ifstream stat_stream("/proc/self/stat",ios_base::in); |
46 |
// dummy vars for leading entries in stat that we don't care about |
47 |
string pid, comm, state, ppid, pgrp, session, tty_nr; |
48 |
string tpgid, flags, minflt, cminflt, majflt, cmajflt; |
49 |
string utime, stime, cutime, cstime, priority, nice; |
50 |
string O, itrealvalue, starttime; |
51 |
|
52 |
// the two fields we want |
53 |
// |
54 |
unsigned long vsize; |
55 |
long rss; |
56 |
|
57 |
stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr |
58 |
>> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt |
59 |
>> utime >> stime >> cutime >> cstime >> priority >> nice |
60 |
>> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest |
61 |
|
62 |
stat_stream.close(); |
63 |
|
64 |
long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages |
65 |
vm_usage = vsize / 1024.0; |
66 |
resident_set = rss * page_size_kb; |
67 |
} |
68 |
|
69 |
void print_mem_usage() { |
70 |
double vs, rs; |
71 |
process_mem_usage(vs,rs); |
72 |
vs/=1024; |
73 |
rs/=1024; |
74 |
fprintf(stderr, "VMSize: %6.1fMB\tRSize: %6.1fMB\n", vs, rs); |
75 |
} |
76 |
#endif |
77 |
|
78 |
bool bowtie2 = true; |
79 |
int bowtie2_min_score = -10; |
80 |
int bowtie2_max_penalty = 6; |
81 |
int bowtie2_min_penalty = 2; |
82 |
int bowtie2_penalty_for_N = 1; |
83 |
int bowtie2_read_gap_open = 5; |
84 |
int bowtie2_read_gap_cont = 3; |
85 |
int bowtie2_ref_gap_open = 5; |
86 |
int bowtie2_ref_gap_cont = 3; |
87 |
|
88 |
// daehwan - temporary |
89 |
bool parallel = true; |
90 |
|
91 |
unsigned int max_insertion_length = 3; |
92 |
unsigned int max_deletion_length = 3; |
93 |
|
94 |
|
95 |
int inner_dist_mean = 200; |
96 |
int inner_dist_std_dev = 20; |
97 |
int max_mate_inner_dist = -1; |
98 |
|
99 |
int min_anchor_len = 8; |
100 |
int min_report_intron_length = 50; |
101 |
int max_report_intron_length = 500000; |
102 |
|
103 |
int min_closure_intron_length = 50; |
104 |
int max_closure_intron_length = 5000; |
105 |
|
106 |
int min_coverage_intron_length = 50; |
107 |
int max_coverage_intron_length = 20000; |
108 |
|
109 |
int min_segment_intron_length = 50; |
110 |
int max_segment_intron_length = 500000; |
111 |
|
112 |
uint32_t min_closure_exon_length = 100; |
113 |
|
114 |
int island_extension = 25; |
115 |
int segment_length = 25; |
116 |
int segment_mismatches = 2; |
117 |
int max_read_mismatches = 2; |
118 |
int max_splice_mismatches = 1; |
119 |
|
120 |
ReadFormat reads_format = FASTQ; |
121 |
|
122 |
bool verbose = false; |
123 |
|
124 |
unsigned int max_multihits = 20; |
125 |
unsigned int max_seg_multihits = 40; |
126 |
bool no_closure_search = false; |
127 |
bool no_coverage_search = false; |
128 |
bool no_microexon_search = false; |
129 |
bool butterfly_search = false; |
130 |
int num_threads = 1; |
131 |
|
132 |
float min_isoform_fraction = 0.15f; |
133 |
|
134 |
string output_dir = "tophat_out"; |
135 |
string aux_outfile = ""; //auxiliary output file name (e.g. prep_reads read stats) |
136 |
string index_outfile = ""; |
137 |
string gene_filter = ""; |
138 |
string gff_file = ""; |
139 |
string ium_reads = ""; |
140 |
string sam_header = ""; |
141 |
string sam_readgroup_id = ""; |
142 |
string zpacker = ""; |
143 |
string samtools_path = "samtools"; |
144 |
|
145 |
bool solexa_quals = false; |
146 |
bool phred64_quals = false; |
147 |
bool quals = false; |
148 |
bool integer_quals = false; |
149 |
bool color = false; |
150 |
string gtf_juncs = ""; |
151 |
|
152 |
bool report_secondary_alignments = false; |
153 |
|
154 |
string flt_reads = ""; |
155 |
string flt_mappings = ""; |
156 |
|
157 |
bool fusion_search = false; |
158 |
size_t fusion_anchor_length = 20; |
159 |
size_t fusion_min_dist = 10000000; |
160 |
size_t fusion_read_mismatches = 2; |
161 |
size_t fusion_multireads = 2; |
162 |
size_t fusion_multipairs = 2; |
163 |
|
164 |
eLIBRARY_TYPE library_type = LIBRARY_TYPE_NONE; |
165 |
|
166 |
extern void print_usage(); |
167 |
|
168 |
/** |
169 |
* Parse an int out of optarg and enforce that it be at least 'lower'; |
170 |
* if it is less than 'lower', than output the given error message and |
171 |
* exit with an error and a usage message. |
172 |
*/ |
173 |
|
174 |
int parseIntOpt(int lower, const char *errmsg, void (*print_usage)()) { |
175 |
long l; |
176 |
char *endPtr= NULL; |
177 |
l = strtol(optarg, &endPtr, 10); |
178 |
if (endPtr != NULL) { |
179 |
if (l < lower) { |
180 |
cerr << errmsg << endl; |
181 |
print_usage(); |
182 |
exit(1); |
183 |
} |
184 |
return (int32_t)l; |
185 |
} |
186 |
cerr << errmsg << endl; |
187 |
print_usage(); |
188 |
exit(1); |
189 |
return -1; |
190 |
} |
191 |
|
192 |
/** |
193 |
* Parse an int out of optarg and enforce that it be at least 'lower'; |
194 |
* if it is less than 'lower', than output the given error message and |
195 |
* exit with an error and a usage message. |
196 |
*/ |
197 |
static float parseFloatOpt(float lower, float upper, const char *errmsg, void (*print_usage)()) { |
198 |
float l; |
199 |
l = (float)atof(optarg); |
200 |
|
201 |
if (l < lower) { |
202 |
cerr << errmsg << endl; |
203 |
print_usage(); |
204 |
exit(1); |
205 |
} |
206 |
|
207 |
if (l > upper) |
208 |
{ |
209 |
cerr << errmsg << endl; |
210 |
print_usage(); |
211 |
exit(1); |
212 |
} |
213 |
|
214 |
return l; |
215 |
|
216 |
cerr << errmsg << endl; |
217 |
print_usage(); |
218 |
exit(1); |
219 |
return -1; |
220 |
} |
221 |
|
222 |
/* |
223 |
this is from |
224 |
http://www.winehq.org/pipermail/wine-patches/2001-November/001322.html |
225 |
*/ |
226 |
char* get_token(char** str, const char* delims) |
227 |
{ |
228 |
char* token; |
229 |
if (*str == NULL) |
230 |
return NULL; |
231 |
|
232 |
token = *str; |
233 |
while (**str != '\0') |
234 |
{ |
235 |
if (strchr(delims, **str) != NULL) |
236 |
{ |
237 |
**str = '\0'; |
238 |
++(*str); |
239 |
return token; |
240 |
} |
241 |
|
242 |
++(*str); |
243 |
} |
244 |
|
245 |
*str = NULL; |
246 |
return token; |
247 |
} |
248 |
|
249 |
const char *short_options = "QCp:z:N:"; |
250 |
|
251 |
enum |
252 |
{ |
253 |
OPT_FASTA = 127, |
254 |
OPT_FASTQ, |
255 |
OPT_MIN_ANCHOR, |
256 |
OPT_SPLICE_MISMATCHES, |
257 |
OPT_VERBOSE, |
258 |
OPT_INSERT_LENGTH_MEAN, |
259 |
OPT_INSERT_LENGTH_STD_DEV, |
260 |
OPT_MIN_ISOFORM_FRACTION, |
261 |
OPT_OUTPUT_DIR, |
262 |
OPT_GENE_FILTER, |
263 |
OPT_GFF_ANNOTATIONS, |
264 |
OPT_MAX_MULTIHITS, |
265 |
OPT_MAX_SEG_MULTIHITS, |
266 |
OPT_NO_CLOSURE_SEARCH, |
267 |
OPT_NO_COVERAGE_SEARCH, |
268 |
OPT_NO_MICROEXON_SEARCH, |
269 |
OPT_SEGMENT_LENGTH, |
270 |
OPT_READ_MISMATCHES, |
271 |
OPT_SEGMENT_MISMATCHES, |
272 |
OPT_MIN_CLOSURE_EXON, |
273 |
OPT_MAX_CLOSURE_INTRON, |
274 |
OPT_MIN_CLOSURE_INTRON, |
275 |
OPT_MAX_COVERAGE_INTRON, |
276 |
OPT_MIN_COVERAGE_INTRON, |
277 |
OPT_MIN_SEGMENT_INTRON, |
278 |
OPT_MAX_SEGMENT_INTRON, |
279 |
OPT_MIN_REPORT_INTRON, |
280 |
OPT_MAX_REPORT_INTRON, |
281 |
OPT_IUM_READS, |
282 |
OPT_BUTTERFLY_SEARCH, |
283 |
OPT_SOLEXA_QUALS, |
284 |
OPT_PHRED64_QUALS, |
285 |
OPT_SAM_HEADER, |
286 |
OPT_SAM_READGROUP_ID, |
287 |
OPT_QUALS, |
288 |
OPT_INTEGER_QUALS, |
289 |
OPT_COLOR, |
290 |
OPT_LIBRARY_TYPE, |
291 |
OPT_MAX_DELETION_LENGTH, |
292 |
OPT_MAX_INSERTION_LENGTH, |
293 |
OPT_NUM_THREADS, |
294 |
OPT_ZPACKER, |
295 |
OPT_SAMTOOLS, |
296 |
OPT_AUX_OUT, |
297 |
OPT_INDEX_OUT, |
298 |
OPT_GTF_JUNCS, |
299 |
OPT_FILTER_READS, |
300 |
OPT_FILTER_HITS, |
301 |
OPT_REPORT_SECONDARY_ALIGNMENTS, |
302 |
OPT_FUSION_SEARCH, |
303 |
OPT_FUSION_ANCHOR_LENGTH, |
304 |
OPT_FUSION_MIN_DIST, |
305 |
OPT_FUSION_READ_MISMATCHES, |
306 |
OPT_FUSION_MULTIREADS, |
307 |
OPT_FUSION_MULTIPAIRS, |
308 |
OPT_BOWTIE1, |
309 |
OPT_BOWTIE2_MIN_SCORE, |
310 |
OPT_BOWTIE2_MAX_PENALTY, |
311 |
OPT_BOWTIE2_MIN_PENALTY, |
312 |
OPT_BOWTIE2_PENALTY_FOR_N, |
313 |
OPT_BOWTIE2_READ_GAP_OPEN, |
314 |
OPT_BOWTIE2_READ_GAP_CONT, |
315 |
OPT_BOWTIE2_REF_GAP_OPEN, |
316 |
OPT_BOWTIE2_REF_GAP_CONT, |
317 |
}; |
318 |
|
319 |
static struct option long_options[] = { |
320 |
{"fasta", no_argument, 0, OPT_FASTA}, |
321 |
{"fastq", no_argument, 0, OPT_FASTQ}, |
322 |
{"min-anchor", required_argument, 0, OPT_MIN_ANCHOR}, |
323 |
{"sam-header", required_argument, 0, OPT_SAM_HEADER}, |
324 |
{"rg-id", required_argument, 0, OPT_SAM_READGROUP_ID}, |
325 |
{"splice-mismatches", required_argument, 0, OPT_SPLICE_MISMATCHES}, |
326 |
{"verbose", no_argument, 0, OPT_VERBOSE}, |
327 |
{"inner-dist-mean", required_argument, 0, OPT_INSERT_LENGTH_MEAN}, |
328 |
{"inner-dist-std-dev", required_argument, 0, OPT_INSERT_LENGTH_STD_DEV}, |
329 |
{"output-dir", required_argument, 0, OPT_OUTPUT_DIR}, |
330 |
{"gene-filter", required_argument, 0, OPT_GENE_FILTER}, |
331 |
{"gtf-annotations", required_argument, 0, OPT_GFF_ANNOTATIONS}, |
332 |
{"max-multihits", required_argument, 0, OPT_MAX_MULTIHITS}, |
333 |
{"max-seg-multihits", required_argument, 0, OPT_MAX_SEG_MULTIHITS}, |
334 |
{"no-closure-search", no_argument, 0, OPT_NO_CLOSURE_SEARCH}, |
335 |
{"no-coverage-search", no_argument, 0, OPT_NO_COVERAGE_SEARCH}, |
336 |
{"no-microexon-search", no_argument, 0, OPT_NO_MICROEXON_SEARCH}, |
337 |
{"segment-length", required_argument, 0, OPT_SEGMENT_LENGTH}, |
338 |
{"segment-mismatches", required_argument, 0, OPT_SEGMENT_MISMATCHES}, |
339 |
{"max-mismatches", required_argument, 0, OPT_READ_MISMATCHES}, |
340 |
{"min-closure-exon", required_argument, 0, OPT_MIN_CLOSURE_EXON}, |
341 |
{"min-closure-intron", required_argument, 0, OPT_MIN_CLOSURE_INTRON}, |
342 |
{"max-closure-intron", required_argument, 0, OPT_MAX_CLOSURE_INTRON}, |
343 |
{"min-coverage-intron", required_argument, 0, OPT_MIN_COVERAGE_INTRON}, |
344 |
{"max-coverage-intron", required_argument, 0, OPT_MAX_COVERAGE_INTRON}, |
345 |
{"min-segment-intron", required_argument, 0, OPT_MIN_SEGMENT_INTRON}, |
346 |
{"max-segment-intron", required_argument, 0, OPT_MAX_SEGMENT_INTRON}, |
347 |
{"min-report-intron", required_argument, 0, OPT_MIN_REPORT_INTRON}, |
348 |
{"max-report-intron", required_argument, 0, OPT_MAX_REPORT_INTRON}, |
349 |
{"min-isoform-fraction",required_argument, 0, OPT_MIN_ISOFORM_FRACTION}, |
350 |
{"ium-reads", required_argument, 0, OPT_IUM_READS}, |
351 |
{"butterfly-search", no_argument, 0, OPT_BUTTERFLY_SEARCH}, |
352 |
{"solexa-quals", no_argument, 0, OPT_SOLEXA_QUALS}, |
353 |
{"phred64-quals", no_argument, 0, OPT_PHRED64_QUALS}, |
354 |
{"quals", no_argument, 0, OPT_QUALS}, |
355 |
{"integer-quals", no_argument, 0, OPT_INTEGER_QUALS}, |
356 |
{"color", no_argument, 0, OPT_COLOR}, |
357 |
{"library-type", required_argument, 0, OPT_LIBRARY_TYPE}, |
358 |
{"max-deletion-length", required_argument, 0, OPT_MAX_DELETION_LENGTH}, |
359 |
{"max-insertion-length", required_argument, 0, OPT_MAX_INSERTION_LENGTH}, |
360 |
{"num-threads", required_argument, 0, OPT_NUM_THREADS}, |
361 |
{"zpacker", required_argument, 0, OPT_ZPACKER}, |
362 |
{"samtools", required_argument, 0, OPT_SAMTOOLS}, |
363 |
{"aux-outfile", required_argument, 0, OPT_AUX_OUT}, |
364 |
{"index-outfile", required_argument, 0, OPT_INDEX_OUT}, |
365 |
{"gtf-juncs", required_argument, 0, OPT_GTF_JUNCS}, |
366 |
{"flt-reads",required_argument, 0, OPT_FILTER_READS}, |
367 |
{"flt-hits",required_argument, 0, OPT_FILTER_HITS}, |
368 |
{"report-secondary-alignments", no_argument, 0, OPT_REPORT_SECONDARY_ALIGNMENTS}, |
369 |
{"fusion-search", no_argument, 0, OPT_FUSION_SEARCH}, |
370 |
{"fusion-anchor-length", required_argument, 0, OPT_FUSION_ANCHOR_LENGTH}, |
371 |
{"fusion-min-dist", required_argument, 0, OPT_FUSION_MIN_DIST}, |
372 |
{"fusion-read-mismatches", required_argument, 0, OPT_FUSION_READ_MISMATCHES}, |
373 |
{"fusion-multireads", required_argument, 0, OPT_FUSION_MULTIREADS}, |
374 |
{"fusion-multipairs", required_argument, 0, OPT_FUSION_MULTIPAIRS}, |
375 |
{"bowtie1", no_argument, 0, OPT_BOWTIE1}, |
376 |
{"bowtie2-min-score", required_argument, 0, OPT_BOWTIE2_MIN_SCORE}, |
377 |
{"bowtie2-max-penalty", required_argument, 0, OPT_BOWTIE2_MAX_PENALTY}, |
378 |
{"bowtie2-min-penalty", required_argument, 0, OPT_BOWTIE2_MIN_PENALTY}, |
379 |
{"bowtie2-penalty-for-N", required_argument, 0, OPT_BOWTIE2_PENALTY_FOR_N}, |
380 |
{"bowtie2-read-gap-open", required_argument, 0, OPT_BOWTIE2_READ_GAP_OPEN}, |
381 |
{"bowtie2-read-gap-cont", required_argument, 0, OPT_BOWTIE2_READ_GAP_CONT}, |
382 |
{"bowtie2-ref-gap-open", required_argument, 0, OPT_BOWTIE2_REF_GAP_OPEN}, |
383 |
{"bowtie2-ref-gap-cont", required_argument, 0, OPT_BOWTIE2_REF_GAP_CONT}, |
384 |
{0, 0, 0, 0} // terminator |
385 |
}; |
386 |
|
387 |
|
388 |
void str_appendInt(string& str, int v) { |
389 |
stringstream ss; |
390 |
ss << v; |
391 |
str.append(ss.str()); |
392 |
} |
393 |
|
394 |
bool str_endsWith(string& str, const char* suffix) { |
395 |
if (str.empty() || str.length()<3) return false; |
396 |
size_t l=strlen(suffix); |
397 |
if (str.length()<=l) return false; |
398 |
if (str.rfind(suffix, str.length()-l-1)!=string::npos) return true; |
399 |
return false; |
400 |
} |
401 |
|
402 |
int parse_options(int argc, char** argv, void (*print_usage)()) |
403 |
{ |
404 |
int option_index = 0; |
405 |
int next_option; |
406 |
do { |
407 |
next_option = getopt_long(argc, argv, short_options, long_options, &option_index); |
408 |
switch (next_option) { |
409 |
case -1: |
410 |
break; |
411 |
case OPT_FASTA: |
412 |
reads_format = FASTA; |
413 |
break; |
414 |
case OPT_FASTQ: |
415 |
reads_format = FASTQ; |
416 |
break; |
417 |
case OPT_MIN_ANCHOR: |
418 |
min_anchor_len = (uint32_t)parseIntOpt(3, "--min-anchor arg must be at least 3", print_usage); |
419 |
break; |
420 |
case OPT_SPLICE_MISMATCHES: |
421 |
max_splice_mismatches = parseIntOpt(0, "--splice-mismatches arg must be at least 0", print_usage); |
422 |
break; |
423 |
case OPT_VERBOSE: |
424 |
verbose = true; |
425 |
break; |
426 |
case OPT_INSERT_LENGTH_MEAN: |
427 |
inner_dist_mean = parseIntOpt(-1024, "--inner-dist-mean arg must be at least -1024", print_usage); |
428 |
break; |
429 |
case OPT_INSERT_LENGTH_STD_DEV: |
430 |
inner_dist_std_dev = parseIntOpt(0, "--inner-dist-std-dev arg must be at least 0", print_usage); |
431 |
break; |
432 |
case OPT_OUTPUT_DIR: |
433 |
output_dir = optarg; |
434 |
break; |
435 |
case OPT_GENE_FILTER: |
436 |
gene_filter = optarg; |
437 |
break; |
438 |
case OPT_GFF_ANNOTATIONS: |
439 |
gff_file = optarg; |
440 |
break; |
441 |
case OPT_MAX_MULTIHITS: |
442 |
max_multihits = parseIntOpt(1, "--max-multihits arg must be at least 1", print_usage); |
443 |
break; |
444 |
case OPT_MAX_SEG_MULTIHITS: |
445 |
max_seg_multihits = parseIntOpt(1, "--max-seg-multihits arg must be at least 1", print_usage); |
446 |
break; |
447 |
case OPT_NO_CLOSURE_SEARCH: |
448 |
no_closure_search = true; |
449 |
break; |
450 |
case OPT_NO_COVERAGE_SEARCH: |
451 |
no_coverage_search = true; |
452 |
break; |
453 |
case OPT_NO_MICROEXON_SEARCH: |
454 |
no_microexon_search = true; |
455 |
break; |
456 |
case OPT_SEGMENT_LENGTH: |
457 |
segment_length = parseIntOpt(4, "--segment-length arg must be at least 4", print_usage); |
458 |
break; |
459 |
case OPT_SEGMENT_MISMATCHES: |
460 |
segment_mismatches = parseIntOpt(0, "--segment-mismatches arg must be at least 0", print_usage); |
461 |
break; |
462 |
case 'N': |
463 |
case OPT_READ_MISMATCHES: |
464 |
max_read_mismatches = parseIntOpt(0, "--max-mismatches arg must be at least 0", print_usage); |
465 |
break; |
466 |
case OPT_MIN_CLOSURE_EXON: |
467 |
min_closure_exon_length = parseIntOpt(1, "--min-closure-exon arg must be at least 1", print_usage); |
468 |
break; |
469 |
case OPT_MIN_CLOSURE_INTRON: |
470 |
min_closure_intron_length = parseIntOpt(1, "--min-closure-intron arg must be at least 1", print_usage); |
471 |
break; |
472 |
case OPT_MAX_CLOSURE_INTRON: |
473 |
max_closure_intron_length = parseIntOpt(1, "--max-closure-intron arg must be at least 1", print_usage); |
474 |
break; |
475 |
case OPT_MIN_COVERAGE_INTRON: |
476 |
min_coverage_intron_length = parseIntOpt(1, "--min-coverage-intron arg must be at least 1", print_usage); |
477 |
break; |
478 |
case OPT_MAX_COVERAGE_INTRON: |
479 |
max_coverage_intron_length = parseIntOpt(1, "--max-coverage-intron arg must be at least 1", print_usage); |
480 |
break; |
481 |
case OPT_MIN_SEGMENT_INTRON: |
482 |
min_segment_intron_length = parseIntOpt(1, "--min-segment-intron arg must be at least 1", print_usage); |
483 |
break; |
484 |
case OPT_MAX_SEGMENT_INTRON: |
485 |
max_segment_intron_length = parseIntOpt(1, "--max-segment-intron arg must be at least 1", print_usage); |
486 |
break; |
487 |
case OPT_MIN_REPORT_INTRON: |
488 |
min_report_intron_length = parseIntOpt(1, "--min-report-intron arg must be at least 1", print_usage); |
489 |
break; |
490 |
case OPT_MAX_REPORT_INTRON: |
491 |
max_report_intron_length = parseIntOpt(1, "--max-report-intron arg must be at least 1", print_usage); |
492 |
break; |
493 |
case OPT_MIN_ISOFORM_FRACTION: |
494 |
min_isoform_fraction = parseFloatOpt(0.0f, 1.0f, "--min-isoform-fraction arg must be [0.0,1.0]", print_usage); |
495 |
break; |
496 |
case OPT_IUM_READS: |
497 |
ium_reads = optarg; |
498 |
break; |
499 |
case OPT_SAM_HEADER: |
500 |
sam_header = optarg; |
501 |
break; |
502 |
case OPT_SAM_READGROUP_ID: |
503 |
sam_readgroup_id = optarg; |
504 |
break; |
505 |
case OPT_BUTTERFLY_SEARCH: |
506 |
butterfly_search = true; |
507 |
break; |
508 |
case OPT_SOLEXA_QUALS: |
509 |
solexa_quals = true; |
510 |
break; |
511 |
case OPT_PHRED64_QUALS: |
512 |
phred64_quals = true; |
513 |
break; |
514 |
case 'Q': |
515 |
case OPT_QUALS: |
516 |
quals = true; |
517 |
break; |
518 |
case OPT_INTEGER_QUALS: |
519 |
integer_quals = true; |
520 |
break; |
521 |
case 'C': |
522 |
case OPT_COLOR: |
523 |
color = true; |
524 |
break; |
525 |
case OPT_LIBRARY_TYPE: |
526 |
if (strcmp(optarg, "fr-unstranded") == 0) |
527 |
library_type = FR_UNSTRANDED; |
528 |
else if (strcmp(optarg, "fr-firststrand") == 0) |
529 |
library_type = FR_FIRSTSTRAND; |
530 |
else if (strcmp(optarg, "fr-secondstrand") == 0) |
531 |
library_type = FR_SECONDSTRAND; |
532 |
else if (strcmp(optarg, "ff-unstranded") == 0) |
533 |
library_type = FF_UNSTRANDED; |
534 |
else if (strcmp(optarg, "ff-firststrand") == 0) |
535 |
library_type = FF_FIRSTSTRAND; |
536 |
else if (strcmp(optarg, "ff-secondstrand") == 0) |
537 |
library_type = FF_SECONDSTRAND; |
538 |
break; |
539 |
case OPT_MAX_DELETION_LENGTH: |
540 |
max_deletion_length = parseIntOpt(0, "--max-deletion-length must be at least 0", print_usage); |
541 |
break; |
542 |
case OPT_MAX_INSERTION_LENGTH: |
543 |
max_insertion_length = parseIntOpt(0, "--max-insertion-length must be at least 0", print_usage); |
544 |
break; |
545 |
case 'z': |
546 |
case OPT_ZPACKER: |
547 |
zpacker = optarg; |
548 |
break; |
549 |
case OPT_SAMTOOLS: |
550 |
samtools_path = optarg; |
551 |
break; |
552 |
case OPT_AUX_OUT: |
553 |
aux_outfile = optarg; |
554 |
break; |
555 |
case OPT_INDEX_OUT: |
556 |
index_outfile = optarg; |
557 |
break; |
558 |
case 'p': |
559 |
case OPT_NUM_THREADS: |
560 |
num_threads=parseIntOpt(1,"-p/--num-threads must be at least 1",print_usage); |
561 |
break; |
562 |
case OPT_GTF_JUNCS: |
563 |
gtf_juncs = optarg; |
564 |
break; |
565 |
case OPT_FILTER_READS: |
566 |
flt_reads = optarg; |
567 |
break; |
568 |
case OPT_FILTER_HITS: |
569 |
flt_mappings = optarg; |
570 |
break; |
571 |
case OPT_REPORT_SECONDARY_ALIGNMENTS: |
572 |
report_secondary_alignments = true; |
573 |
break; |
574 |
case OPT_FUSION_SEARCH: |
575 |
fusion_search = true; |
576 |
break; |
577 |
case OPT_FUSION_ANCHOR_LENGTH: |
578 |
fusion_anchor_length = parseIntOpt(10, "--fusion-anchor-length must be at least 10", print_usage); |
579 |
break; |
580 |
case OPT_FUSION_MIN_DIST: |
581 |
fusion_min_dist = parseIntOpt(0, "--fusion-min-dist must be at least 0", print_usage); |
582 |
break; |
583 |
case OPT_FUSION_READ_MISMATCHES: |
584 |
fusion_read_mismatches = parseIntOpt(0, "--fusion-read-mismatches must be at least 0", print_usage); |
585 |
break; |
586 |
case OPT_FUSION_MULTIREADS: |
587 |
fusion_multireads = parseIntOpt(1, "--fusion-multireads must be at least 1", print_usage); |
588 |
break; |
589 |
case OPT_FUSION_MULTIPAIRS: |
590 |
fusion_multipairs = parseIntOpt(1, "--fusion-multipairs must be at least 0", print_usage); |
591 |
break; |
592 |
case OPT_BOWTIE1: |
593 |
bowtie2 = false; |
594 |
break; |
595 |
case OPT_BOWTIE2_MIN_SCORE: |
596 |
bowtie2_min_score = -1 * parseIntOpt(0, "--bowtie2-min-score must be at least 0", print_usage); |
597 |
break; |
598 |
case OPT_BOWTIE2_MAX_PENALTY: |
599 |
bowtie2_max_penalty = parseIntOpt(0, "--bowtie2-max-penalty must be at least 0", print_usage); |
600 |
break; |
601 |
case OPT_BOWTIE2_MIN_PENALTY: |
602 |
bowtie2_min_penalty = parseIntOpt(0, "--bowtie2-min-penalty must be at least 0", print_usage); |
603 |
break; |
604 |
case OPT_BOWTIE2_PENALTY_FOR_N: |
605 |
bowtie2_penalty_for_N = parseIntOpt(0, "--bowtie2-penalty-for-N must be at least 0", print_usage); |
606 |
break; |
607 |
case OPT_BOWTIE2_READ_GAP_OPEN: |
608 |
bowtie2_read_gap_open = parseIntOpt(0, "--bowtie2-read-gap-open must be at least 0", print_usage); |
609 |
break; |
610 |
case OPT_BOWTIE2_READ_GAP_CONT: |
611 |
bowtie2_read_gap_cont = parseIntOpt(0, "--bowtie2-read-gap-cont must be at least 0", print_usage); |
612 |
break; |
613 |
case OPT_BOWTIE2_REF_GAP_OPEN: |
614 |
bowtie2_ref_gap_open = parseIntOpt(0, "--bowtie2-ref-gap-open must be at least 0", print_usage); |
615 |
break; |
616 |
case OPT_BOWTIE2_REF_GAP_CONT: |
617 |
bowtie2_ref_gap_cont = parseIntOpt(0, "--bowtie2-ref-gap-cont must be at least 0", print_usage); |
618 |
break; |
619 |
default: |
620 |
print_usage(); |
621 |
return 1; |
622 |
} |
623 |
} while(next_option != -1); |
624 |
|
625 |
return 0; |
626 |
} |
627 |
|
628 |
|
629 |
// Error routine (prints error message and exits!) |
630 |
void err_exit(const char* format,...){ |
631 |
va_list arguments; |
632 |
va_start(arguments,format); |
633 |
vfprintf(stderr,format,arguments); |
634 |
va_end(arguments); |
635 |
#ifdef DEBUG |
636 |
// trigger a core dump for later inspection |
637 |
abort(); |
638 |
#endif |
639 |
exit(1); |
640 |
} |
641 |
|
642 |
FILE* FZPipe::openRead(const char* fname, string& popencmd) { |
643 |
pipecmd=popencmd; |
644 |
filename=fname; |
645 |
if (pipecmd.empty()) { |
646 |
file=fopen(filename.c_str(), "r"); |
647 |
} |
648 |
else { |
649 |
string pcmd(pipecmd); |
650 |
pcmd.append(" '"); |
651 |
pcmd.append(filename); |
652 |
pcmd.append("'"); |
653 |
file=popen(pcmd.c_str(), "r"); |
654 |
} |
655 |
return file; |
656 |
} |
657 |
|
658 |
FILE* FZPipe::openRead(const char* fname) { |
659 |
string pcmd; |
660 |
return this->openRead(fname,pcmd); |
661 |
} |
662 |
|
663 |
FILE* FZPipe::openWrite(const char* fname, string& popencmd) { |
664 |
pipecmd=popencmd; |
665 |
filename=fname; |
666 |
if (pipecmd.empty()) { |
667 |
file=fopen(filename.c_str(), "w"); |
668 |
} |
669 |
else { |
670 |
string pcmd(pipecmd); |
671 |
pcmd.append(" - > '"); |
672 |
pcmd.append(filename.c_str()); |
673 |
pcmd.append("'"); |
674 |
file=popen(pcmd.c_str(), "w"); |
675 |
} |
676 |
return file; |
677 |
} |
678 |
|
679 |
|
680 |
FILE* FZPipe::openWrite(const char* fname) { |
681 |
string pcmd; |
682 |
return this->openWrite(fname,pcmd); |
683 |
} |
684 |
|
685 |
void FZPipe::rewind() { |
686 |
if (pipecmd.empty()) { |
687 |
if (file!=NULL) { |
688 |
::rewind(file); |
689 |
return; |
690 |
} |
691 |
if (!filename.empty()) { |
692 |
file=fopen(filename.c_str(),"r"); |
693 |
return; |
694 |
} |
695 |
} |
696 |
if (filename.empty()) |
697 |
err_die("Error: FZStream::rewind() failed (missing filename)!\n"); |
698 |
this->close(); |
699 |
string pcmd(pipecmd); |
700 |
pcmd.append(" '"); |
701 |
pcmd.append(filename); |
702 |
pcmd.append("'"); |
703 |
file=popen(pcmd.c_str(), "r"); |
704 |
if (file==NULL) { |
705 |
err_die("Error: FZStream::rewind() popen(%s) failed!\n",pcmd.c_str()); |
706 |
} |
707 |
} |
708 |
|
709 |
|
710 |
string getFext(const string& s) { |
711 |
string r(""); |
712 |
//if (xpos!=NULL) *xpos=0; |
713 |
if (s.empty() || s=="-") return r; |
714 |
int slen=(int)s.length(); |
715 |
int p=s.rfind('.'); |
716 |
int d=s.rfind('/'); |
717 |
if (p<=0 || p>slen-2 || p<slen-7 || p<d) return r; |
718 |
r=s.substr(p+1); |
719 |
//if (xpos!=NULL) *xpos=p+1; |
720 |
for(size_t i=0; i!=r.length(); i++) |
721 |
r[i] = std::tolower(r[i]); |
722 |
return r; |
723 |
} |
724 |
|
725 |
string guess_packer(const string& fname, bool use_all_cpus) { |
726 |
//only needed for the primary input files (given by user) |
727 |
string picmd(""); |
728 |
string fext=getFext(fname); |
729 |
if (fext=="bam") { |
730 |
picmd="bam2fastx"; |
731 |
return picmd; |
732 |
} |
733 |
if (fext=="gz" || fext=="gzip" || fext=="z") { |
734 |
if (use_all_cpus && str_endsWith(zpacker,"pigz")) { |
735 |
picmd=zpacker; |
736 |
if (num_threads<2) picmd.append(" -p1"); |
737 |
else { |
738 |
picmd.append(" -p"); |
739 |
str_appendInt(picmd, num_threads); |
740 |
//picmd.append(" -cd"); |
741 |
} |
742 |
} |
743 |
else picmd="gzip"; |
744 |
} |
745 |
else if (fext=="bz2" || fext=="bzip2" || fext=="bz" || fext=="bzip") { |
746 |
if (use_all_cpus && str_endsWith(zpacker,"pbzip2")) { |
747 |
picmd=zpacker; |
748 |
if (num_threads<2) picmd.append(" -p1"); |
749 |
else { |
750 |
picmd.append(" -p"); |
751 |
str_appendInt(picmd, num_threads); |
752 |
//picmd.append(" -cd"); |
753 |
} |
754 |
} |
755 |
else picmd="bzip2"; |
756 |
} |
757 |
return picmd; |
758 |
} |
759 |
|
760 |
/* |
761 |
string getBam2SamCmd(const string& fname) { |
762 |
string pipecmd(""); |
763 |
string fext=getFext(fname); |
764 |
if (fext=="bam") { |
765 |
pipecmd=samtools_path; |
766 |
pipecmd.append(" view"); |
767 |
} |
768 |
return pipecmd; |
769 |
} |
770 |
*/ |
771 |
|
772 |
void err_die(const char* format,...) { // Error exit |
773 |
va_list arguments; |
774 |
va_start(arguments,format); |
775 |
vfprintf(stderr,format,arguments); |
776 |
va_end(arguments); |
777 |
exit(1); |
778 |
} |
779 |
|
780 |
string getUnpackCmd(const string& fname, bool use_all_cpus) { |
781 |
//prep_reads should use guess_packer() instead |
782 |
//otherwise compressed files MUST have the .z extension, |
783 |
//as they are all internally generated |
784 |
string pipecmd(""); |
785 |
string fext=getFext(fname); |
786 |
if (fext=="bam") { |
787 |
pipecmd="bam2fastx"; |
788 |
return pipecmd; |
789 |
} |
790 |
if (zpacker.empty() || fext!="z") { |
791 |
return pipecmd; //no packer used |
792 |
} |
793 |
pipecmd=zpacker; |
794 |
if (str_endsWith(pipecmd, "pigz") ||str_endsWith(pipecmd, "pbzip2")) { |
795 |
if (use_all_cpus==false) pipecmd.append(" -p1"); |
796 |
else if (num_threads>1) { |
797 |
pipecmd.append(" -p"); |
798 |
str_appendInt(pipecmd,num_threads); |
799 |
} |
800 |
} |
801 |
if (!pipecmd.empty()) pipecmd.append(" -cd"); |
802 |
return pipecmd; |
803 |
} |
804 |
|
805 |
void checkSamHeader() { |
806 |
if (sam_header.empty()) |
807 |
err_die("Error: writeSamHeader() with empty sam_header string\n"); |
808 |
//copy the SAM header |
809 |
FILE* fh=fopen(sam_header.c_str(), "r"); |
810 |
if (fh==NULL) |
811 |
err_die("Error: cannot open SAM header file %s\n",sam_header.c_str()); |
812 |
fclose(fh); |
813 |
} |
814 |
|
815 |
void writeSamHeader(FILE* fout) { |
816 |
if (fout==NULL) |
817 |
err_die("Error: writeSamHeader(NULL)\n"); |
818 |
checkSamHeader(); |
819 |
//copy the SAM header |
820 |
FILE* fh=fopen(sam_header.c_str(), "r"); |
821 |
int ch=-1; |
822 |
while ((ch=fgetc(fh))!=EOF) { |
823 |
if (fputc(ch, fout)==EOF) |
824 |
err_die("Error copying SAM header\n"); |
825 |
} |
826 |
fclose(fh); |
827 |
} |
828 |
|
829 |
//auxiliary functions for BAM record handling |
830 |
uint8_t* realloc_bdata(bam1_t *b, int size) { |
831 |
if (b->m_data < size) { |
832 |
b->m_data = size; |
833 |
kroundup32(b->m_data); |
834 |
b->data = (uint8_t*)realloc(b->data, b->m_data); |
835 |
} |
836 |
if (b->data_len<size) b->data_len=size; |
837 |
return b->data; |
838 |
} |
839 |
|
840 |
uint8_t* dupalloc_bdata(bam1_t *b, int size) { |
841 |
//same as realloc_bdata, but does not free previous data |
842 |
//but returns it instead |
843 |
//it ALWAYS duplicates data |
844 |
b->m_data = size; |
845 |
kroundup32(b->m_data); |
846 |
uint8_t* odata=b->data; |
847 |
b->data = (uint8_t*)malloc(b->m_data); |
848 |
memcpy((void*)b->data, (void*)odata, b->data_len); |
849 |
b->data_len=size; |
850 |
return odata; //user must FREE this after |
851 |
} |
852 |
|
853 |
extern unsigned short bam_char2flag_table[]; |
854 |
|
855 |
GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid, |
856 |
int pos, bool reverse, const char* qseq, const char* cigar, const char* quals) { |
857 |
novel=true; |
858 |
b=bam_init1(); |
859 |
b->core.tid=gseq_tid; |
860 |
if (pos<=0) { |
861 |
b->core.pos=-1; //unmapped |
862 |
//if (gseq_tid<0) |
863 |
b->core.flag |= BAM_FUNMAP; |
864 |
} |
865 |
else b->core.pos=pos-1; //BAM is 0-based |
866 |
b->core.qual=255; |
867 |
int l_qseq=strlen(qseq); |
868 |
//this may not be accurate, setting CIGAR is the correct way |
869 |
//b->core.bin = bam_reg2bin(b->core.pos, b->core.pos+l_qseq-1); |
870 |
b->core.l_qname=strlen(qname)+1; //includes the \0 at the end |
871 |
memcpy(realloc_bdata(b, b->core.l_qname), qname, b->core.l_qname); |
872 |
set_cigar(cigar); //this will also set core.bin |
873 |
add_sequence(qseq, l_qseq); |
874 |
add_quals(quals); //quals must be given as Phred33 |
875 |
if (reverse) { b->core.flag |= BAM_FREVERSE ; } |
876 |
} |
877 |
|
878 |
GBamRecord::GBamRecord(const char* qname, int32_t flags, int32_t g_tid, |
879 |
int pos, int map_qual, const char* cigar, int32_t mg_tid, int mate_pos, |
880 |
int insert_size, const char* qseq, const char* quals, |
881 |
const vector<string>* aux_strings) { |
882 |
novel=true; |
883 |
b=bam_init1(); |
884 |
b->core.tid=g_tid; |
885 |
b->core.pos = (pos<=0) ? -1 : pos-1; //BAM is 0-based |
886 |
b->core.qual=map_qual; |
887 |
int l_qseq=strlen(qseq); |
888 |
b->core.l_qname=strlen(qname)+1; //includes the \0 at the end |
889 |
memcpy(realloc_bdata(b, b->core.l_qname), qname, b->core.l_qname); |
890 |
set_cigar(cigar); //this will also set core.bin |
891 |
add_sequence(qseq, l_qseq); |
892 |
add_quals(quals); //quals must be given as Phred33 |
893 |
set_flags(flags); |
894 |
set_mdata(mg_tid, (int32_t)(mate_pos-1), (int32_t)insert_size); |
895 |
if (aux_strings!=NULL) { |
896 |
for (vector<string>::const_iterator itr=aux_strings->begin(); |
897 |
itr!=aux_strings->end(); ++itr) { |
898 |
add_aux(itr->c_str()); |
899 |
} |
900 |
} |
901 |
} |
902 |
void GBamRecord::set_cigar(const char* cigar) { |
903 |
//requires b->core.pos and b->core.flag to have been set properly PRIOR to this call |
904 |
int doff=b->core.l_qname; |
905 |
uint8_t* after_cigar=NULL; |
906 |
int after_cigar_len=0; |
907 |
uint8_t* prev_bdata=NULL; |
908 |
if (b->data_len>doff) { |
909 |
//cigar string already allocated, replace it |
910 |
int d=b->core.l_qname + b->core.n_cigar * 4;//offset of after-cigar data |
911 |
after_cigar=b->data+d; |
912 |
after_cigar_len=b->data_len-d; |
913 |
} |
914 |
const char *s; |
915 |
char *t; |
916 |
int i, op; |
917 |
long x; |
918 |
b->core.n_cigar = 0; |
919 |
if (cigar != NULL && strcmp(cigar, "*") != 0) { |
920 |
for (s = cigar; *s; ++s) { |
921 |
if (isalpha(*s)) b->core.n_cigar++; |
922 |
else if (!isdigit(*s)) { |
923 |
err_die("Error: invalid CIGAR character (%s)\n",cigar); |
924 |
} |
925 |
} |
926 |
if (after_cigar_len>0) { //replace/insert into existing full data |
927 |
prev_bdata=dupalloc_bdata(b, doff + b->core.n_cigar * 4 + after_cigar_len); |
928 |
memcpy((void*)(b->data+doff+b->core.n_cigar*4),(void*)after_cigar, after_cigar_len); |
929 |
free(prev_bdata); |
930 |
} |
931 |
else { |
932 |
realloc_bdata(b, doff + b->core.n_cigar * 4); |
933 |
} |
934 |
for (i = 0, s = cigar; i != (int)b->core.n_cigar; ++i) { |
935 |
x = strtol(s, &t, 10); |
936 |
op = toupper(*t); |
937 |
if (op == 'M' || op == '=' || op == 'X') op = BAM_CMATCH; |
938 |
else if (op == 'I') op = BAM_CINS; |
939 |
else if (op == 'D') op = BAM_CDEL; |
940 |
else if (op == 'N') op = BAM_CREF_SKIP; |
941 |
else if (op == 'S') op = BAM_CSOFT_CLIP; |
942 |
else if (op == 'H') op = BAM_CHARD_CLIP; |
943 |
else if (op == 'P') op = BAM_CPAD; |
944 |
else err_die("Error: invalid CIGAR operation (%s)\n",cigar); |
945 |
s = t + 1; |
946 |
bam1_cigar(b)[i] = x << BAM_CIGAR_SHIFT | op; |
947 |
} |
948 |
if (*s) err_die("Error: unmatched CIGAR operation (%s)\n",cigar); |
949 |
b->core.bin = bam_reg2bin(b->core.pos, bam_calend(&b->core, bam1_cigar(b))); |
950 |
} else {//no CIGAR string given |
951 |
if (!(b->core.flag&BAM_FUNMAP)) { |
952 |
fprintf(stderr, "Warning: mapped sequence without CIGAR (%s)\n", (char*)b->data); |
953 |
b->core.flag |= BAM_FUNMAP; |
954 |
} |
955 |
b->core.bin = bam_reg2bin(b->core.pos, b->core.pos + 1); |
956 |
} |
957 |
} //set_cigar() |
958 |
|
959 |
void GBamRecord::add_sequence(const char* qseq, int slen) { |
960 |
//must be called AFTER set_cigar (cannot replace existing sequence for now) |
961 |
if (qseq==NULL) return; //should we ever care about this? |
962 |
if (slen<0) slen=strlen(qseq); |
963 |
int doff = b->core.l_qname + b->core.n_cigar * 4; |
964 |
if (strcmp(qseq, "*")!=0) { |
965 |
b->core.l_qseq=slen; |
966 |
if (b->core.n_cigar && b->core.l_qseq != (int32_t)bam_cigar2qlen(&b->core, bam1_cigar(b))) |
967 |
err_die("Error: CIGAR and sequence length are inconsistent!(%s)\n", |
968 |
qseq); |
969 |
uint8_t* p = (uint8_t*)realloc_bdata(b, doff + (b->core.l_qseq+1)/2 + b->core.l_qseq) + doff; |
970 |
//also allocated quals memory |
971 |
memset(p, 0, (b->core.l_qseq+1)/2); |
972 |
for (int i = 0; i < b->core.l_qseq; ++i) |
973 |
p[i/2] |= bam_nt16_table[(int)qseq[i]] << 4*(1-i%2); |
974 |
} else b->core.l_qseq = 0; |
975 |
} |
976 |
|
977 |
void GBamRecord::add_quals(const char* quals) { |
978 |
//requires core.l_qseq already set |
979 |
//and must be called AFTER add_sequence(), which also allocates the memory for quals |
980 |
uint8_t* p = b->data+(b->core.l_qname + b->core.n_cigar * 4 + (b->core.l_qseq+1)/2); |
981 |
if (quals==NULL || strcmp(quals, "*") == 0) { |
982 |
for (int i=0;i < b->core.l_qseq; i++) p[i] = 0xff; |
983 |
return; |
984 |
} |
985 |
for (int i=0;i < b->core.l_qseq; i++) p[i] = quals[i]-33; |
986 |
} |
987 |
|
988 |
void GBamRecord::add_aux(const char* str) { |
989 |
//requires: being called AFTER add_quals() |
990 |
// static char tag[2]; |
991 |
// static uint8_t abuf[512]; |
992 |
//requires: being called AFTER add_quals() |
993 |
int strl=strlen(str); |
994 |
//int doff = b->core.l_qname + b->core.n_cigar*4 + (b->core.l_qseq+1)/2 + b->core.l_qseq + b->l_aux; |
995 |
//int doff0=doff; |
996 |
if (strl < 6 || str[2] != ':' || str[4] != ':') |
997 |
parse_error("missing colon in auxiliary data"); |
998 |
tag[0] = str[0]; tag[1] = str[1]; |
999 |
uint8_t atype = str[3]; |
1000 |
uint8_t* adata=abuf; |
1001 |
int alen=0; |
1002 |
if (atype == 'A' || atype == 'a' || atype == 'c' || atype == 'C') { // c and C for backward compatibility |
1003 |
atype='A'; |
1004 |
alen=1; |
1005 |
adata=(uint8_t*)&str[5]; |
1006 |
} |
1007 |
else if (atype == 'I' || atype == 'i') { |
1008 |
long long x=(long long)atoll(str + 5); |
1009 |
if (x < 0) { |
1010 |
if (x >= -127) { |
1011 |
atype='c'; |
1012 |
abuf[0] = (int8_t)x; |
1013 |
alen=1; |
1014 |
} |
1015 |
else if (x >= -32767) { |
1016 |
atype = 's'; |
1017 |
*(int16_t*)abuf = (int16_t)x; |
1018 |
alen=2; |
1019 |
} |
1020 |
else { |
1021 |
atype='i'; |
1022 |
*(int32_t*)abuf = (int32_t)x; |
1023 |
alen=4; |
1024 |
if (x < -2147483648ll) |
1025 |
fprintf(stderr, "Parse warning: integer %lld is out of range.", |
1026 |
x); |
1027 |
} |
1028 |
} else { //x >=0 |
1029 |
if (x <= 255) { |
1030 |
atype = 'C'; |
1031 |
abuf[0] = (uint8_t)x; |
1032 |
alen=1; |
1033 |
} |
1034 |
else if (x <= 65535) { |
1035 |
atype='S'; |
1036 |
*(uint16_t*)abuf = (uint16_t)x; |
1037 |
alen=2; |
1038 |
} |
1039 |
else { |
1040 |
atype='I'; |
1041 |
*(uint32_t*)abuf = (uint32_t)x; |
1042 |
alen=4; |
1043 |
if (x > 4294967295ll) |
1044 |
fprintf(stderr, "Parse warning: integer %lld is out of range.", |
1045 |
x); |
1046 |
} |
1047 |
} |
1048 |
} //integer type |
1049 |
else if (atype == 'f') { |
1050 |
*(float*)abuf = (float)atof(str + 5); |
1051 |
alen = sizeof(float); |
1052 |
} |
1053 |
else if (atype == 'd') { //? |
1054 |
*(float*)abuf = (float)atof(str + 9); |
1055 |
alen=8; |
1056 |
} |
1057 |
else if (atype == 'Z' || atype == 'H') { |
1058 |
if (atype == 'H') { // check whether the hex string is valid |
1059 |
if ((strl - 5) % 2 == 1) parse_error("length of the hex string not even"); |
1060 |
for (int i = 0; i < strl - 5; ++i) { |
1061 |
int c = toupper(str[5 + i]); |
1062 |
if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F'))) |
1063 |
parse_error("invalid hex character"); |
1064 |
} |
1065 |
} |
1066 |
memcpy(abuf, str + 5, strl - 5); |
1067 |
abuf[strl-5] = 0; |
1068 |
alen=strl-4; |
1069 |
} else parse_error("unrecognized aux type"); |
1070 |
this->add_aux(tag, atype, alen, adata); |
1071 |
}//add_aux() |