14 |
|
#include <iostream> |
15 |
|
#include <sstream> |
16 |
|
#include <cstdarg> |
17 |
+ |
#include <limits> |
18 |
|
#include <getopt.h> |
19 |
|
|
20 |
|
#include "common.h" |
75 |
|
} |
76 |
|
#endif |
77 |
|
|
78 |
+ |
bool bowtie2 = true; |
79 |
+ |
int bowtie2_min_score = -10; |
80 |
+ |
int max_segment_mapping = 20; |
81 |
+ |
|
82 |
+ |
// daehwan - temporary |
83 |
+ |
bool parallel = true; |
84 |
|
|
85 |
|
unsigned int max_insertion_length = 3; |
86 |
|
unsigned int max_deletion_length = 3; |
120 |
|
bool no_coverage_search = false; |
121 |
|
bool no_microexon_search = false; |
122 |
|
bool butterfly_search = false; |
123 |
< |
int num_cpus = 1; |
123 |
> |
int num_threads = 1; |
124 |
> |
|
125 |
|
float min_isoform_fraction = 0.15f; |
126 |
|
|
127 |
|
string output_dir = "tophat_out"; |
128 |
|
string aux_outfile = ""; //auxiliary output file name (e.g. prep_reads read stats) |
129 |
+ |
string index_outfile = ""; |
130 |
|
string gene_filter = ""; |
131 |
|
string gff_file = ""; |
132 |
|
string ium_reads = ""; |
147 |
|
string flt_reads = ""; |
148 |
|
string flt_mappings = ""; |
149 |
|
|
150 |
+ |
bool fusion_search = false; |
151 |
+ |
size_t fusion_anchor_length = 20; |
152 |
+ |
size_t fusion_min_dist = 10000000; |
153 |
+ |
size_t fusion_read_mismatches = 2; |
154 |
+ |
size_t fusion_multireads = 2; |
155 |
+ |
size_t fusion_multipairs = 2; |
156 |
+ |
|
157 |
|
eLIBRARY_TYPE library_type = LIBRARY_TYPE_NONE; |
158 |
|
|
159 |
|
extern void print_usage(); |
239 |
|
return token; |
240 |
|
} |
241 |
|
|
226 |
– |
|
242 |
|
const char *short_options = "QCp:z:N:"; |
243 |
|
|
244 |
|
enum |
283 |
|
OPT_LIBRARY_TYPE, |
284 |
|
OPT_MAX_DELETION_LENGTH, |
285 |
|
OPT_MAX_INSERTION_LENGTH, |
286 |
< |
OPT_NUM_CPUS, |
286 |
> |
OPT_NUM_THREADS, |
287 |
|
OPT_ZPACKER, |
288 |
|
OPT_SAMTOOLS, |
289 |
|
OPT_AUX_OUT, |
290 |
+ |
OPT_INDEX_OUT, |
291 |
|
OPT_GTF_JUNCS, |
292 |
|
OPT_FILTER_READS, |
293 |
< |
OPT_FILTER_HITS |
293 |
> |
OPT_FILTER_HITS, |
294 |
> |
OPT_FUSION_SEARCH, |
295 |
> |
OPT_FUSION_ANCHOR_LENGTH, |
296 |
> |
OPT_FUSION_MIN_DIST, |
297 |
> |
OPT_FUSION_READ_MISMATCHES, |
298 |
> |
OPT_FUSION_MULTIREADS, |
299 |
> |
OPT_FUSION_MULTIPAIRS, |
300 |
> |
OPT_BOWTIE1, |
301 |
> |
OPT_BOWTIE2_MIN_SCORE, |
302 |
|
}; |
303 |
|
|
304 |
|
static struct option long_options[] = { |
342 |
|
{"library-type", required_argument, 0, OPT_LIBRARY_TYPE}, |
343 |
|
{"max-deletion-length", required_argument, 0, OPT_MAX_DELETION_LENGTH}, |
344 |
|
{"max-insertion-length", required_argument, 0, OPT_MAX_INSERTION_LENGTH}, |
345 |
< |
{"num-threads", required_argument, 0, OPT_NUM_CPUS}, |
345 |
> |
{"num-threads", required_argument, 0, OPT_NUM_THREADS}, |
346 |
|
{"zpacker", required_argument, 0, OPT_ZPACKER}, |
347 |
|
{"samtools", required_argument, 0, OPT_SAMTOOLS}, |
348 |
|
{"aux-outfile", required_argument, 0, OPT_AUX_OUT}, |
349 |
+ |
{"index-outfile", required_argument, 0, OPT_INDEX_OUT}, |
350 |
|
{"gtf-juncs", required_argument, 0, OPT_GTF_JUNCS}, |
351 |
|
{"flt-reads",required_argument, 0, OPT_FILTER_READS}, |
352 |
|
{"flt-hits",required_argument, 0, OPT_FILTER_HITS}, |
353 |
+ |
{"fusion-search", no_argument, 0, OPT_FUSION_SEARCH}, |
354 |
+ |
{"fusion-anchor-length", required_argument, 0, OPT_FUSION_ANCHOR_LENGTH}, |
355 |
+ |
{"fusion-min-dist", required_argument, 0, OPT_FUSION_MIN_DIST}, |
356 |
+ |
{"fusion-read-mismatches", required_argument, 0, OPT_FUSION_READ_MISMATCHES}, |
357 |
+ |
{"fusion-multireads", required_argument, 0, OPT_FUSION_MULTIREADS}, |
358 |
+ |
{"fusion-multipairs", required_argument, 0, OPT_FUSION_MULTIPAIRS}, |
359 |
+ |
{"bowtie1", no_argument, 0, OPT_BOWTIE1}, |
360 |
+ |
{"bowtie2-min-score", required_argument, 0, OPT_BOWTIE2_MIN_SCORE}, |
361 |
|
{0, 0, 0, 0} // terminator |
362 |
|
}; |
363 |
|
|
529 |
|
case OPT_AUX_OUT: |
530 |
|
aux_outfile = optarg; |
531 |
|
break; |
532 |
+ |
case OPT_INDEX_OUT: |
533 |
+ |
index_outfile = optarg; |
534 |
+ |
break; |
535 |
|
case 'p': |
536 |
< |
case OPT_NUM_CPUS: |
537 |
< |
num_cpus=parseIntOpt(1,"-p/--num-threads must be at least 1",print_usage); |
536 |
> |
case OPT_NUM_THREADS: |
537 |
> |
num_threads=parseIntOpt(1,"-p/--num-threads must be at least 1",print_usage); |
538 |
|
break; |
539 |
|
case OPT_GTF_JUNCS: |
540 |
|
gtf_juncs = optarg; |
545 |
|
case OPT_FILTER_HITS: |
546 |
|
flt_mappings = optarg; |
547 |
|
break; |
548 |
+ |
case OPT_FUSION_SEARCH: |
549 |
+ |
fusion_search = true; |
550 |
+ |
break; |
551 |
+ |
case OPT_FUSION_ANCHOR_LENGTH: |
552 |
+ |
fusion_anchor_length = parseIntOpt(10, "--fusion-anchor-length must be at least 10", print_usage); |
553 |
+ |
break; |
554 |
+ |
case OPT_FUSION_MIN_DIST: |
555 |
+ |
fusion_min_dist = parseIntOpt(0, "--fusion-min-dist must be at least 0", print_usage); |
556 |
+ |
break; |
557 |
+ |
case OPT_FUSION_READ_MISMATCHES: |
558 |
+ |
fusion_read_mismatches = parseIntOpt(0, "--fusion-read-mismatches must be at least 0", print_usage); |
559 |
+ |
break; |
560 |
+ |
case OPT_FUSION_MULTIREADS: |
561 |
+ |
fusion_multireads = parseIntOpt(1, "--fusion-multireads must be at least 1", print_usage); |
562 |
+ |
break; |
563 |
+ |
case OPT_FUSION_MULTIPAIRS: |
564 |
+ |
fusion_multipairs = parseIntOpt(1, "--fusion-multipairs must be at least 0", print_usage); |
565 |
+ |
break; |
566 |
+ |
case OPT_BOWTIE1: |
567 |
+ |
bowtie2 = false; |
568 |
+ |
break; |
569 |
+ |
case OPT_BOWTIE2_MIN_SCORE: |
570 |
+ |
bowtie2_min_score = -1 * parseIntOpt(0, "--bowtie2-min-score must be at least 0", print_usage); |
571 |
+ |
break; |
572 |
|
default: |
573 |
|
print_usage(); |
574 |
|
return 1; |
686 |
|
if (fext=="gz" || fext=="gzip" || fext=="z") { |
687 |
|
if (use_all_cpus && str_endsWith(zpacker,"pigz")) { |
688 |
|
picmd=zpacker; |
689 |
< |
if (num_cpus<2) picmd.append(" -p1"); |
689 |
> |
if (num_threads<2) picmd.append(" -p1"); |
690 |
|
else { |
691 |
|
picmd.append(" -p"); |
692 |
< |
str_appendInt(picmd, num_cpus); |
692 |
> |
str_appendInt(picmd, num_threads); |
693 |
|
//picmd.append(" -cd"); |
694 |
|
} |
695 |
|
} |
698 |
|
else if (fext=="bz2" || fext=="bzip2" || fext=="bz" || fext=="bzip") { |
699 |
|
if (use_all_cpus && str_endsWith(zpacker,"pbzip2")) { |
700 |
|
picmd=zpacker; |
701 |
< |
if (num_cpus<2) picmd.append(" -p1"); |
701 |
> |
if (num_threads<2) picmd.append(" -p1"); |
702 |
|
else { |
703 |
|
picmd.append(" -p"); |
704 |
< |
str_appendInt(picmd, num_cpus); |
704 |
> |
str_appendInt(picmd, num_threads); |
705 |
|
//picmd.append(" -cd"); |
706 |
|
} |
707 |
|
} |
746 |
|
pipecmd=zpacker; |
747 |
|
if (str_endsWith(pipecmd, "pigz") ||str_endsWith(pipecmd, "pbzip2")) { |
748 |
|
if (use_all_cpus==false) pipecmd.append(" -p1"); |
749 |
< |
else if (num_cpus>1) { |
749 |
> |
else if (num_threads>1) { |
750 |
|
pipecmd.append(" -p"); |
751 |
< |
str_appendInt(pipecmd,num_cpus); |
751 |
> |
str_appendInt(pipecmd,num_threads); |
752 |
|
} |
753 |
|
} |
754 |
|
if (!pipecmd.empty()) pipecmd.append(" -cd"); |
884 |
|
else { |
885 |
|
realloc_bdata(b, doff + b->core.n_cigar * 4); |
886 |
|
} |
887 |
< |
for (i = 0, s = cigar; i != b->core.n_cigar; ++i) { |
887 |
> |
for (i = 0, s = cigar; i != (int)b->core.n_cigar; ++i) { |
888 |
|
x = strtol(s, &t, 10); |
889 |
|
op = toupper(*t); |
890 |
|
if (op == 'M' || op == '=' || op == 'X') op = BAM_CMATCH; |
939 |
|
} |
940 |
|
|
941 |
|
void GBamRecord::add_aux(const char* str) { |
942 |
+ |
// daehwan - this is not thread-safe - I made these varaiables as class members |
943 |
|
//requires: being called AFTER add_quals() |
944 |
< |
static char tag[2]; |
945 |
< |
static uint8_t abuf[512]; |
944 |
> |
// static char tag[2]; |
945 |
> |
// static uint8_t abuf[512]; |
946 |
|
//requires: being called AFTER add_quals() |
947 |
|
int strl=strlen(str); |
948 |
|
//int doff = b->core.l_qname + b->core.n_cigar*4 + (b->core.l_qseq+1)/2 + b->core.l_qseq + b->l_aux; |