1 |
#include <cstdlib> |
2 |
#include <cstdio> |
3 |
#include <cstring> |
4 |
#include <getopt.h> |
5 |
|
6 |
#include "bam/bam.h" |
7 |
#include "bam/sam.h" |
8 |
|
9 |
bool is_fastq=true; //default is fastq |
10 |
bool sam_input=false; //default is BAM |
11 |
bool all_reads=false; |
12 |
bool mapped_only=false; |
13 |
|
14 |
#define USAGE "Usage: bam2fastx [--fasta|-a|--fastq|-q] [--sam|-s|-t] \n\ |
15 |
[-M|--mapped-only|-A|--all] <in.bam>|<in.sam> \n" |
16 |
|
17 |
char qseq[2048]; |
18 |
|
19 |
const char *short_options = "aqstMU"; |
20 |
|
21 |
enum { |
22 |
OPT_FASTA = 127, |
23 |
OPT_FASTQ, |
24 |
OPT_SAM, |
25 |
OPT_MAPPED_ONLY, |
26 |
OPT_ALL |
27 |
}; |
28 |
|
29 |
struct option long_options[] = { |
30 |
{"fasta", no_argument, 0, OPT_FASTA}, |
31 |
{"fastq", no_argument, 0, OPT_FASTQ}, |
32 |
{"sam", no_argument, 0, OPT_SAM}, |
33 |
{"mapped-only", no_argument, 0, OPT_MAPPED_ONLY}, |
34 |
{"all", no_argument, 0, OPT_ALL}, |
35 |
{0, 0, 0, 0} // terminator |
36 |
}; |
37 |
|
38 |
int parse_options(int argc, char** argv) |
39 |
{ |
40 |
int option_index = 0; |
41 |
int next_option; |
42 |
do { |
43 |
next_option = getopt_long(argc, argv, short_options, long_options, &option_index); |
44 |
switch (next_option) { |
45 |
case -1: |
46 |
break; |
47 |
case 'a': |
48 |
case OPT_FASTA: |
49 |
is_fastq = false; |
50 |
break; |
51 |
case 'q': |
52 |
case OPT_FASTQ: |
53 |
is_fastq = true; |
54 |
break; |
55 |
case 's': |
56 |
case 't': |
57 |
case OPT_SAM: //sam (text) input |
58 |
sam_input = true; |
59 |
break; |
60 |
case 'M': |
61 |
case OPT_MAPPED_ONLY: |
62 |
mapped_only = true; |
63 |
break; |
64 |
case 'A': |
65 |
case OPT_ALL: |
66 |
all_reads = true; |
67 |
break; |
68 |
default: |
69 |
return 1; |
70 |
} |
71 |
} while(next_option != -1); |
72 |
if (all_reads && mapped_only) { |
73 |
fprintf(stderr, "Error: incompatible options !\n"); |
74 |
exit(2); |
75 |
} |
76 |
return 0; |
77 |
} |
78 |
|
79 |
#define bam_unmapped(b) (((b)->core.flag & BAM_FUNMAP) != 0) |
80 |
|
81 |
void showfastq(const bam1_t *b, samfile_t* fp) { |
82 |
char *name = bam1_qname(b); |
83 |
char *qual = (char*)bam1_qual(b); |
84 |
char *s = (char*)bam1_seq(b); |
85 |
int i; |
86 |
bool ismapped=((b->core.flag & BAM_FUNMAP) == 0); |
87 |
if (ismapped && !all_reads) return; |
88 |
if (mapped_only && !ismapped) return; |
89 |
|
90 |
for(i=0;i<(b->core.l_qseq);i++) { |
91 |
int8_t v = bam1_seqi(s,i); |
92 |
qseq[i] = bam_nt16_rev_table[v]; |
93 |
} |
94 |
qseq[i] = 0; |
95 |
|
96 |
printf("@%s\n%s\n",name, qseq); |
97 |
for(i=0;i<(b->core.l_qseq);i++) { |
98 |
qseq[i]=qual[i]+33; |
99 |
} |
100 |
qseq[i]=0; |
101 |
printf("+\n%s\n",qseq); |
102 |
} |
103 |
|
104 |
void showfasta(const bam1_t *b, samfile_t* fp) { |
105 |
char *name = bam1_qname(b); |
106 |
char *s = (char*)bam1_seq(b); |
107 |
int i; |
108 |
for(i=0;i<(b->core.l_qseq);i++) { |
109 |
int8_t v = bam1_seqi(s,i); |
110 |
qseq[i] = bam_nt16_rev_table[v]; |
111 |
} |
112 |
qseq[i] = 0; |
113 |
printf(">%s\n%s\n",name, qseq); |
114 |
} |
115 |
|
116 |
|
117 |
|
118 |
int main(int argc, char *argv[]) |
119 |
{ |
120 |
samfile_t *fp; |
121 |
char* fname=NULL; |
122 |
|
123 |
if (parse_options(argc, argv) || optind>=argc) { |
124 |
fprintf(stderr, USAGE); |
125 |
return -1; |
126 |
} |
127 |
fname=argv[optind++]; |
128 |
|
129 |
if (fname==NULL || fname[0]==0) { |
130 |
fprintf(stderr, USAGE); |
131 |
return 1; |
132 |
} |
133 |
if (sam_input) |
134 |
fp = samopen(fname, "r", 0); |
135 |
else |
136 |
fp = samopen(fname, "rb", 0); |
137 |
if (fp == 0) { |
138 |
fprintf(stderr, "Error: bam2fastx failed to open BAM file %s\n", fname); |
139 |
return 1; |
140 |
} |
141 |
|
142 |
bam1_t *b = bam_init1(); |
143 |
if (is_fastq) { |
144 |
while (samread(fp, b) >= 0) showfastq(b, fp); |
145 |
} |
146 |
else { |
147 |
while (samread(fp, b) >= 0) showfasta(b, fp); |
148 |
} |
149 |
|
150 |
bam_destroy1(b); |
151 |
|
152 |
samclose(fp); |
153 |
return 0; |
154 |
} |