ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/bam2fastx.cpp
Revision: 154
Committed: Tue Jan 24 02:29:21 2012 UTC (7 years, 8 months ago) by gpertea
File size: 5163 byte(s)
Log Message:
massive update with Daehwan's work

Line File contents
1 #include <cstdlib>
2 #include <cstdio>
3 #include <cstring>
4 #include <getopt.h>
5
6 #include "bam/bam.h"
7 #include "bam/sam.h"
8
9 bool is_fastq=true; //default is fastq
10 bool sam_input=false; //default is BAM
11 bool all_reads=false;
12 bool mapped_only=false;
13 char outfname[1024];
14
15 #define USAGE "Usage: bam2fastx [--fasta|-a|--fastq|-q] [--sam|-s|-t] \n\
16 [-M|--mapped-only|-A|--all] [-o <outfile>] <in.bam>|<in.sam> \n"
17
18
19
20 char qseq[2048];
21
22 const char *short_options = "o:aqstMUA";
23
24 enum {
25 OPT_FASTA = 127,
26 OPT_FASTQ,
27 OPT_SAM,
28 OPT_MAPPED_ONLY,
29 OPT_ALL
30 };
31
32 struct option long_options[] = {
33 {"fasta", no_argument, 0, OPT_FASTA},
34 {"fastq", no_argument, 0, OPT_FASTQ},
35 {"sam", no_argument, 0, OPT_SAM},
36 {"mapped-only", no_argument, 0, OPT_MAPPED_ONLY},
37 {"all", no_argument, 0, OPT_ALL},
38 {0, 0, 0, 0} // terminator
39 };
40
41 int parse_options(int argc, char** argv)
42 {
43 int option_index = 0;
44 int next_option;
45 do {
46 next_option = getopt_long(argc, argv, short_options, long_options, &option_index);
47 switch (next_option) {
48 case -1:
49 break;
50 case 'a':
51 case OPT_FASTA:
52 is_fastq = false;
53 break;
54 case 'q':
55 case OPT_FASTQ:
56 is_fastq = true;
57 break;
58 case 's':
59 case 't':
60 case OPT_SAM: //sam (text) input
61 sam_input = true;
62 break;
63 case 'M':
64 case OPT_MAPPED_ONLY:
65 mapped_only = true;
66 break;
67 case 'A':
68 case OPT_ALL:
69 all_reads = true;
70 break;
71 case 'o':
72 strcpy(outfname, optarg);
73 break;
74 default:
75 return 1;
76 }
77 } while(next_option != -1);
78 if (all_reads && mapped_only) {
79 fprintf(stderr, "Error: incompatible options !\n");
80 exit(2);
81 }
82 return 0;
83 }
84
85 #define bam_unmapped(b) (((b)->core.flag & BAM_FUNMAP) != 0)
86
87 void showfastq(const bam1_t *b, samfile_t* fp, FILE* fout) {
88 char *name = bam1_qname(b);
89 char *qual = (char*)bam1_qual(b);
90 char *s = (char*)bam1_seq(b);
91 int i;
92 bool ismapped=((b->core.flag & BAM_FUNMAP) == 0);
93 if (ismapped && !all_reads) return;
94 if (mapped_only && !ismapped) return;
95
96 bool isreversed=((b->core.flag & BAM_FREVERSE) != 0);
97
98 for(i=0;i<(b->core.l_qseq);i++)
99 qseq[i] = bam1_seqi(s,i);
100 qseq[i] = 0;
101
102 // copied from sam_view.c in samtools.
103 static const int8_t seq_comp_table[16] = { 0, 8, 4, 12, 2, 10, 9, 14, 1, 6, 5, 13, 3, 11, 7, 15 };
104 if (isreversed) {
105 int qlen = b->core.l_qseq;
106 for(i=0;i<qlen>>1;i++) {
107 int8_t t = seq_comp_table[qseq[qlen - 1 - i]];
108 qseq[qlen - 1 - i] = seq_comp_table[qseq[i]];
109 qseq[i] = t;
110 }
111 if(qlen&1) qseq[i] = seq_comp_table[qseq[i]];
112 }
113
114 for(i=0;i<(b->core.l_qseq);i++) {
115 qseq[i] = bam_nt16_rev_table[qseq[i]];
116 }
117
118 fprintf(fout, "@%s\n%s\n",name, qseq);
119 for(i=0;i<(b->core.l_qseq);i++) {
120 qseq[i]=qual[i]+33;
121 }
122 qseq[i]=0;
123
124 if(isreversed) {
125 int qlen = b->core.l_qseq;
126 for(i= 0;i<qlen>>1;i++) {
127 int8_t t = qseq[qlen - 1 - i];
128 qseq[qlen - 1 - i] = qseq[i];
129 qseq[i] = t;
130 }
131 }
132
133 fprintf(fout, "+\n%s\n",qseq);
134 }
135
136 void showfasta(const bam1_t *b, samfile_t* fp, FILE* fout) {
137 char *name = bam1_qname(b);
138 char *s = (char*)bam1_seq(b);
139 int i;
140
141 bool ismapped=((b->core.flag & BAM_FUNMAP) == 0);
142 if (ismapped && !all_reads) return;
143 if (mapped_only && !ismapped) return;
144
145 bool isreversed=((b->core.flag & BAM_FREVERSE) != 0);
146
147 for(i=0;i<(b->core.l_qseq);i++)
148 qseq[i] = bam1_seqi(s,i);
149 qseq[i] = 0;
150
151 // copied from sam_view.c in samtools.
152 static const int8_t seq_comp_table[16] = { 0, 8, 4, 12, 2, 10, 9, 14, 1, 6, 5, 13, 3, 11, 7, 15 };
153 if (isreversed) {
154 int qlen = b->core.l_qseq;
155 for(i=0;i<qlen>>1;i++) {
156 int8_t t = seq_comp_table[qseq[qlen - 1 - i]];
157 qseq[qlen - 1 - i] = seq_comp_table[qseq[i]];
158 qseq[i] = t;
159 }
160 if(qlen&1) qseq[i] = seq_comp_table[qseq[i]];
161 }
162
163 for(i=0;i<(b->core.l_qseq);i++) {
164 qseq[i] = bam_nt16_rev_table[qseq[i]];
165 }
166 qseq[i] = 0;
167 fprintf(fout, ">%s\n%s\n",name, qseq);
168 }
169
170
171
172 int main(int argc, char *argv[])
173 {
174 samfile_t *fp;
175 outfname[0]=0;
176 char* fname=NULL;
177
178 if (parse_options(argc, argv) || optind>=argc) {
179 fprintf(stderr, USAGE);
180 return -1;
181 }
182 fname=argv[optind++];
183
184 if (fname==NULL || fname[0]==0) {
185 fprintf(stderr, USAGE);
186 return 1;
187 }
188 if (sam_input)
189 fp = samopen(fname, "r", 0);
190 else
191 fp = samopen(fname, "rb", 0);
192 if (fp == 0) {
193 fprintf(stderr, "Error: bam2fastx failed to open BAM file %s\n", fname);
194 return 1;
195 }
196 FILE* fout=stdout;
197 if (outfname[0]) {
198 fout=fopen(outfname, "w");
199 if (fout==NULL) {
200 fprintf(stderr, "Error creating output file %s\n", outfname);
201 return 2;
202 }
203 }
204 bam1_t *b = bam_init1();
205 if (is_fastq) {
206 while (samread(fp, b) >= 0) showfastq(b, fp, fout);
207 }
208 else {
209 while (samread(fp, b) >= 0) showfasta(b, fp, fout);
210 }
211
212 bam_destroy1(b);
213
214 samclose(fp);
215 return 0;
216 }