ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/bam2fastx.cpp
Revision: 154
Committed: Tue Jan 24 02:29:21 2012 UTC (8 years, 4 months ago) by gpertea
File size: 5163 byte(s)
Log Message:
massive update with Daehwan's work

Line User Rev File contents
1 gpertea 29 #include <cstdlib>
2     #include <cstdio>
3     #include <cstring>
4     #include <getopt.h>
5    
6     #include "bam/bam.h"
7     #include "bam/sam.h"
8    
9 gpertea 97 bool is_fastq=true; //default is fastq
10     bool sam_input=false; //default is BAM
11     bool all_reads=false;
12     bool mapped_only=false;
13 gpertea 135 char outfname[1024];
14 gpertea 37
15 gpertea 97 #define USAGE "Usage: bam2fastx [--fasta|-a|--fastq|-q] [--sam|-s|-t] \n\
16 gpertea 135 [-M|--mapped-only|-A|--all] [-o <outfile>] <in.bam>|<in.sam> \n"
17 gpertea 29
18    
19 gpertea 97
20     char qseq[2048];
21    
22 gpertea 135 const char *short_options = "o:aqstMUA";
23 gpertea 97
24 gpertea 29 enum {
25     OPT_FASTA = 127,
26 gpertea 37 OPT_FASTQ,
27 gpertea 97 OPT_SAM,
28     OPT_MAPPED_ONLY,
29     OPT_ALL
30 gpertea 29 };
31    
32 gpertea 97 struct option long_options[] = {
33 gpertea 29 {"fasta", no_argument, 0, OPT_FASTA},
34     {"fastq", no_argument, 0, OPT_FASTQ},
35 gpertea 37 {"sam", no_argument, 0, OPT_SAM},
36 gpertea 97 {"mapped-only", no_argument, 0, OPT_MAPPED_ONLY},
37     {"all", no_argument, 0, OPT_ALL},
38 gpertea 29 {0, 0, 0, 0} // terminator
39     };
40    
41     int parse_options(int argc, char** argv)
42     {
43     int option_index = 0;
44     int next_option;
45     do {
46     next_option = getopt_long(argc, argv, short_options, long_options, &option_index);
47     switch (next_option) {
48     case -1:
49     break;
50     case 'a':
51     case OPT_FASTA:
52     is_fastq = false;
53     break;
54     case 'q':
55     case OPT_FASTQ:
56     is_fastq = true;
57     break;
58 gpertea 37 case 's':
59     case 't':
60     case OPT_SAM: //sam (text) input
61     sam_input = true;
62     break;
63 gpertea 97 case 'M':
64     case OPT_MAPPED_ONLY:
65     mapped_only = true;
66     break;
67     case 'A':
68     case OPT_ALL:
69     all_reads = true;
70     break;
71 gpertea 135 case 'o':
72     strcpy(outfname, optarg);
73     break;
74 gpertea 29 default:
75     return 1;
76     }
77     } while(next_option != -1);
78 gpertea 97 if (all_reads && mapped_only) {
79     fprintf(stderr, "Error: incompatible options !\n");
80     exit(2);
81     }
82 gpertea 29 return 0;
83     }
84    
85 gpertea 97 #define bam_unmapped(b) (((b)->core.flag & BAM_FUNMAP) != 0)
86 gpertea 29
87 gpertea 135 void showfastq(const bam1_t *b, samfile_t* fp, FILE* fout) {
88 gpertea 29 char *name = bam1_qname(b);
89     char *qual = (char*)bam1_qual(b);
90     char *s = (char*)bam1_seq(b);
91     int i;
92 gpertea 97 bool ismapped=((b->core.flag & BAM_FUNMAP) == 0);
93     if (ismapped && !all_reads) return;
94     if (mapped_only && !ismapped) return;
95 gpertea 154
96     bool isreversed=((b->core.flag & BAM_FREVERSE) != 0);
97    
98     for(i=0;i<(b->core.l_qseq);i++)
99     qseq[i] = bam1_seqi(s,i);
100     qseq[i] = 0;
101 gpertea 97
102 gpertea 154 // copied from sam_view.c in samtools.
103     static const int8_t seq_comp_table[16] = { 0, 8, 4, 12, 2, 10, 9, 14, 1, 6, 5, 13, 3, 11, 7, 15 };
104     if (isreversed) {
105     int qlen = b->core.l_qseq;
106     for(i=0;i<qlen>>1;i++) {
107     int8_t t = seq_comp_table[qseq[qlen - 1 - i]];
108     qseq[qlen - 1 - i] = seq_comp_table[qseq[i]];
109     qseq[i] = t;
110     }
111     if(qlen&1) qseq[i] = seq_comp_table[qseq[i]];
112     }
113    
114 gpertea 29 for(i=0;i<(b->core.l_qseq);i++) {
115 gpertea 154 qseq[i] = bam_nt16_rev_table[qseq[i]];
116     }
117    
118 gpertea 135 fprintf(fout, "@%s\n%s\n",name, qseq);
119 gpertea 29 for(i=0;i<(b->core.l_qseq);i++) {
120 gpertea 97 qseq[i]=qual[i]+33;
121 gpertea 29 }
122     qseq[i]=0;
123 gpertea 154
124     if(isreversed) {
125     int qlen = b->core.l_qseq;
126     for(i= 0;i<qlen>>1;i++) {
127     int8_t t = qseq[qlen - 1 - i];
128     qseq[qlen - 1 - i] = qseq[i];
129     qseq[i] = t;
130     }
131     }
132    
133 gpertea 135 fprintf(fout, "+\n%s\n",qseq);
134 gpertea 29 }
135    
136 gpertea 135 void showfasta(const bam1_t *b, samfile_t* fp, FILE* fout) {
137 gpertea 29 char *name = bam1_qname(b);
138     char *s = (char*)bam1_seq(b);
139     int i;
140 gpertea 154
141     bool ismapped=((b->core.flag & BAM_FUNMAP) == 0);
142     if (ismapped && !all_reads) return;
143     if (mapped_only && !ismapped) return;
144    
145     bool isreversed=((b->core.flag & BAM_FREVERSE) != 0);
146    
147     for(i=0;i<(b->core.l_qseq);i++)
148     qseq[i] = bam1_seqi(s,i);
149     qseq[i] = 0;
150    
151     // copied from sam_view.c in samtools.
152     static const int8_t seq_comp_table[16] = { 0, 8, 4, 12, 2, 10, 9, 14, 1, 6, 5, 13, 3, 11, 7, 15 };
153     if (isreversed) {
154     int qlen = b->core.l_qseq;
155     for(i=0;i<qlen>>1;i++) {
156     int8_t t = seq_comp_table[qseq[qlen - 1 - i]];
157     qseq[qlen - 1 - i] = seq_comp_table[qseq[i]];
158     qseq[i] = t;
159     }
160     if(qlen&1) qseq[i] = seq_comp_table[qseq[i]];
161     }
162    
163 gpertea 29 for(i=0;i<(b->core.l_qseq);i++) {
164 gpertea 154 qseq[i] = bam_nt16_rev_table[qseq[i]];
165 gpertea 29 }
166     qseq[i] = 0;
167 gpertea 135 fprintf(fout, ">%s\n%s\n",name, qseq);
168 gpertea 29 }
169    
170    
171    
172     int main(int argc, char *argv[])
173     {
174     samfile_t *fp;
175 gpertea 135 outfname[0]=0;
176 gpertea 29 char* fname=NULL;
177 gpertea 37
178 gpertea 29 if (parse_options(argc, argv) || optind>=argc) {
179     fprintf(stderr, USAGE);
180     return -1;
181     }
182     fname=argv[optind++];
183    
184     if (fname==NULL || fname[0]==0) {
185     fprintf(stderr, USAGE);
186     return 1;
187     }
188 gpertea 37 if (sam_input)
189     fp = samopen(fname, "r", 0);
190     else
191     fp = samopen(fname, "rb", 0);
192     if (fp == 0) {
193 gpertea 29 fprintf(stderr, "Error: bam2fastx failed to open BAM file %s\n", fname);
194     return 1;
195     }
196 gpertea 135 FILE* fout=stdout;
197     if (outfname[0]) {
198     fout=fopen(outfname, "w");
199     if (fout==NULL) {
200     fprintf(stderr, "Error creating output file %s\n", outfname);
201     return 2;
202     }
203     }
204 gpertea 29 bam1_t *b = bam_init1();
205     if (is_fastq) {
206 gpertea 135 while (samread(fp, b) >= 0) showfastq(b, fp, fout);
207 gpertea 29 }
208     else {
209 gpertea 135 while (samread(fp, b) >= 0) showfasta(b, fp, fout);
210 gpertea 29 }
211    
212     bam_destroy1(b);
213    
214     samclose(fp);
215     return 0;
216     }