ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/bam2fastx.cpp
Revision: 135
Committed: Mon Dec 12 22:28:38 2011 UTC (8 years, 5 months ago) by gpertea
File size: 3820 byte(s)
Log Message:
wip - SplicedSAMHitFactory() still not implemented

Line File contents
1 #include <cstdlib>
2 #include <cstdio>
3 #include <cstring>
4 #include <getopt.h>
5
6 #include "bam/bam.h"
7 #include "bam/sam.h"
8
9 bool is_fastq=true; //default is fastq
10 bool sam_input=false; //default is BAM
11 bool all_reads=false;
12 bool mapped_only=false;
13 char outfname[1024];
14
15 #define USAGE "Usage: bam2fastx [--fasta|-a|--fastq|-q] [--sam|-s|-t] \n\
16 [-M|--mapped-only|-A|--all] [-o <outfile>] <in.bam>|<in.sam> \n"
17
18
19
20 char qseq[2048];
21
22 const char *short_options = "o:aqstMUA";
23
24 enum {
25 OPT_FASTA = 127,
26 OPT_FASTQ,
27 OPT_SAM,
28 OPT_MAPPED_ONLY,
29 OPT_ALL
30 };
31
32 struct option long_options[] = {
33 {"fasta", no_argument, 0, OPT_FASTA},
34 {"fastq", no_argument, 0, OPT_FASTQ},
35 {"sam", no_argument, 0, OPT_SAM},
36 {"mapped-only", no_argument, 0, OPT_MAPPED_ONLY},
37 {"all", no_argument, 0, OPT_ALL},
38 {0, 0, 0, 0} // terminator
39 };
40
41 int parse_options(int argc, char** argv)
42 {
43 int option_index = 0;
44 int next_option;
45 do {
46 next_option = getopt_long(argc, argv, short_options, long_options, &option_index);
47 switch (next_option) {
48 case -1:
49 break;
50 case 'a':
51 case OPT_FASTA:
52 is_fastq = false;
53 break;
54 case 'q':
55 case OPT_FASTQ:
56 is_fastq = true;
57 break;
58 case 's':
59 case 't':
60 case OPT_SAM: //sam (text) input
61 sam_input = true;
62 break;
63 case 'M':
64 case OPT_MAPPED_ONLY:
65 mapped_only = true;
66 break;
67 case 'A':
68 case OPT_ALL:
69 all_reads = true;
70 break;
71 case 'o':
72 strcpy(outfname, optarg);
73 break;
74 default:
75 return 1;
76 }
77 } while(next_option != -1);
78 if (all_reads && mapped_only) {
79 fprintf(stderr, "Error: incompatible options !\n");
80 exit(2);
81 }
82 return 0;
83 }
84
85 #define bam_unmapped(b) (((b)->core.flag & BAM_FUNMAP) != 0)
86
87 void showfastq(const bam1_t *b, samfile_t* fp, FILE* fout) {
88 char *name = bam1_qname(b);
89 char *qual = (char*)bam1_qual(b);
90 char *s = (char*)bam1_seq(b);
91 int i;
92 bool ismapped=((b->core.flag & BAM_FUNMAP) == 0);
93 if (ismapped && !all_reads) return;
94 if (mapped_only && !ismapped) return;
95
96 for(i=0;i<(b->core.l_qseq);i++) {
97 int8_t v = bam1_seqi(s,i);
98 qseq[i] = bam_nt16_rev_table[v];
99 }
100 qseq[i] = 0;
101
102 fprintf(fout, "@%s\n%s\n",name, qseq);
103 for(i=0;i<(b->core.l_qseq);i++) {
104 qseq[i]=qual[i]+33;
105 }
106 qseq[i]=0;
107 fprintf(fout, "+\n%s\n",qseq);
108 }
109
110 void showfasta(const bam1_t *b, samfile_t* fp, FILE* fout) {
111 char *name = bam1_qname(b);
112 char *s = (char*)bam1_seq(b);
113 int i;
114 for(i=0;i<(b->core.l_qseq);i++) {
115 int8_t v = bam1_seqi(s,i);
116 qseq[i] = bam_nt16_rev_table[v];
117 }
118 qseq[i] = 0;
119 fprintf(fout, ">%s\n%s\n",name, qseq);
120 }
121
122
123
124 int main(int argc, char *argv[])
125 {
126 samfile_t *fp;
127 outfname[0]=0;
128 char* fname=NULL;
129
130 if (parse_options(argc, argv) || optind>=argc) {
131 fprintf(stderr, USAGE);
132 return -1;
133 }
134 fname=argv[optind++];
135
136 if (fname==NULL || fname[0]==0) {
137 fprintf(stderr, USAGE);
138 return 1;
139 }
140 if (sam_input)
141 fp = samopen(fname, "r", 0);
142 else
143 fp = samopen(fname, "rb", 0);
144 if (fp == 0) {
145 fprintf(stderr, "Error: bam2fastx failed to open BAM file %s\n", fname);
146 return 1;
147 }
148 FILE* fout=stdout;
149 if (outfname[0]) {
150 fout=fopen(outfname, "w");
151 if (fout==NULL) {
152 fprintf(stderr, "Error creating output file %s\n", outfname);
153 return 2;
154 }
155 }
156 bam1_t *b = bam_init1();
157 if (is_fastq) {
158 while (samread(fp, b) >= 0) showfastq(b, fp, fout);
159 }
160 else {
161 while (samread(fp, b) >= 0) showfasta(b, fp, fout);
162 }
163
164 bam_destroy1(b);
165
166 samclose(fp);
167 return 0;
168 }