1 |
#include <cstdlib> |
2 |
#include <cstdio> |
3 |
#include <cstring> |
4 |
|
5 |
#include <bam/bam.h> |
6 |
#include <bam/sam.h> |
7 |
|
8 |
#include "common.h" |
9 |
#include "GBase.h" |
10 |
#include "GList.hh" |
11 |
|
12 |
#define USAGE "Usage: bam_merge <out.bam> <in1.bam> <in2.bam> [...]\n" |
13 |
|
14 |
#define ERR_BAM_OPEN "Error: bam_merge failed to open BAM file %s\n" |
15 |
|
16 |
void print_usage() |
17 |
{ |
18 |
fprintf(stderr, USAGE); |
19 |
} |
20 |
|
21 |
samfile_t **srcfiles; //array of SAM file handles |
22 |
samfile_t *fw; //output SAM handle |
23 |
|
24 |
class CBamLine { |
25 |
public: |
26 |
int fileno; |
27 |
long read_id; |
28 |
bam1_t* b; |
29 |
bool operator==(CBamLine& l){ |
30 |
return (read_id==l.read_id); |
31 |
} |
32 |
bool operator>(CBamLine& l){ |
33 |
return (read_id<l.read_id); //(last has lowest #) |
34 |
} |
35 |
bool operator<(CBamLine& l){ |
36 |
return (read_id>l.read_id); |
37 |
} |
38 |
CBamLine(int fno=-1, bam1_t* br=NULL) { |
39 |
fileno=fno; |
40 |
read_id=0; |
41 |
b=br; |
42 |
b_init(); |
43 |
} |
44 |
void b_init() { |
45 |
if (b) { |
46 |
char *name = bam1_qname(b); |
47 |
read_id=atol(name); |
48 |
if (read_id<1) { |
49 |
char* samline=bam_format1(srcfiles[0]->header, b); |
50 |
GError("Error: invalid read Id (must be numeric) for BAM record:\n%s\n", |
51 |
samline); |
52 |
} |
53 |
} |
54 |
} |
55 |
|
56 |
void b_free() { |
57 |
if (b!=NULL) { bam_destroy1(b); } |
58 |
} |
59 |
}; |
60 |
|
61 |
GList<CBamLine> lines(true,true,false); |
62 |
|
63 |
int main(int argc, char *argv[]) |
64 |
{ |
65 |
int parse_ret = parse_options(argc, argv, print_usage); |
66 |
if (parse_ret) |
67 |
return parse_ret; |
68 |
|
69 |
char* outfname=NULL; |
70 |
if (argc-optind<3) { |
71 |
print_usage(); |
72 |
if (argc>1) |
73 |
fprintf(stderr, "Error: only %d arguments given.\n", argc-1); |
74 |
return -1; |
75 |
} |
76 |
outfname=argv[optind]; |
77 |
int num_src=argc-optind-1; |
78 |
GMALLOC(srcfiles, (num_src*sizeof(samfile_t*))); |
79 |
for (int i=optind+1;i<argc;i++) { |
80 |
int fno=i-optind-1; |
81 |
samfile_t* fp=samopen(argv[i], "rb", 0); |
82 |
if (fp==0) { |
83 |
fprintf(stderr, ERR_BAM_OPEN, argv[i]); |
84 |
return 1; |
85 |
} |
86 |
bam1_t *b = bam_init1(); |
87 |
if (samread(fp, b) > 0) { |
88 |
srcfiles[fno]=fp; |
89 |
lines.Add(new CBamLine(fno, b)); |
90 |
} |
91 |
} |
92 |
if (lines.Count()==0) { |
93 |
GMessage("Warning: no input BAM records found.\n"); |
94 |
} |
95 |
fw=samopen(outfname, "wb", srcfiles[lines[0]->fileno]->header); |
96 |
if (fw==NULL) |
97 |
GError("Error creating output file %s\n", outfname); |
98 |
|
99 |
FILE* findex = NULL; |
100 |
if (!index_outfile.empty()) { |
101 |
findex = fopen(index_outfile.c_str(), "w"); |
102 |
if (findex == NULL) |
103 |
err_die("Error: cannot create file %s\n", index_outfile.c_str()); |
104 |
} |
105 |
|
106 |
int last; |
107 |
uint64_t count = 0, last_id = 0; |
108 |
while ((last=lines.Count()-1)>=0) { |
109 |
CBamLine* from=lines[last]; //should have the smallest read_id |
110 |
if (from->fileno<0 || from->b==NULL) |
111 |
GError("Invalid processTopLine() call with uninitialized value."); |
112 |
if (findex != NULL) |
113 |
{ |
114 |
++count; |
115 |
if (count >= 1000 && from->read_id != last_id) |
116 |
{ |
117 |
int64_t offset = bam_tell(fw->x.bam); |
118 |
int block_offset = offset & 0xFFFF; |
119 |
|
120 |
// daehwan - this is a bug in bgzf.c in samtools |
121 |
// I'll report this bug with a temporary solution, soon. |
122 |
if (block_offset <= 0xF000) |
123 |
{ |
124 |
fprintf(findex, "%lu\t%ld\n", from->read_id, offset); |
125 |
count = 0; |
126 |
} |
127 |
} |
128 |
last_id = from->read_id; |
129 |
} |
130 |
samwrite(fw, from->b); |
131 |
|
132 |
if (samread(srcfiles[from->fileno], from->b)>0) { |
133 |
from->b_init(); |
134 |
//adjust the position in the sorted lines list |
135 |
if (last<7) {// |
136 |
int i=last; |
137 |
while (i>0 && lines[i-1]->read_id<lines[i]->read_id) { |
138 |
//swap |
139 |
CBamLine* tmp=lines[i-1]; |
140 |
lines.Put(i-1, lines[i], false); |
141 |
lines.Put(i,tmp, false); |
142 |
i--; |
143 |
} |
144 |
} |
145 |
else { //use quick-insert |
146 |
lines.Pop(); |
147 |
lines.Add(from); |
148 |
} |
149 |
} |
150 |
else { //no more BAM records |
151 |
lines.Pop(); |
152 |
from->b_free(); |
153 |
} |
154 |
} |
155 |
samclose(fw); |
156 |
for (int i=0;i<num_src;i++) |
157 |
samclose(srcfiles[i]); |
158 |
GFREE(srcfiles); |
159 |
|
160 |
if (findex != NULL) |
161 |
fclose(findex); |
162 |
|
163 |
return 0; |
164 |
} |