ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/bam_merge.cpp
Revision: 154
Committed: Tue Jan 24 02:29:21 2012 UTC (7 years, 8 months ago) by gpertea
File size: 4115 byte(s)
Log Message:
massive update with Daehwan's work

Line File contents
1 #include <cstdlib>
2 #include <cstdio>
3 #include <cstring>
4
5 #include <bam/bam.h>
6 #include <bam/sam.h>
7
8 #include "common.h"
9 #include "GBase.h"
10 #include "GList.hh"
11
12 #define USAGE "Usage: bam_merge <out.bam> <in1.bam> <in2.bam> [...]\n"
13
14 #define ERR_BAM_OPEN "Error: bam_merge failed to open BAM file %s\n"
15
16 void print_usage()
17 {
18 fprintf(stderr, USAGE);
19 }
20
21 samfile_t **srcfiles; //array of SAM file handles
22 samfile_t *fw; //output SAM handle
23
24 class CBamLine {
25 public:
26 int fileno;
27 long read_id;
28 bam1_t* b;
29 bool operator==(CBamLine& l){
30 return (read_id==l.read_id);
31 }
32 bool operator>(CBamLine& l){
33 return (read_id<l.read_id); //(last has lowest #)
34 }
35 bool operator<(CBamLine& l){
36 return (read_id>l.read_id);
37 }
38 CBamLine(int fno=-1, bam1_t* br=NULL) {
39 fileno=fno;
40 read_id=0;
41 b=br;
42 b_init();
43 }
44 void b_init() {
45 if (b) {
46 char *name = bam1_qname(b);
47 read_id=atol(name);
48 if (read_id<1) {
49 char* samline=bam_format1(srcfiles[0]->header, b);
50 GError("Error: invalid read Id (must be numeric) for BAM record:\n%s\n",
51 samline);
52 }
53 }
54 }
55
56 void b_free() {
57 if (b!=NULL) { bam_destroy1(b); }
58 }
59 };
60
61 GList<CBamLine> lines(true,true,false);
62
63 int main(int argc, char *argv[])
64 {
65 int parse_ret = parse_options(argc, argv, print_usage);
66 if (parse_ret)
67 return parse_ret;
68
69 char* outfname=NULL;
70 if (argc-optind<3) {
71 print_usage();
72 if (argc>1)
73 fprintf(stderr, "Error: only %d arguments given.\n", argc-1);
74 return -1;
75 }
76 outfname=argv[optind];
77 int num_src=argc-optind-1;
78 GMALLOC(srcfiles, (num_src*sizeof(samfile_t*)));
79 for (int i=optind+1;i<argc;i++) {
80 int fno=i-optind-1;
81 samfile_t* fp=samopen(argv[i], "rb", 0);
82 if (fp==0) {
83 fprintf(stderr, ERR_BAM_OPEN, argv[i]);
84 return 1;
85 }
86 bam1_t *b = bam_init1();
87 if (samread(fp, b) > 0) {
88 srcfiles[fno]=fp;
89 lines.Add(new CBamLine(fno, b));
90 }
91 }
92 if (lines.Count()==0) {
93 GMessage("Warning: no input BAM records found.\n");
94 }
95 fw=samopen(outfname, "wb", srcfiles[lines[0]->fileno]->header);
96 if (fw==NULL)
97 GError("Error creating output file %s\n", outfname);
98
99 FILE* findex = NULL;
100 if (!index_outfile.empty()) {
101 findex = fopen(index_outfile.c_str(), "w");
102 if (findex == NULL)
103 err_die("Error: cannot create file %s\n", index_outfile.c_str());
104 }
105
106 int last;
107 uint64_t count = 0, last_id = 0;
108 while ((last=lines.Count()-1)>=0) {
109 CBamLine* from=lines[last]; //should have the smallest read_id
110 if (from->fileno<0 || from->b==NULL)
111 GError("Invalid processTopLine() call with uninitialized value.");
112 if (findex != NULL)
113 {
114 ++count;
115 if (count >= 1000 && from->read_id != last_id)
116 {
117 int64_t offset = bam_tell(fw->x.bam);
118 int block_offset = offset & 0xFFFF;
119
120 // daehwan - this is a bug in bgzf.c in samtools
121 // I'll report this bug with a temporary solution, soon.
122 if (block_offset <= 0xF000)
123 {
124 fprintf(findex, "%lu\t%ld\n", from->read_id, offset);
125 count = 0;
126 }
127 }
128 last_id = from->read_id;
129 }
130 samwrite(fw, from->b);
131
132 if (samread(srcfiles[from->fileno], from->b)>0) {
133 from->b_init();
134 //adjust the position in the sorted lines list
135 if (last<7) {//
136 int i=last;
137 while (i>0 && lines[i-1]->read_id<lines[i]->read_id) {
138 //swap
139 CBamLine* tmp=lines[i-1];
140 lines.Put(i-1, lines[i], false);
141 lines.Put(i,tmp, false);
142 i--;
143 }
144 }
145 else { //use quick-insert
146 lines.Pop();
147 lines.Add(from);
148 }
149 }
150 else { //no more BAM records
151 lines.Pop();
152 from->b_free();
153 }
154 }
155 samclose(fw);
156 for (int i=0;i<num_src;i++)
157 samclose(srcfiles[i]);
158 GFREE(srcfiles);
159
160 if (findex != NULL)
161 fclose(findex);
162
163 return 0;
164 }