ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/bam_merge.cpp
Revision: 154
Committed: Tue Jan 24 02:29:21 2012 UTC (8 years, 5 months ago) by gpertea
File size: 4115 byte(s)
Log Message:
massive update with Daehwan's work

Line User Rev File contents
1 gpertea 29 #include <cstdlib>
2     #include <cstdio>
3     #include <cstring>
4    
5 gpertea 154 #include <bam/bam.h>
6     #include <bam/sam.h>
7    
8     #include "common.h"
9 gpertea 29 #include "GBase.h"
10     #include "GList.hh"
11    
12     #define USAGE "Usage: bam_merge <out.bam> <in1.bam> <in2.bam> [...]\n"
13    
14     #define ERR_BAM_OPEN "Error: bam_merge failed to open BAM file %s\n"
15    
16 gpertea 154 void print_usage()
17     {
18     fprintf(stderr, USAGE);
19     }
20    
21 gpertea 29 samfile_t **srcfiles; //array of SAM file handles
22     samfile_t *fw; //output SAM handle
23    
24     class CBamLine {
25     public:
26     int fileno;
27     long read_id;
28     bam1_t* b;
29     bool operator==(CBamLine& l){
30     return (read_id==l.read_id);
31     }
32     bool operator>(CBamLine& l){
33     return (read_id<l.read_id); //(last has lowest #)
34     }
35     bool operator<(CBamLine& l){
36     return (read_id>l.read_id);
37     }
38     CBamLine(int fno=-1, bam1_t* br=NULL) {
39     fileno=fno;
40     read_id=0;
41     b=br;
42     b_init();
43     }
44     void b_init() {
45     if (b) {
46     char *name = bam1_qname(b);
47     read_id=atol(name);
48     if (read_id<1) {
49     char* samline=bam_format1(srcfiles[0]->header, b);
50     GError("Error: invalid read Id (must be numeric) for BAM record:\n%s\n",
51     samline);
52     }
53     }
54     }
55    
56     void b_free() {
57     if (b!=NULL) { bam_destroy1(b); }
58     }
59     };
60    
61     GList<CBamLine> lines(true,true,false);
62    
63     int main(int argc, char *argv[])
64     {
65 gpertea 154 int parse_ret = parse_options(argc, argv, print_usage);
66     if (parse_ret)
67     return parse_ret;
68    
69 gpertea 29 char* outfname=NULL;
70 gpertea 154 if (argc-optind<3) {
71     print_usage();
72 gpertea 29 if (argc>1)
73     fprintf(stderr, "Error: only %d arguments given.\n", argc-1);
74     return -1;
75     }
76 gpertea 154 outfname=argv[optind];
77     int num_src=argc-optind-1;
78 gpertea 29 GMALLOC(srcfiles, (num_src*sizeof(samfile_t*)));
79 gpertea 154 for (int i=optind+1;i<argc;i++) {
80     int fno=i-optind-1;
81 gpertea 29 samfile_t* fp=samopen(argv[i], "rb", 0);
82     if (fp==0) {
83     fprintf(stderr, ERR_BAM_OPEN, argv[i]);
84     return 1;
85     }
86     bam1_t *b = bam_init1();
87     if (samread(fp, b) > 0) {
88     srcfiles[fno]=fp;
89     lines.Add(new CBamLine(fno, b));
90     }
91     }
92     if (lines.Count()==0) {
93     GMessage("Warning: no input BAM records found.\n");
94     }
95     fw=samopen(outfname, "wb", srcfiles[lines[0]->fileno]->header);
96     if (fw==NULL)
97     GError("Error creating output file %s\n", outfname);
98 gpertea 154
99     FILE* findex = NULL;
100     if (!index_outfile.empty()) {
101     findex = fopen(index_outfile.c_str(), "w");
102     if (findex == NULL)
103     err_die("Error: cannot create file %s\n", index_outfile.c_str());
104     }
105    
106 gpertea 29 int last;
107 gpertea 154 uint64_t count = 0, last_id = 0;
108 gpertea 29 while ((last=lines.Count()-1)>=0) {
109     CBamLine* from=lines[last]; //should have the smallest read_id
110     if (from->fileno<0 || from->b==NULL)
111     GError("Invalid processTopLine() call with uninitialized value.");
112 gpertea 154 if (findex != NULL)
113     {
114     ++count;
115     if (count >= 1000 && from->read_id != last_id)
116     {
117     int64_t offset = bam_tell(fw->x.bam);
118     int block_offset = offset & 0xFFFF;
119    
120     // daehwan - this is a bug in bgzf.c in samtools
121     // I'll report this bug with a temporary solution, soon.
122     if (block_offset <= 0xF000)
123     {
124     fprintf(findex, "%lu\t%ld\n", from->read_id, offset);
125     count = 0;
126     }
127     }
128     last_id = from->read_id;
129     }
130     samwrite(fw, from->b);
131    
132 gpertea 29 if (samread(srcfiles[from->fileno], from->b)>0) {
133     from->b_init();
134     //adjust the position in the sorted lines list
135     if (last<7) {//
136     int i=last;
137     while (i>0 && lines[i-1]->read_id<lines[i]->read_id) {
138     //swap
139     CBamLine* tmp=lines[i-1];
140     lines.Put(i-1, lines[i], false);
141     lines.Put(i,tmp, false);
142     i--;
143     }
144     }
145     else { //use quick-insert
146     lines.Pop();
147     lines.Add(from);
148     }
149     }
150     else { //no more BAM records
151     lines.Pop();
152     from->b_free();
153     }
154     }
155     samclose(fw);
156     for (int i=0;i<num_src;i++)
157     samclose(srcfiles[i]);
158     GFREE(srcfiles);
159 gpertea 154
160     if (findex != NULL)
161     fclose(findex);
162    
163 gpertea 29 return 0;
164     }