ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/bam_merge.cpp
Revision: 159
Committed: Mon Jan 30 22:38:08 2012 UTC (8 years, 8 months ago) by gpertea
File size: 3294 byte(s)
Log Message:
wip

Line User Rev File contents
1 gpertea 29 #include <cstdlib>
2     #include <cstdio>
3     #include <cstring>
4    
5 gpertea 154 #include "common.h"
6 gpertea 29 #include "GList.hh"
7    
8     #define USAGE "Usage: bam_merge <out.bam> <in1.bam> <in2.bam> [...]\n"
9    
10     #define ERR_BAM_OPEN "Error: bam_merge failed to open BAM file %s\n"
11    
12 gpertea 154 void print_usage()
13     {
14     fprintf(stderr, USAGE);
15     }
16    
17 gpertea 29 samfile_t **srcfiles; //array of SAM file handles
18 gpertea 157 //samfile_t *fw; //output SAM handle
19 gpertea 29
20     class CBamLine {
21     public:
22     int fileno;
23     long read_id;
24     bam1_t* b;
25     bool operator==(CBamLine& l){
26     return (read_id==l.read_id);
27     }
28     bool operator>(CBamLine& l){
29     return (read_id<l.read_id); //(last has lowest #)
30     }
31     bool operator<(CBamLine& l){
32     return (read_id>l.read_id);
33     }
34     CBamLine(int fno=-1, bam1_t* br=NULL) {
35     fileno=fno;
36     read_id=0;
37 gpertea 159 b=br;
38 gpertea 29 b_init();
39     }
40     void b_init() {
41     if (b) {
42     char *name = bam1_qname(b);
43     read_id=atol(name);
44     if (read_id<1) {
45     char* samline=bam_format1(srcfiles[0]->header, b);
46 gpertea 157 err_die("Error: invalid read Id (must be numeric) for BAM record:\n%s\n",
47 gpertea 29 samline);
48     }
49     }
50     }
51    
52     void b_free() {
53     if (b!=NULL) { bam_destroy1(b); }
54     }
55     };
56    
57     GList<CBamLine> lines(true,true,false);
58    
59     int main(int argc, char *argv[])
60     {
61 gpertea 154 int parse_ret = parse_options(argc, argv, print_usage);
62     if (parse_ret)
63     return parse_ret;
64    
65 gpertea 29 char* outfname=NULL;
66 gpertea 154 if (argc-optind<3) {
67     print_usage();
68 gpertea 29 if (argc>1)
69     fprintf(stderr, "Error: only %d arguments given.\n", argc-1);
70     return -1;
71     }
72 gpertea 154 outfname=argv[optind];
73     int num_src=argc-optind-1;
74 gpertea 29 GMALLOC(srcfiles, (num_src*sizeof(samfile_t*)));
75 gpertea 154 for (int i=optind+1;i<argc;i++) {
76     int fno=i-optind-1;
77 gpertea 29 samfile_t* fp=samopen(argv[i], "rb", 0);
78     if (fp==0) {
79     fprintf(stderr, ERR_BAM_OPEN, argv[i]);
80     return 1;
81     }
82     bam1_t *b = bam_init1();
83     if (samread(fp, b) > 0) {
84     srcfiles[fno]=fp;
85 gpertea 159 lines.Add(new CBamLine(fno, b));
86 gpertea 29 }
87     }
88     if (lines.Count()==0) {
89     GMessage("Warning: no input BAM records found.\n");
90     }
91 gpertea 157 //fw=samopen(outfname, "wb", srcfiles[lines[0]->fileno]->header);
92     GBamWriter bamwriter(outfname, srcfiles[0]->header, index_outfile);
93 gpertea 154
94 gpertea 29 int last;
95     while ((last=lines.Count()-1)>=0) {
96 gpertea 157 CBamLine* from=lines[last]; //should have the smallest read_id
97     if (from->fileno<0 || from->b==NULL)
98     err_die("Invalid call with uninitialized value.");
99     bamwriter.write(from->b, from->read_id);
100     if (samread(srcfiles[from->fileno], from->b)>0) {
101 gpertea 159 from->b_init();
102     //adjust the position in the sorted list
103     if (last<7) {//for a few lines this may be faster
104 gpertea 29 int i=last;
105     while (i>0 && lines[i-1]->read_id<lines[i]->read_id) {
106     //swap
107     CBamLine* tmp=lines[i-1];
108     lines.Put(i-1, lines[i], false);
109     lines.Put(i,tmp, false);
110     i--;
111     }
112     }
113 gpertea 159 else { //use quick-insert for more lines
114     lines.Pop();
115     lines.Add(from);
116     }
117 gpertea 29 }
118     else { //no more BAM records
119 gpertea 159 lines.Pop();
120 gpertea 29 from->b_free();
121     }
122     }
123     for (int i=0;i<num_src;i++)
124     samclose(srcfiles[i]);
125     GFREE(srcfiles);
126     return 0;
127     }