ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/bam_merge.cpp
Revision: 29
Committed: Tue Aug 2 21:24:54 2011 UTC (9 years, 2 months ago) by gpertea
File size: 3171 byte(s)
Log Message:
adding tophat source work

Line User Rev File contents
1 gpertea 29 #include <cstdlib>
2     #include <cstdio>
3     #include <cstring>
4    
5     #include "bam/bam.h"
6     #include "bam/sam.h"
7     #include "GBase.h"
8     #include "GList.hh"
9    
10     #define USAGE "Usage: bam_merge <out.bam> <in1.bam> <in2.bam> [...]\n"
11    
12     #define ERR_BAM_OPEN "Error: bam_merge failed to open BAM file %s\n"
13    
14     samfile_t **srcfiles; //array of SAM file handles
15     samfile_t *fw; //output SAM handle
16    
17     class CBamLine {
18     public:
19     int fileno;
20     long read_id;
21     bam1_t* b;
22     bool operator==(CBamLine& l){
23     return (read_id==l.read_id);
24     }
25     bool operator>(CBamLine& l){
26     return (read_id<l.read_id); //(last has lowest #)
27     }
28     bool operator<(CBamLine& l){
29     return (read_id>l.read_id);
30     }
31     CBamLine(int fno=-1, bam1_t* br=NULL) {
32     fileno=fno;
33     read_id=0;
34     b=br;
35     b_init();
36     }
37     void b_init() {
38     if (b) {
39     char *name = bam1_qname(b);
40     read_id=atol(name);
41     if (read_id<1) {
42     char* samline=bam_format1(srcfiles[0]->header, b);
43     GError("Error: invalid read Id (must be numeric) for BAM record:\n%s\n",
44     samline);
45     }
46     }
47     }
48    
49     void b_free() {
50     if (b!=NULL) { bam_destroy1(b); }
51     }
52     };
53    
54     GList<CBamLine> lines(true,true,false);
55    
56     int main(int argc, char *argv[])
57     {
58     char* outfname=NULL;
59     if (argc<4) {
60     fprintf(stderr, USAGE);
61     if (argc>1)
62     fprintf(stderr, "Error: only %d arguments given.\n", argc-1);
63     return -1;
64     }
65     outfname=argv[1];
66     int num_src=argc-2;
67     GMALLOC(srcfiles, (num_src*sizeof(samfile_t*)));
68     for (int i=2;i<argc;i++) {
69     int fno=i-2;
70     samfile_t* fp=samopen(argv[i], "rb", 0);
71     if (fp==0) {
72     fprintf(stderr, ERR_BAM_OPEN, argv[i]);
73     return 1;
74     }
75     bam1_t *b = bam_init1();
76     if (samread(fp, b) > 0) {
77     srcfiles[fno]=fp;
78     lines.Add(new CBamLine(fno, b));
79     }
80     }
81     if (lines.Count()==0) {
82     GMessage("Warning: no input BAM records found.\n");
83     }
84     fw=samopen(outfname, "wb", srcfiles[lines[0]->fileno]->header);
85     if (fw==NULL)
86     GError("Error creating output file %s\n", outfname);
87     int last;
88     while ((last=lines.Count()-1)>=0) {
89     CBamLine* from=lines[last]; //should have the smallest read_id
90     if (from->fileno<0 || from->b==NULL)
91     GError("Invalid processTopLine() call with uninitialized value.");
92     samwrite(fw, from->b);
93     if (samread(srcfiles[from->fileno], from->b)>0) {
94     from->b_init();
95     //adjust the position in the sorted lines list
96     if (last<7) {//
97     int i=last;
98     while (i>0 && lines[i-1]->read_id<lines[i]->read_id) {
99     //swap
100     CBamLine* tmp=lines[i-1];
101     lines.Put(i-1, lines[i], false);
102     lines.Put(i,tmp, false);
103     i--;
104     }
105     }
106     else { //use quick-insert
107     lines.Pop();
108     lines.Add(from);
109     }
110     }
111     else { //no more BAM records
112     lines.Pop();
113     from->b_free();
114     }
115     }
116     samclose(fw);
117     for (int i=0;i<num_src;i++)
118     samclose(srcfiles[i]);
119     GFREE(srcfiles);
120     return 0;
121     }