ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/bam_merge.cpp
Revision: 159
Committed: Mon Jan 30 22:38:08 2012 UTC (7 years, 8 months ago) by gpertea
File size: 3294 byte(s)
Log Message:
wip

Line File contents
1 #include <cstdlib>
2 #include <cstdio>
3 #include <cstring>
4
5 #include "common.h"
6 #include "GList.hh"
7
8 #define USAGE "Usage: bam_merge <out.bam> <in1.bam> <in2.bam> [...]\n"
9
10 #define ERR_BAM_OPEN "Error: bam_merge failed to open BAM file %s\n"
11
12 void print_usage()
13 {
14 fprintf(stderr, USAGE);
15 }
16
17 samfile_t **srcfiles; //array of SAM file handles
18 //samfile_t *fw; //output SAM handle
19
20 class CBamLine {
21 public:
22 int fileno;
23 long read_id;
24 bam1_t* b;
25 bool operator==(CBamLine& l){
26 return (read_id==l.read_id);
27 }
28 bool operator>(CBamLine& l){
29 return (read_id<l.read_id); //(last has lowest #)
30 }
31 bool operator<(CBamLine& l){
32 return (read_id>l.read_id);
33 }
34 CBamLine(int fno=-1, bam1_t* br=NULL) {
35 fileno=fno;
36 read_id=0;
37 b=br;
38 b_init();
39 }
40 void b_init() {
41 if (b) {
42 char *name = bam1_qname(b);
43 read_id=atol(name);
44 if (read_id<1) {
45 char* samline=bam_format1(srcfiles[0]->header, b);
46 err_die("Error: invalid read Id (must be numeric) for BAM record:\n%s\n",
47 samline);
48 }
49 }
50 }
51
52 void b_free() {
53 if (b!=NULL) { bam_destroy1(b); }
54 }
55 };
56
57 GList<CBamLine> lines(true,true,false);
58
59 int main(int argc, char *argv[])
60 {
61 int parse_ret = parse_options(argc, argv, print_usage);
62 if (parse_ret)
63 return parse_ret;
64
65 char* outfname=NULL;
66 if (argc-optind<3) {
67 print_usage();
68 if (argc>1)
69 fprintf(stderr, "Error: only %d arguments given.\n", argc-1);
70 return -1;
71 }
72 outfname=argv[optind];
73 int num_src=argc-optind-1;
74 GMALLOC(srcfiles, (num_src*sizeof(samfile_t*)));
75 for (int i=optind+1;i<argc;i++) {
76 int fno=i-optind-1;
77 samfile_t* fp=samopen(argv[i], "rb", 0);
78 if (fp==0) {
79 fprintf(stderr, ERR_BAM_OPEN, argv[i]);
80 return 1;
81 }
82 bam1_t *b = bam_init1();
83 if (samread(fp, b) > 0) {
84 srcfiles[fno]=fp;
85 lines.Add(new CBamLine(fno, b));
86 }
87 }
88 if (lines.Count()==0) {
89 GMessage("Warning: no input BAM records found.\n");
90 }
91 //fw=samopen(outfname, "wb", srcfiles[lines[0]->fileno]->header);
92 GBamWriter bamwriter(outfname, srcfiles[0]->header, index_outfile);
93
94 int last;
95 while ((last=lines.Count()-1)>=0) {
96 CBamLine* from=lines[last]; //should have the smallest read_id
97 if (from->fileno<0 || from->b==NULL)
98 err_die("Invalid call with uninitialized value.");
99 bamwriter.write(from->b, from->read_id);
100 if (samread(srcfiles[from->fileno], from->b)>0) {
101 from->b_init();
102 //adjust the position in the sorted list
103 if (last<7) {//for a few lines this may be faster
104 int i=last;
105 while (i>0 && lines[i-1]->read_id<lines[i]->read_id) {
106 //swap
107 CBamLine* tmp=lines[i-1];
108 lines.Put(i-1, lines[i], false);
109 lines.Put(i,tmp, false);
110 i--;
111 }
112 }
113 else { //use quick-insert for more lines
114 lines.Pop();
115 lines.Add(from);
116 }
117 }
118 else { //no more BAM records
119 lines.Pop();
120 from->b_free();
121 }
122 }
123 for (int i=0;i<num_src;i++)
124 samclose(srcfiles[i]);
125 GFREE(srcfiles);
126 return 0;
127 }