ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GBam.h
(Generate patch)
# Line 5 | Line 5
5   #include "bam.h"
6   #include "sam.h"
7  
8
8   class GBamReader;
9   class GBamWriter;
10  
# Line 19 | Line 18
18     //   +seq  (4bit-encoded)
19     //    +qual
20     //     +aux
21 <   //bool novel; //created from scratch, not from a given bam1_t* record
21 >   bool novel; //if true, will also free b on delete
22     bam_header_t* bam_header;
23 <
23 >   char tag[2];
24 >   uint8_t abuf[512];
25   public:
26 <   GVec<GSeg> exons;
26 >   GVec<GSeg> exons; //coordinates will be 1-based
27     //created from a reader:
28 <   GBamRecord(bam1_t* from_b=NULL, bam_header_t* b_header=NULL):exons(1) {
28 >   void bfree_on_delete(bool b_free=true) { novel=b_free; }
29 >   GBamRecord(bam1_t* from_b=NULL, bam_header_t* b_header=NULL, bool b_free=true):exons(1) {
30        bam_header=NULL;
31        if (from_b==NULL) {
32             b=bam_init1();
33 <           //novel=true;
33 >           novel=true;
34             }
35          else {
36 <           b=from_b; //it'll take over from_b and FREE it when destroyed
37 <           //novel=false;
36 >           b=from_b; //it'll take over from_b
37 >           novel=b_free;
38             }
39 +
40        bam_header=b_header;
41 <      setupCoordinates();//sets start, end coordinate and populates exons array
41 >      setupCoordinates();//set 1-based coordinates (start, end and exons array)
42        }
43  
44     const GBamRecord& operator=(GBamRecord& r) {
45        //make a new copy of the bam1_t record etc.
46        clear();
47        b=bam_dup1(r.b);
48 <      //novel=true; //will also free b when destroyed
48 >      novel=true; //will also free b when destroyed
49        start=r.start;
50        end=r.end;
51        exons = r.exons;
# Line 52 | Line 54
54  
55       void setupCoordinates();
56       void clear() {
57 <        //if (novel) {
58 <        bam_destroy1(b);
59 <        //   novel=false;
60 <        //   }
57 >        if (novel) {
58 >           bam_destroy1(b);
59 >           //novel=false;
60 >           }
61          b=NULL;
62          exons.Clear();
63          bam_header=NULL;
# Line 97 | Line 99
99      void add_quals(const char* quals); //quality values string in Phred33 format
100      void add_aux(const char* str); //adds one aux field in plain SAM text format (e.g. "NM:i:1")
101      void add_aux(const char tag[2], char atype, int len, uint8_t *data) {
102 +      //IMPORTANT:  strings (Z,H) should include the terminal \0
103 +      int addz=0;
104 +      if ((atype=='Z' || atype=='H') && data[len-1]!=0) {
105 +        addz=1;
106 +        }
107        int ori_len = b->data_len;
108 <      b->data_len += 3 + len;
109 <      b->l_aux += 3 + len;
108 >      b->data_len += 3 + len + addz;
109 >      b->l_aux += 3 + len + addz;
110        if (b->m_data < b->data_len) {
111          b->m_data = b->data_len;
112          kroundup32(b->m_data);
# Line 107 | Line 114
114        }
115        b->data[ori_len] = tag[0]; b->data[ori_len + 1] = tag[1];
116        b->data[ori_len + 2] = atype;
117 +      if (addz) {
118 +        b->data[ori_len+len+4]=0;
119 +        }
120        memcpy(b->data + ori_len + 3, data, len);
121        }
122 +
123      void add_tag(const char tag[2], char atype, int len, uint8_t *data) {
124        //same with add_aux()
125        add_aux(tag,atype,len,data);
# Line 143 | Line 154
154   int32_t insertSize() { return b->core.isize; }
155   int32_t mate_start() { return b->core.mpos<0? 0 : b->core.mpos+1; }
156  
157 < int find_tag(const char tag[2], uint8_t* & s, char& tag_type);
157 > //int find_tag(const char tag[2], uint8_t* & s, char& tag_type);
158 > uint8_t* find_tag(const char tag[2]);
159   //position s at the beginning of tag data, tag_type is set to the found tag type
160   //returns length of tag data, or 0 if tag not found
161  
# Line 157 | Line 169
169   char* cigar(); //returns text version of the CIGAR string; user must free
170   };
171  
172 + // from sam.c:
173 + #define FTYPE_BAM  1
174 + #define FTYPE_READ 2
175  
176   class GBamReader {
177     samfile_t* bam_file;
178     char* fname;
179 +   // from bam_import.c:
180 +   struct samtools_tamFile_t {
181 +        gzFile fp;
182 +        void *ks;
183 +        void *str;
184 +        uint64_t n_lines;
185 +        int is_first;
186 +   };
187 +
188   public:
189     void bopen(const char* filename) {
190 <      bam_file=samopen(filename, "rb", 0);
191 <      //for .sam files:
192 <      //bam_file=samopen(fname, "r", 0);
190 >      if (strcmp(filename, "-")==0) {
191 >        //if stdin was given, we HAVE to assume it's text SAM format, sorry
192 >        bam_file=samopen(filename, "r", 0);
193 >        }
194 >      else {
195 >      //BAM files have the zlib signature bytes at the beginning: 1F 8B 08
196 >      //if that's not present, we assume sam file
197 >      FILE* f=fopen(filename, "rb");
198 >      if (f==NULL) GError("Error opening file %s!\n", filename);
199 >      byte fsig[3];
200 >      size_t rd=fread(fsig, 1, 3, f);
201 >      fclose(f);
202 >      if (rd<3) GError("Error reading from file %s!\n",filename);
203 >      if ((fsig[0]==0x1F && fsig[1]==0x8B && fsig[2]==0x08) ||
204 >          (fsig[0]=='B' && fsig[1]=='A' && fsig[2]=='M')) {
205 >          bam_file=samopen(filename, "rb", 0); //BAM or uncompressed BAM
206 >          }
207 >        else { //assume text SAM file
208 >          bam_file=samopen(filename, "r", 0);
209 >          }
210 >        }
211        if (bam_file==NULL)
212           GError("Error: could not open SAM file %s!\n",filename);
213        fname=Gstrdup(filename);
# Line 189 | Line 231
231        bclose();
232        GFREE(fname);
233        }
234 +   int64_t fpos() {
235 +         if ( bam_file->type & FTYPE_BAM )
236 +           return bgzf_tell(bam_file->x.bam);
237 +         else
238 +                 return (int64_t)gztell(((samtools_tamFile_t*)(bam_file->x.tamr))->fp);
239 +   }
240 +   int64_t fseek(int64_t offs) {
241 +         if ( bam_file->type & FTYPE_BAM )
242 +                 return bgzf_seek(bam_file->x.bam, offs, SEEK_SET);
243 +         else
244 +                 return (int64_t)gzseek(((samtools_tamFile_t*)(bam_file->x.tamr))->fp, offs, SEEK_SET);
245 +   }
246     void rewind() {
247       if (fname==NULL) {
248         GMessage("Warning: GBamReader::rewind() called without a file name.\n");
# Line 205 | Line 259
259          GError("Warning: GBamReader::next() called with no open file.\n");
260        bam1_t* b = bam_init1();
261        if (samread(bam_file, b) >= 0) {
262 <        GBamRecord* bamrec=new GBamRecord(b, bam_file->header);
262 >        GBamRecord* bamrec=new GBamRecord(b, bam_file->header, true);
263          return bamrec;
264          }
265        else {

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines