ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GBam.h
(Generate patch)
# Line 19 | Line 19
19     //   +seq  (4bit-encoded)
20     //    +qual
21     //     +aux
22 <   bool novel;
22 >   bool novel; //created from scratch, not from a given bam1_t* record
23     bam_header_t* bam_header;
24 <
24 >   char tag[2];
25 >   uint8_t abuf[512];
26   public:
27 <   GPVec<GSeg> exons;
28 <   GBamRecord(bam1_t* from_b=NULL, bam_header_t* b_header=NULL):exons() {
27 >   GVec<GSeg> exons;
28 >   //created from a reader:
29 >   GBamRecord(bam1_t* from_b=NULL, bam_header_t* b_header=NULL):exons(1) {
30        bam_header=NULL;
31        if (from_b==NULL) {
32             b=bam_init1();
33             novel=true;
34             }
35          else {
36 <           b=from_b;
36 >           b=from_b; //it'll take over from_b but not free it when destroyed
37             novel=false;
38             }
39        bam_header=b_header;
40        setupCoordinates();//sets start, end coordinate and populates exons array
41        }
42 +
43 +   const GBamRecord& operator=(GBamRecord& r) {
44 +      //make a new copy of the bam1_t record etc.
45 +      clear();
46 +      b=bam_dup1(r.b);
47 +      novel=true; //will also free b when destroyed
48 +      start=r.start;
49 +      end=r.end;
50 +      exons = r.exons;
51 +      return *this;
52 +      }
53 +
54       void setupCoordinates();
55       void clear() {
56          if (novel) {
57 <            bam_destroy1(b);
58 <            }
59 <        novel=true;
60 <        b=bam_init1();
57 >           bam_destroy1(b);
58 >           novel=false;
59 >           }
60 >        b=NULL;
61 >        exons.Clear();
62 >        bam_header=NULL;
63          }
64  
65      ~GBamRecord() {
66 <       if (novel) {  bam_destroy1(b); }
66 >       clear();
67         }
68  
69      void parse_error(const char* s) {
# Line 67 | Line 83
83        b->core.flag=flags;
84        }
85  
70
71
86      //creates a new record from 1-based alignment coordinate
87      //quals should be given as Phred33
88      //Warning: pos and mate_pos must be given 1-based!
# Line 84 | Line 98
98      void add_quals(const char* quals); //quality values string in Phred33 format
99      void add_aux(const char* str); //adds one aux field in plain SAM text format (e.g. "NM:i:1")
100      void add_aux(const char tag[2], char atype, int len, uint8_t *data) {
101 +      //IMPORTANT:  strings (Z,H) should include the terminal \0
102 +      int addz=0;
103 +      if ((atype=='Z' || atype=='H') && data[len-1]!=0) {
104 +        addz=1;
105 +        }
106        int ori_len = b->data_len;
107 <      b->data_len += 3 + len;
108 <      b->l_aux += 3 + len;
107 >      b->data_len += 3 + len + addz;
108 >      b->l_aux += 3 + len + addz;
109        if (b->m_data < b->data_len) {
110          b->m_data = b->data_len;
111          kroundup32(b->m_data);
# Line 94 | Line 113
113        }
114        b->data[ori_len] = tag[0]; b->data[ori_len + 1] = tag[1];
115        b->data[ori_len + 2] = atype;
116 +      if (addz) {
117 +        b->data[ori_len+len+4]=0;
118 +        }
119        memcpy(b->data + ori_len + 3, data, len);
120        }
121 +
122      void add_tag(const char tag[2], char atype, int len, uint8_t *data) {
123        //same with add_aux()
124        add_aux(tag,atype,len,data);
# Line 105 | Line 128
128   bool isUnmapped() { return ((b->core.flag & BAM_FUNMAP) != 0); }
129   bool isMapped() { return ((b->core.flag & BAM_FUNMAP) == 0); }
130   bool isPaired() { return ((b->core.flag & BAM_FPAIRED) != 0); }
131 < int mateNum() {
131 > const char* name() { return bam1_qname(b); }
132 > int pairOrder() {
133 >    //which read in the pair: 0 = unpaired, 1=first read, 2=second read
134      int r=0;
135      if ((b->core.flag & BAM_FREAD1) != 0) r=1;
136      else if ((b->core.flag & BAM_FREAD2) != 0) r=2;
137      return r;
138      }
139 < bool onRevStrand() { return ((b->core.flag & BAM_FREVERSE) != 0); }
140 < const char* refName() {
141 <     return (bam_header!=NULL) ? bam_header->target_name[b->core.tid] : NULL;
142 <     }
139 > bool revStrand() {
140 >   //this is the raw alignment strand, NOT the transcription (XS) strand
141 >   return ((b->core.flag & BAM_FREVERSE) != 0);
142 >   }
143 > const char* refName() {
144 >   return (bam_header!=NULL) ?
145 >         ((b->core.tid<0) ? "*" : bam_header->target_name[b->core.tid]) : NULL;
146 >   }
147   int32_t refId() { return b->core.tid; }
148 + int32_t mate_refId() { return b->core.mtid; }
149 + const char* mate_refName() {
150 +    return (bam_header!=NULL) ?
151 +       ((b->core.mtid<0) ? "*" : bam_header->target_name[b->core.mtid]) : NULL;
152 +    }
153 + int32_t insertSize() { return b->core.isize; }
154 + int32_t mate_start() { return b->core.mpos<0? 0 : b->core.mpos+1; }
155  
156 < uint8_t* tag(const char tag[2]); //retrieves tag data
157 < int tag_int(const char tag[2]); //get the numeric value of tag (if it's numeric)
158 < char spliceStrand(); // '+', '-' from the XS tag, or 0 if no XS tag
159 < char* sequence(); //user must free this after use
160 < char* qualities();//user must free this after use
161 < char* cigar(); //returns text version of the CIGAR string; must be freed by user
156 > //int find_tag(const char tag[2], uint8_t* & s, char& tag_type);
157 > uint8_t* find_tag(const char tag[2]);
158 > //position s at the beginning of tag data, tag_type is set to the found tag type
159 > //returns length of tag data, or 0 if tag not found
160 >
161 > char* tag_str(const char tag[2]); //return tag value for tag type 'Z'
162 > int tag_int(const char tag[2]); //return numeric value of tag (for numeric types)
163 > char tag_char(const char tag[2]); //return char value of tag (for type 'A')
164 > char spliceStrand(); // '+', '-' from the XS tag, or '.' if no XS tag
165 >
166 > char* sequence(); //user should free after use
167 > char* qualities();//user should free after use
168 > char* cigar(); //returns text version of the CIGAR string; user must free
169   };
170  
171  
# Line 130 | Line 173
173     samfile_t* bam_file;
174     char* fname;
175   public:
176 <   void open(const char* filename) {
177 <      bam_file=samopen(filename, "rb", 0);
178 <      //for .sam files:
179 <      //bam_file=samopen(fname, "r", 0);
176 >   void bopen(const char* filename) {
177 >      if (strcmp(filename, "-")==0) {
178 >        //if stdin was given, we HAVE to assume it's text SAM format, sorry
179 >        bam_file=samopen(filename, "r", 0);
180 >        }
181 >      else {
182 >      //BAM files have the zlib signature bytes at the beginning: 1F 8B 08
183 >      //if that's not present, we assume sam file
184 >      FILE* f=fopen(filename, "rb");
185 >      if (f==NULL) GError("Error opening file %s!\n", filename);
186 >      byte fsig[3];
187 >      size_t rd=fread(fsig, 1, 3, f);
188 >      if (rd<3) GError("Error reading from file %s!\n",filename);
189 >      if ((fsig[0]==0x1F && fsig[1]==0x8B && fsig[2]==0x08) ||
190 >          (fsig[0]=='B' && fsig[1]=='A' && fsig[2]=='M')) {
191 >          bam_file=samopen(filename, "rb", 0); //BAM or uncompressed BAM
192 >          }
193 >        else { //assume text SAM file
194 >          bam_file=samopen(filename, "r", 0);
195 >          }
196 >        }
197        if (bam_file==NULL)
198           GError("Error: could not open SAM file %s!\n",filename);
199        fname=Gstrdup(filename);
200        }
201 <
202 <   GBamReader(const char* fname) {
201 >   GBamReader(const char* fn) {
202 >      bam_file=NULL;
203        fname=NULL;
204 <      open(fname);
204 >      bopen(fn);
205        }
206  
207 +   bam_header_t* header() {
208 +      return bam_file? bam_file->header : NULL;
209 +      }
210 +   void bclose() {
211 +      if (bam_file) {
212 +        samclose(bam_file);
213 +        bam_file=NULL;
214 +        }
215 +      }
216     ~GBamReader() {
217 <      samclose(bam_file);
217 >      bclose();
218        GFREE(fname);
219        }
220 +   void rewind() {
221 +     if (fname==NULL) {
222 +       GMessage("Warning: GBamReader::rewind() called without a file name.\n");
223 +       return;
224 +       }
225 +     bclose();
226 +     char* ifname=fname;
227 +     bopen(ifname);
228 +     GFREE(ifname);
229 +     }
230  
231 +   GBamRecord* next() {
232 +      if (bam_file==NULL)
233 +        GError("Warning: GBamReader::next() called with no open file.\n");
234 +      bam1_t* b = bam_init1();
235 +      if (samread(bam_file, b) >= 0) {
236 +        GBamRecord* bamrec=new GBamRecord(b, bam_file->header);
237 +        return bamrec;
238 +        }
239 +      else {
240 +        bam_destroy1(b);
241 +        return NULL;
242 +        }
243 +      }
244   };
245  
246  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines