ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GBam.cpp
(Generate patch)
# Line 1 | Line 1
1   #include "GBam.h"
2   #include <ctype.h>
3 <
3 > #include "kstring.h"
4  
5   //auxiliary functions for BAM record handling
6   uint8_t* realloc_bdata(bam1_t *b, int size) {
# Line 30 | Line 30
30   extern unsigned short bam_char2flag_table[];
31  
32   GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid,
33 <                 int pos, bool reverse, const char* qseq, const char* cigar, const char* quals) {
33 >                 int pos, bool reverse, const char* qseq,
34 >                 const char* cigar, const char* quals):exons(1) {
35     novel=true;
36     bam_header=NULL;
37     b=bam_init1();
# Line 56 | Line 57
57   GBamRecord::GBamRecord(const char* qname, int32_t flags, int32_t g_tid,
58               int pos, int map_qual, const char* cigar, int32_t mg_tid, int mate_pos,
59               int insert_size, const char* qseq, const char* quals,
60 <             GVec<char*>* aux_strings) {
60 >             GVec<char*>* aux_strings):exons(1)  {
61    novel=true;
62    bam_header=NULL;
63    b=bam_init1();
# Line 77 | Line 78
78         }
79      }
80   }
81 +
82   void GBamRecord::set_cigar(const char* cigar) {
83     //requires b->core.pos and b->core.flag to have been set properly PRIOR to this call
84     int doff=b->core.l_qname;
# Line 132 | Line 134
134          }
135          b->core.bin = bam_reg2bin(b->core.pos, b->core.pos + 1);
136      }
137 +   setupCoordinates();
138     } //set_cigar()
139  
140   void GBamRecord::add_sequence(const char* qseq, int slen) {
# Line 250 | Line 253
253   void GBamRecord::setupCoordinates() {
254     uint32_t *cigar = bam1_cigar(b);
255     const bam1_core_t *c = &b->core;
256 <   if (c->flag & BAM_FUNMAP) return 0; /* skip unmapped reads */
256 >   if (c->flag & BAM_FUNMAP) return; /* skip unmapped reads */
257     int l=0;
258 <   for (int i = l = 0; i < c->n_cigar; ++i) {
259 <           int op = cigar[i]&0xf;
260 <           if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP)
258 >   start=c->pos+1;
259 >   int exstart=c->pos;
260 >   for (int i = 0; i < c->n_cigar; ++i) {
261 >        int op = cigar[i]&0xf;
262 >        if (op == BAM_CMATCH || op==BAM_CEQUAL ||
263 >            op == BAM_CDIFF || op == BAM_CDEL) {
264                 l += cigar[i]>>4;
265 <       }
265 >               }
266 >           else if (op == BAM_CREF_SKIP) { //N
267 >               //intron starts
268 >               //exon ends here
269 >               exons.Add(new GSeg(exstart+1,l));
270 >               l += cigar[i]>>4;
271 >               exstart=c->pos+l;
272 >               }
273 >        }
274 >   exons.Add(new GSeg(exstart+1, l));
275 >   end=c->pos+l;
276   }
277 +
278 + int GBamRecord::find_tag(const char tag[2], uint8_t* & s, char& tag_type) {
279 +   //position s at the beginning of tag "data" (after the type char)
280 +   //returns the length of tag data, and tag type in tag_type
281 +   s=bam1_aux(b);
282 +   while (s < b->data + b->data_len) {
283 +     char key[2];
284 +     key[0] = (char)s[0]; key[1] = (char)s[1];
285 +     s += 2; tag_type = (char)*s; ++s;
286 +     int inc=0;
287 +     int m_inc=0;       //for 'B' case
288 +     uint8_t sub_type=0; // --,,--
289 +     switch (tag_type) {
290 +       case 'A':
291 +       case 'C':
292 +       case 'c':
293 +         inc=1;
294 +         break;
295 +       case 'S':
296 +       case 's':
297 +         inc=2;
298 +         break;
299 +       case 'I':
300 +       case 'i':
301 +       case 'f':
302 +         inc=4;
303 +         break;
304 +       case 'd':
305 +         inc=8;
306 +         break;
307 +       case 'B':
308 +           sub_type = *(s+1);
309 +           int32_t n;
310 +           memcpy(&n, s+1, 4);
311 +           inc += 5;
312 +           //kputc(type, &str); kputc(':', &str); kputc(sub_type, &str);
313 +           m_inc=0;
314 +           if (sub_type == 'c' || sub_type == 'C')
315 +                        { m_inc=1; }
316 +           else if (sub_type == 's' || sub_type == 'S')
317 +                         { m_inc = 2; }
318 +           else if ('i' == sub_type || 'I' == sub_type || 'f' == sub_type)
319 +                         { m_inc = 4; }
320 +           if (m_inc==0) GError("Error: invalid 'B' array subtype (%c)!\n",sub_type);
321 +           inc += m_inc*n;
322 +           break;
323 +       case 'H':
324 +       case 'Z':
325 +         while (*(s+inc)) ++inc;
326 +         break;
327 +       } //switch (tag_type)
328 +     if (tag[0]==key[0] && tag[1]==key[1])
329 +        return inc;
330 +     }//while aux data
331 +   return 0;
332 +   }
333 +
334 + char GBamRecord::tag_char(const char tag[2]) { //retrieve tag data as single char
335 +   uint8_t *s;
336 +   char tag_type=0;
337 +   int vlen=find_tag(tag, s, tag_type);
338 +   if (vlen==0) return 0;
339 +   //if (vlen>1) GWarning("Warning: tag %c%c value has length %d, but char was expected.\n",
340 +   //        tag[0],tag[1],vlen);
341 +   return (char)s[0];
342 +  }
343 +
344 + int GBamRecord::tag_int(const char tag[2]) { //get the numeric value of tag
345 +   uint8_t *s;
346 +   char tag_type=0;
347 +   int vlen=find_tag(tag, s, tag_type);
348 +   if (vlen==0) return 0;
349 +   if (vlen==1) return (int)(*(int8_t*)s);
350 +    else if (vlen==2) return (int)(*(int16_t*)s);
351 +     else if (vlen==4) return (int)(*(int32_t*)s);
352 +     else GMessage("Warning: tag %c%c value has length %d, but int type was expected.\n",
353 +           tag[0],tag[1], vlen);
354 +   return 0;
355 +   }
356 +
357 + char* GBamRecord::tag_str(const char tag[2]) { //return string value for a tag
358 +   uint8_t *s;
359 +   char tag_type=0;
360 +   int vlen=find_tag(tag, s, tag_type);
361 +   if (vlen==0) return NULL; //not found
362 +   //if (vlen>1) GWarning("Warning: tag %c%c value has length %d, but char was expected.\n",
363 +   //        tag[0],tag[1],vlen);
364 +   return (char *)s;
365 +   }
366 +
367 + char GBamRecord::spliceStrand() { // '+', '-' from the XS tag, or 0 if no XS tag
368 +   uint8_t *s;
369 +   char tag_type=0;
370 +   int vlen=find_tag("XS", s, tag_type);
371 +   if (vlen==0) return '.';
372 +   return (char)s[0];
373 +   }
374 +
375 + char* GBamRecord::sequence() { //user must free this after use
376 +   char *s = (char*)bam1_seq(b);
377 +   char* qseq=NULL;
378 +   GMALLOC(qseq, (b->core.l_qseq+1));
379 +   int i;
380 +   for (i=0;i<(b->core.l_qseq);i++) {
381 +     int8_t v = bam1_seqi(s,i);
382 +     qseq[i] = bam_nt16_rev_table[v];
383 +     }
384 +   qseq[i] = 0;
385 +   return qseq;
386 +   }
387 +
388 + char* GBamRecord::qualities() {//user must free this after use
389 +   char *qual  = (char*)bam1_qual(b);
390 +   char* qv=NULL;
391 +   GMALLOC(qv, (b->core.l_qseq+1) );
392 +   int i;
393 +   for(i=0;i<(b->core.l_qseq);i++) {
394 +     qv[i]=qual[i]+33;
395 +     }
396 +   qv[i]=0;
397 +   return qv;
398 +   }
399 +
400 + char* GBamRecord::cigar() { //returns text version of the CIGAR string; must be freed by user
401 +   kstring_t str;
402 +   str.l = str.m = 0; str.s = 0;
403 +   if (b->core.n_cigar == 0) kputc('*', &str);
404 +    else {
405 +      for (int i = 0; i < b->core.n_cigar; ++i) {
406 +         kputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str);
407 +         kputc("MIDNSHP=X"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str);
408 +         }
409 +      }
410 +   return str.s;
411 +   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines