ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/seqalign/GXAlign.h
(Generate patch)
# Line 399 | Line 399
399  
400  
401   struct GXSeed {
402 <   int a_ofs; //0-based coordinate on seq a
403 <   int b_ofs; //0-based coordinate on seq a
402 >   int b_ofs; //0-based coordinate on seq b (x coordinate)
403 >   int a_ofs; //0-based coordinate on seq a (y coordinate)
404     int len; //length of exact match after extension
405 <   int diag; //b_ofs-a_ofs
405 >   bool edited; //if it's an edited (cut) seed
406     bool operator<(GXSeed& d){
407 <      return (len<d.len);
407 >      return ((b_ofs==d.b_ofs) ? a_ofs<d.a_ofs : b_ofs<d.b_ofs);
408        }
409     bool operator>(GXSeed& d){
410 <      return (len>d.len);
410 >      return ((b_ofs==d.b_ofs) ? a_ofs>d.a_ofs : b_ofs>d.b_ofs);
411        }
412     bool operator==(GXSeed& d){
413 <      return (len==d.len);
413 >      return (b_ofs==d.b_ofs && a_ofs==d.a_ofs); //should never be the case, seeds are uniquely constructed
414        }
415     GXSeed(int aofs=0, int bofs=0, int l=4) {
416       a_ofs=aofs;
417       b_ofs=bofs;
418 +     edited=false;
419       len=l;
419     diag=b_ofs-a_ofs;
420       }
421   };
422  
423 < void collectSeeds_R(GList<GXSeed>& seeds, const char* seqa, int a_len, const char* seqb, int b_len); //overlap of 3' end of seqb
423 > struct GXBand {
424 >  //bundle of seed matches on 3 adjacent diagonals
425 >  int diag; //first anti-diagonal (b_ofs-a_ofs) in this group of 3
426 >   //seeds for this, and diag+1 and diag+2 are stored here
427 >  int min_a, max_a; //maximal coordinates of the bundle
428 >  int min_b, max_b;
429 >  GList<GXSeed> seeds; //sorted by x coordinate (b_ofs)
430 >  GPVec<GXSeed> cutseeds; //copy-on-cut list of seeds that will replace the seed instances when a seed gets edited
431 >  int score; //sum of seed scores (- overlapping_bases/2 - gaps)
432 >  GXBand(int start_diag=-1, GXSeed* seed=NULL):seeds(true, false, false),
433 >                  cutseeds(10, true){
434 >          diag=start_diag;
435 >          min_a=MAX_INT;
436 >          min_b=MAX_INT;
437 >          max_a=0;
438 >          max_b=0;
439 >          score=0;
440 >      if (seed!=NULL) addSeed(seed);
441 >      }
442 >  void addSeed(GXSeed* seed) {
443 >     seeds.Add(seed);
444 >     score+=seed->len;
445 >     //if (diag<0) diag=seed->diag; //should NOT be done like this
446 >     if (seed->a_ofs < min_a) min_a=seed->a_ofs;
447 >     if (seed->a_ofs+ seed->len > max_a) max_a=seed->a_ofs+seed->len;
448 >     if (seed->b_ofs < min_b) min_b=seed->b_ofs;
449 >     if (seed->b_ofs+seed->len > max_b) max_b=seed->b_ofs+seed->len;
450 >     }
451 >  void finalize() {
452 >         //!! to be called only AFTER all seeds have been added
453 >         // seeds are sorted by b_ofs
454 >         //penalize gaps and overlaps on b sequence
455 >         //cut out overlapping regions from shorter seeds
456 >
457 >         bool had_cut=false;
458 >         do {
459 >          for (int i=1;i<seeds.Count();i++) {
460 >        GXSeed& sprev=*seeds[i-1];
461 >        if (sprev.len<=0) continue;
462 >        GXSeed& scur=*seeds[i];
463 >        if (scur==sprev) GError("Error: duplicate seeds found (%d-%d:%d-%d)!\n",
464 >                              scur.a_ofs+1, scur.a_ofs+scur.len, scur.b_ofs+1, scur.b_ofs+scur.len);
465 >        int b_gap=scur.b_ofs-sprev.b_ofs-sprev.len;
466 >        int a_gap=scur.a_ofs-sprev.a_ofs-sprev.len;
467 >        int max_gap=b_gap;
468 >        int min_gap=a_gap;
469 >        if (min_gap>max_gap) swap(max_gap, min_gap);
470 >        if (min_gap<0) {
471 >            //cut shorter or previous seed by -min_gap amount
472 >            GXSeed* cseed=NULL;
473 >            had_cut=true;
474 >            if (scur.len<sprev.len) {
475 >               //cut beginning of scur
476 >               cseed=new GXSeed(scur);
477 >               cseed->edited=1;
478 >               cseed->len+=min_gap; //this can get negative!
479 >               cseed->a_ofs+=min_gap;//these can get invalid
480 >               cseed->b_ofs+=min_gap;
481 >               seeds.Put(i,cseed); //TODO: re-sort?
482 >               }
483 >            else { //cut ending of sprev
484 >               cseed=new GXSeed(sprev);
485 >               cseed->edited=1;
486 >               cseed->len+=min_gap; //this can get negative!
487 >               seeds.Put(i-1,cseed);
488 >               }
489 >            }//overlap
490 >            }
491 >           } while (had_cut); //repeat until no more cuts
492 >     }
493 >  //bands will be sorted by decreasing score eventually, after all seeds are added
494 >  bool operator<(GXBand& d){
495 >     return ((score==d.score) ? seeds.Count()>d.seeds.Count() : score>d.score);
496 >     }
497 >  bool operator>(GXBand& d){
498 >     return ((score==d.score) ? seeds.Count()<d.seeds.Count() : score<d.score);
499 >     }
500 >  bool operator==(GXBand& d){
501 >     return (score==d.score && seeds.Count()==d.seeds.Count());
502 >     }
503 >
504 > };
505 >
506 > class GXBandSet:public GList<GXBand> {
507 >  public:
508 >   int idxoffset; //global anti-diagonal->index offset (a_len-1)
509 >   //used to convert a diagonal to an index
510 >   //diagonal is always b_ofs-a_ofs, so the minimum value is -a_len+1
511 >   //hence offset is a_len-1
512 >   GXBand* band(int diag) { //retrieve the band for given anti-diagonal (b_ofs-a_ofs)
513 >      return Get(diag+idxoffset);
514 >      }
515 >   GXBand* band(int a_ofs, int b_ofs) { //retrieve the band for given anti-diagonal (b_ofs-a_ofs)
516 >      return Get(b_ofs-a_ofs+idxoffset);
517 >      }
518 >   GXBandSet(int a_len, int b_len):GList<GXBand>(a_len+b_len-1, false, true, false) {
519 >      idxoffset=a_len-1;
520 >          //diag will range from -a_len+1 to b_len-1, so after adjustment
521 >          //by idxoffset we get a max of a_len+b_len-2
522 >      int bcount=a_len+b_len-1;
523 >      for (int i=0;i<bcount;i++)
524 >           fList[i]=new GXBand(i-idxoffset);
525 >      }
526 >   void addSeed(GXSeed* seed) {
527 >         //MUST be unsorted !!!
528 >         int idx=(seed->b_ofs-seed->a_ofs)+idxoffset;
529 >     fList[idx]->addSeed(seed);
530 >     if (idx>0) fList[idx-1]->addSeed(seed);
531 >     if (idx<fCount-1) fList[idx+1]->addSeed(seed);
532 >     }
533 > };
534 >
535 > GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, const char* seqa, int a_len, const char* seqb, int b_len); //overlap of 3' end of seqb
536   //void collectSeeds_L(GList<GXSeed>& seeds, const char* seqa, const char* seqb); //overlap of 5' end of seqb
537  
538   void printEditScript(GXEditScript* ed_script);
539  
428 //GapXEditScript* EditScript2GapX(GXEditScript* ed_script);
429
540   GXAlnInfo* GreedyAlignRegion(const char* q_seq, int q_alnstart, int q_max,
541                    const char* s_seq, int s_alnstart, int s_max,
542                       bool editscript=false, int reward=2, int penalty=-3, int xdrop=22);
543   GXAlnInfo* GreedyAlign(const char* q_seq,  int q_alnstart, const char* s_seq, int s_alnstart,
544          bool editscript=false, int reward=2, int penalty=-3, int xdrop=22);
545  
436 /*
437 GXAlnInfo* GreedyExtendRight(const char* q_seq,  int q_off, const char* s_seq, int s_off,
438     bool editscript=true, int reward=2, int penalty=-3, int xdrop=22);
439 */
440
546   #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines