ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GAlnExtend.cpp
(Generate patch)
# Line 575 | Line 575
575  
576   };
577  
578 + const int a_m_score=2; //match score
579 + const int a_mis_score=-3; //mismatch
580 + const int a_dropoff_score=7;
581 +
582 + // ------------------ adapter matching - simple k-mer seed & extend, no indels for now
583 + //when a k-mer match is found, simply try to extend the alignment using a drop-off scheme
584 + //check minimum score and
585 + //for 3' adapter trimming:
586 + //     require that the right end of the alignment for either the adaptor OR the read must be
587 + //     < 3 distance from its right end
588 + // for 5' adapter trimming:
589 + //     require that the left end of the alignment for either the adaptor OR the read must
590 + //     be at coordinate < 3 from start
591 +
592 + bool extendUngapped(const char* a, int alen, int ai,
593 +                 const char* b, int blen, int bi, int& mlen, int& l5, int& l3, bool end5=false) {
594 + //so the alignment starts at ai in a, bi in b, with a perfect match of length mlen
595 + //if (debug) {
596 + //  GMessage(">> in %s\n\textending hit: %s at position %d\n", a, (dbg.substr(bi, mlen)).chars(), ai);
597 + //  }
598 + int a_l=ai; //alignment coordinates on a
599 + int a_r=ai+mlen-1;
600 + int b_l=bi; //alignment coordinates on b
601 + int b_r=bi+mlen-1;
602 + int ai_maxscore=ai;
603 + int bi_maxscore=bi;
604 + int score=mlen*a_m_score;
605 + int maxscore=score;
606 + int mism5score=a_mis_score;
607 + if (end5 && ai<(alen>>1)) mism5score-=2; // increase penalty for mismatches at 5' end
608 + //try to extend to the left first, if possible
609 + while (ai>0 && bi>0) {
610 +   ai--;
611 +   bi--;
612 +   score+= (a[ai]==b[bi])? a_m_score : mism5score;
613 +   if (score>maxscore) {
614 +       ai_maxscore=ai;
615 +       bi_maxscore=bi;
616 +       maxscore=score;
617 +       }
618 +     else if (maxscore-score>a_dropoff_score) break;
619 +   }
620 + a_l=ai_maxscore;
621 + b_l=bi_maxscore;
622 + //now extend to the right
623 + ai_maxscore=a_r;
624 + bi_maxscore=b_r;
625 + ai=a_r;
626 + bi=b_r;
627 + score=maxscore;
628 + //sometimes there are extra As at the end of the read, ignore those
629 + if (a[alen-2]=='A' && a[alen-1]=='A') {
630 +    alen-=2;
631 +    while (a[alen-1]=='A' && alen>ai) alen--;
632 +    }
633 + while (ai<alen-1 && bi<blen-1) {
634 +   ai++;
635 +   bi++;
636 +   //score+= (a[ai]==b[bi])? a_m_score : a_mis_score;
637 +   if (a[ai]==b[bi]) { //match
638 +      score+=a_m_score;
639 +      if (ai>=alen-2) {
640 +           score+=a_m_score-(alen-ai-1);
641 +           }
642 +      }
643 +    else { //mismatch
644 +      score+=a_mis_score;
645 +      }
646 +   if (score>maxscore) {
647 +       ai_maxscore=ai;
648 +       bi_maxscore=bi;
649 +       maxscore=score;
650 +       }
651 +     else if (maxscore-score>a_dropoff_score) break;
652 +   }
653 +  a_r=ai_maxscore;
654 +  b_r=bi_maxscore;
655 +  int a_ovh3=alen-a_r-1;
656 +  int b_ovh3=blen-b_r-1;
657 +  int mmovh3=(a_ovh3<b_ovh3)? a_ovh3 : b_ovh3;
658 +  int mmovh5=(a_l<b_l)? a_l : b_l;
659 +  if (maxscore>=a_min_score && mmovh3<2 && mmovh5<2) {
660 +     if (a_l<a_ovh3) {
661 +        //adapter closer to the left end (typical for 5' adapter)
662 +        l5=a_r+1;
663 +        l3=alen-1;
664 +        }
665 +      else {
666 +        //adapter matching at the right end (typical for 3' adapter)
667 +        l5=0;
668 +        l3=a_l-1;
669 +        }
670 +     return true;
671 +     }
672 +  //do not trim:
673 +  l5=0;
674 +  l3=alen-1;
675 +  return false;
676 + }
677 +
678 +
679 + bool extendUngapped_R(const char* seqa, int a_len, int &aix,
680 +                      const char* seqb, int b_len, int &bix, int &len) {
681 + //must start with a match
682 + if (seqa[aix]!=seqb[bix]) GError("Error at extendUngapped_R: no initial match!\n");
683 +
684 + }
685  
686   GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, const char* seqa, int a_len, const char* seqb, int b_len) {
687   //overlap of right (3') end of seqb
# Line 609 | Line 716
716                   aix++;bix++;
717                   len++;
718                 }
719 +          if (len>8) {
720 +              //heuristics: very likely the best we can get
721 +              int qlen=len;
722 +              while (bix<b_len && aix<a_len && seqa[aix]==seqb[bix]) {
723 +                aix++;
724 +                bix++;
725 +                qlen++;
726 +                }
727 +              if (qlen>10) {
728 +                 diagstrips->qmatch=new GXSeed(ai,bi,qlen);
729 +                 return diagstrips;
730 +                 }
731 +              }
732            GXSeed* newseed=new GXSeed(ai,bi,len);
733            seeds.Add(newseed);
734            diagstrips->addSeed(newseed);//add it to all 3 adjacent diagonals
# Line 794 | Line 914
914    #endif
915    GList<GXSeed> rseeds(true,true,false);
916    GXBandSet* alnbands=collectSeeds_R(rseeds, seqa, seqa_len, seqb, seqb_len);
917 +  //did we find a shortcut?
918 +  if (alnbands->qmatch) {
919 +
920 +    }
921    GList<GXAlnInfo> galns(true,true,false);
922    int max_top_bands=5;
923    int top_band_count=0;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines