ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GAlnExtend.h
(Generate patch)
# Line 5 | Line 5
5  
6   #include "GBase.h"
7   #include "GList.hh"
8 < #include <string.h>
8 > #include "gdna.h"
9  
10   //#define GDEBUG 1
11  
# Line 98 | Line 98
98  
99          if (GX_EDITOP_GET(oplast) == op) {
100              uint32 l=ops[opnum-1];
101 <            ops[opnum-1]=GX_EDITOP_CONS((GX_EDITOP_GET(l)),
101 >            ops[opnum-1]=GX_EDITOP_CONS((GX_EDITOP_GET(l)),
102                 (GX_EDITOP_VAL(l) + k));
103              }
104          else {
# Line 346 | Line 346
346         a_len=0;
347             b_len=0;
348             if (ed_script==NULL) return;
349 <           for (uint32 i=0; i<ed_script->opnum; i++) {
349 >           for (uint32 i=0; i<ed_script->opnum; i++) {
350                    int num=((ed_script->ops[i]) >> 2);
351                    char op_type = 3 - ( ed_script->ops[i] & gxEDIT_OP_MASK );
352                    if (op_type == 3 || op_type < 0 )
# Line 371 | Line 371
371             }
372  
373   #ifdef GDEBUG
374 <        void printAlignment(FILE* f, const char* sa, int sa_len,
374 >        void printAlignment(FILE* f, const char* sa, int sa_len,
375                       const char* sb, int sb_len) {
376                  //print seq A
377             char al[1024]; //display buffer for seq A
# Line 436 | Line 436
436         }
437   #endif
438    };
439 <  
439 >
440   struct GXAlnInfo {
441   const char *qseq;
442   int ql,qr;
# Line 578 | Line 578
578   class GXBandSet:public GList<GXBand> {
579    public:
580     GXSeed* qmatch; //long match (mismatches allowed) if a very good match was extended well
581 <   GXSeed* tmatch; //terminal match to be used if there is no better alignment
581 >   GXSeed* tmatch_r; //terminal match to be used if there is no better alignment
582 >   GXSeed* tmatch_l; //terminal match to be used if there is no better alignment
583     int idxoffset; //global anti-diagonal->index offset (a_len-1)
584     //used to convert a diagonal to an index
585     //diagonal is always b_ofs-a_ofs, so the minimum value is -a_len+1
# Line 592 | Line 593
593     GXBandSet(int a_len, int b_len):GList<GXBand>(a_len+b_len-1, false, true, false) {
594        idxoffset=a_len-1;
595        qmatch=NULL;
596 <      tmatch=NULL; //terminal match to be used if everything else fails
596 >      tmatch_l=NULL; //terminal match to be used if everything else fails
597 >      tmatch_r=NULL;
598            //diag will range from -a_len+1 to b_len-1, so after adjustment
599            //by idxoffset we get a max of a_len+b_len-2
600        int bcount=a_len+b_len-1;
601        for (int i=0;i<bcount;i++)
602 <                   this->Add(new GXBand(i-idxoffset));
602 >              this->Add(new GXBand(i-idxoffset));
603             //unsorted, this should set fList[i]
604        }
605     ~GXBandSet() {
# Line 612 | Line 614
614       }
615   };
616  
617 + struct GXSeqData {
618 +  const char* aseq;
619 +  int alen;
620 +  const char* bseq;
621 +  int blen;
622 +  GVec<uint16>** amers;
623 +  int amlen; //minimum alignment length that's sufficient to
624 +             //trigger the quick extension heuristics
625 +  GXSeqData(const char* sa=NULL, int la=0, const char* sb=NULL, int lb=0,
626 +  GVec<uint16>* mers[]=NULL):aseq(sa), alen(la),
627 +     bseq(sb),  blen(lb), amers(mers), amlen(0) {
628 +   calc_amlen();
629 +   calc_bmlen();
630 +   }
631 +  void calc_amlen() {
632 +    if (alen) {
633 +       int ah=iround(double(alen)*0.8);
634 +       if (ah<12) ah=12;
635 +       if (amlen>ah) amlen=ah;
636 +       }
637 +    }
638 +  void calc_bmlen() {
639 +    if (blen) {
640 +      int bh = iround(double(alen)*0.6);
641 +      if (bh<12) bh=12;
642 +      if (amlen>bh) amlen=bh;
643 +      }
644 +    }
645 +  void update(const char* sa, int la, GVec<uint16>** mers,
646 +          const char* sb, int lb, int mlen=0) {
647 +     aseq=sa;
648 +     alen=la;
649 +     amers=mers;
650 +     if (mlen) {
651 +       amlen=mlen;
652 +       }
653 +       else calc_amlen();
654 +     if (sb==bseq && blen==lb) return;
655 +     bseq=sb;
656 +     blen=lb;
657 +     calc_bmlen();
658 +     }
659 +  /*
660 +  void update_b(const char* sb, int lb) {
661 +     if (sb==bseq && blen==lb) return;
662 +     bseq=sb;
663 +     blen=lb;
664 +     calc_bmlen();
665 +     }*/
666 + };
667 +
668 + uint16 get6mer(char* p);
669 + void table6mers(const char* s, int slen, GVec<uint16>* amers[]);
670  
671 < GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, const char* seqa, int a_len, const char* seqb, int b_len); //for overlap at 3' end of seqb
671 > //GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, GXSeqData& sd); //for overlap at 3' end of seqb
672  
673 < GXBandSet* collectSeeds_L(GList<GXSeed>& seeds, const char* seqa, int a_len, const char* seqb, int b_len); //for overlap at 5' end of seqb
673 > GXBandSet* collectSeeds(GList<GXSeed>& seeds, GXSeqData& sd); //for overlap at 5' end of seqb
674  
675   void printEditScript(GXEditScript* ed_script);
676  
# Line 638 | Line 693
693   struct CAlnTrim {
694    GAlnTrimType type;
695    int boundary; //base index (either left or right) excluding terminal poly-A stretches
696 <  void prepare(GAlnTrimType trim_type, const char* s, int s_len) {
697 <    type=trim_type;
696 >  int safelen; //alignment length > amlen should be automatically validated
697 >  int seedlen;
698 >  void prepare(const char* s, int s_len) {
699 >    //type=trim_type;
700 >    //amlen=smlen;
701      boundary=0;
702      if (type==galn_TrimLeft) {
703          int s_lbound=0;
# Line 668 | Line 726
726         }
727      }
728  
729 <  CAlnTrim(GAlnTrimType trim_type, const char* s, int s_len) {
730 <    prepare(trim_type, s, s_len);
729 >  CAlnTrim(GAlnTrimType trim_type, const char* s, int s_len, int smlen):
730 >                   type(trim_type), boundary(0), safelen(smlen) {
731 >    prepare(s, s_len);
732      }
733  
734    bool validate(int sl, int sr, int alnpid, int adist) {
735     int alnlen=sr-sl+1;
736 +   if (alnpid>=90 && alnlen>safelen) return true;
737     sl--;sr--; //boundary is 0-based
738     int badj=0; //default boundary is 3 bases distance to end
739     int admax=1;
# Line 713 | Line 773
773   GXAlnInfo* GreedyAlign(const char* q_seq,  int q_alnstart, const char* s_seq, int s_alnstart,
774          bool editscript=false, int reward=2, int penalty=3, int xdrop=8);
775  
776 < GXAlnInfo* match_LeftEnd(const char* seqa, int seqa_len, const char* seqb, int seqb_len,
777 <          CGreedyAlignData* gxmem=NULL, int min_pid=83);
718 < GXAlnInfo* match_RightEnd(const char* seqa, int seqa_len, const char* seqb, int seqb_len,
719 <          CGreedyAlignData* gxmem=NULL, int min_pid=73);
776 > GXAlnInfo* match_LeftEnd(GXSeqData& sd, CGreedyAlignData* gxmem=NULL, int min_pid=90);
777 > GXAlnInfo* match_RightEnd(GXSeqData& sd, CGreedyAlignData* gxmem=NULL, int min_pid=90);
778   #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines