ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GAlnExtend.h
(Generate patch)
# Line 5 | Line 5
5  
6   #include "GBase.h"
7   #include "GList.hh"
8 < #include <string.h>
8 > #include "gdna.h"
9  
10 < #define GDEBUG 1
10 > //#define GDEBUG 1
11  
12   enum {
13      gxEDIT_OP_MASK = 0x3,
# Line 98 | Line 98
98  
99          if (GX_EDITOP_GET(oplast) == op) {
100              uint32 l=ops[opnum-1];
101 <            ops[opnum-1]=GX_EDITOP_CONS((GX_EDITOP_GET(l)),
101 >            ops[opnum-1]=GX_EDITOP_CONS((GX_EDITOP_GET(l)),
102                 (GX_EDITOP_VAL(l) + k));
103              }
104          else {
# Line 215 | Line 215
215  
216   };
217  
218 < #define GREEDY_MAX_COST_FRACTION 5
218 > #define GREEDY_MAX_COST_FRACTION 8
219   /* (was 2) sequence_length / (this number) is a measure of how hard the
220    alignment code will work to find the optimal alignment; in fact
221    this gives a worst case bound on the number of loop iterations */
# Line 371 | Line 371
371             }
372  
373   #ifdef GDEBUG
374 <        void printAlignment(FILE* f, const char* sa, int sa_len,
374 >        void printAlignment(FILE* f, const char* sa, int sa_len,
375                       const char* sb, int sb_len) {
376                  //print seq A
377             char al[1024]; //display buffer for seq A
# Line 436 | Line 436
436         }
437   #endif
438    };
439 <  
439 >
440   struct GXAlnInfo {
441   const char *qseq;
442   int ql,qr;
# Line 466 | Line 466
466    bool operator<(GXAlnInfo& d) {
467      return ((score==d.score)? pid>d.pid : score>d.score);
468      }
469  bool operator>(GXAlnInfo& d) {
470    return ((score==d.score)? pid<d.pid : score<d.score);
471    }
469    bool operator==(GXAlnInfo& d) {
470      return (score==d.score && pid==d.pid);
471      }
# Line 484 | Line 481
481     bool operator<(GXSeed& d){
482        return ((b_ofs==d.b_ofs) ? a_ofs<d.a_ofs : b_ofs<d.b_ofs);
483        }
487   bool operator>(GXSeed& d){
488      return ((b_ofs==d.b_ofs) ? a_ofs>d.a_ofs : b_ofs>d.b_ofs);
489      }
484     bool operator==(GXSeed& d){
485        return (b_ofs==d.b_ofs && a_ofs==d.a_ofs); //should never be the case, seeds are uniquely constructed
486        }
# Line 554 | Line 548
548          int a_gap=scur.a_ofs-sprev.a_ofs-sprev.len;
549          int max_gap=b_gap;
550          int min_gap=a_gap;
551 <        if (min_gap>max_gap) swap(max_gap, min_gap);
551 >        if (min_gap>max_gap) Gswap(max_gap, min_gap);
552          int _penalty=0;
553          if (min_gap<0) { //overlap
554                 if (max_gap>0) { _penalty=GMAX((-min_gap), max_gap); }
# Line 574 | Line 568
568       //return ((score==d.score) ? seeds.Count()>d.seeds.Count() : score>d.score);
569       return ((score==d.score) ? w_min_b<d.w_min_b : score>d.score);
570       }
577  bool operator>(GXBand& d){
578     //return ((score==d.score) ? seeds.Count()<d.seeds.Count() : score<d.score);
579    return ((score==d.score) ? w_min_b>d.w_min_b : score<d.score);
580     }
571    bool operator==(GXBand& d){
572      //return (score==d.score && seeds.Count()==d.seeds.Count());
573       return (score==d.score && w_min_b==d.w_min_b);
# Line 588 | Line 578
578   class GXBandSet:public GList<GXBand> {
579    public:
580     GXSeed* qmatch; //long match (mismatches allowed) if a very good match was extended well
581 +   GXSeed* tmatch; //terminal match to be used if there is no better alignment
582     int idxoffset; //global anti-diagonal->index offset (a_len-1)
583     //used to convert a diagonal to an index
584     //diagonal is always b_ofs-a_ofs, so the minimum value is -a_len+1
# Line 601 | Line 592
592     GXBandSet(int a_len, int b_len):GList<GXBand>(a_len+b_len-1, false, true, false) {
593        idxoffset=a_len-1;
594        qmatch=NULL;
595 +      tmatch=NULL; //terminal match to be used if everything else fails
596            //diag will range from -a_len+1 to b_len-1, so after adjustment
597            //by idxoffset we get a max of a_len+b_len-2
598        int bcount=a_len+b_len-1;
# Line 621 | Line 613
613   };
614  
615  
616 < GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, const char* seqa, int a_len, const char* seqb, int b_len); //for overlap at 3' end of seqb
616 > uint16 get6mer(char* p);
617 >
618 > GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, const char* seqa, int a_len, GVec<uint16> amers[],
619 >         const char* seqb, int b_len); //for overlap at 3' end of seqb
620  
621 < GXBandSet* collectSeeds_L(GList<GXSeed>& seeds, const char* seqa, int a_len, const char* seqb, int b_len); //for overlap at 5' end of seqb
621 > GXBandSet* collectSeeds_L(GList<GXSeed>& seeds, const char* seqa, int a_len, GVec<uint16> amers[],
622 >        const char* seqb, int b_len); //for overlap at 5' end of seqb
623  
624   void printEditScript(GXEditScript* ed_script);
625  
# Line 683 | Line 679
679    bool validate(int sl, int sr, int alnpid, int adist) {
680     int alnlen=sr-sl+1;
681     sl--;sr--; //boundary is 0-based
682 <   int badj=0;
683 <   int admax=3;
684 <   if (alnlen<11) {
682 >   int badj=0; //default boundary is 3 bases distance to end
683 >   int admax=1;
684 >   if (alnlen<13) {
685        //stricter boundary check
686 +      if (alnpid<90) return false;
687        badj=2;
688 <      admax=1;
688 >      if (alnlen<=7) { badj++; admax=0; }
689        }
690     if (adist>admax) return false;
691     if (type==galn_TrimRight) {
692        return (sr>=boundary+badj);
693        }
694     else {
695 <      //left side should be more stringent
696 <      if (alnpid<91) {
697 <        if (alnlen<11) return false;
695 >      //left match should be more stringent (5')
696 >      if (alnpid<93) {
697 >        if (alnlen<13) return false;
698 >        admax=0;
699          badj++;
700          }
701        return (sl<=boundary-badj);
# Line 719 | Line 717
717   GXAlnInfo* GreedyAlign(const char* q_seq,  int q_alnstart, const char* s_seq, int s_alnstart,
718          bool editscript=false, int reward=2, int penalty=3, int xdrop=8);
719  
720 < GXAlnInfo* match_LeftEnd(const char* seqa, int seqa_len, const char* seqb, int seqb_len,
721 <          CGreedyAlignData* gxmem=NULL, int min_pid=83);
722 < GXAlnInfo* match_RightEnd(const char* seqa, int seqa_len, const char* seqb, int seqb_len,
723 <          CGreedyAlignData* gxmem=NULL, int min_pid=73);
720 > GXAlnInfo* match_LeftEnd(const char* seqa, int seqa_len, GVec<uint16> amers[],
721 >           const char* seqb, int seqb_len, CGreedyAlignData* gxmem=NULL, int min_pid=83);
722 > GXAlnInfo* match_RightEnd(const char* seqa, int seqa_len, GVec<uint16> amers[],
723 >           const char* seqb, int seqb_len, CGreedyAlignData* gxmem=NULL, int min_pid=73);
724   #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines