ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GAlnExtend.h
(Generate patch)
# Line 685 | Line 685
685  
686  
687   enum GAlnTrimType {
688 <  galn_NoTrim=0,
688 >  //Describes trimming intent
689 >  galn_None=0, //no trimming, just alignment
690    galn_TrimLeft,
691 <  galn_TrimRight
691 >  galn_TrimRight,
692 >  galn_TrimEither //adaptor should be trimmed from either end
693    };
694  
695   struct CAlnTrim {
696    GAlnTrimType type;
697 <  int boundary; //base index (either left or right) excluding terminal poly-A stretches
697 >  int l_boundary; //base index (either left or right) excluding terminal poly-A stretches
698 >  int r_boundary; //base index (either left or right) excluding terminal poly-A stretches
699 >  int alen; //query/adaptor seq length (for validate())
700    int safelen; //alignment length > amlen should be automatically validated
701    int seedlen;
702    void prepare(const char* s, int s_len) {
703      //type=trim_type;
704      //amlen=smlen;
705 <    boundary=0;
706 <    if (type==galn_TrimLeft) {
705 >    l_boundary=0;
706 >    r_boundary=0;
707 >    //if (type==galn_TrimLeft) {
708          int s_lbound=0;
709          if (s[0]=='A' && s[1]=='A' && s[2]=='A') {
710             s_lbound=3;
# Line 709 | Line 714
714             s_lbound=4;
715             while (s_lbound<s_len-1 && s[s_lbound]=='A') s_lbound++;
716             }
717 <        boundary=s_lbound+3;
718 <        return;
719 <        }
720 <    if (type==galn_TrimRight) {
717 >        l_boundary=s_lbound+3;
718 >    //    return;
719 >    //    }
720 >    //if (type==galn_TrimRight) {
721         int r=s_len-1;
722         if (s[r]=='A' && s[r-1]=='A' && s[r-2]=='A') {
723            r-=3;
# Line 722 | Line 727
727            r-=4;
728            while (r>0 && s[r]=='A') r--;
729            }
730 <       boundary=r-3;
731 <       }
730 >       r_boundary=r-3;
731 >    //   }
732      }
733  
734 <  CAlnTrim(GAlnTrimType trim_type, const char* s, int s_len, int smlen):
735 <                   type(trim_type), boundary(0), safelen(smlen) {
734 >  CAlnTrim(GAlnTrimType trim_type, const char* s, int s_len, int a_len, int smlen):
735 >                   type(trim_type), l_boundary(0), r_boundary(0), alen(a_len), safelen(smlen) {
736      prepare(s, s_len);
737      }
738  
739 <  bool validate(int sl, int sr, int alnpid, int adist) {
740 <   int alnlen=sr-sl+1;
741 <   if (alnpid>=90 && alnlen>safelen) return true;
739 >  bool validate_R(int sr, int admax, int badj, int adist) {
740 >        if (adist>admax) return false;
741 >        return (sr>=r_boundary+badj);
742 >   }
743 >  bool validate_L(int sl, int alnlen, int admax, int badj, int alnpid, int adist) {
744 >        if (adist>admax) return false;
745 >    //left match should be more stringent (5')
746 >    if (alnpid<93) {
747 >      if (alnlen<13) return false;
748 >      admax=0;
749 >      badj++;
750 >      }
751 >    return (sl<=l_boundary-badj);
752 >  }
753 >
754 >  bool validate(GXAlnInfo* alninfo) {
755 >   int alnlen=alninfo->sr - alninfo->sl + 1;
756 >   if (alninfo->pid>90.0 && alnlen>safelen)
757 >           //special case: heavy match, could be in the middle
758 >           return true;
759 >   int sl=alninfo->sl;
760 >   int sr=alninfo->sr;
761     sl--;sr--; //boundary is 0-based
762     int badj=0; //default boundary is 3 bases distance to end
763     int admax=1;
764     if (alnlen<13) {
765        //stricter boundary check
766 <      if (alnpid<90) return false;
766 >      if (alninfo->pid<90) return false;
767        badj=2;
768        if (alnlen<=7) { badj++; admax=0; }
769        }
770 <   if (adist>admax) return false;
770 >   if (type==galn_TrimLeft) {
771 >         return validate_L(sl, alnlen, admax, badj, alninfo->pid, alen-alninfo->qr);
772 >     }
773 >   else if (type==galn_TrimRight) {
774 >         return validate_R(sr, admax, badj, alninfo->ql-1);
775 >     }
776 >   else if (type==galn_TrimEither) {
777 >     return (validate_R(sr, admax, badj, alninfo->ql-1) ||
778 >           validate_L(sl, alnlen, admax, badj, alninfo->pid, alen-alninfo->qr));
779 >     }
780 >   return true;
781 >   /*
782     if (type==galn_TrimRight) {
783        return (sr>=boundary+badj);
784        }
# Line 756 | Line 791
791          }
792        return (sl<=boundary-badj);
793        }
794 +    */
795     }
760
796   };
797  
798  
# Line 773 | Line 808
808   GXAlnInfo* GreedyAlign(const char* q_seq,  int q_alnstart, const char* s_seq, int s_alnstart,
809          bool editscript=false, int reward=2, int penalty=3, int xdrop=8);
810  
811 < GXAlnInfo* match_LeftEnd(GXSeqData& sd, CGreedyAlignData* gxmem=NULL, int min_pid=90);
812 < GXAlnInfo* match_RightEnd(GXSeqData& sd, CGreedyAlignData* gxmem=NULL, int min_pid=90);
811 > GXAlnInfo* match_adaptor(GXSeqData& sd, GAlnTrimType trim_type,
812 >                                CGreedyAlignData* gxmem=NULL, int min_pid=90);
813 > //GXAlnInfo* match_RightEnd(GXSeqData& sd, CGreedyAlignData* gxmem=NULL, int min_pid=90);
814   #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines