ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GAlnExtend.h
(Generate patch)
# Line 235 | Line 235
235     int* max_score;                   // array of maximum scores
236     GXMemPool* space;                // local memory pool for SGreedyOffset structs
237     //
238 +   bool scaled; //scores are all x2
239     int match_reward;
240     int mismatch_penalty;
241     int x_drop;
242 +   int xdrop_ofs;
243     // Allocate memory for the greedy gapped alignment algorithm
244     CGreedyAlignData(int reward, int penalty, int xdrop) {
245 +          scaled=false;
246 +          xdrop_ofs = 0;
247        //int max_d, diff_d;
248        if (penalty<0) penalty=-penalty;
249        if (reward % 2) {
250 <         //scale params
250 >         //scale params up
251 >         scaled=true;
252           match_reward = reward << 1;
253           mismatch_penalty = (penalty << 1);
254           x_drop = xdrop<<1;
# Line 253 | Line 258
258           mismatch_penalty = penalty;
259           x_drop=xdrop;
260           }
261 +      xdrop_ofs=(x_drop + (match_reward>>1)) /
262 +          (match_reward + mismatch_penalty) + 1;
263        //if (gap_open == 0 && gap_extend == 0)
264        //   gap_extend = (reward >> 1) + penalty;
265        const int max_dbseq_length=255; //adjust this accordingly
# Line 293 | Line 300
300       GCALLOC(max_score, (sizeof(int) * (max_d + 1 + d_diff)));
301       if (!max_score) GError(GX_GALLOC_ERROR);
302       }
303 +
304     ~CGreedyAlignData() {
305       if (last_seq2_off) {
306           GFREE(last_seq2_off[0]);
# Line 616 | Line 624
624       }
625   };
626  
627 + inline int calc_safelen(int alen) {
628 +         int r=iround(double(alen*0.6));
629 +         return (r<16)? 16 : r;
630 +  }
631 +
632   struct GXSeqData {
633    const char* aseq;
634    int alen;
# Line 630 | Line 643
643     calc_amlen();
644     calc_bmlen();
645     }
646 +
647    void calc_amlen() {
648      if (alen) {
649 <       int ah=iround(double(alen)*0.8);
636 <       if (ah<16) ah=16;
649 >       int ah=calc_safelen(alen);
650         if (amlen>ah) amlen=ah;
651         }
652      }
653    void calc_bmlen() {
654      if (blen) {
655 <      int bh = iround(double(alen)*0.6);
655 >      int bh = iround(double(blen)*0.6);
656        if (bh<16) bh=16;
657        if (amlen>bh) amlen=bh;
658        }
# Line 670 | Line 683
683   uint16 get6mer(char* p);
684   void table6mers(const char* s, int slen, GVec<uint16>* amers[]);
685  
673 //GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, GXSeqData& sd); //for overlap at 3' end of seqb
674
675 GXBandSet* collectSeeds(GList<GXSeed>& seeds, GXSeqData& sd); //for overlap at 5' end of seqb
676
686   void printEditScript(GXEditScript* ed_script);
687  
688  
689   int GXGreedyExtend(const char* seq1, int len1,
690                         const char* seq2, int len2,
691 <                       bool reverse, int xdrop_threshold,
683 <                       int match_cost, int mismatch_cost,
691 >                       bool reverse, //int xdrop_threshold, int match_cost, int mismatch_cost,
692                         int& seq1_align_len, int& seq2_align_len,
693                         CGreedyAlignData& aux_data,
694                         GXEditScript *edit_block);
# Line 696 | Line 704
704  
705   struct CAlnTrim {
706    GAlnTrimType type;
707 +  int minMatch; //minimum terminal exact match that will be removed from ends
708    int l_boundary; //base index (either left or right) excluding terminal poly-A stretches
709    int r_boundary; //base index (either left or right) excluding terminal poly-A stretches
710    int alen; //query/adaptor seq length (for validate())
# Line 733 | Line 742
742      //   }
743      }
744  
745 <  CAlnTrim(GAlnTrimType trim_type, const char* s, int s_len, int a_len, int smlen):
746 <                   type(trim_type), l_boundary(0), r_boundary(0), alen(a_len), safelen(smlen) {
745 >  CAlnTrim(GAlnTrimType trim_type, const char* s, int s_len, int a_len, int minEndTrim, int smlen):
746 >                   type(trim_type), minMatch(minEndTrim), l_boundary(0), r_boundary(0),
747 >                   alen(a_len), safelen(smlen) {
748      prepare(s, s_len);
749      }
750  
# Line 747 | Line 757
757          if (adist>admax) return false;
758      //left match should be more stringent (5')
759      if (alnpid<93) {
760 <      if (alnlen<13) return false;
760 >      if (alnlen<13 || alnlen<minMatch) return false;
761        admax=0;
762        badj++;
763        }
# Line 802 | Line 812
812   };
813  
814  
815 < // reward MUST be >1, always
815 >
816 >
817 > //GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, GXSeqData& sd); //for overlap at 3' end of seqb
818 >
819 > GXBandSet* collectSeeds(GList<GXSeed>& seeds, GXSeqData& sd, GAlnTrimType trim_intent); //for overlap at 5' end of seqb
820 >                                                                //=galn_None
821 > // reward MUST be >1 for this function
822   GXAlnInfo* GreedyAlignRegion(const char* q_seq, int q_alnstart, int q_max,
823                    const char* s_seq, int s_alnstart, int s_max,
824                    int reward, int penalty, int xdrop, CGreedyAlignData* gxmem=NULL,
# Line 812 | Line 828
828                         CAlnTrim* trim=NULL, bool editscript=false);
829  
830   GXAlnInfo* GreedyAlign(const char* q_seq,  int q_alnstart, const char* s_seq, int s_alnstart,
831 <        bool editscript=false, int reward=2, int penalty=3, int xdrop=8);
831 >        bool editscript=false, int reward=2, int penalty=10, int xdrop=32);
832  
833 < GXAlnInfo* match_adaptor(GXSeqData& sd, GAlnTrimType trim_type,
833 > GXAlnInfo* match_adaptor(GXSeqData& sd, GAlnTrimType trim_type, int minMatch,
834                                  CGreedyAlignData* gxmem=NULL, int min_pid=90);
835   //GXAlnInfo* match_RightEnd(GXSeqData& sd, CGreedyAlignData* gxmem=NULL, int min_pid=90);
836   #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines