ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/seqalign/GXAlign.cpp
(Generate patch)
# Line 261 | Line 261
261        }
262      GMessage("\n");
263    }
264 /*
265 GapXEditScript* EditScript2GapX(GXEditScript* ed_script) {
266   GapXEditScript* esp_array=NULL;
267   uint i;
268   if (ed_script==NULL || ed_script->opnum == 0)
269      return NULL;
270   esp_array = new GapXEditScript[ed_script->opnum];
271   for (i=0; i<ed_script->opnum; i++) {
272      esp_array[i].num=((ed_script->ops[i]) >> 2);
273      esp_array[i].op_type = 3 - ( ed_script->ops[i] & EDIT_OP_MASK );
274      if (esp_array[i].op_type == 3)
275         GError("Error: EditScript2GapX encountered op_type 3 ?!\n");
276      if (i>0)
277         esp_array[i-1].next=&esp_array[i];
278      }
279   return esp_array;
280 }
281 */
282 /*
283 GXAlnInfo* GreedyExtendRight(const char* seq1,  int seq1start, const char* seq2, int seq2start,
284        bool editscript, int reward, int penalty, int xdrop) {
285  GXEditScript* ed_script_fwd=NULL;
286  if (editscript)
287      ed_script_fwd =  new GXEditScript();
288  int s1ext_l=0, s1ext_r=0, s2ext_l=0, s2ext_r=0;
289  int score=0;
290  int slen=strlen(seq2); //subj
291  int qlen=strlen(seq1); //query
292  const char* q=seq1+seq1start;
293  int q_avail=qlen-seq1start;
294  const char* s=seq2+seq2start;
295  int s_avail=slen-seq2start;
296  int MIN_GREEDY_SCORE=reward*5; //minimum score for an alignment to be reported
297  GXAlnMem* abmp=new GXAlnMem(slen, reward, -penalty, xdrop);
298  // extend to the right
299  score = GXGreedyAlign(s, s_avail, q, q_avail, false, xdrop,
300             reward, -penalty, &s2ext_r, &s1ext_r, abmp, ed_script_fwd);
264  
302  GMessage("fwd score=%d (up to a_r=%d, b_r=%d)\n", score, s1ext_r, s2ext_r);
303  if (editscript) {
304        GMessage("fwd edit script: ");
305       printEditScript(ed_script_fwd);
306       }
307
308  //delete abmp;
309  // In basic case the greedy algorithm returns number of
310  // differences, hence we need to convert it to score    
311  int retscore = (s1ext_r + s2ext_r + s1ext_l + s2ext_l)*reward/2 - score*(reward-penalty);
312  GMessage("Actual score: %d\n",retscore);
313  GXAlnInfo* alninfo=NULL;
314  if (retscore>=MIN_GREEDY_SCORE) {
315    alninfo=new GXAlnInfo(seq1, s1ext_l, s1ext_r, seq2, s2ext_l, s2ext_r);
316    alninfo->abmp=abmp;
317    int hsp_length = GMIN(s1ext_l+s1ext_r, s2ext_l+s2ext_r);
318    alninfo->score=retscore;
319    alninfo->pid = 100 * (1 - ((double) score) / hsp_length);
320    if (editscript) {
321          alninfo->editscript=ed_script_fwd;
322          }
323    alninfo->gapinfo = new CAlnGapInfo(ed_script_fwd, alninfo->ql, alninfo->sl);
324    }
325  else {
326    //bad extension, delete unused stuff
327    delete abmp;
328    delete ed_script_fwd;
329    }
330  return alninfo;
331 }
332 */
265   GXAlnInfo* GreedyAlign(const char* q_seq,  int q_alnstart, const char* s_seq, int s_alnstart,
266          bool editscript, int reward, int penalty, int xdrop) {
267   int q_max=strlen(q_seq); //query
# Line 339 | Line 271
271   }
272  
273   struct GXSeedTable {
274 <  int a_max, b_max;
274 >  int a_num, b_num;
275    int a_cap, b_cap;
276 <  int **x;
276 >  char* xc;
277    GXSeedTable(int a=12, int b=255) {
278 <   a_cap=0;
279 <   b_cap=0;
280 <   a_max=0;
281 <   b_max=0;
282 <   x=NULL;
283 <   init(a,b);
284 <   }
285 <  void b_free() {
286 <   for (int i=0;i<a_cap;i++) {
355 <     GFREE((x[i]));
278 >    a_cap=0;
279 >    b_cap=0;
280 >    a_num=0;
281 >    b_num=0;
282 >    xc=NULL;
283 >    init(a,b);
284 >    }
285 >  ~GXSeedTable() {
286 >     GFREE(xc);
287       }
357   }
358   void b_alloc() {
359    for (int i=0;i<a_cap;i++) {
360      GCALLOC((x[i]), (b_cap*sizeof(int)));
361      }
362   }
288    void init(int a, int b) {
289 <    a_max=a;
290 <    b_max=b;
291 <    if (a_max>a_cap) {
292 <      b_free();
293 <      a_cap=a_max;
294 <      b_cap=b_max;
295 <      GFREE(x);
296 <      GCALLOC(x, (a_max*sizeof(int*)));
372 <      b_alloc();
373 <      }
374 <     else if (b_max>b_cap) {
375 <      b_free();
376 <      b_cap=b_max;
377 <      b_alloc();
289 >    a_num=a;
290 >    b_num=b;
291 >    bool resize=false;
292 >    if (b_num>b_cap) { resize=true; b_cap=b_num;}
293 >    if (a_num>a_cap) { resize=true; a_cap=a_num;}
294 >    if (resize) {
295 >      GFREE(xc);
296 >      GCALLOC(xc, (a_num*b_num));
297        }
298       else {
299        //just clear up to a_max, b_max
300 <      for (int i=0;i<a_max;i++)
382 <        for (int j=0;j<b_max;j++) x[i][j]=0; // memset() ?
300 >      memset((void*)xc, 0, (a_num*b_num));
301        }
384   }
385   ~GXSeedTable() {
386    b_free();
387    GFREE(x);
302      }
303 +   char& x(int ax, int by) {
304 +       return xc[by*a_num+ax];
305 +       }
306 +
307   };
308  
309  
310   static GXSeedTable gx(12,255);
311  
312 < void collectSeeds_R(GList<GXSeed>& seeds, const char* seqa, int a_len, const char* seqb, int b_len) {
312 > GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, const char* seqa, int a_len, const char* seqb, int b_len) {
313   //overlap of right (3') end of seqb
314 < //hash the first 8 bases of seqa:
315 < int aimax=GMIN(7,(a_len-4));
314 > //hash the first 12 bases of seqa:
315 > int aimax=GMIN(9,(a_len-4));
316   int bimin=GMAX(0,(b_len-a_len-2));//allow 2 extraneous bases, just in case there is some garbage at the end
317 < //int **x=NULL; //cache diagonal coverage information
318 < //GCALLOC(x,((aimax+4)*sizeof(int*)));
319 < //for (int i=0;i<aimax+4;i++) { GCALLOC( (x[i]), ((b_len-4)*sizeof(int))); }
320 < gx.init(aimax+4, b_len-4);
317 > int a_mlen=aimax+4; //number of rows in the diagonal matrix
318 > int b_mlen=b_len-4; //number of cols in the diagonal matrix
319 > gx.init(a_mlen, b_mlen);
320 > GXBandSet* diagstrips=new GXBandSet(a_mlen, b_mlen); //set of overlapping 3-diagonal strips
321   for (int ai=0;ai<aimax;ai++) {
322      int32* av=(int32 *)(&seqa[ai]);
323      for (int bi=b_len-5;bi>=bimin;bi--) {
324 <       if (gx.x[ai+3][bi+3]) continue; //already have a previous seed extending here
324 >       if (gx.x(ai,bi))
325 >            continue; //already have a previous seed covering this region of this diagonal
326         int32* bv=(int32 *)(&seqb[bi]);
327         if (*av == *bv) {
328            //word match
329            //see if we can extend to the right
330 +          gx.x(ai+1,bi+1)=1;
331 +          gx.x(ai+2,bi+2)=1;
332 +          gx.x(ai+3,bi+3)=1;
333            int aix=ai+4;
334            int bix=bi+4;
335            int len=4;
336            while (bix<b_len && aix<a_len && seqa[aix]==seqb[bix])
337 <               { aix++;bix++; len++; gx.x[aix][bix]=1; }
338 <          seeds.Add(new GXSeed(ai,bi,len));
337 >               { aix++;bix++; len++; gx.x(aix,bix)=1; }
338 >          GXSeed* newseed=new GXSeed(ai,bi,len);
339 >          seeds.Add(newseed);
340 >          diagstrips->addSeed(newseed);//add it to all 3 adjacent diagonals
341            }
342         }
343      }//for each 4-mer at the beginning of seqa
344 + return diagstrips;
345   }
346  
347   GXAlnInfo* GreedyAlignRegion(const char* q_seq, int q_alnstart, int q_max,

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines