ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GAlnExtend.cpp
Revision: 171
Committed: Tue Feb 14 22:36:26 2012 UTC (7 years, 3 months ago) by gpertea
File size: 39886 byte(s)
Log Message:
wip fqtrim

Line User Rev File contents
1 gpertea 93 #include "GAlnExtend.h"
2    
3     //greedy gapped alignment extension
4     //(mostly lifted from NCBI's megablast gapped extension code)
5    
6     int GXMemPool::kMinSpace = 1000000;
7    
8     #ifdef GDEBUG
9     char buf[6]={0x1B,'[', 'n','m','m','\0'};
10    
11     void color_fg(int c,FILE* f) {
12     if (f!=stderr && f!=stdout) return;
13     sprintf((char *)(&buf[2]),"%dm",c+30);
14     fwrite(buf,1,strlen(buf), f);
15     }
16    
17     void color_bg(int c, FILE* f) {
18     if (f!=stderr && f!=stdout) return;
19     sprintf((char *)(&buf[2]),"%dm",c+40);
20     fwrite(buf,1,strlen(buf),f);
21     };
22    
23     void color_resetfg(FILE* f) {
24     if (f!=stderr && f!=stdout) return;
25     sprintf((char *)(&buf[2]),"39m");
26     fwrite(buf,1,strlen(buf), f);
27     };
28    
29     void color_resetbg(FILE* f) {
30     if (f!=stderr && f!=stdout) return;
31     sprintf((char *)(&buf[2]),"49m");
32     fwrite(buf,1,strlen(buf), f);
33     }
34    
35     void color_reset(FILE* f) {
36     if (f!=stderr && f!=stdout) return;
37     sprintf((char *)(&buf[2]),"0m");
38     fwrite(buf,1,strlen(buf), f);
39     };
40    
41     void color_normal(FILE* f) {
42     if (f!=stderr && f!=stdout) return;
43     sprintf((char *)(&buf[2]),"22m");
44     fwrite(buf,1,strlen(buf), f);
45     };
46    
47     #endif
48    
49    
50     char xgapcodes[4]={'S','I', 'D', 'X'};
51    
52     int get_last(int **flast_d, int d, int diag, int *row1) {
53     if (flast_d[d-1][diag-1] > GMAX(flast_d[d-1][diag], flast_d[d-1][diag+1])) {
54     *row1 = flast_d[d-1][diag-1];
55     return diag-1;
56 gpertea 171 }
57 gpertea 93 if (flast_d[d-1][diag] > flast_d[d-1][diag+1]) {
58     *row1 = flast_d[d-1][diag];
59     return diag;
60     }
61     *row1 = flast_d[d-1][diag+1];
62     return diag+1;
63     }
64    
65     void GapXEditScript::print() { //debug
66     GapXEditScript* p=this;
67     do {
68 gpertea 171 GMessage("%d%c ",p->num, xgapcodes[p->op_type]);
69     } while ((p=p->next)!=NULL);
70 gpertea 93 GMessage("\n");
71     }
72    
73    
74     int BLAST_Gcd(int a, int b) {
75     int c;
76    
77     b = abs(b);
78     if (b > a)
79     c=a, a=b, b=c;
80    
81     while (b != 0) {
82     c = a%b;
83     a = b;
84     b = c;
85     }
86     return a;
87     }
88    
89     int BLAST_Gdb3(int* a, int* b, int* c) {
90     int g;
91     if (*b == 0)
92     g = BLAST_Gcd(*a, *c);
93     else
94     g = BLAST_Gcd(*a, BLAST_Gcd(*b, *c));
95     if (g > 1) {
96     *a /= g;
97     *b /= g;
98     *c /= g;
99     }
100     return g;
101     }
102    
103    
104 gpertea 171 uint16 get6mer(char* p) {
105     uint16 r=gdna2bit(p,3);
106     if (*p) {
107     uint16 v=gdna2bit(p,3);
108     r |= (v<<8);
109     }
110     return r;
111     }
112 gpertea 93
113     //signal that a diagonal is invalid
114     static const int kInvalidOffset = -2;
115    
116     int s_FindFirstMismatch(const char *seq1, int len1,
117     const char *seq2, int len2,
118     int seq1_index, int seq2_index,
119     //bool &fence_hit,
120     bool reverse) {
121     int start_index = seq1_index;
122     /* Sentry detection here should be relatively inexpensive: The
123     sentry value cannot appear in the query, so detection only
124     needs to be done at exit from the subject-query matching loop.
125     For uncompressed sequences, ambiguities in the query (i.e. seq1)
126     always count as mismatches */
127     if (reverse) {
128     while (seq1_index < len1 && seq2_index < len2 &&
129     //seq1[len1-1 - seq1_index] < 4 &&
130     seq1[len1-1 - seq1_index] == seq2[len2-1 - seq2_index]) {
131     ++seq1_index;
132     ++seq2_index;
133     }
134     //if (seq2_index < len2 && seq2[len2-1-seq2_index] == FENCE_SENTRY) {
135     //if len2-1-seq2_index<=0) {
136     // fence_hit = true;
137     // }
138     }
139     else { //forward lookup
140 gpertea 171 while (seq1_index < len1 && seq2_index < len2 &&
141 gpertea 93 //seq1[seq1_index] < 4 &&
142     seq1[seq1_index] == seq2[seq2_index]) {
143     ++seq1_index;
144     ++seq2_index;
145     }
146     //if (seq2_index < len2 && seq2[seq2_index] == FENCE_SENTRY) {
147     //if (seq2_index==len2) {
148     // fence_hit = true;
149     //}
150     }
151     return seq1_index - start_index;
152     }
153    
154    
155    
156     /** During the traceback for a non-affine greedy alignment,
157     compute the diagonal that will result from the next
158     traceback operation
159    
160     @param last_seq2_off Array of offsets into the second sequence;
161     last_seq2_off[d][k] gives the largest offset into
162     the second sequence that lies on diagonal k and
163     has distance d [in]
164     @param d Starting distance [in]
165     @param diag Index of diagonal that produced the starting distance [in]
166     @param seq2_index The offset into the second sequence after the traceback
167     operation has completed [out]
168     @return The diagonal resulting from the next traceback operation
169     being applied
170     */
171     int s_GetNextNonAffineTback(int **last_seq2_off, int d,
172     int diag, int *seq2_index) {
173     // choose the traceback operation that results in the
174     // largest seq2 offset at this point, then compute the
175     // new diagonal that is implied by the operation
176     if (last_seq2_off[d-1][diag-1] >
177     GMAX(last_seq2_off[d-1][diag], last_seq2_off[d-1][diag+1])) {
178     *seq2_index = last_seq2_off[d-1][diag-1];
179     return diag - 1; // gap in seq2
180     }
181     if (last_seq2_off[d-1][diag] > last_seq2_off[d-1][diag+1]) {
182     *seq2_index = last_seq2_off[d-1][diag];
183     return diag; // match
184     }
185     *seq2_index = last_seq2_off[d-1][diag+1];
186     return diag + 1; // gap in seq1
187     }
188    
189    
190     int GXGreedyExtend(const char* seq1, int len1,
191     const char* seq2, int len2,
192     bool reverse, int xdrop_threshold,
193     int match_cost, int mismatch_cost,
194     int& seq1_align_len, int& seq2_align_len,
195 gpertea 101 CGreedyAlignData& aux_data,
196 gpertea 93 GXEditScript *edit_block) {
197     //GapPrelimEditBlock *edit_block,
198     //bool& fence_hit, SGreedySeed *seed) {
199     int seq1_index;
200     int seq2_index;
201     int index;
202     int d;
203     int k;
204     int diag_lower, diag_upper;
205     int max_dist;
206     int diag_origin;
207     int best_dist;
208     int best_diag;
209     int** last_seq2_off;
210     int* max_score;
211     int xdrop_offset;
212     int longest_match_run;
213     bool end1_reached, end2_reached;
214     GXMemPool* mem_pool;
215 gpertea 171
216 gpertea 93 /* ordinary dynamic programming alignment, for each offset
217     in seq1, walks through offsets in seq2 until an X-dropoff
218 gpertea 171 test fails, saving the best score encountered along
219 gpertea 93 the way. Instead of score, this code tracks the 'distance'
220     (number of mismatches plus number of gaps) between seq1
221     and seq2. Instead of walking through sequence offsets, it
222     walks through diagonals that can achieve a given distance.
223 gpertea 171
224 gpertea 93 Note that in what follows, the numbering of diagonals implies
225 gpertea 171 a dot matrix where increasing seq1 offsets go to the right on
226 gpertea 93 the x axis, and increasing seq2 offsets go up the y axis.
227     The gapped alignment thus proceeds up and to the right in
228     the graph, and diagonals are numbered increasing to the right */
229    
230     best_dist = 0;
231     best_diag = 0;
232    
233     /* set the number of distinct distances the algorithm will
234 gpertea 171 examine in the search for an optimal alignment. The
235     heuristic worst-case running time of the algorithm is
236 gpertea 93 O(max_dist**2 + (len1+len2)); for sequences which are
237     very similar, the average running time will be sig-
238     nificantly better than this */
239    
240     max_dist = GMIN(GREEDY_MAX_COST,
241 gpertea 109 (len2/GREEDY_MAX_COST_FRACTION + 1));
242 gpertea 93
243     /* the main loop assumes that the index of all diagonals is
244 gpertea 171 biased to lie in the middle of allocated bookkeeping
245 gpertea 93 structures */
246    
247     diag_origin = max_dist + 2;
248    
249     // last_seq2_off[d][k] is the largest offset into seq2 that
250     // lies on diagonal k and has distance d
251    
252     last_seq2_off = aux_data.last_seq2_off;
253    
254     /* Instead of tracking the best alignment score and using
255     xdrop_theshold directly, track the best score for each
256     unique distance and use the best score for some previously
257     computed distance to implement the X-dropoff test.
258    
259     xdrop_offset gives the distance backwards in the score
260     array to look */
261    
262 gpertea 171 xdrop_offset = (xdrop_threshold + match_cost / 2) /
263 gpertea 93 (match_cost + mismatch_cost) + 1;
264 gpertea 171
265 gpertea 93 // find the offset of the first mismatch between seq1 and seq2
266    
267     index = s_FindFirstMismatch(seq1, len1, seq2, len2, 0, 0, reverse);
268     // fence_hit, reverse, rem);
269    
270     // update the extents of the alignment, and bail out
271     // early if no further work is needed
272    
273     seq1_align_len = index;
274     seq2_align_len = index;
275     seq1_index = index;
276     /*
277     seed->start_q = 0;
278     seed->start_s = 0;
279     seed->match_length = index;
280     */
281     longest_match_run = index;
282    
283     if (index == len1 || index == len2) {
284     /* Return the number of differences, which is zero here */
285     if (edit_block != NULL)
286     //GapPrelimEditBlockAdd(edit_block, eGapAlignSub, index);
287     edit_block->opRep(index);
288 gpertea 171 return 0;
289 gpertea 93 }
290    
291     // set up the memory pool
292     mem_pool = aux_data.space;
293     if (edit_block == NULL) {
294     mem_pool = NULL;
295     }
296     else if (mem_pool == NULL) {
297     aux_data.space = mem_pool = new GXMemPool();
298 gpertea 171 }
299 gpertea 93 else {
300     mem_pool->refresh();
301     }
302 gpertea 171
303 gpertea 93 /* set up the array of per-distance maximum scores. There
304     are max_diags + xdrop_offset distances to track, the first
305     xdrop_offset of which are 0 */
306    
307     max_score = aux_data.max_score + xdrop_offset;
308     for (index = 0; index < xdrop_offset; index++)
309     aux_data.max_score[index] = 0;
310 gpertea 171
311 gpertea 93 // fill in the initial offsets of the distance matrix
312    
313     last_seq2_off[0][diag_origin] = seq1_index;
314     max_score[0] = seq1_index * match_cost;
315     diag_lower = diag_origin - 1;
316     diag_upper = diag_origin + 1;
317     end1_reached = end2_reached = false;
318    
319     // for each distance
320     for (d = 1; d <= max_dist; d++) {
321     int xdrop_score;
322     int curr_score;
323     int curr_extent = 0;
324     int curr_seq2_index = 0;
325     int curr_diag = 0;
326     int tmp_diag_lower = diag_lower;
327     int tmp_diag_upper = diag_upper;
328    
329     // Assign impossible seq2 offsets to any diagonals that
330     // are not in the range (diag_lower,diag_upper).
331     // These will serve as sentinel values for the inner loop
332     last_seq2_off[d - 1][diag_lower-1] = kInvalidOffset;
333     last_seq2_off[d - 1][diag_lower] = kInvalidOffset;
334     last_seq2_off[d - 1][diag_upper] = kInvalidOffset;
335     last_seq2_off[d - 1][diag_upper+1] = kInvalidOffset;
336    
337     // compute the score for distance d corresponding to the X-dropoff criterion
338    
339 gpertea 171 xdrop_score = max_score[d - xdrop_offset] +
340 gpertea 93 (match_cost + mismatch_cost) * d - xdrop_threshold;
341 gpertea 171 xdrop_score = (int)ceil((double)xdrop_score / (match_cost / 2));
342 gpertea 93
343     // for each diagonal of interest
344     for (k = tmp_diag_lower; k <= tmp_diag_upper; k++) {
345     /* find the largest offset into seq2 that increases
346     the distance from d-1 to d (i.e. keeps the alignment
347 gpertea 171 from getting worse for as long as possible), then
348 gpertea 93 choose the offset into seq1 that will keep the
349 gpertea 171 resulting diagonal fixed at k
350    
351 gpertea 93 Note that this requires kInvalidOffset+1 to be smaller
352     than any valid offset into seq2, i.e. to be negative */
353    
354     seq2_index = GMAX(last_seq2_off[d - 1][k + 1],
355     last_seq2_off[d - 1][k ]) + 1;
356     seq2_index = GMAX(seq2_index, last_seq2_off[d - 1][k - 1]);
357     seq1_index = seq2_index + k - diag_origin;
358    
359     if (seq2_index < 0 || seq1_index + seq2_index < xdrop_score) {
360    
361     // if no valid diagonal can reach distance d, or the
362     // X-dropoff test fails, narrow the range of diagonals
363     // to test and skip to the next diagonal
364     if (k == diag_lower)
365     diag_lower++;
366     else
367     last_seq2_off[d][k] = kInvalidOffset;
368     continue;
369     }
370     diag_upper = k;
371 gpertea 171
372     /* slide down diagonal k until a mismatch
373 gpertea 93 occurs. As long as only matches are encountered,
374     the current distance d will not change */
375    
376     index = s_FindFirstMismatch(seq1, len1, seq2, len2,
377     seq1_index, seq2_index, reverse);
378     //fence_hit, reverse, rem);
379     if (index > longest_match_run) {
380     //seed->start_q = seq1_index;
381     //seed->start_s = seq2_index;
382     //seed->match_length = index;
383     longest_match_run = index;
384     }
385     seq1_index += index;
386     seq2_index += index;
387    
388     // set the new largest seq2 offset that achieves
389     // distance d on diagonal k
390    
391     last_seq2_off[d][k] = seq2_index;
392    
393     // since all values of k are constrained to have the
394     // same distance d, the value of k which maximizes the
395     // alignment score is the one that covers the most of seq1 and seq2
396     if (seq1_index + seq2_index > curr_extent) {
397     curr_extent = seq1_index + seq2_index;
398     curr_seq2_index = seq2_index;
399     curr_diag = k;
400     }
401    
402     /* clamp the bounds on diagonals to avoid walking off
403     either sequence. Because the bounds increase by at
404     most one for each distance, diag_lower and diag_upper
405     can each be of size at most max_diags+2 */
406    
407     if (seq2_index == len2) {
408 gpertea 171 diag_lower = k + 1;
409 gpertea 93 end2_reached = true;
410     }
411     if (seq1_index == len1) {
412 gpertea 171 diag_upper = k - 1;
413 gpertea 93 end1_reached = true;
414     }
415     } // end loop over diagonals
416    
417     // compute the maximum score possible for distance d
418 gpertea 171 curr_score = curr_extent * (match_cost / 2) -
419 gpertea 93 d * (match_cost + mismatch_cost);
420     // if this is the best score seen so far, update the
421     // statistics of the best alignment
422     if (curr_score > max_score[d - 1]) {
423     max_score[d] = curr_score;
424     best_dist = d;
425     best_diag = curr_diag;
426     seq2_align_len = curr_seq2_index;
427     seq1_align_len = curr_seq2_index + best_diag - diag_origin;
428 gpertea 171 }
429 gpertea 93 else {
430     max_score[d] = max_score[d - 1];
431     }
432    
433     // alignment has finished if the lower and upper bounds
434     // on diagonals to check have converged to each other
435    
436     if (diag_lower > diag_upper)
437     break;
438    
439 gpertea 171 /* set up for the next distance to examine. Because the
440     bounds increase by at most one for each distance,
441     diag_lower and diag_upper can each be of size at
442 gpertea 93 most max_diags+2 */
443    
444     if (!end2_reached)
445 gpertea 171 diag_lower--;
446 gpertea 93 if (!end1_reached)
447     diag_upper++;
448    
449     if (edit_block == NULL) {
450     // if no traceback is specified, the next row of
451     // last_seq2_off can reuse previously allocated memory
452     //TODO FIXME The following assumes two arrays of
453     // at least max_dist+4 int's have already been allocated
454     last_seq2_off[d + 1] = last_seq2_off[d - 1];
455     }
456     else {
457     // traceback requires all rows of last_seq2_off to be saved,
458     // so a new row must be allocated
459     last_seq2_off[d + 1] = (int*)mem_pool->getByteSpace((diag_upper - diag_lower + 7)*sizeof(int));
460     // move the origin for this row backwards
461     //TODO FIXME: dubious pointer arithmetic ?!
462     //last_seq2_off[d + 1] = last_seq2_off[d + 1] - diag_lower + 2;
463     }
464     } // end loop over distinct distances
465    
466 gpertea 171
467 gpertea 93 if (edit_block == NULL)
468     return best_dist;
469    
470     //---- perform traceback
471 gpertea 171 d = best_dist;
472 gpertea 93 seq1_index = seq1_align_len;
473     seq2_index = seq2_align_len;
474     // for all positive distances
475    
476     //if (fence_hit && *fence_hit)
477     // goto done;
478     if (index==len1 || index==len2) d=0;
479     while (d > 0) {
480     int new_diag;
481     int new_seq2_index;
482    
483     /* retrieve the value of the diagonal after the next
484     traceback operation. best_diag starts off with the
485     value computed during the alignment process */
486    
487 gpertea 171 new_diag = s_GetNextNonAffineTback(last_seq2_off, d,
488 gpertea 93 best_diag, &new_seq2_index);
489    
490     if (new_diag == best_diag) {
491     // same diagonal: issue a group of substitutions
492     if (seq2_index - new_seq2_index > 0) {
493     edit_block->opRep(seq2_index - new_seq2_index);
494     }
495 gpertea 171 }
496 gpertea 93 else if (new_diag < best_diag) {
497     // smaller diagonal: issue a group of substitutions
498     // and then a gap in seq2 */
499     if (seq2_index - new_seq2_index > 0) {
500     edit_block->opRep(seq2_index - new_seq2_index);
501     }
502     //GapPrelimEditBlockAdd(edit_block, eGapAlignIns, 1);
503     edit_block->opIns(1);
504 gpertea 171 }
505 gpertea 93 else {
506     // larger diagonal: issue a group of substitutions
507     // and then a gap in seq1
508     if (seq2_index - new_seq2_index - 1 > 0) {
509     edit_block->opRep(seq2_index - new_seq2_index - 1);
510     }
511     edit_block->opDel(1);
512     }
513 gpertea 171 d--;
514     best_diag = new_diag;
515     seq2_index = new_seq2_index;
516 gpertea 93 }
517     //done:
518     // handle the final group of substitutions back to distance zero,
519     // i.e. back to offset zero of seq1 and seq2
520     //GapPrelimEditBlockAdd(edit_block, eGapAlignSub,
521     // last_seq2_off[0][diag_origin]);
522     edit_block->opRep(last_seq2_off[0][diag_origin]);
523     if (!reverse)
524     edit_block->reverse();
525     return best_dist;
526     }
527    
528     void printEditScript(GXEditScript* ed_script) {
529     uint i;
530     if (ed_script==NULL || ed_script->opnum == 0)
531     return;
532     for (i=0; i<ed_script->opnum; i++) {
533     int num=((ed_script->ops[i]) >> 2);
534     unsigned char op_type = 3 - ( ed_script->ops[i] & gxEDIT_OP_MASK );
535     if (op_type == 3)
536     GError("Error: printEditScript encountered op_type 3 ?!\n");
537 gpertea 171 GMessage("%d%c ", num, xgapcodes[op_type]);
538 gpertea 93 }
539     GMessage("\n");
540     }
541    
542     GXAlnInfo* GreedyAlign(const char* q_seq, int q_alnstart, const char* s_seq, int s_alnstart,
543     bool editscript, int reward, int penalty, int xdrop) {
544     int q_max=strlen(q_seq); //query
545     int s_max=strlen(s_seq); //subj
546     return GreedyAlignRegion(q_seq, q_alnstart, q_max,
547 gpertea 101 s_seq, s_alnstart, s_max, reward, penalty, xdrop, NULL, NULL, editscript);
548 gpertea 93 }
549    
550     struct GXSeedTable {
551     int a_num, b_num;
552     int a_cap, b_cap;
553     char* xc;
554     GXSeedTable(int a=12, int b=255) {
555     a_cap=0;
556     b_cap=0;
557     a_num=0;
558     b_num=0;
559     xc=NULL;
560     init(a,b);
561     }
562     ~GXSeedTable() {
563     GFREE(xc);
564     }
565     void init(int a, int b) {
566     a_num=a;
567     b_num=b;
568     bool resize=false;
569     if (b_num>b_cap) { resize=true; b_cap=b_num;}
570     if (a_num>a_cap) { resize=true; a_cap=a_num;}
571     if (resize) {
572     GFREE(xc);
573     GCALLOC(xc, (a_num*b_num));
574     }
575     else {
576     //just clear up to a_max, b_max
577     memset((void*)xc, 0, (a_num*b_num));
578     }
579     }
580     char& x(int ax, int by) {
581     return xc[by*a_num+ax];
582     }
583    
584     };
585    
586 gpertea 99 const int a_m_score=2; //match score
587     const int a_mis_score=-3; //mismatch
588     const int a_dropoff_score=7;
589 gpertea 101 const int a_min_score=12; //at least 6 bases full match
590 gpertea 93
591 gpertea 99 // ------------------ adapter matching - simple k-mer seed & extend, no indels for now
592     //when a k-mer match is found, simply try to extend the alignment using a drop-off scheme
593     //check minimum score and
594     //for 3' adapter trimming:
595     // require that the right end of the alignment for either the adaptor OR the read must be
596     // < 3 distance from its right end
597     // for 5' adapter trimming:
598     // require that the left end of the alignment for either the adaptor OR the read must
599     // be at coordinate < 3 from start
600    
601     bool extendUngapped(const char* a, int alen, int ai,
602     const char* b, int blen, int bi, int& mlen, int& l5, int& l3, bool end5=false) {
603     //so the alignment starts at ai in a, bi in b, with a perfect match of length mlen
604     //if (debug) {
605     // GMessage(">> in %s\n\textending hit: %s at position %d\n", a, (dbg.substr(bi, mlen)).chars(), ai);
606     // }
607     int a_l=ai; //alignment coordinates on a
608     int a_r=ai+mlen-1;
609     int b_l=bi; //alignment coordinates on b
610     int b_r=bi+mlen-1;
611     int ai_maxscore=ai;
612     int bi_maxscore=bi;
613     int score=mlen*a_m_score;
614     int maxscore=score;
615     int mism5score=a_mis_score;
616     if (end5 && ai<(alen>>1)) mism5score-=2; // increase penalty for mismatches at 5' end
617     //try to extend to the left first, if possible
618     while (ai>0 && bi>0) {
619     ai--;
620     bi--;
621     score+= (a[ai]==b[bi])? a_m_score : mism5score;
622     if (score>maxscore) {
623     ai_maxscore=ai;
624     bi_maxscore=bi;
625     maxscore=score;
626     }
627     else if (maxscore-score>a_dropoff_score) break;
628     }
629     a_l=ai_maxscore;
630     b_l=bi_maxscore;
631     //now extend to the right
632     ai_maxscore=a_r;
633     bi_maxscore=b_r;
634     ai=a_r;
635     bi=b_r;
636     score=maxscore;
637     //sometimes there are extra As at the end of the read, ignore those
638     if (a[alen-2]=='A' && a[alen-1]=='A') {
639     alen-=2;
640     while (a[alen-1]=='A' && alen>ai) alen--;
641     }
642     while (ai<alen-1 && bi<blen-1) {
643     ai++;
644     bi++;
645     //score+= (a[ai]==b[bi])? a_m_score : a_mis_score;
646     if (a[ai]==b[bi]) { //match
647     score+=a_m_score;
648     if (ai>=alen-2) {
649     score+=a_m_score-(alen-ai-1);
650     }
651     }
652     else { //mismatch
653     score+=a_mis_score;
654     }
655     if (score>maxscore) {
656     ai_maxscore=ai;
657     bi_maxscore=bi;
658     maxscore=score;
659     }
660     else if (maxscore-score>a_dropoff_score) break;
661     }
662     a_r=ai_maxscore;
663     b_r=bi_maxscore;
664     int a_ovh3=alen-a_r-1;
665     int b_ovh3=blen-b_r-1;
666     int mmovh3=(a_ovh3<b_ovh3)? a_ovh3 : b_ovh3;
667     int mmovh5=(a_l<b_l)? a_l : b_l;
668     if (maxscore>=a_min_score && mmovh3<2 && mmovh5<2) {
669     if (a_l<a_ovh3) {
670     //adapter closer to the left end (typical for 5' adapter)
671     l5=a_r+1;
672     l3=alen-1;
673     }
674     else {
675     //adapter matching at the right end (typical for 3' adapter)
676     l5=0;
677     l3=a_l-1;
678     }
679     return true;
680     }
681     //do not trim:
682     l5=0;
683     l3=alen-1;
684     return false;
685     }
686    
687    
688 gpertea 171 GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, const char* seqa, int a_len,
689     GVec<uint16> amers[], const char* seqb, int b_len) {
690 gpertea 93 //overlap of right (3') end of seqb
691     //hash the first 12 bases of seqa:
692     int aimin=0;
693     int aimax=GMIN(9,(a_len-4));
694     int bimin=GMAX(0,(b_len-a_len-2));//allow 2 extraneous bases, just in case there is some garbage at the end
695     int bimax=b_len-4;
696     int a_maxlen=aimax+4; //number of rows in the diagonal matrix
697     int b_maxlen=b_len; //number of cols in the diagonal matrix
698     GXSeedTable gx(a_maxlen, b_maxlen);
699     GXBandSet* diagstrips=new GXBandSet(a_maxlen, b_maxlen); //set of overlapping 3-diagonal strips
700     for (int ai=aimin;ai<=aimax;ai++) {
701     int32* av=(int32 *)(&seqa[ai]);
702     //for (int bi=b_len-4;bi>=bimin;bi--) {
703     for (int bi=bimin;bi<=bimax;bi++) {
704     if (gx.x(ai,bi))
705     continue; //already have a previous seed covering this region of this diagonal
706     int32* bv=(int32 *)(&seqb[bi]);
707     if (*av == *bv) {
708     //word match
709     //see if we can extend to the right
710     gx.x(ai+1,bi+1)=1;
711     gx.x(ai+2,bi+2)=1;
712     gx.x(ai+3,bi+3)=1;
713     int aix=ai+4;
714     int bix=bi+4;
715     int len=4;
716     while (bix<b_maxlen && aix<a_maxlen && seqa[aix]==seqb[bix])
717     {
718     gx.x(aix,bix)=1;
719     aix++;bix++;
720     len++;
721     }
722 gpertea 101 if (len>7) {
723 gpertea 99 //heuristics: very likely the best we can get
724     int qlen=len;
725     while (bix<b_len && aix<a_len && seqa[aix]==seqb[bix]) {
726     aix++;
727     bix++;
728     qlen++;
729     }
730 gpertea 101 if (qlen>10) { //found a quick match shortcut
731 gpertea 99 diagstrips->qmatch=new GXSeed(ai,bi,qlen);
732     return diagstrips;
733     }
734     }
735 gpertea 93 GXSeed* newseed=new GXSeed(ai,bi,len);
736     seeds.Add(newseed);
737     diagstrips->addSeed(newseed);//add it to all 3 adjacent diagonals
738 gpertea 104 //special last resort terminal match to be used if no better alignment is there
739     if (len>4 && ai==0 && (bi+len==b_len || bi+len==b_len-1)) diagstrips->tmatch=newseed;
740 gpertea 93 }
741     }
742     }//for each 4-mer at the beginning of seqa
743     for (int i=0;i<diagstrips->Count();i++) {
744     diagstrips->Get(i)->finalize(); //adjust scores due to overlaps or gaps between seeds
745     }
746     diagstrips->setSorted(true); //sort by score
747     return diagstrips;
748     }
749    
750 gpertea 171 GXBandSet* collectSeeds_L(GList<GXSeed>& seeds, const char* seqa, int a_len,
751     GVec<uint16> amers[], const char* seqb, int b_len) {
752 gpertea 93 //overlap of left (5') end of seqb
753     //hash the last 12 bases of seqa:
754     int aimin=GMAX(0,(a_len-16));
755     int aimax=a_len-4;
756     int bimin=0;
757     int bimax=GMIN((a_len-2), (b_len-4));
758     int a_maxlen=aimax+4; //number of rows in the diagonal matrix
759     int b_maxlen=b_len; //number of cols in the diagonal matrix
760     //gx.init(a_maxlen, b_maxlen);
761     GXSeedTable gx(a_maxlen, b_maxlen);
762     GXBandSet* diagstrips=new GXBandSet(a_maxlen, b_maxlen); //set of overlapping 3-diagonal strips
763     for (int ai=aimin;ai<=aimax;ai++) {
764     int32* av=(int32 *)(&seqa[ai]);
765     for (int bi=bimin;bi<=bimax;bi++) {
766     if (gx.x(ai,bi))
767     continue; //already have a previous seed covering this region of this diagonal
768     int32* bv=(int32 *)(&seqb[bi]);
769     if (*av == *bv) {
770     //word match
771     //see if we can extend to the right
772     gx.x(ai+1,bi+1)=1;
773     gx.x(ai+2,bi+2)=1;
774     gx.x(ai+3,bi+3)=1;
775     int aix=ai+4;
776     int bix=bi+4;
777     int len=4;
778     while (bix<b_maxlen && aix<a_maxlen && seqa[aix]==seqb[bix])
779     {
780     gx.x(aix,bix)=1;
781     aix++;bix++;
782     len++;
783     }
784 gpertea 101 if (len>7) {
785     //heuristics: very likely the best we can get
786     int qlen=len;
787     while (bix<b_len && aix<a_len && seqa[aix]==seqb[bix]) {
788     aix++;
789     bix++;
790     qlen++;
791     }
792     if (qlen>10) { //found a quick match shortcut
793     diagstrips->qmatch=new GXSeed(ai,bi,qlen);
794     return diagstrips;
795     }
796     }
797 gpertea 93 GXSeed* newseed=new GXSeed(ai,bi,len);
798     seeds.Add(newseed);
799     diagstrips->addSeed(newseed);//add it to all 3 adjacent diagonals
800 gpertea 104 //special last resort terminal match to be used if no better alignment is there
801     if (len>4 && bi==0 && ai+len==a_len) diagstrips->tmatch=newseed;
802 gpertea 93 }
803     }
804     }//for each 4-mer at the end of seqa
805     for (int i=0;i<diagstrips->Count();i++) {
806     diagstrips->Get(i)->finalize(); //adjust scores due to overlaps or gaps between seeds
807     }
808     diagstrips->setSorted(true); //sort by score
809     return diagstrips;
810     }
811    
812     int cmpSeedScore(const pointer p1, const pointer p2) {
813     //return (((GXSeed*)s2)->len-((GXSeed*)s1)->len);
814     GXSeed* s1=(GXSeed*)p1;
815     GXSeed* s2=(GXSeed*)p2;
816     if (s1->len==s2->len) {
817     return (s1->b_ofs-s2->b_ofs);
818     }
819     else return (s2->len-s1->len);
820     }
821    
822     int cmpSeedDiag(const pointer p1, const pointer p2) {
823     GXSeed* s1=(GXSeed*)p1;
824     GXSeed* s2=(GXSeed*)p2;
825     return ((s1->b_ofs-s1->a_ofs)-(s2->b_ofs-s2->a_ofs));
826     }
827    
828 gpertea 101
829     int cmpDiagBands_R(const pointer p1, const pointer p2) {
830     //return (((GXSeed*)s2)->len-((GXSeed*)s1)->len);
831     GXBand* b1=(GXBand*)p1;
832     GXBand* b2=(GXBand*)p2;
833     if (b1->score==b2->score) {
834     return (b2->w_min_b-b1->w_min_b);
835     }
836     else return (b2->score-b1->score);
837     }
838    
839    
840    
841 gpertea 93 GXAlnInfo* GreedyAlignRegion(const char* q_seq, int q_alnstart, int q_max,
842 gpertea 101 const char* s_seq, int s_alnstart, int s_max,
843     int reward, int penalty, int xdrop, CGreedyAlignData* gxmem,
844     CAlnTrim* trim, bool editscript) {
845 gpertea 93 GXEditScript* ed_script_fwd = NULL;
846     GXEditScript* ed_script_rev = NULL;
847     if ( q_alnstart>q_max || q_alnstart<1 || s_alnstart>s_max || s_alnstart<1 )
848     GError("GreedyAlign() Error: invalid anchor coordinate.\n");
849     q_alnstart--;
850     s_alnstart--;
851     if (q_seq==NULL || q_seq[0]==0 || s_seq==NULL || s_seq[0]==0)
852     GError("GreedyAlign() Error: attempt to use an empty sequence string!\n");
853     if (q_seq[q_alnstart]!=s_seq[s_alnstart])
854     GError("GreedyAlign() Error: improper anchor (mismatch)!\n");
855     int q_ext_l=0, q_ext_r=0, s_ext_l=0, s_ext_r=0;
856     const char* q=q_seq+q_alnstart;
857     int q_avail=q_max-q_alnstart;
858     const char* s=s_seq+s_alnstart;
859     int s_avail=s_max-s_alnstart;
860     if (penalty<0) penalty=-penalty;
861 gpertea 101 int MIN_GREEDY_SCORE=5*reward; //minimum score for an alignment to be reported for 0 diffs
862 gpertea 93 GXAlnInfo* alninfo=NULL;
863     bool freeAlnMem=(gxmem==NULL);
864     if (freeAlnMem) {
865 gpertea 101 gxmem=new CGreedyAlignData(reward, penalty, xdrop);
866 gpertea 93 }
867     else gxmem->reset();
868     int retscore = 0;
869     int numdiffs = 0;
870 gpertea 101 if (trim!=NULL && trim->type==galn_TrimLeft) {
871 gpertea 93 if (editscript)
872     ed_script_rev=new GXEditScript();
873    
874 gpertea 101 int numdiffs_l = GXGreedyExtend(s_seq, s_alnstart, q_seq, q_alnstart, true, xdrop,
875 gpertea 93 reward, penalty, s_ext_l, q_ext_l, *gxmem, ed_script_rev);
876     //check this extension here and bail out if it's not a good extension
877 gpertea 101 if (s_alnstart+1-s_ext_l>trim->boundary) {
878 gpertea 93 delete ed_script_rev;
879     if (freeAlnMem) delete gxmem;
880     return NULL;
881     }
882    
883     if (editscript)
884     ed_script_fwd=new GXEditScript();
885     int numdiffs_r = GXGreedyExtend(s, s_avail, q, q_avail, false, xdrop,
886     reward, penalty, s_ext_r, q_ext_r, *gxmem, ed_script_fwd);
887     numdiffs=numdiffs_r+numdiffs_l;
888     //convert num diffs to actual score
889 gpertea 101 retscore = (q_ext_r + s_ext_r + q_ext_l + s_ext_l)*reward/2 - numdiffs*(reward+penalty);
890 gpertea 93 if (editscript)
891     ed_script_rev->Append(ed_script_fwd); //combine the two extensions
892     }
893     else {
894     if (editscript) {
895     ed_script_fwd=new GXEditScript();
896     }
897     int numdiffs_r = GXGreedyExtend(s, s_avail, q, q_avail, false, xdrop,
898     reward, penalty, s_ext_r, q_ext_r, *gxmem, ed_script_fwd);
899     //check extension here and bail out if not a good right extension
900     //assuming s_max is really at the right end of s_seq
901 gpertea 109 if (trim!=NULL && trim->type==galn_TrimRight &&
902     s_alnstart+s_ext_r<trim->boundary) {
903 gpertea 93 delete ed_script_fwd;
904     if (freeAlnMem) delete gxmem;
905     return NULL;
906     }
907     if (editscript)
908     ed_script_rev=new GXEditScript();
909     int numdiffs_l = GXGreedyExtend(s_seq, s_alnstart, q_seq, q_alnstart, true, xdrop,
910     reward, penalty, s_ext_l, q_ext_l, *gxmem, ed_script_rev);
911     //convert num diffs to actual score
912     numdiffs=numdiffs_r+numdiffs_l;
913 gpertea 101 retscore = (q_ext_r + s_ext_r + q_ext_l + s_ext_l)*reward/2 - numdiffs*(reward+penalty);
914 gpertea 93 if (editscript)
915     ed_script_rev->Append(ed_script_fwd); //combine the two extensions
916     }
917    
918     if (retscore>=MIN_GREEDY_SCORE) {
919     alninfo=new GXAlnInfo(q_seq, q_alnstart+1-q_ext_l,q_alnstart+q_ext_r, s_seq, s_alnstart+1-s_ext_l, s_alnstart+s_ext_r);
920     int hsp_length = GMIN(q_ext_l+q_ext_r, s_ext_l+s_ext_r);
921     alninfo->score=retscore;
922     alninfo->pid = 100 * (1 - ((double) numdiffs) / hsp_length);
923     #ifdef GDEBUG
924 gpertea 104 //if (ed_script_rev) {
925     // GMessage("Final Edit script ::: ");
926     // printEditScript(ed_script_rev);
927     // }
928 gpertea 93 #endif
929     alninfo->editscript=ed_script_rev;
930     alninfo->gapinfo = new CAlnGapInfo(ed_script_rev, alninfo->ql-1, alninfo->sl-1);
931     }
932     else {
933     if (freeAlnMem) delete gxmem;
934     delete ed_script_rev;
935     delete alninfo;
936     alninfo=NULL;
937     }
938     delete ed_script_fwd;
939     return alninfo;
940     }
941 gpertea 96
942 gpertea 101 GXAlnInfo* GreedyAlignRegion(const char* q_seq, int q_alnstart, int q_max,
943     const char* s_seq, int s_alnstart, int s_max, CGreedyAlignData* gxmem,
944     CAlnTrim* trim, bool editscript) {
945     int reward=2;
946     int penalty=3;
947     int xdrop=8;
948     if (gxmem) {
949     reward=gxmem->match_reward;
950     penalty=gxmem->mismatch_penalty;
951     xdrop=gxmem->x_drop;
952     }
953     return GreedyAlignRegion(q_seq, q_alnstart, q_max, s_seq, s_alnstart, s_max,
954     reward, penalty, xdrop, gxmem, trim, editscript);
955     }
956    
957 gpertea 171 GXAlnInfo* match_RightEnd(const char* seqa, int seqa_len, GVec<uint16> amers[],
958     const char* seqb, int seqb_len, CGreedyAlignData* gxmem, int min_pid) {
959 gpertea 96 bool editscript=false;
960     #ifdef GDEBUG
961     editscript=true;
962 gpertea 104 // GMessage("==========> matching Right (3') end ========\n");
963 gpertea 96 #endif
964 gpertea 101
965     CAlnTrim trimInfo(galn_TrimRight, seqb, seqb_len);
966 gpertea 96 GList<GXSeed> rseeds(true,true,false);
967 gpertea 171 GXBandSet* alnbands=collectSeeds_R(rseeds, seqa, seqa_len, amers, seqb, seqb_len);
968 gpertea 101 GList<GXSeed> anchor_seeds(cmpSeedDiag, NULL, true); //stores unique seeds per diagonal
969 gpertea 99 //did we find a shortcut?
970     if (alnbands->qmatch) {
971 gpertea 104 //#ifdef GDEBUG
972     // GMessage("::: Found a quick long match -- using it instead of diagonal bands\n");
973     //#endif
974 gpertea 101 anchor_seeds.Add(alnbands->qmatch);
975     }
976     else {
977     int max_top_bands=5;
978     int top_band_count=0;
979     for (int b=0;b<alnbands->Count();b++) {
980     if (alnbands->Get(b)->score<4) break;
981 gpertea 104 //#ifdef GDEBUG
982     //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
983     //#endif
984 gpertea 101 top_band_count++;
985     GXBand& band=*(alnbands->Get(b));
986     band.seeds.setSorted(cmpSeedScore);
987     anchor_seeds.Add(band.seeds.First());
988     band.tested=true;
989     if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
990     }
991 gpertea 104 //#ifdef GDEBUG
992     //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
993     //#endif
994 gpertea 101 }
995 gpertea 99
996 gpertea 96 GList<GXAlnInfo> galns(true,true,false);
997     for (int i=0;i<anchor_seeds.Count();i++) {
998     GXSeed& aseed=*anchor_seeds[i];
999     int a1=aseed.a_ofs+(aseed.len>>1)+1;
1000     int a2=aseed.b_ofs+(aseed.len>>1)+1;
1001     GXAlnInfo* alninfo=GreedyAlignRegion(seqa, a1, seqa_len,
1002 gpertea 101 seqb, a2, seqb_len, gxmem, &trimInfo, editscript);
1003     if (alninfo && alninfo->pid>=min_pid && trimInfo.validate(alninfo->sl, alninfo->sr, alninfo->pid, alninfo->ql-1))
1004 gpertea 96 galns.AddIfNew(alninfo, true);
1005     else delete alninfo;
1006     }
1007 gpertea 104 //special 3' end case: due to the seed scoring scheme being biased towards the 5' end of the read,
1008     //we should also try some seeds closer to the 3' end
1009 gpertea 101 if (galns.Count()==0) {
1010 gpertea 104 anchor_seeds.Clear();
1011 gpertea 101 alnbands->setSorted(cmpDiagBands_R);
1012     int max_top_bands=4;
1013     int top_band_count=0;
1014 gpertea 104 //#ifdef GDEBUG
1015     //GMessage(":::>> Retrying adjusting sort order.\n");
1016     //#endif
1017     if (alnbands->tmatch) {
1018     //anchor_seeds.setSorted(false);
1019     anchor_seeds.Add(alnbands->tmatch);
1020     }
1021 gpertea 101 for (int b=0;b<alnbands->Count();b++) {
1022     if (alnbands->Get(b)->score<4) break;
1023 gpertea 104 //#ifdef GDEBUG
1024     //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1025     //#endif
1026 gpertea 101 if (alnbands->Get(b)->tested) continue;
1027     top_band_count++;
1028     GXBand& band=*(alnbands->Get(b));
1029     band.seeds.setSorted(cmpSeedScore);
1030     anchor_seeds.Add(band.seeds.First());
1031     if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
1032     }
1033 gpertea 104 //#ifdef GDEBUG
1034     //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1035     //#endif
1036 gpertea 101 for (int i=0;i<anchor_seeds.Count();i++) {
1037     GXSeed& aseed=*anchor_seeds[i];
1038     int a1=aseed.a_ofs+(aseed.len>>1)+1;
1039     int a2=aseed.b_ofs+(aseed.len>>1)+1;
1040     GXAlnInfo* alninfo=GreedyAlignRegion(seqa, a1, seqa_len,
1041     seqb, a2, seqb_len, gxmem, &trimInfo, editscript);
1042     if (alninfo && alninfo->pid>=min_pid && trimInfo.validate(alninfo->sl, alninfo->sr, alninfo->pid, alninfo->ql-1))
1043     galns.AddIfNew(alninfo, true);
1044     else delete alninfo;
1045     }
1046     }
1047 gpertea 96 //---- done
1048     delete alnbands;
1049     if (galns.Count()) {
1050     GXAlnInfo* bestaln=galns.Shift();
1051 gpertea 104 #ifdef GDEBUG
1052     GMessage("Best alignment: a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", bestaln->ql, bestaln->qr,
1053     bestaln->sl, bestaln->sr, bestaln->score, bestaln->pid);
1054     if (bestaln->gapinfo!=NULL) {
1055     bestaln->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1056     }
1057     #endif
1058    
1059 gpertea 96 return bestaln;
1060     }
1061     else return NULL;
1062     }
1063    
1064 gpertea 171 GXAlnInfo* match_LeftEnd(const char* seqa, int seqa_len, GVec<uint16> amers[],
1065     const char* seqb, int seqb_len, CGreedyAlignData* gxmem, int min_pid) {
1066 gpertea 96 bool editscript=false;
1067     #ifdef GDEBUG
1068     editscript=true;
1069 gpertea 104 //GMessage("==========> matching Left (5') end ========\n");
1070 gpertea 96 #endif
1071 gpertea 101 CAlnTrim trimInfo(galn_TrimLeft, seqb, seqb_len);
1072 gpertea 96 GList<GXSeed> rseeds(true,true,false);
1073 gpertea 171 GXBandSet* alnbands=collectSeeds_L(rseeds, seqa, seqa_len, amers, seqb, seqb_len);
1074 gpertea 96 GList<GXAlnInfo> galns(true,true,false);
1075     GList<GXSeed> anchor_seeds(cmpSeedDiag, NULL, true); //stores unique seeds per diagonal
1076    
1077 gpertea 101 if (alnbands->qmatch) {
1078 gpertea 104 //#ifdef GDEBUG
1079     // GMessage("::: Found a quick long match -- using it instead of diagonal bands\n");
1080     //#endif
1081 gpertea 101 anchor_seeds.Add(alnbands->qmatch);
1082     }
1083     else {
1084     int max_top_bands=5;
1085     int top_band_count=0;
1086     for (int b=0;b<alnbands->Count();b++) {
1087     if (alnbands->Get(b)->score<4) break;
1088 gpertea 104 //#ifdef GDEBUG
1089     //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1090     //#endif
1091 gpertea 101 top_band_count++;
1092     GXBand& band=*(alnbands->Get(b));
1093     band.seeds.setSorted(cmpSeedScore);
1094     anchor_seeds.Add(band.seeds.First());
1095     if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
1096     }
1097 gpertea 104 //#ifdef GDEBUG
1098     //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1099     //#endif
1100 gpertea 101 }
1101 gpertea 96 for (int i=0;i<anchor_seeds.Count();i++) {
1102     GXSeed& aseed=*anchor_seeds[i];
1103     int a1=aseed.a_ofs+(aseed.len>>1)+1;
1104     int a2=aseed.b_ofs+(aseed.len>>1)+1;
1105     GXAlnInfo* alninfo=GreedyAlignRegion(seqa, a1, seqa_len,
1106 gpertea 101 seqb, a2, seqb_len, gxmem, &trimInfo, editscript);
1107     if (alninfo && alninfo->pid>=min_pid
1108     && trimInfo.validate(alninfo->sl, alninfo->sr, alninfo->pid, seqa_len-alninfo->qr))
1109 gpertea 96 galns.AddIfNew(alninfo, true);
1110     else delete alninfo;
1111     }
1112 gpertea 104 if (galns.Count()==0 && alnbands->tmatch) {
1113     GXSeed& aseed=*alnbands->tmatch;
1114     int a1=aseed.a_ofs+(aseed.len>>1)+1;
1115     int a2=aseed.b_ofs+(aseed.len>>1)+1;
1116     GXAlnInfo* alninfo=GreedyAlignRegion(seqa, a1, seqa_len,
1117     seqb, a2, seqb_len, gxmem, &trimInfo, editscript);
1118     if (alninfo) galns.Add(alninfo);
1119     }
1120 gpertea 96 #ifdef GDEBUG
1121 gpertea 104 /*
1122 gpertea 96 for (int i=0;i<galns.Count();i++) {
1123     GXAlnInfo* alninfo=galns[i];
1124     GMessage("a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", alninfo->ql, alninfo->qr,
1125     alninfo->sl, alninfo->sr, alninfo->score, alninfo->pid);
1126     if (alninfo->gapinfo!=NULL) {
1127     GMessage("Alignment:\n");
1128     alninfo->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1129     }
1130     }
1131 gpertea 104 */
1132 gpertea 96 #endif
1133     //---- done
1134     delete alnbands;
1135     if (galns.Count()) {
1136     GXAlnInfo* bestaln=galns.Shift();
1137 gpertea 104 #ifdef GDEBUG
1138     GMessage("Best alignment: a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", bestaln->ql, bestaln->qr,
1139     bestaln->sl, bestaln->sr, bestaln->score, bestaln->pid);
1140     if (bestaln->gapinfo!=NULL) {
1141     bestaln->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1142     }
1143     #endif
1144 gpertea 96 return bestaln;
1145     }
1146     else return NULL;
1147     }