ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GAlnExtend.cpp
Revision: 178
Committed: Thu Feb 16 07:10:45 2012 UTC (7 years, 3 months ago) by gpertea
File size: 40982 byte(s)
Log Message:
fqtrim

Line File contents
1 #include "GAlnExtend.h"
2
3 //greedy gapped alignment extension
4 //(mostly lifted from NCBI's megablast gapped extension code)
5
6 int GXMemPool::kMinSpace = 1000000;
7
8 #ifdef GDEBUG
9 char buf[6]={0x1B,'[', 'n','m','m','\0'};
10
11 void color_fg(int c,FILE* f) {
12 if (f!=stderr && f!=stdout) return;
13 sprintf((char *)(&buf[2]),"%dm",c+30);
14 fwrite(buf,1,strlen(buf), f);
15 }
16
17 void color_bg(int c, FILE* f) {
18 if (f!=stderr && f!=stdout) return;
19 sprintf((char *)(&buf[2]),"%dm",c+40);
20 fwrite(buf,1,strlen(buf),f);
21 };
22
23 void color_resetfg(FILE* f) {
24 if (f!=stderr && f!=stdout) return;
25 sprintf((char *)(&buf[2]),"39m");
26 fwrite(buf,1,strlen(buf), f);
27 };
28
29 void color_resetbg(FILE* f) {
30 if (f!=stderr && f!=stdout) return;
31 sprintf((char *)(&buf[2]),"49m");
32 fwrite(buf,1,strlen(buf), f);
33 }
34
35 void color_reset(FILE* f) {
36 if (f!=stderr && f!=stdout) return;
37 sprintf((char *)(&buf[2]),"0m");
38 fwrite(buf,1,strlen(buf), f);
39 };
40
41 void color_normal(FILE* f) {
42 if (f!=stderr && f!=stdout) return;
43 sprintf((char *)(&buf[2]),"22m");
44 fwrite(buf,1,strlen(buf), f);
45 };
46
47 #endif
48
49
50 char xgapcodes[4]={'S','I', 'D', 'X'};
51
52 int get_last(int **flast_d, int d, int diag, int *row1) {
53 if (flast_d[d-1][diag-1] > GMAX(flast_d[d-1][diag], flast_d[d-1][diag+1])) {
54 *row1 = flast_d[d-1][diag-1];
55 return diag-1;
56 }
57 if (flast_d[d-1][diag] > flast_d[d-1][diag+1]) {
58 *row1 = flast_d[d-1][diag];
59 return diag;
60 }
61 *row1 = flast_d[d-1][diag+1];
62 return diag+1;
63 }
64
65 void GapXEditScript::print() { //debug
66 GapXEditScript* p=this;
67 do {
68 GMessage("%d%c ",p->num, xgapcodes[p->op_type]);
69 } while ((p=p->next)!=NULL);
70 GMessage("\n");
71 }
72
73
74 int BLAST_Gcd(int a, int b) {
75 int c;
76
77 b = abs(b);
78 if (b > a)
79 c=a, a=b, b=c;
80
81 while (b != 0) {
82 c = a%b;
83 a = b;
84 b = c;
85 }
86 return a;
87 }
88
89 int BLAST_Gdb3(int* a, int* b, int* c) {
90 int g;
91 if (*b == 0)
92 g = BLAST_Gcd(*a, *c);
93 else
94 g = BLAST_Gcd(*a, BLAST_Gcd(*b, *c));
95 if (g > 1) {
96 *a /= g;
97 *b /= g;
98 *c /= g;
99 }
100 return g;
101 }
102
103
104 uint16 get6mer(char* p) {
105 uint16 r=gdna2bit(p,3);
106 r <<= 6;
107 r |= gdna2bit(p,3);
108 return r;
109 }
110
111
112 void table6mers(const char* s, int slen, GVec<uint16>* amers[]) {
113 for (uint16 i=0; i <= slen-6; i++) {
114 char* p = (char*)(s+i);
115 uint16 v=get6mer(p);
116 if (amers[v]==NULL) {
117 amers[v]=new GVec<uint16>(1);
118 }
119 amers[v]->Add(i);
120 }
121 }
122
123 GVec<uint16>* match6mer(char* start, GVec<uint16>* amers[]) {
124 //careful: this is broken if start+5 falls beyond the end of the string!
125 uint16 r=get6mer(start);
126 return amers[r];
127 }
128
129 //signal that a diagonal is invalid
130 static const int kInvalidOffset = -2;
131
132 int s_FindFirstMismatch(const char *seq1, int len1,
133 const char *seq2, int len2,
134 int seq1_index, int seq2_index,
135 //bool &fence_hit,
136 bool reverse) {
137 int start_index = seq1_index;
138 /* Sentry detection here should be relatively inexpensive: The
139 sentry value cannot appear in the query, so detection only
140 needs to be done at exit from the subject-query matching loop.
141 For uncompressed sequences, ambiguities in the query (i.e. seq1)
142 always count as mismatches */
143 if (reverse) {
144 while (seq1_index < len1 && seq2_index < len2 &&
145 //seq1[len1-1 - seq1_index] < 4 &&
146 seq1[len1-1 - seq1_index] == seq2[len2-1 - seq2_index]) {
147 ++seq1_index;
148 ++seq2_index;
149 }
150 //if (seq2_index < len2 && seq2[len2-1-seq2_index] == FENCE_SENTRY) {
151 //if len2-1-seq2_index<=0) {
152 // fence_hit = true;
153 // }
154 }
155 else { //forward lookup
156 while (seq1_index < len1 && seq2_index < len2 &&
157 //seq1[seq1_index] < 4 &&
158 seq1[seq1_index] == seq2[seq2_index]) {
159 ++seq1_index;
160 ++seq2_index;
161 }
162 //if (seq2_index < len2 && seq2[seq2_index] == FENCE_SENTRY) {
163 //if (seq2_index==len2) {
164 // fence_hit = true;
165 //}
166 }
167 return seq1_index - start_index;
168 }
169
170
171
172 /** During the traceback for a non-affine greedy alignment,
173 compute the diagonal that will result from the next
174 traceback operation
175
176 @param last_seq2_off Array of offsets into the second sequence;
177 last_seq2_off[d][k] gives the largest offset into
178 the second sequence that lies on diagonal k and
179 has distance d [in]
180 @param d Starting distance [in]
181 @param diag Index of diagonal that produced the starting distance [in]
182 @param seq2_index The offset into the second sequence after the traceback
183 operation has completed [out]
184 @return The diagonal resulting from the next traceback operation
185 being applied
186 */
187 int s_GetNextNonAffineTback(int **last_seq2_off, int d,
188 int diag, int *seq2_index) {
189 // choose the traceback operation that results in the
190 // largest seq2 offset at this point, then compute the
191 // new diagonal that is implied by the operation
192 if (last_seq2_off[d-1][diag-1] >
193 GMAX(last_seq2_off[d-1][diag], last_seq2_off[d-1][diag+1])) {
194 *seq2_index = last_seq2_off[d-1][diag-1];
195 return diag - 1; // gap in seq2
196 }
197 if (last_seq2_off[d-1][diag] > last_seq2_off[d-1][diag+1]) {
198 *seq2_index = last_seq2_off[d-1][diag];
199 return diag; // match
200 }
201 *seq2_index = last_seq2_off[d-1][diag+1];
202 return diag + 1; // gap in seq1
203 }
204
205
206 int GXGreedyExtend(const char* seq1, int len1,
207 const char* seq2, int len2,
208 bool reverse, int xdrop_threshold,
209 int match_cost, int mismatch_cost,
210 int& seq1_align_len, int& seq2_align_len,
211 CGreedyAlignData& aux_data,
212 GXEditScript *edit_block) {
213 //GapPrelimEditBlock *edit_block,
214 //bool& fence_hit, SGreedySeed *seed) {
215 int seq1_index;
216 int seq2_index;
217 int index;
218 int d;
219 int k;
220 int diag_lower, diag_upper;
221 int max_dist;
222 int diag_origin;
223 int best_dist;
224 int best_diag;
225 int** last_seq2_off;
226 int* max_score;
227 int xdrop_offset;
228 int longest_match_run;
229 bool end1_reached, end2_reached;
230 GXMemPool* mem_pool;
231
232 /* ordinary dynamic programming alignment, for each offset
233 in seq1, walks through offsets in seq2 until an X-dropoff
234 test fails, saving the best score encountered along
235 the way. Instead of score, this code tracks the 'distance'
236 (number of mismatches plus number of gaps) between seq1
237 and seq2. Instead of walking through sequence offsets, it
238 walks through diagonals that can achieve a given distance.
239
240 Note that in what follows, the numbering of diagonals implies
241 a dot matrix where increasing seq1 offsets go to the right on
242 the x axis, and increasing seq2 offsets go up the y axis.
243 The gapped alignment thus proceeds up and to the right in
244 the graph, and diagonals are numbered increasing to the right */
245
246 best_dist = 0;
247 best_diag = 0;
248
249 /* set the number of distinct distances the algorithm will
250 examine in the search for an optimal alignment. The
251 heuristic worst-case running time of the algorithm is
252 O(max_dist**2 + (len1+len2)); for sequences which are
253 very similar, the average running time will be sig-
254 nificantly better than this */
255
256 max_dist = GMIN(GREEDY_MAX_COST,
257 (len2/GREEDY_MAX_COST_FRACTION + 1));
258
259 /* the main loop assumes that the index of all diagonals is
260 biased to lie in the middle of allocated bookkeeping
261 structures */
262
263 diag_origin = max_dist + 2;
264
265 // last_seq2_off[d][k] is the largest offset into seq2 that
266 // lies on diagonal k and has distance d
267
268 last_seq2_off = aux_data.last_seq2_off;
269
270 /* Instead of tracking the best alignment score and using
271 xdrop_theshold directly, track the best score for each
272 unique distance and use the best score for some previously
273 computed distance to implement the X-dropoff test.
274
275 xdrop_offset gives the distance backwards in the score
276 array to look */
277
278 xdrop_offset = (xdrop_threshold + match_cost / 2) /
279 (match_cost + mismatch_cost) + 1;
280
281 // find the offset of the first mismatch between seq1 and seq2
282
283 index = s_FindFirstMismatch(seq1, len1, seq2, len2, 0, 0, reverse);
284 // fence_hit, reverse, rem);
285
286 // update the extents of the alignment, and bail out
287 // early if no further work is needed
288
289 seq1_align_len = index;
290 seq2_align_len = index;
291 seq1_index = index;
292 /*
293 seed->start_q = 0;
294 seed->start_s = 0;
295 seed->match_length = index;
296 */
297 longest_match_run = index;
298
299 if (index == len1 || index == len2) {
300 /* Return the number of differences, which is zero here */
301 if (edit_block != NULL)
302 //GapPrelimEditBlockAdd(edit_block, eGapAlignSub, index);
303 edit_block->opRep(index);
304 return 0;
305 }
306
307 // set up the memory pool
308 mem_pool = aux_data.space;
309 if (edit_block == NULL) {
310 mem_pool = NULL;
311 }
312 else if (mem_pool == NULL) {
313 aux_data.space = mem_pool = new GXMemPool();
314 }
315 else {
316 mem_pool->refresh();
317 }
318
319 /* set up the array of per-distance maximum scores. There
320 are max_diags + xdrop_offset distances to track, the first
321 xdrop_offset of which are 0 */
322
323 max_score = aux_data.max_score + xdrop_offset;
324 for (index = 0; index < xdrop_offset; index++)
325 aux_data.max_score[index] = 0;
326
327 // fill in the initial offsets of the distance matrix
328
329 last_seq2_off[0][diag_origin] = seq1_index;
330 max_score[0] = seq1_index * match_cost;
331 diag_lower = diag_origin - 1;
332 diag_upper = diag_origin + 1;
333 end1_reached = end2_reached = false;
334
335 // for each distance
336 for (d = 1; d <= max_dist; d++) {
337 int xdrop_score;
338 int curr_score;
339 int curr_extent = 0;
340 int curr_seq2_index = 0;
341 int curr_diag = 0;
342 int tmp_diag_lower = diag_lower;
343 int tmp_diag_upper = diag_upper;
344
345 // Assign impossible seq2 offsets to any diagonals that
346 // are not in the range (diag_lower,diag_upper).
347 // These will serve as sentinel values for the inner loop
348 last_seq2_off[d - 1][diag_lower-1] = kInvalidOffset;
349 last_seq2_off[d - 1][diag_lower] = kInvalidOffset;
350 last_seq2_off[d - 1][diag_upper] = kInvalidOffset;
351 last_seq2_off[d - 1][diag_upper+1] = kInvalidOffset;
352
353 // compute the score for distance d corresponding to the X-dropoff criterion
354
355 xdrop_score = max_score[d - xdrop_offset] +
356 (match_cost + mismatch_cost) * d - xdrop_threshold;
357 xdrop_score = (int)ceil((double)xdrop_score / (match_cost / 2));
358
359 // for each diagonal of interest
360 for (k = tmp_diag_lower; k <= tmp_diag_upper; k++) {
361 /* find the largest offset into seq2 that increases
362 the distance from d-1 to d (i.e. keeps the alignment
363 from getting worse for as long as possible), then
364 choose the offset into seq1 that will keep the
365 resulting diagonal fixed at k
366
367 Note that this requires kInvalidOffset+1 to be smaller
368 than any valid offset into seq2, i.e. to be negative */
369
370 seq2_index = GMAX(last_seq2_off[d - 1][k + 1],
371 last_seq2_off[d - 1][k ]) + 1;
372 seq2_index = GMAX(seq2_index, last_seq2_off[d - 1][k - 1]);
373 seq1_index = seq2_index + k - diag_origin;
374
375 if (seq2_index < 0 || seq1_index + seq2_index < xdrop_score) {
376
377 // if no valid diagonal can reach distance d, or the
378 // X-dropoff test fails, narrow the range of diagonals
379 // to test and skip to the next diagonal
380 if (k == diag_lower)
381 diag_lower++;
382 else
383 last_seq2_off[d][k] = kInvalidOffset;
384 continue;
385 }
386 diag_upper = k;
387
388 /* slide down diagonal k until a mismatch
389 occurs. As long as only matches are encountered,
390 the current distance d will not change */
391
392 index = s_FindFirstMismatch(seq1, len1, seq2, len2,
393 seq1_index, seq2_index, reverse);
394 //fence_hit, reverse, rem);
395 if (index > longest_match_run) {
396 //seed->start_q = seq1_index;
397 //seed->start_s = seq2_index;
398 //seed->match_length = index;
399 longest_match_run = index;
400 }
401 seq1_index += index;
402 seq2_index += index;
403
404 // set the new largest seq2 offset that achieves
405 // distance d on diagonal k
406
407 last_seq2_off[d][k] = seq2_index;
408
409 // since all values of k are constrained to have the
410 // same distance d, the value of k which maximizes the
411 // alignment score is the one that covers the most of seq1 and seq2
412 if (seq1_index + seq2_index > curr_extent) {
413 curr_extent = seq1_index + seq2_index;
414 curr_seq2_index = seq2_index;
415 curr_diag = k;
416 }
417
418 /* clamp the bounds on diagonals to avoid walking off
419 either sequence. Because the bounds increase by at
420 most one for each distance, diag_lower and diag_upper
421 can each be of size at most max_diags+2 */
422
423 if (seq2_index == len2) {
424 diag_lower = k + 1;
425 end2_reached = true;
426 }
427 if (seq1_index == len1) {
428 diag_upper = k - 1;
429 end1_reached = true;
430 }
431 } // end loop over diagonals
432
433 // compute the maximum score possible for distance d
434 curr_score = curr_extent * (match_cost / 2) -
435 d * (match_cost + mismatch_cost);
436 // if this is the best score seen so far, update the
437 // statistics of the best alignment
438 if (curr_score > max_score[d - 1]) {
439 max_score[d] = curr_score;
440 best_dist = d;
441 best_diag = curr_diag;
442 seq2_align_len = curr_seq2_index;
443 seq1_align_len = curr_seq2_index + best_diag - diag_origin;
444 }
445 else {
446 max_score[d] = max_score[d - 1];
447 }
448
449 // alignment has finished if the lower and upper bounds
450 // on diagonals to check have converged to each other
451
452 if (diag_lower > diag_upper)
453 break;
454
455 /* set up for the next distance to examine. Because the
456 bounds increase by at most one for each distance,
457 diag_lower and diag_upper can each be of size at
458 most max_diags+2 */
459
460 if (!end2_reached)
461 diag_lower--;
462 if (!end1_reached)
463 diag_upper++;
464
465 if (edit_block == NULL) {
466 // if no traceback is specified, the next row of
467 // last_seq2_off can reuse previously allocated memory
468 //TODO FIXME The following assumes two arrays of
469 // at least max_dist+4 int's have already been allocated
470 last_seq2_off[d + 1] = last_seq2_off[d - 1];
471 }
472 else {
473 // traceback requires all rows of last_seq2_off to be saved,
474 // so a new row must be allocated
475 last_seq2_off[d + 1] = (int*)mem_pool->getByteSpace((diag_upper - diag_lower + 7)*sizeof(int));
476 // move the origin for this row backwards
477 //TODO FIXME: dubious pointer arithmetic ?!
478 //last_seq2_off[d + 1] = last_seq2_off[d + 1] - diag_lower + 2;
479 }
480 } // end loop over distinct distances
481
482
483 if (edit_block == NULL)
484 return best_dist;
485
486 //---- perform traceback
487 d = best_dist;
488 seq1_index = seq1_align_len;
489 seq2_index = seq2_align_len;
490 // for all positive distances
491
492 //if (fence_hit && *fence_hit)
493 // goto done;
494 if (index==len1 || index==len2) d=0;
495 while (d > 0) {
496 int new_diag;
497 int new_seq2_index;
498
499 /* retrieve the value of the diagonal after the next
500 traceback operation. best_diag starts off with the
501 value computed during the alignment process */
502
503 new_diag = s_GetNextNonAffineTback(last_seq2_off, d,
504 best_diag, &new_seq2_index);
505
506 if (new_diag == best_diag) {
507 // same diagonal: issue a group of substitutions
508 if (seq2_index - new_seq2_index > 0) {
509 edit_block->opRep(seq2_index - new_seq2_index);
510 }
511 }
512 else if (new_diag < best_diag) {
513 // smaller diagonal: issue a group of substitutions
514 // and then a gap in seq2 */
515 if (seq2_index - new_seq2_index > 0) {
516 edit_block->opRep(seq2_index - new_seq2_index);
517 }
518 //GapPrelimEditBlockAdd(edit_block, eGapAlignIns, 1);
519 edit_block->opIns(1);
520 }
521 else {
522 // larger diagonal: issue a group of substitutions
523 // and then a gap in seq1
524 if (seq2_index - new_seq2_index - 1 > 0) {
525 edit_block->opRep(seq2_index - new_seq2_index - 1);
526 }
527 edit_block->opDel(1);
528 }
529 d--;
530 best_diag = new_diag;
531 seq2_index = new_seq2_index;
532 }
533 //done:
534 // handle the final group of substitutions back to distance zero,
535 // i.e. back to offset zero of seq1 and seq2
536 //GapPrelimEditBlockAdd(edit_block, eGapAlignSub,
537 // last_seq2_off[0][diag_origin]);
538 edit_block->opRep(last_seq2_off[0][diag_origin]);
539 if (!reverse)
540 edit_block->reverse();
541 return best_dist;
542 }
543
544 void printEditScript(GXEditScript* ed_script) {
545 uint i;
546 if (ed_script==NULL || ed_script->opnum == 0)
547 return;
548 for (i=0; i<ed_script->opnum; i++) {
549 int num=((ed_script->ops[i]) >> 2);
550 unsigned char op_type = 3 - ( ed_script->ops[i] & gxEDIT_OP_MASK );
551 if (op_type == 3)
552 GError("Error: printEditScript encountered op_type 3 ?!\n");
553 GMessage("%d%c ", num, xgapcodes[op_type]);
554 }
555 GMessage("\n");
556 }
557
558 GXAlnInfo* GreedyAlign(const char* q_seq, int q_alnstart, const char* s_seq, int s_alnstart,
559 bool editscript, int reward, int penalty, int xdrop) {
560 int q_max=strlen(q_seq); //query
561 int s_max=strlen(s_seq); //subj
562 return GreedyAlignRegion(q_seq, q_alnstart, q_max,
563 s_seq, s_alnstart, s_max, reward, penalty, xdrop, NULL, NULL, editscript);
564 }
565
566 struct GXSeedTable {
567 int a_num, b_num;
568 int a_cap, b_cap;
569 char* xc;
570 GXSeedTable(int a=12, int b=255) {
571 a_cap=0;
572 b_cap=0;
573 a_num=0;
574 b_num=0;
575 xc=NULL;
576 init(a,b);
577 }
578 ~GXSeedTable() {
579 GFREE(xc);
580 }
581 void init(int a, int b) {
582 a_num=a;
583 b_num=b;
584 bool resize=false;
585 if (b_num>b_cap) { resize=true; b_cap=b_num;}
586 if (a_num>a_cap) { resize=true; a_cap=a_num;}
587 if (resize) {
588 GFREE(xc);
589 GCALLOC(xc, (a_num*b_num));
590 }
591 else {
592 //just clear up to a_max, b_max
593 memset((void*)xc, 0, (a_num*b_num));
594 }
595 }
596 char& x(int ax, int by) {
597 return xc[by*a_num+ax];
598 }
599
600 };
601
602 const int a_m_score=2; //match score
603 const int a_mis_score=-3; //mismatch
604 const int a_dropoff_score=7;
605 const int a_min_score=12; //at least 6 bases full match
606
607 // ------------------ adapter matching - simple k-mer seed & extend, no indels for now
608 //when a k-mer match is found, simply try to extend the alignment using a drop-off scheme
609 //check minimum score and
610 //for 3' adapter trimming:
611 // require that the right end of the alignment for either the adaptor OR the read must be
612 // < 3 distance from its right end
613 // for 5' adapter trimming:
614 // require that the left end of the alignment for either the adaptor OR the read must
615 // be at coordinate < 3 from start
616
617 bool extendUngapped(const char* a, int alen, int ai,
618 const char* b, int blen, int bi, int& mlen, int& l5, int& l3, bool end5=false) {
619 //so the alignment starts at ai in a, bi in b, with a perfect match of length mlen
620 //if (debug) {
621 // GMessage(">> in %s\n\textending hit: %s at position %d\n", a, (dbg.substr(bi, mlen)).chars(), ai);
622 // }
623 int a_l=ai; //alignment coordinates on a
624 int a_r=ai+mlen-1;
625 int b_l=bi; //alignment coordinates on b
626 int b_r=bi+mlen-1;
627 int ai_maxscore=ai;
628 int bi_maxscore=bi;
629 int score=mlen*a_m_score;
630 int maxscore=score;
631 int mism5score=a_mis_score;
632 if (end5 && ai<(alen>>1)) mism5score-=2; // increase penalty for mismatches at 5' end
633 //try to extend to the left first, if possible
634 while (ai>0 && bi>0) {
635 ai--;
636 bi--;
637 score+= (a[ai]==b[bi])? a_m_score : mism5score;
638 if (score>maxscore) {
639 ai_maxscore=ai;
640 bi_maxscore=bi;
641 maxscore=score;
642 }
643 else if (maxscore-score>a_dropoff_score) break;
644 }
645 a_l=ai_maxscore;
646 b_l=bi_maxscore;
647 //now extend to the right
648 ai_maxscore=a_r;
649 bi_maxscore=b_r;
650 ai=a_r;
651 bi=b_r;
652 score=maxscore;
653 //sometimes there are extra As at the end of the read, ignore those
654 if (a[alen-2]=='A' && a[alen-1]=='A') {
655 alen-=2;
656 while (a[alen-1]=='A' && alen>ai) alen--;
657 }
658 while (ai<alen-1 && bi<blen-1) {
659 ai++;
660 bi++;
661 //score+= (a[ai]==b[bi])? a_m_score : a_mis_score;
662 if (a[ai]==b[bi]) { //match
663 score+=a_m_score;
664 if (ai>=alen-2) {
665 score+=a_m_score-(alen-ai-1);
666 }
667 }
668 else { //mismatch
669 score+=a_mis_score;
670 }
671 if (score>maxscore) {
672 ai_maxscore=ai;
673 bi_maxscore=bi;
674 maxscore=score;
675 }
676 else if (maxscore-score>a_dropoff_score) break;
677 }
678 a_r=ai_maxscore;
679 b_r=bi_maxscore;
680 int a_ovh3=alen-a_r-1;
681 int b_ovh3=blen-b_r-1;
682 int mmovh3=(a_ovh3<b_ovh3)? a_ovh3 : b_ovh3;
683 int mmovh5=(a_l<b_l)? a_l : b_l;
684 if (maxscore>=a_min_score && mmovh3<2 && mmovh5<2) {
685 if (a_l<a_ovh3) {
686 //adapter closer to the left end (typical for 5' adapter)
687 l5=a_r+1;
688 l3=alen-1;
689 }
690 else {
691 //adapter matching at the right end (typical for 3' adapter)
692 l5=0;
693 l3=a_l-1;
694 }
695 return true;
696 }
697 //do not trim:
698 l5=0;
699 l3=alen-1;
700 return false;
701 }
702
703 /*
704 GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, GXSeqData& sd) {
705 int bimin=GMAX(0,(sd.blen-sd.alen-6));
706 GXSeedTable gx(sd.alen, sd.blen);
707 GXBandSet* diagstrips=new GXBandSet(sd.alen, sd.blen); //set of overlapping 3-diagonal strips
708 //for (int bi=0;bi<=b_len-6;bi++) {
709 for (int bi=sd.blen-6;bi>=0;bi--) {
710 //for each 6-mer of seqb
711 uint16 bv = get6mer((char*)&(sd.bseq[bi]));
712 GVec<uint16>* alocs = sd.amers[bv];
713 if (alocs==NULL) continue;
714 //extend each hit
715 for (int h=0;h<alocs->Count();h++) {
716 int ai=alocs->Get(h);
717 //word match
718 if (gx.x(ai,bi))
719 //already have a previous seed covering this region of this diagonal
720 continue;
721 for (int i=0;i<6;i++)
722 gx.x(ai+i,bi+i)=1;
723 //see if we can extend to the left
724 int aix=ai-1;
725 int bix=bi-1;
726 int len=6;
727 while (bix>=0 && aix>=0 && sd.aseq[aix]==sd.bseq[bix]) {
728 gx.x(aix,bix)=1;
729 ai=aix;
730 bi=bix;
731 aix--;bix--;
732 len++;
733 }
734 if (len>sd.amlen) {
735 //heuristics: likely the best we can get
736 //quick match shortcut
737 diagstrips->qmatch=new GXSeed(ai,bi,len);
738 return diagstrips;
739 }
740 if (bi<bimin && len<9) continue; //skip middle seeds that are not high scoring enough
741 GXSeed* newseed=new GXSeed(ai,bi,len);
742 seeds.Add(newseed);
743 diagstrips->addSeed(newseed);//add it to all 3 adjacent diagonals
744 //special last resort terminal match to be used if no better alignment is there
745 if (ai<2 && bi+len>sd.blen-2 &&
746 (!diagstrips->tmatch || diagstrips->tmatch->len<len)) diagstrips->tmatch=newseed;
747 }
748 }
749 for (int i=0;i<diagstrips->Count();i++) {
750 diagstrips->Get(i)->finalize(); //adjust scores due to overlaps or gaps between seeds
751 }
752 diagstrips->setSorted(true); //sort by score
753 return diagstrips;
754 }
755 */
756 GXBandSet* collectSeeds(GList<GXSeed>& seeds, GXSeqData& sd) {
757 int bimax=GMIN((sd.alen+2), (sd.blen-6));
758 int bimin=GMAX(0,(sd.blen-sd.alen-6)); //from collectSeeds_R
759 //gx.init(a_maxlen, b_maxlen);
760 GXSeedTable gx(sd.alen, sd.blen);
761 GXBandSet* diagstrips=new GXBandSet(sd.alen, sd.blen); //set of overlapping 3-diagonal strips
762 for (int bi=0;bi<=sd.blen-6;bi++) {
763 //for each 6-mer of seqb
764 uint16 bv = get6mer((char*) & (sd.bseq[bi]));
765 GVec<uint16>* alocs = sd.amers[bv];
766 if (alocs==NULL) continue;
767 //extend each hit
768 for (int h=0;h<alocs->Count();h++) {
769 int ai=alocs->Get(h);
770 //word match
771 if (gx.x(ai,bi))
772 //already have a previous seed covering this region of this diagonal
773 continue;
774 for (int i=0;i<6;i++)
775 gx.x(ai+i,bi+i)=1;
776 //see if we can extend to the right
777 int aix=ai+6;
778 int bix=bi+6;
779 int len=6;
780 while (bix<sd.blen && aix<sd.alen && sd.aseq[aix]==sd.bseq[bix]) {
781 gx.x(aix,bix)=1;
782 aix++;bix++;
783 len++;
784 }
785 if (len>sd.amlen) {
786 //heuristics: very likely the best we can get
787 //quick match shortcut
788 diagstrips->qmatch=new GXSeed(ai,bi,len);
789 return diagstrips;
790 }
791 if (bi>bimax && len<9) continue; //skip middle seeds that are not high scoring enough
792 if (bi<bimin && len<9) continue; //collectSeeds_R
793
794 GXSeed* newseed=new GXSeed(ai,bi,len);
795 seeds.Add(newseed);
796 diagstrips->addSeed(newseed);//add it to all 3 adjacent diagonals
797 //special last resort terminal match to be used if no better alignment is there
798 if (bi<2 && ai+len>=sd.alen-1 &&
799 (!diagstrips->tmatch_l || diagstrips->tmatch_l->len<len))
800 diagstrips->tmatch_l=newseed;
801 //collectSeeds_R:
802 if (ai<2 && bi+len>sd.blen-2 &&
803 (!diagstrips->tmatch_r || diagstrips->tmatch_r->len<len))
804 diagstrips->tmatch_r=newseed;
805 }
806 } //for each 6-mer of the read
807 for (int i=0;i<diagstrips->Count();i++) {
808 diagstrips->Get(i)->finalize(); //adjust scores due to overlaps or gaps between seeds
809 }
810 diagstrips->setSorted(true); //sort by score
811 return diagstrips;
812 }
813
814 int cmpSeedScore(const pointer p1, const pointer p2) {
815 //return (((GXSeed*)s2)->len-((GXSeed*)s1)->len);
816 GXSeed* s1=(GXSeed*)p1;
817 GXSeed* s2=(GXSeed*)p2;
818 if (s1->len==s2->len) {
819 return (s1->b_ofs-s2->b_ofs);
820 }
821 else return (s2->len-s1->len);
822 }
823
824 int cmpSeedScore_R(const pointer p1, const pointer p2) {
825 //return (((GXSeed*)s2)->len-((GXSeed*)s1)->len);
826 GXSeed* s1=(GXSeed*)p1;
827 GXSeed* s2=(GXSeed*)p2;
828 if (s1->len==s2->len) {
829 return (s2->b_ofs-s1->b_ofs);
830 }
831 else return (s2->len-s1->len);
832 }
833
834
835 int cmpSeedDiag(const pointer p1, const pointer p2) {
836 GXSeed* s1=(GXSeed*)p1;
837 GXSeed* s2=(GXSeed*)p2;
838 return ((s1->b_ofs-s1->a_ofs)-(s2->b_ofs-s2->a_ofs));
839 }
840
841
842 int cmpDiagBands_R(const pointer p1, const pointer p2) {
843 //return (((GXSeed*)s2)->len-((GXSeed*)s1)->len);
844 GXBand* b1=(GXBand*)p1;
845 GXBand* b2=(GXBand*)p2;
846 if (b1->score==b2->score) {
847 return (b2->w_min_b-b1->w_min_b);
848 }
849 else return (b2->score-b1->score);
850 }
851
852
853
854 GXAlnInfo* GreedyAlignRegion(const char* q_seq, int q_alnstart, int q_max,
855 const char* s_seq, int s_alnstart, int s_max,
856 int reward, int penalty, int xdrop, CGreedyAlignData* gxmem,
857 CAlnTrim* trim, bool editscript) {
858 GXEditScript* ed_script_fwd = NULL;
859 GXEditScript* ed_script_rev = NULL;
860 if ( q_alnstart>q_max || q_alnstart<1 || s_alnstart>s_max || s_alnstart<1 )
861 GError("GreedyAlign() Error: invalid anchor coordinate.\n");
862 q_alnstart--;
863 s_alnstart--;
864 if (q_seq==NULL || q_seq[0]==0 || s_seq==NULL || s_seq[0]==0)
865 GError("GreedyAlign() Error: attempt to use an empty sequence string!\n");
866 if (q_seq[q_alnstart]!=s_seq[s_alnstart])
867 GError("GreedyAlign() Error: improper anchor (mismatch)!\n");
868 int q_ext_l=0, q_ext_r=0, s_ext_l=0, s_ext_r=0;
869 const char* q=q_seq+q_alnstart;
870 int q_avail=q_max-q_alnstart;
871 const char* s=s_seq+s_alnstart;
872 int s_avail=s_max-s_alnstart;
873 if (penalty<0) penalty=-penalty;
874 int MIN_GREEDY_SCORE=6*reward; //minimum score for an alignment to be reported for 0 diffs
875 GXAlnInfo* alninfo=NULL;
876 bool freeAlnMem=(gxmem==NULL);
877 if (freeAlnMem) {
878 gxmem=new CGreedyAlignData(reward, penalty, xdrop);
879 }
880 else gxmem->reset();
881 int retscore = 0;
882 int numdiffs = 0;
883 if (trim!=NULL && trim->type==galn_TrimLeft) {
884 //intent: trimming the left side
885 if (editscript)
886 ed_script_rev=new GXEditScript();
887
888 int numdiffs_l = GXGreedyExtend(s_seq, s_alnstart, q_seq, q_alnstart, true, xdrop,
889 reward, penalty, s_ext_l, q_ext_l, *gxmem, ed_script_rev);
890 //check this extension here and bail out if it's not a good extension
891 if (s_ext_l+(trim->seedlen>>1) < trim->safelen &&
892 q_alnstart+1-q_ext_l>trim->boundary &&
893 s_alnstart+1-s_ext_l>trim->boundary) {
894 delete ed_script_rev;
895 if (freeAlnMem) delete gxmem;
896 return NULL;
897 }
898
899 if (editscript)
900 ed_script_fwd=new GXEditScript();
901 int numdiffs_r = GXGreedyExtend(s, s_avail, q, q_avail, false, xdrop,
902 reward, penalty, s_ext_r, q_ext_r, *gxmem, ed_script_fwd);
903 numdiffs=numdiffs_r+numdiffs_l;
904 //convert num diffs to actual score
905 retscore = (q_ext_r + s_ext_r + q_ext_l + s_ext_l)*reward/2 - numdiffs*(reward+penalty);
906 if (editscript)
907 ed_script_rev->Append(ed_script_fwd); //combine the two extensions
908 }
909 else {
910 if (editscript) {
911 ed_script_fwd=new GXEditScript();
912 }
913 int numdiffs_r = GXGreedyExtend(s, s_avail, q, q_avail, false, xdrop,
914 reward, penalty, s_ext_r, q_ext_r, *gxmem, ed_script_fwd);
915 //check extension here and bail out if not a good right extension
916 //assuming s_max is really at the right end of s_seq
917 if (trim!=NULL && trim->type==galn_TrimRight &&
918 s_ext_r+(trim->seedlen>>1) < trim->safelen &&
919 q_alnstart+q_ext_r<q_max-3 &&
920 s_alnstart+s_ext_r<trim->boundary) {
921 delete ed_script_fwd;
922 if (freeAlnMem) delete gxmem;
923 return NULL;
924 }
925 if (editscript)
926 ed_script_rev=new GXEditScript();
927 int numdiffs_l = GXGreedyExtend(s_seq, s_alnstart, q_seq, q_alnstart, true, xdrop,
928 reward, penalty, s_ext_l, q_ext_l, *gxmem, ed_script_rev);
929 //convert num diffs to actual score
930 numdiffs=numdiffs_r+numdiffs_l;
931 retscore = (q_ext_r + s_ext_r + q_ext_l + s_ext_l)*reward/2 - numdiffs*(reward+penalty);
932 if (editscript)
933 ed_script_rev->Append(ed_script_fwd); //combine the two extensions
934 }
935
936 if (retscore>=MIN_GREEDY_SCORE) {
937 alninfo=new GXAlnInfo(q_seq, q_alnstart+1-q_ext_l,q_alnstart+q_ext_r, s_seq, s_alnstart+1-s_ext_l, s_alnstart+s_ext_r);
938 int hsp_length = GMIN(q_ext_l+q_ext_r, s_ext_l+s_ext_r);
939 alninfo->score=retscore;
940 alninfo->pid = 100 * (1 - ((double) numdiffs) / hsp_length);
941 #ifdef GDEBUG
942 //if (ed_script_rev) {
943 // GMessage("Final Edit script ::: ");
944 // printEditScript(ed_script_rev);
945 // }
946 #endif
947 alninfo->editscript=ed_script_rev;
948 alninfo->gapinfo = new CAlnGapInfo(ed_script_rev, alninfo->ql-1, alninfo->sl-1);
949 }
950 else {
951 if (freeAlnMem) delete gxmem;
952 delete ed_script_rev;
953 delete alninfo;
954 alninfo=NULL;
955 }
956 delete ed_script_fwd;
957 return alninfo;
958 }
959
960 GXAlnInfo* GreedyAlignRegion(const char* q_seq, int q_alnstart, int q_max,
961 const char* s_seq, int s_alnstart, int s_max, CGreedyAlignData* gxmem,
962 CAlnTrim* trim, bool editscript) {
963 int reward=2;
964 int penalty=3;
965 int xdrop=8;
966 if (gxmem) {
967 reward=gxmem->match_reward;
968 penalty=gxmem->mismatch_penalty;
969 xdrop=gxmem->x_drop;
970 }
971 return GreedyAlignRegion(q_seq, q_alnstart, q_max, s_seq, s_alnstart, s_max,
972 reward, penalty, xdrop, gxmem, trim, editscript);
973 }
974
975 GXAlnInfo* match_RightEnd(GXSeqData& sd, CGreedyAlignData* gxmem, int min_pid) {
976 bool editscript=false;
977 #ifdef GDEBUG
978 editscript=true;
979 GMessage("==========> matching Right (3') end : %s\n", sd.aseq);
980 #endif
981 CAlnTrim trimInfo(galn_TrimRight, sd.bseq, sd.blen, sd.amlen);
982 GList<GXSeed> rseeds(true,true,false);
983
984 GXBandSet* alnbands=collectSeeds(rseeds, sd);
985 GList<GXSeed> anchor_seeds(cmpSeedDiag, NULL, true); //stores unique seeds per diagonal
986 //did we find a shortcut?
987 if (alnbands->qmatch) {
988 #ifdef GDEBUG
989 GMessage("::: Found a quick long match at %d, len %d\n",
990 alnbands->qmatch->b_ofs, alnbands->qmatch->len);
991 #endif
992 anchor_seeds.Add(alnbands->qmatch);
993 }
994 else {
995 int max_top_bands=5;
996 int top_band_count=0;
997 for (int b=0;b<alnbands->Count();b++) {
998 if (alnbands->Get(b)->score<6) break;
999 //#ifdef GDEBUG
1000 //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1001 //#endif
1002 top_band_count++;
1003 GXBand& band=*(alnbands->Get(b));
1004 band.seeds.setSorted(cmpSeedScore);
1005 anchor_seeds.Add(band.seeds.First());
1006 band.tested=true;
1007 if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
1008 }
1009 //#ifdef GDEBUG
1010 //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1011 //#endif
1012 }
1013 GList<GXAlnInfo> galns(true,true,false);
1014 for (int i=0;i<anchor_seeds.Count();i++) {
1015 GXSeed& aseed=*anchor_seeds[i];
1016 int a1=aseed.a_ofs+(aseed.len>>1)+1;
1017 int a2=aseed.b_ofs+(aseed.len>>1)+1;
1018 trimInfo.seedlen=aseed.len;
1019 #ifdef GDEBUG
1020 GMessage("\t::: align from seed (%d, %d)of len %d.\n",aseed.a_ofs, aseed.b_ofs,
1021 aseed.len);
1022 #endif
1023 GXAlnInfo* alninfo=GreedyAlignRegion(sd.aseq, a1, sd.alen,
1024 sd.bseq, a2, sd.blen, gxmem, &trimInfo, editscript);
1025 if (alninfo && alninfo->pid>=min_pid &&
1026 trimInfo.validate(alninfo->sl, alninfo->sr, alninfo->pid, alninfo->ql-1))
1027 galns.AddIfNew(alninfo, true);
1028 else delete alninfo;
1029 }
1030 if (galns.Count()==0 && alnbands->tmatch_r) {
1031 //last resort seed
1032 GXSeed& aseed=*alnbands->tmatch_r;
1033 int halfseed=aseed.len>>1;
1034 int a1=aseed.a_ofs+halfseed+1;
1035 int a2=aseed.b_ofs+halfseed+1;
1036 trimInfo.seedlen=aseed.len;
1037 #ifdef GDEBUG
1038 GMessage("\t::: align from seed (%d, %d)of len %d.\n",aseed.a_ofs, aseed.b_ofs,
1039 aseed.len);
1040 #endif
1041 GXAlnInfo* alninfo=GreedyAlignRegion(sd.aseq, a1, sd.alen,
1042 sd.bseq, a2, sd.blen, gxmem, &trimInfo, editscript);
1043 if (alninfo && alninfo->pid>=min_pid &&
1044 trimInfo.validate(alninfo->sl, alninfo->sr, alninfo->pid, alninfo->ql-1))
1045 galns.AddIfNew(alninfo, true);
1046 else delete alninfo;
1047 }
1048
1049 /*
1050 //special 3' end case: due to the seed scoring scheme being biased towards the 5' end of the read,
1051 //we should also try some seeds closer to the 3' end
1052 if (galns.Count()==0) {
1053 anchor_seeds.Clear();
1054 alnbands->setSorted(cmpDiagBands_R);
1055 int max_top_bands=4;
1056 int top_band_count=0;
1057 //#ifdef GDEBUG
1058 //GMessage(":::>> Retrying adjusting sort order.\n");
1059 //#endif
1060 if (alnbands->tmatch) {
1061 //anchor_seeds.setSorted(false);
1062 anchor_seeds.Add(alnbands->tmatch);
1063 }
1064 for (int b=0;b<alnbands->Count();b++) {
1065 if (alnbands->Get(b)->score<4) break;
1066 //#ifdef GDEBUG
1067 //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1068 //#endif
1069 if (alnbands->Get(b)->tested) continue;
1070 top_band_count++;
1071 GXBand& band=*(alnbands->Get(b));
1072 band.seeds.setSorted(cmpSeedScore);
1073 anchor_seeds.Add(band.seeds.First());
1074 if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
1075 }
1076 //#ifdef GDEBUG
1077 //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1078 //#endif
1079 for (int i=0;i<anchor_seeds.Count();i++) {
1080 GXSeed& aseed=*anchor_seeds[i];
1081 int a1=aseed.a_ofs+(aseed.len>>1)+1;
1082 int a2=aseed.b_ofs+(aseed.len>>1)+1;
1083 GXAlnInfo* alninfo=GreedyAlignRegion(seqa, a1, seqa_len,
1084 seqb, a2, seqb_len, gxmem, &trimInfo, editscript);
1085 if (alninfo && alninfo->pid>=min_pid && trimInfo.validate(alninfo->sl, alninfo->sr, alninfo->pid, alninfo->ql-1))
1086 galns.AddIfNew(alninfo, true);
1087 else delete alninfo;
1088 }
1089 } */
1090
1091 //---- done
1092 delete alnbands;
1093 if (galns.Count()) {
1094 GXAlnInfo* bestaln=galns.Shift();
1095 #ifdef GDEBUG
1096 GMessage("Best alignment: a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", bestaln->ql, bestaln->qr,
1097 bestaln->sl, bestaln->sr, bestaln->score, bestaln->pid);
1098 if (bestaln->gapinfo!=NULL) {
1099 bestaln->gapinfo->printAlignment(stderr, sd.aseq, sd.alen, sd.bseq, sd.blen);
1100 }
1101 #endif
1102
1103 return bestaln;
1104 }
1105 else return NULL;
1106 }
1107
1108 GXAlnInfo* match_LeftEnd(GXSeqData& sd, CGreedyAlignData* gxmem, int min_pid) {
1109 bool editscript=false;
1110 #ifdef GDEBUG
1111 editscript=true;
1112 GMessage("==========> matching Left (5') end : %s\n", sd.aseq);
1113 #endif
1114 CAlnTrim trimInfo(galn_TrimLeft, sd.bseq, sd.blen, sd.amlen);
1115 GList<GXSeed> rseeds(true,true,false);
1116 GXBandSet* alnbands = collectSeeds(rseeds, sd);
1117 GList<GXSeed> anchor_seeds(cmpSeedDiag, NULL, true); //stores unique seeds per diagonal
1118 if (alnbands->qmatch) {
1119 #ifdef GDEBUG
1120 GMessage("::: Found a quick long match at %d, len %d\n",
1121 alnbands->qmatch->b_ofs, alnbands->qmatch->len);
1122 #endif
1123 anchor_seeds.Add(alnbands->qmatch);
1124 }
1125 else {
1126 int max_top_bands=5;
1127 int top_band_count=0;
1128 for (int b=0;b<alnbands->Count();b++) {
1129 if (alnbands->Get(b)->score<6) break;
1130 //#ifdef GDEBUG
1131 //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1132 //#endif
1133 top_band_count++;
1134 GXBand& band=*(alnbands->Get(b));
1135 band.seeds.setSorted(cmpSeedScore);
1136 anchor_seeds.Add(band.seeds.First());
1137 band.tested=true;
1138 if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
1139 }
1140 //#ifdef GDEBUG
1141 //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1142 //#endif
1143 }
1144 GList<GXAlnInfo> galns(true,true,false);
1145 for (int i=0;i<anchor_seeds.Count();i++) {
1146 GXSeed& aseed=*anchor_seeds[i];
1147 int a1=aseed.a_ofs+(aseed.len>>1)+1;
1148 int a2=aseed.b_ofs+(aseed.len>>1)+1;
1149 trimInfo.seedlen=aseed.len;
1150 #ifdef GDEBUG
1151 GMessage("\t::: align from seed (%d, %d)of len %d.\n",aseed.a_ofs, aseed.b_ofs,
1152 aseed.len);
1153 #endif
1154 GXAlnInfo* alninfo=GreedyAlignRegion(sd.aseq, a1, sd.alen,
1155 sd.bseq, a2, sd.blen, gxmem, &trimInfo, editscript);
1156 if (alninfo && alninfo->pid>=min_pid
1157 && trimInfo.validate(alninfo->sl, alninfo->sr,
1158 alninfo->pid, sd.alen-alninfo->qr))
1159 galns.AddIfNew(alninfo, true);
1160 else delete alninfo;
1161 }
1162 if (galns.Count()==0 && alnbands->tmatch_l) {
1163 //last resort seed
1164 GXSeed& aseed=*alnbands->tmatch_l;
1165 int a1=aseed.a_ofs+(aseed.len>>1)+1;
1166 int a2=aseed.b_ofs+(aseed.len>>1)+1;
1167 trimInfo.seedlen=aseed.len;
1168 GXAlnInfo* alninfo=GreedyAlignRegion(sd.aseq, a1, sd.alen,
1169 sd.bseq, a2, sd.blen, gxmem, &trimInfo, editscript);
1170 if (alninfo && alninfo->pid>=min_pid &&
1171 trimInfo.validate(alninfo->sl, alninfo->sr, alninfo->pid, sd.alen-alninfo->qr))
1172 galns.Add(alninfo);
1173 }
1174 /*
1175 #ifdef GDEBUG
1176 //print valid alignments found
1177 for (int i=0;i<galns.Count();i++) {
1178 GXAlnInfo* alninfo=galns[i];
1179 GMessage("a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", alninfo->ql, alninfo->qr,
1180 alninfo->sl, alninfo->sr, alninfo->score, alninfo->pid);
1181 if (alninfo->gapinfo!=NULL) {
1182 GMessage("Alignment:\n");
1183 alninfo->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1184 }
1185 }
1186 #endif
1187 */
1188 //---- done
1189 delete alnbands;
1190 if (galns.Count()) {
1191 GXAlnInfo* bestaln=galns.Shift();
1192 #ifdef GDEBUG
1193 GMessage("Best alignment: a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", bestaln->ql, bestaln->qr,
1194 bestaln->sl, bestaln->sr, bestaln->score, bestaln->pid);
1195 if (bestaln->gapinfo!=NULL) {
1196 bestaln->gapinfo->printAlignment(stderr, sd.aseq, sd.alen,
1197 sd.bseq, sd.blen);
1198 }
1199 #endif
1200 return bestaln;
1201 }
1202 else return NULL;
1203 }