1 |
#include "GAlnExtend.h" |
2 |
|
3 |
//greedy gapped alignment extension |
4 |
//(mostly lifted from NCBI's megablast gapped extension code) |
5 |
|
6 |
int GXMemPool::kMinSpace = 1000000; |
7 |
|
8 |
#ifdef TRIMDEBUG |
9 |
char buf[6]={0x1B,'[', 'n','m','m','\0'}; |
10 |
|
11 |
void color_fg(int c,FILE* f) { |
12 |
if (f!=stderr && f!=stdout) return; |
13 |
sprintf((char *)(&buf[2]),"%dm",c+30); |
14 |
fwrite(buf,1,strlen(buf), f); |
15 |
} |
16 |
|
17 |
void color_bg(int c, FILE* f) { |
18 |
if (f!=stderr && f!=stdout) return; |
19 |
sprintf((char *)(&buf[2]),"%dm",c+40); |
20 |
fwrite(buf,1,strlen(buf),f); |
21 |
}; |
22 |
|
23 |
void color_resetfg(FILE* f) { |
24 |
if (f!=stderr && f!=stdout) return; |
25 |
sprintf((char *)(&buf[2]),"39m"); |
26 |
fwrite(buf,1,strlen(buf), f); |
27 |
}; |
28 |
|
29 |
void color_resetbg(FILE* f) { |
30 |
if (f!=stderr && f!=stdout) return; |
31 |
sprintf((char *)(&buf[2]),"49m"); |
32 |
fwrite(buf,1,strlen(buf), f); |
33 |
} |
34 |
|
35 |
void color_reset(FILE* f) { |
36 |
if (f!=stderr && f!=stdout) return; |
37 |
sprintf((char *)(&buf[2]),"0m"); |
38 |
fwrite(buf,1,strlen(buf), f); |
39 |
}; |
40 |
|
41 |
void color_normal(FILE* f) { |
42 |
if (f!=stderr && f!=stdout) return; |
43 |
sprintf((char *)(&buf[2]),"22m"); |
44 |
fwrite(buf,1,strlen(buf), f); |
45 |
}; |
46 |
|
47 |
#endif |
48 |
|
49 |
|
50 |
char xgapcodes[4]={'S','I', 'D', 'X'}; |
51 |
|
52 |
int get_last(int **flast_d, int d, int diag, int *row1) { |
53 |
if (flast_d[d-1][diag-1] > GMAX(flast_d[d-1][diag], flast_d[d-1][diag+1])) { |
54 |
*row1 = flast_d[d-1][diag-1]; |
55 |
return diag-1; |
56 |
} |
57 |
if (flast_d[d-1][diag] > flast_d[d-1][diag+1]) { |
58 |
*row1 = flast_d[d-1][diag]; |
59 |
return diag; |
60 |
} |
61 |
*row1 = flast_d[d-1][diag+1]; |
62 |
return diag+1; |
63 |
} |
64 |
|
65 |
void GapXEditScript::print() { //debug |
66 |
GapXEditScript* p=this; |
67 |
do { |
68 |
GMessage("%d%c ",p->num, xgapcodes[p->op_type]); |
69 |
} while ((p=p->next)!=NULL); |
70 |
GMessage("\n"); |
71 |
} |
72 |
|
73 |
|
74 |
int BLAST_Gcd(int a, int b) { |
75 |
int c; |
76 |
|
77 |
b = abs(b); |
78 |
if (b > a) |
79 |
c=a, a=b, b=c; |
80 |
|
81 |
while (b != 0) { |
82 |
c = a%b; |
83 |
a = b; |
84 |
b = c; |
85 |
} |
86 |
return a; |
87 |
} |
88 |
|
89 |
int BLAST_Gdb3(int* a, int* b, int* c) { |
90 |
int g; |
91 |
if (*b == 0) |
92 |
g = BLAST_Gcd(*a, *c); |
93 |
else |
94 |
g = BLAST_Gcd(*a, BLAST_Gcd(*b, *c)); |
95 |
if (g > 1) { |
96 |
*a /= g; |
97 |
*b /= g; |
98 |
*c /= g; |
99 |
} |
100 |
return g; |
101 |
} |
102 |
|
103 |
|
104 |
uint16 get6mer(char* p) { |
105 |
uint16 r=gdna2bit(p,3); |
106 |
r <<= 6; |
107 |
r |= gdna2bit(p,3); |
108 |
return r; |
109 |
} |
110 |
|
111 |
|
112 |
void table6mers(const char* s, int slen, GVec<uint16>* amers[]) { |
113 |
for (uint16 i=0; i <= slen-6; i++) { |
114 |
char* p = (char*)(s+i); |
115 |
uint16 v=get6mer(p); |
116 |
if (amers[v]==NULL) { |
117 |
amers[v]=new GVec<uint16>(1); |
118 |
} |
119 |
amers[v]->Add(i); |
120 |
} |
121 |
} |
122 |
|
123 |
GVec<uint16>* match6mer(char* start, GVec<uint16>* amers[]) { |
124 |
//careful: this is broken if start+5 falls beyond the end of the string! |
125 |
uint16 r=get6mer(start); |
126 |
return amers[r]; |
127 |
} |
128 |
|
129 |
//signal that a diagonal is invalid |
130 |
static const int kInvalidOffset = -2; |
131 |
|
132 |
int s_FindFirstMismatch(const char *seq1, int len1, |
133 |
const char *seq2, int len2, |
134 |
int seq1_index, int seq2_index, |
135 |
//bool &fence_hit, |
136 |
bool reverse) { |
137 |
int start_index = seq1_index; |
138 |
/* Sentry detection here should be relatively inexpensive: The |
139 |
sentry value cannot appear in the query, so detection only |
140 |
needs to be done at exit from the subject-query matching loop. |
141 |
For uncompressed sequences, ambiguities in the query (i.e. seq1) |
142 |
always count as mismatches */ |
143 |
if (reverse) { |
144 |
while (seq1_index < len1 && seq2_index < len2 && |
145 |
//seq1[len1-1 - seq1_index] < 4 && |
146 |
seq1[len1-1 - seq1_index] == seq2[len2-1 - seq2_index]) { |
147 |
++seq1_index; |
148 |
++seq2_index; |
149 |
} |
150 |
//if (seq2_index < len2 && seq2[len2-1-seq2_index] == FENCE_SENTRY) { |
151 |
//if len2-1-seq2_index<=0) { |
152 |
// fence_hit = true; |
153 |
// } |
154 |
} |
155 |
else { //forward lookup |
156 |
while (seq1_index < len1 && seq2_index < len2 && |
157 |
//seq1[seq1_index] < 4 && |
158 |
seq1[seq1_index] == seq2[seq2_index]) { |
159 |
++seq1_index; |
160 |
++seq2_index; |
161 |
} |
162 |
//if (seq2_index < len2 && seq2[seq2_index] == FENCE_SENTRY) { |
163 |
//if (seq2_index==len2) { |
164 |
// fence_hit = true; |
165 |
//} |
166 |
} |
167 |
return seq1_index - start_index; |
168 |
} |
169 |
|
170 |
|
171 |
|
172 |
/** During the traceback for a non-affine greedy alignment, |
173 |
compute the diagonal that will result from the next |
174 |
traceback operation |
175 |
|
176 |
@param last_seq2_off Array of offsets into the second sequence; |
177 |
last_seq2_off[d][k] gives the largest offset into |
178 |
the second sequence that lies on diagonal k and |
179 |
has distance d [in] |
180 |
@param d Starting distance [in] |
181 |
@param diag Index of diagonal that produced the starting distance [in] |
182 |
@param seq2_index The offset into the second sequence after the traceback |
183 |
operation has completed [out] |
184 |
@return The diagonal resulting from the next traceback operation |
185 |
being applied |
186 |
*/ |
187 |
int s_GetNextNonAffineTback(int **last_seq2_off, int d, |
188 |
int diag, int *seq2_index) { |
189 |
// choose the traceback operation that results in the |
190 |
// largest seq2 offset at this point, then compute the |
191 |
// new diagonal that is implied by the operation |
192 |
if (last_seq2_off[d-1][diag-1] > |
193 |
GMAX(last_seq2_off[d-1][diag], last_seq2_off[d-1][diag+1])) { |
194 |
*seq2_index = last_seq2_off[d-1][diag-1]; |
195 |
return diag - 1; // gap in seq2 |
196 |
} |
197 |
if (last_seq2_off[d-1][diag] > last_seq2_off[d-1][diag+1]) { |
198 |
*seq2_index = last_seq2_off[d-1][diag]; |
199 |
return diag; // match |
200 |
} |
201 |
*seq2_index = last_seq2_off[d-1][diag+1]; |
202 |
return diag + 1; // gap in seq1 |
203 |
} |
204 |
|
205 |
|
206 |
int GXGreedyExtend(const char* seq1, int len1, |
207 |
const char* seq2, int len2, |
208 |
bool reverse, //int xdrop_threshold, int match_cost, int mismatch_cost, |
209 |
int& seq1_align_len, int& seq2_align_len, |
210 |
CGreedyAlignData& aux_data, |
211 |
GXEditScript *edit_block) { |
212 |
//GapPrelimEditBlock *edit_block, |
213 |
//bool& fence_hit, SGreedySeed *seed) { |
214 |
int seq1_index; |
215 |
int seq2_index; |
216 |
int index; |
217 |
int d; |
218 |
int k; |
219 |
int diag_lower, diag_upper; |
220 |
int max_dist; |
221 |
int diag_origin; |
222 |
int best_dist; |
223 |
int best_diag; |
224 |
int** last_seq2_off; |
225 |
int* max_score; |
226 |
int xdrop_offset; |
227 |
int longest_match_run; |
228 |
bool end1_reached, end2_reached; |
229 |
GXMemPool* mem_pool; |
230 |
|
231 |
/* ordinary dynamic programming alignment, for each offset |
232 |
in seq1, walks through offsets in seq2 until an X-dropoff |
233 |
test fails, saving the best score encountered along |
234 |
the way. Instead of score, this code tracks the 'distance' |
235 |
(number of mismatches plus number of gaps) between seq1 |
236 |
and seq2. Instead of walking through sequence offsets, it |
237 |
walks through diagonals that can achieve a given distance. |
238 |
|
239 |
Note that in what follows, the numbering of diagonals implies |
240 |
a dot matrix where increasing seq1 offsets go to the right on |
241 |
the x axis, and increasing seq2 offsets go up the y axis. |
242 |
The gapped alignment thus proceeds up and to the right in |
243 |
the graph, and diagonals are numbered increasing to the right */ |
244 |
|
245 |
best_dist = 0; |
246 |
best_diag = 0; |
247 |
|
248 |
/* set the number of distinct distances the algorithm will |
249 |
examine in the search for an optimal alignment. The |
250 |
heuristic worst-case running time of the algorithm is |
251 |
O(max_dist**2 + (len1+len2)); for sequences which are |
252 |
very similar, the average running time will be sig- |
253 |
nificantly better than this */ |
254 |
|
255 |
max_dist = GMIN(GREEDY_MAX_COST, |
256 |
(len2/GREEDY_MAX_COST_FRACTION + 1)); |
257 |
|
258 |
/* the main loop assumes that the index of all diagonals is |
259 |
biased to lie in the middle of allocated bookkeeping |
260 |
structures */ |
261 |
|
262 |
diag_origin = max_dist + 2; |
263 |
|
264 |
// last_seq2_off[d][k] is the largest offset into seq2 that |
265 |
// lies on diagonal k and has distance d |
266 |
|
267 |
last_seq2_off = aux_data.last_seq2_off; |
268 |
|
269 |
/* Instead of tracking the best alignment score and using |
270 |
xdrop_theshold directly, track the best score for each |
271 |
unique distance and use the best score for some previously |
272 |
computed distance to implement the X-dropoff test. |
273 |
|
274 |
xdrop_offset gives the distance backwards in the score |
275 |
array to look */ |
276 |
|
277 |
xdrop_offset = aux_data.xdrop_ofs; |
278 |
|
279 |
// find the offset of the first mismatch between seq1 and seq2 |
280 |
|
281 |
index = s_FindFirstMismatch(seq1, len1, seq2, len2, 0, 0, reverse); |
282 |
// fence_hit, reverse, rem); |
283 |
|
284 |
// update the extents of the alignment, and bail out |
285 |
// early if no further work is needed |
286 |
|
287 |
seq1_align_len = index; |
288 |
seq2_align_len = index; |
289 |
seq1_index = index; |
290 |
/* |
291 |
seed->start_q = 0; |
292 |
seed->start_s = 0; |
293 |
seed->match_length = index; |
294 |
*/ |
295 |
longest_match_run = index; |
296 |
|
297 |
if (index == len1 || index == len2) { |
298 |
/* Return the number of differences, which is zero here */ |
299 |
if (edit_block != NULL) |
300 |
//GapPrelimEditBlockAdd(edit_block, eGapAlignSub, index); |
301 |
edit_block->opRep(index); |
302 |
return 0; |
303 |
} |
304 |
|
305 |
// set up the memory pool |
306 |
mem_pool = aux_data.space; |
307 |
if (edit_block == NULL) { |
308 |
mem_pool = NULL; |
309 |
} |
310 |
else if (mem_pool == NULL) { |
311 |
aux_data.space = mem_pool = new GXMemPool(); |
312 |
} |
313 |
else { |
314 |
mem_pool->refresh(); |
315 |
} |
316 |
|
317 |
/* set up the array of per-distance maximum scores. There |
318 |
are max_diags + xdrop_offset distances to track, the first |
319 |
xdrop_offset of which are 0 */ |
320 |
|
321 |
max_score = aux_data.max_score + xdrop_offset; |
322 |
for (index = 0; index < xdrop_offset; index++) |
323 |
aux_data.max_score[index] = 0; |
324 |
|
325 |
// fill in the initial offsets of the distance matrix |
326 |
|
327 |
last_seq2_off[0][diag_origin] = seq1_index; |
328 |
max_score[0] = seq1_index * aux_data.match_reward; |
329 |
diag_lower = diag_origin - 1; |
330 |
diag_upper = diag_origin + 1; |
331 |
end1_reached = end2_reached = false; |
332 |
|
333 |
// for each distance |
334 |
for (d = 1; d <= max_dist; d++) { |
335 |
int xdrop_score; |
336 |
int curr_score; |
337 |
int curr_extent = 0; |
338 |
int curr_seq2_index = 0; |
339 |
int curr_diag = 0; |
340 |
int tmp_diag_lower = diag_lower; |
341 |
int tmp_diag_upper = diag_upper; |
342 |
|
343 |
// Assign impossible seq2 offsets to any diagonals that |
344 |
// are not in the range (diag_lower,diag_upper). |
345 |
// These will serve as sentinel values for the inner loop |
346 |
last_seq2_off[d - 1][diag_lower-1] = kInvalidOffset; |
347 |
last_seq2_off[d - 1][diag_lower] = kInvalidOffset; |
348 |
last_seq2_off[d - 1][diag_upper] = kInvalidOffset; |
349 |
last_seq2_off[d - 1][diag_upper+1] = kInvalidOffset; |
350 |
|
351 |
// compute the score for distance d corresponding to the X-dropoff criterion |
352 |
|
353 |
xdrop_score = max_score[d - xdrop_offset] + |
354 |
(aux_data.match_reward + aux_data.mismatch_penalty) * d - aux_data.x_drop; |
355 |
xdrop_score = (int)ceil((double)xdrop_score / (aux_data.match_reward>>1)); |
356 |
|
357 |
// for each diagonal of interest |
358 |
for (k = tmp_diag_lower; k <= tmp_diag_upper; k++) { |
359 |
/* find the largest offset into seq2 that increases |
360 |
the distance from d-1 to d (i.e. keeps the alignment |
361 |
from getting worse for as long as possible), then |
362 |
choose the offset into seq1 that will keep the |
363 |
resulting diagonal fixed at k |
364 |
|
365 |
Note that this requires kInvalidOffset+1 to be smaller |
366 |
than any valid offset into seq2, i.e. to be negative */ |
367 |
|
368 |
seq2_index = GMAX(last_seq2_off[d - 1][k + 1], |
369 |
last_seq2_off[d - 1][k ]) + 1; |
370 |
seq2_index = GMAX(seq2_index, last_seq2_off[d - 1][k - 1]); |
371 |
seq1_index = seq2_index + k - diag_origin; |
372 |
|
373 |
if (seq2_index < 0 || seq1_index + seq2_index < xdrop_score) { |
374 |
|
375 |
// if no valid diagonal can reach distance d, or the |
376 |
// X-dropoff test fails, narrow the range of diagonals |
377 |
// to test and skip to the next diagonal |
378 |
if (k == diag_lower) |
379 |
diag_lower++; |
380 |
else |
381 |
last_seq2_off[d][k] = kInvalidOffset; |
382 |
continue; |
383 |
} |
384 |
diag_upper = k; |
385 |
|
386 |
/* slide down diagonal k until a mismatch |
387 |
occurs. As long as only matches are encountered, |
388 |
the current distance d will not change */ |
389 |
|
390 |
index = s_FindFirstMismatch(seq1, len1, seq2, len2, |
391 |
seq1_index, seq2_index, reverse); |
392 |
//fence_hit, reverse, rem); |
393 |
if (index > longest_match_run) { |
394 |
//seed->start_q = seq1_index; |
395 |
//seed->start_s = seq2_index; |
396 |
//seed->match_length = index; |
397 |
longest_match_run = index; |
398 |
} |
399 |
seq1_index += index; |
400 |
seq2_index += index; |
401 |
|
402 |
// set the new largest seq2 offset that achieves |
403 |
// distance d on diagonal k |
404 |
|
405 |
last_seq2_off[d][k] = seq2_index; |
406 |
|
407 |
// since all values of k are constrained to have the |
408 |
// same distance d, the value of k which maximizes the |
409 |
// alignment score is the one that covers the most of seq1 and seq2 |
410 |
if (seq1_index + seq2_index > curr_extent) { |
411 |
curr_extent = seq1_index + seq2_index; |
412 |
curr_seq2_index = seq2_index; |
413 |
curr_diag = k; |
414 |
} |
415 |
|
416 |
/* clamp the bounds on diagonals to avoid walking off |
417 |
either sequence. Because the bounds increase by at |
418 |
most one for each distance, diag_lower and diag_upper |
419 |
can each be of size at most max_diags+2 */ |
420 |
|
421 |
if (seq2_index == len2) { |
422 |
diag_lower = k + 1; |
423 |
end2_reached = true; |
424 |
} |
425 |
if (seq1_index == len1) { |
426 |
diag_upper = k - 1; |
427 |
end1_reached = true; |
428 |
} |
429 |
} // end loop over diagonals |
430 |
|
431 |
// compute the maximum score possible for distance d |
432 |
curr_score = curr_extent * (aux_data.match_reward / 2) - |
433 |
d * (aux_data.match_reward + aux_data.mismatch_penalty); |
434 |
// if this is the best score seen so far, update the |
435 |
// statistics of the best alignment |
436 |
if (curr_score > max_score[d - 1]) { |
437 |
max_score[d] = curr_score; |
438 |
best_dist = d; |
439 |
best_diag = curr_diag; |
440 |
seq2_align_len = curr_seq2_index; |
441 |
seq1_align_len = curr_seq2_index + best_diag - diag_origin; |
442 |
} |
443 |
else { |
444 |
max_score[d] = max_score[d - 1]; |
445 |
} |
446 |
|
447 |
// alignment has finished if the lower and upper bounds |
448 |
// on diagonals to check have converged to each other |
449 |
|
450 |
if (diag_lower > diag_upper) |
451 |
break; |
452 |
|
453 |
/* set up for the next distance to examine. Because the |
454 |
bounds increase by at most one for each distance, |
455 |
diag_lower and diag_upper can each be of size at |
456 |
most max_diags+2 */ |
457 |
|
458 |
if (!end2_reached) |
459 |
diag_lower--; |
460 |
if (!end1_reached) |
461 |
diag_upper++; |
462 |
|
463 |
if (edit_block == NULL) { |
464 |
// if no traceback is specified, the next row of |
465 |
// last_seq2_off can reuse previously allocated memory |
466 |
//WARNING The following assumes two arrays of |
467 |
// at least max_dist+4 int's have already been allocated |
468 |
last_seq2_off[d + 1] = last_seq2_off[d - 1]; |
469 |
} |
470 |
else { |
471 |
// traceback requires all rows of last_seq2_off to be saved, |
472 |
// so a new row must be allocated |
473 |
last_seq2_off[d + 1] = (int*)mem_pool->getByteSpace((diag_upper - diag_lower + 7)*sizeof(int)); |
474 |
// move the origin for this row backwards |
475 |
// dubious pointer arithmetic ?! |
476 |
//last_seq2_off[d + 1] = last_seq2_off[d + 1] - diag_lower + 2; |
477 |
} |
478 |
} // end loop over distinct distances |
479 |
|
480 |
|
481 |
if (edit_block == NULL) |
482 |
return best_dist; |
483 |
|
484 |
//---- perform traceback |
485 |
d = best_dist; |
486 |
seq1_index = seq1_align_len; |
487 |
seq2_index = seq2_align_len; |
488 |
// for all positive distances |
489 |
|
490 |
//if (fence_hit && *fence_hit) |
491 |
// goto done; |
492 |
if (index==len1 || index==len2) d=0; |
493 |
while (d > 0) { |
494 |
int new_diag; |
495 |
int new_seq2_index; |
496 |
|
497 |
/* retrieve the value of the diagonal after the next |
498 |
traceback operation. best_diag starts off with the |
499 |
value computed during the alignment process */ |
500 |
|
501 |
new_diag = s_GetNextNonAffineTback(last_seq2_off, d, |
502 |
best_diag, &new_seq2_index); |
503 |
|
504 |
if (new_diag == best_diag) { |
505 |
// same diagonal: issue a group of substitutions |
506 |
if (seq2_index - new_seq2_index > 0) { |
507 |
edit_block->opRep(seq2_index - new_seq2_index); |
508 |
} |
509 |
} |
510 |
else if (new_diag < best_diag) { |
511 |
// smaller diagonal: issue a group of substitutions |
512 |
// and then a gap in seq2 */ |
513 |
if (seq2_index - new_seq2_index > 0) { |
514 |
edit_block->opRep(seq2_index - new_seq2_index); |
515 |
} |
516 |
//GapPrelimEditBlockAdd(edit_block, eGapAlignIns, 1); |
517 |
edit_block->opIns(1); |
518 |
} |
519 |
else { |
520 |
// larger diagonal: issue a group of substitutions |
521 |
// and then a gap in seq1 |
522 |
if (seq2_index - new_seq2_index - 1 > 0) { |
523 |
edit_block->opRep(seq2_index - new_seq2_index - 1); |
524 |
} |
525 |
edit_block->opDel(1); |
526 |
} |
527 |
d--; |
528 |
best_diag = new_diag; |
529 |
seq2_index = new_seq2_index; |
530 |
} |
531 |
//done: |
532 |
// handle the final group of substitutions back to distance zero, |
533 |
// i.e. back to offset zero of seq1 and seq2 |
534 |
//GapPrelimEditBlockAdd(edit_block, eGapAlignSub, |
535 |
// last_seq2_off[0][diag_origin]); |
536 |
edit_block->opRep(last_seq2_off[0][diag_origin]); |
537 |
if (!reverse) |
538 |
edit_block->reverse(); |
539 |
return best_dist; |
540 |
} |
541 |
|
542 |
void printEditScript(GXEditScript* ed_script) { |
543 |
uint i; |
544 |
if (ed_script==NULL || ed_script->opnum == 0) |
545 |
return; |
546 |
for (i=0; i<ed_script->opnum; i++) { |
547 |
int num=((ed_script->ops[i]) >> 2); |
548 |
unsigned char op_type = 3 - ( ed_script->ops[i] & gxEDIT_OP_MASK ); |
549 |
if (op_type == 3) |
550 |
GError("Error: printEditScript encountered op_type 3 ?!\n"); |
551 |
GMessage("%d%c ", num, xgapcodes[op_type]); |
552 |
} |
553 |
GMessage("\n"); |
554 |
} |
555 |
|
556 |
GXAlnInfo* GreedyAlign(const char* q_seq, int q_alnstart, const char* s_seq, int s_alnstart, |
557 |
bool editscript, int reward, int penalty, int xdrop) { |
558 |
int q_max=strlen(q_seq); //query |
559 |
int s_max=strlen(s_seq); //subj |
560 |
return GreedyAlignRegion(q_seq, q_alnstart, q_max, |
561 |
s_seq, s_alnstart, s_max, reward, penalty, xdrop, NULL, NULL, editscript); |
562 |
} |
563 |
|
564 |
struct GXSeedTable { |
565 |
int a_num, b_num; |
566 |
int a_cap, b_cap; |
567 |
char* xc; |
568 |
GXSeedTable(int a=12, int b=255) { |
569 |
a_cap=0; |
570 |
b_cap=0; |
571 |
a_num=0; |
572 |
b_num=0; |
573 |
xc=NULL; |
574 |
init(a,b); |
575 |
} |
576 |
~GXSeedTable() { |
577 |
GFREE(xc); |
578 |
} |
579 |
void init(int a, int b) { |
580 |
a_num=a; |
581 |
b_num=b; |
582 |
bool resize=false; |
583 |
if (b_num>b_cap) { resize=true; b_cap=b_num;} |
584 |
if (a_num>a_cap) { resize=true; a_cap=a_num;} |
585 |
if (resize) { |
586 |
GFREE(xc); |
587 |
GCALLOC(xc, (a_num*b_num)); |
588 |
} |
589 |
else { |
590 |
//just clear up to a_max, b_max |
591 |
memset((void*)xc, 0, (a_num*b_num)); |
592 |
} |
593 |
} |
594 |
char& x(int ax, int by) { |
595 |
return xc[by*a_num+ax]; |
596 |
} |
597 |
|
598 |
}; |
599 |
|
600 |
const int a_m_score=2; //match score |
601 |
const int a_mis_score=-3; //mismatch |
602 |
const int a_dropoff_score=7; |
603 |
const int a_min_score=12; //at least 6 bases full match |
604 |
|
605 |
// ------------------ adaptor matching - simple k-mer seed & extend, no indels for now |
606 |
//when a k-mer match is found, simply try to extend the alignment using a drop-off scheme |
607 |
//check minimum score and |
608 |
//for 3' adaptor trimming: |
609 |
// require that the right end of the alignment for either the adaptor OR the read must be |
610 |
// < 3 distance from its right end |
611 |
// for 5' adaptor trimming: |
612 |
// require that the left end of the alignment for either the adaptor OR the read must |
613 |
// be at coordinate < 3 from start |
614 |
|
615 |
bool extendUngapped(const char* a, int alen, int ai, |
616 |
const char* b, int blen, int bi, int& mlen, int& l5, int& l3, bool end5=false) { |
617 |
//so the alignment starts at ai in a, bi in b, with a perfect match of length mlen |
618 |
//if (debug) { |
619 |
// GMessage(">> in %s\n\textending hit: %s at position %d\n", a, (dbg.substr(bi, mlen)).chars(), ai); |
620 |
// } |
621 |
int a_l=ai; //alignment coordinates on a |
622 |
int a_r=ai+mlen-1; |
623 |
int b_l=bi; //alignment coordinates on b |
624 |
int b_r=bi+mlen-1; |
625 |
int ai_maxscore=ai; |
626 |
int bi_maxscore=bi; |
627 |
int score=mlen*a_m_score; |
628 |
int maxscore=score; |
629 |
int mism5score=a_mis_score; |
630 |
if (end5 && ai<(alen>>1)) mism5score-=2; // increase penalty for mismatches at 5' end |
631 |
//try to extend to the left first, if possible |
632 |
while (ai>0 && bi>0) { |
633 |
ai--; |
634 |
bi--; |
635 |
score+= (a[ai]==b[bi])? a_m_score : mism5score; |
636 |
if (score>maxscore) { |
637 |
ai_maxscore=ai; |
638 |
bi_maxscore=bi; |
639 |
maxscore=score; |
640 |
} |
641 |
else if (maxscore-score>a_dropoff_score) break; |
642 |
} |
643 |
a_l=ai_maxscore; |
644 |
b_l=bi_maxscore; |
645 |
//now extend to the right |
646 |
ai_maxscore=a_r; |
647 |
bi_maxscore=b_r; |
648 |
ai=a_r; |
649 |
bi=b_r; |
650 |
score=maxscore; |
651 |
//sometimes there are extra As at the end of the read, ignore those |
652 |
if (a[alen-2]=='A' && a[alen-1]=='A') { |
653 |
alen-=2; |
654 |
while (a[alen-1]=='A' && alen>ai) alen--; |
655 |
} |
656 |
while (ai<alen-1 && bi<blen-1) { |
657 |
ai++; |
658 |
bi++; |
659 |
//score+= (a[ai]==b[bi])? a_m_score : a_mis_score; |
660 |
if (a[ai]==b[bi]) { //match |
661 |
score+=a_m_score; |
662 |
if (ai>=alen-2) { |
663 |
score+=a_m_score-(alen-ai-1); |
664 |
} |
665 |
} |
666 |
else { //mismatch |
667 |
score+=a_mis_score; |
668 |
} |
669 |
if (score>maxscore) { |
670 |
ai_maxscore=ai; |
671 |
bi_maxscore=bi; |
672 |
maxscore=score; |
673 |
} |
674 |
else if (maxscore-score>a_dropoff_score) break; |
675 |
} |
676 |
a_r=ai_maxscore; |
677 |
b_r=bi_maxscore; |
678 |
int a_ovh3=alen-a_r-1; |
679 |
int b_ovh3=blen-b_r-1; |
680 |
int mmovh3=(a_ovh3<b_ovh3)? a_ovh3 : b_ovh3; |
681 |
int mmovh5=(a_l<b_l)? a_l : b_l; |
682 |
if (maxscore>=a_min_score && mmovh3<2 && mmovh5<2) { |
683 |
if (a_l<a_ovh3) { |
684 |
//adaptor closer to the left end (typical for 5' adaptor) |
685 |
l5=a_r+1; |
686 |
l3=alen-1; |
687 |
} |
688 |
else { |
689 |
//adaptor matching at the right end (typical for 3' adaptor) |
690 |
l5=0; |
691 |
l3=a_l-1; |
692 |
} |
693 |
return true; |
694 |
} |
695 |
//do not trim: |
696 |
l5=0; |
697 |
l3=alen-1; |
698 |
return false; |
699 |
} |
700 |
|
701 |
GXBandSet* collectSeeds(GList<GXSeed>& seeds, GXSeqData& sd, GAlnTrimType trim_intent) { |
702 |
int bimin=GMAX(0,(sd.blen-sd.alen-6)); //from collectSeeds_R |
703 |
int bimax=GMIN((sd.alen+2), (sd.blen-6)); |
704 |
int b_start = (trim_intent==galn_TrimRight) ? bimin : 0; |
705 |
int b_end = (trim_intent==galn_TrimLeft) ? bimax : sd.blen-6; |
706 |
//gx.init(a_maxlen, b_maxlen); |
707 |
GXSeedTable gx(sd.alen, sd.blen); |
708 |
GXBandSet* diagstrips=new GXBandSet(sd.alen, sd.blen); //set of overlapping 3-diagonal strips |
709 |
for (int bi=b_start;bi<=b_end;bi++) { |
710 |
//for each 6-mer of seqb |
711 |
uint16 bv = get6mer((char*) & (sd.bseq[bi])); |
712 |
GVec<uint16>* alocs = sd.amers[bv]; |
713 |
if (alocs==NULL) continue; |
714 |
//extend each hit |
715 |
for (int h=0;h<alocs->Count();h++) { |
716 |
int ai=alocs->Get(h); |
717 |
//word match |
718 |
if (gx.x(ai,bi)) |
719 |
//already have a previous seed covering this region of this diagonal |
720 |
continue; |
721 |
if (trim_intent==galn_TrimLeft && sd.blen>sd.alen+6 && bi>ai+6) |
722 |
continue; //improper positioning for 5' trimming |
723 |
if (trim_intent==galn_TrimRight && sd.blen>sd.alen+6 && bi<ai-6) |
724 |
continue; //improper positioning for 5' trimming |
725 |
|
726 |
//TODO: there could be Ns in this seed, should we count/adjust score? |
727 |
for (int i=0;i<6;i++) |
728 |
gx.x(ai+i,bi+i)=1; |
729 |
//see if we can extend to the right |
730 |
int aix=ai+6; |
731 |
int bix=bi+6; |
732 |
int len=6; |
733 |
while (bix<sd.blen && aix<sd.alen && sd.aseq[aix]==sd.bseq[bix]) { |
734 |
gx.x(aix,bix)=1; |
735 |
aix++;bix++; |
736 |
len++; |
737 |
} |
738 |
if (len>sd.amlen) { |
739 |
//heuristics: very likely the best we can get |
740 |
//quick match shortcut |
741 |
diagstrips->qmatch=new GXSeed(ai,bi,len); |
742 |
return diagstrips; |
743 |
} |
744 |
if (bi>bimax && bi<bimin && len<9) |
745 |
//ignore mid-sequence seeds that are not high scoring |
746 |
continue; |
747 |
|
748 |
GXSeed* newseed=new GXSeed(ai,bi,len); |
749 |
seeds.Add(newseed); |
750 |
diagstrips->addSeed(newseed);//add it to all 3 adjacent diagonals |
751 |
//keep last resort terminal match to be used if no better alignment is there |
752 |
if (bi<2 && ai+len>=sd.alen-1 && |
753 |
(!diagstrips->tmatch_l || diagstrips->tmatch_l->len<len)) |
754 |
diagstrips->tmatch_l=newseed; |
755 |
//collectSeeds_R: |
756 |
if (ai<2 && bi+len>sd.blen-2 && |
757 |
(!diagstrips->tmatch_r || diagstrips->tmatch_r->len<len)) |
758 |
diagstrips->tmatch_r=newseed; |
759 |
} |
760 |
} //for each 6-mer of the read |
761 |
for (int i=0;i<diagstrips->Count();i++) { |
762 |
diagstrips->Get(i)->finalize(); //adjust scores due to overlaps or gaps between seeds |
763 |
} |
764 |
diagstrips->setSorted(true); //sort by score |
765 |
return diagstrips; |
766 |
} |
767 |
|
768 |
int cmpSeedScore(const pointer p1, const pointer p2) { |
769 |
//return (((GXSeed*)s2)->len-((GXSeed*)s1)->len); |
770 |
GXSeed* s1=(GXSeed*)p1; |
771 |
GXSeed* s2=(GXSeed*)p2; |
772 |
if (s1->len==s2->len) { |
773 |
return (s1->b_ofs-s2->b_ofs); |
774 |
} |
775 |
else return (s2->len-s1->len); |
776 |
} |
777 |
|
778 |
int cmpSeedScore_R(const pointer p1, const pointer p2) { |
779 |
//return (((GXSeed*)s2)->len-((GXSeed*)s1)->len); |
780 |
GXSeed* s1=(GXSeed*)p1; |
781 |
GXSeed* s2=(GXSeed*)p2; |
782 |
if (s1->len==s2->len) { |
783 |
return (s2->b_ofs-s1->b_ofs); |
784 |
} |
785 |
else return (s2->len-s1->len); |
786 |
} |
787 |
|
788 |
|
789 |
int cmpSeedDiag(const pointer p1, const pointer p2) { |
790 |
GXSeed* s1=(GXSeed*)p1; |
791 |
GXSeed* s2=(GXSeed*)p2; |
792 |
return ((s1->b_ofs-s1->a_ofs)-(s2->b_ofs-s2->a_ofs)); |
793 |
} |
794 |
|
795 |
|
796 |
int cmpDiagBands_R(const pointer p1, const pointer p2) { |
797 |
//return (((GXSeed*)s2)->len-((GXSeed*)s1)->len); |
798 |
GXBand* b1=(GXBand*)p1; |
799 |
GXBand* b2=(GXBand*)p2; |
800 |
if (b1->score==b2->score) { |
801 |
return (b2->w_min_b-b1->w_min_b); |
802 |
} |
803 |
else return (b2->score-b1->score); |
804 |
} |
805 |
|
806 |
|
807 |
|
808 |
GXAlnInfo* GreedyAlignRegion(const char* q_seq, int q_alnstart, int q_max, |
809 |
const char* s_seq, int s_alnstart, int s_max, |
810 |
int reward, int penalty, int xdrop, CGreedyAlignData* gxmem, |
811 |
CAlnTrim* trim, bool editscript) { |
812 |
GXEditScript* ed_script_fwd = NULL; |
813 |
GXEditScript* ed_script_rev = NULL; |
814 |
if ( q_alnstart>q_max || q_alnstart<1 || s_alnstart>s_max || s_alnstart<1 ) |
815 |
GError("GreedyAlign() Error: invalid anchor coordinate.\n"); |
816 |
q_alnstart--; |
817 |
s_alnstart--; |
818 |
if (q_seq==NULL || q_seq[0]==0 || s_seq==NULL || s_seq[0]==0) |
819 |
GError("GreedyAlign() Error: attempt to use an empty sequence string!\n"); |
820 |
/*if (q_seq[q_alnstart]!=s_seq[s_alnstart]) |
821 |
GError("GreedyAlign() Error: improper anchor (mismatch):\n%s (start %d len %d)\n%s (start %d len %d)\n", |
822 |
q_seq, q_alnstart, q_max, s_seq, s_alnstart, s_max); |
823 |
*/ |
824 |
int q_ext_l=0, q_ext_r=0, s_ext_l=0, s_ext_r=0; |
825 |
const char* q=q_seq+q_alnstart; |
826 |
int q_avail=q_max-q_alnstart; |
827 |
const char* s=s_seq+s_alnstart; |
828 |
int s_avail=s_max-s_alnstart; |
829 |
if (penalty<0) penalty=-penalty; |
830 |
GXAlnInfo* alninfo=NULL; |
831 |
bool freeAlnMem=(gxmem==NULL); |
832 |
if (freeAlnMem) { |
833 |
gxmem=new CGreedyAlignData(reward, penalty, xdrop); |
834 |
reward=gxmem->match_reward; |
835 |
penalty=gxmem->mismatch_penalty; |
836 |
xdrop=gxmem->x_drop; |
837 |
} |
838 |
else |
839 |
gxmem->reset(); |
840 |
int minMatch= trim ? trim->minMatch : 6; |
841 |
int MIN_GREEDY_SCORE=minMatch*reward; //minimum score for an alignment to be reported for 0 diffs |
842 |
int retscore = 0; |
843 |
int numdiffs = 0; |
844 |
if (trim!=NULL && trim->type==galn_TrimLeft) { |
845 |
//intent: trimming the left side |
846 |
if (editscript) |
847 |
ed_script_rev=new GXEditScript(); |
848 |
|
849 |
int numdiffs_l = GXGreedyExtend(s_seq, s_alnstart, q_seq, q_alnstart, true, // xdrop, reward, penalty, |
850 |
s_ext_l, q_ext_l, *gxmem, ed_script_rev); |
851 |
//check this extension here and bail out if it's not a good extension |
852 |
if (s_ext_l+(trim->seedlen>>1) < trim->safelen && |
853 |
q_alnstart+1-q_ext_l>1 && |
854 |
s_alnstart+1-s_ext_l>trim->l_boundary) { |
855 |
delete ed_script_rev; |
856 |
if (freeAlnMem) delete gxmem; |
857 |
return NULL; |
858 |
} |
859 |
|
860 |
if (editscript) |
861 |
ed_script_fwd=new GXEditScript(); |
862 |
int numdiffs_r = GXGreedyExtend(s, s_avail, q, q_avail, false, //xdrop, reward, penalty, |
863 |
s_ext_r, q_ext_r, *gxmem, ed_script_fwd); |
864 |
numdiffs=numdiffs_r+numdiffs_l; |
865 |
//convert num diffs to actual score |
866 |
retscore = (q_ext_r + s_ext_r + q_ext_l + s_ext_l)*(reward>>1) - numdiffs*(reward+penalty); |
867 |
if (editscript) |
868 |
ed_script_rev->Append(ed_script_fwd); //combine the two extensions |
869 |
} |
870 |
else { |
871 |
if (editscript) { |
872 |
ed_script_fwd=new GXEditScript(); |
873 |
} |
874 |
int numdiffs_r = GXGreedyExtend(s, s_avail, q, q_avail, false, // xdrop, reward, penalty, |
875 |
s_ext_r, q_ext_r, *gxmem, ed_script_fwd); |
876 |
//check extension here and bail out if not a good right extension |
877 |
//assuming s_max is really at the right end of s_seq |
878 |
if (trim!=NULL && trim->type==galn_TrimRight && |
879 |
s_ext_r+(trim->seedlen>>1) < trim->safelen && |
880 |
q_alnstart+q_ext_r<q_max-2 && |
881 |
s_alnstart+s_ext_r<trim->r_boundary) { |
882 |
delete ed_script_fwd; |
883 |
if (freeAlnMem) delete gxmem; |
884 |
return NULL; |
885 |
} |
886 |
if (editscript) |
887 |
ed_script_rev=new GXEditScript(); |
888 |
int numdiffs_l = GXGreedyExtend(s_seq, s_alnstart, q_seq, q_alnstart, true, // xdrop, reward, penalty, |
889 |
s_ext_l, q_ext_l, *gxmem, ed_script_rev); |
890 |
//convert num diffs to actual score |
891 |
numdiffs=numdiffs_r+numdiffs_l; |
892 |
retscore = (q_ext_r + s_ext_r + q_ext_l + s_ext_l)*(reward>>1) - numdiffs*(reward+penalty); |
893 |
if (editscript) |
894 |
ed_script_rev->Append(ed_script_fwd); //combine the two extensions |
895 |
} |
896 |
if (retscore>=MIN_GREEDY_SCORE) { |
897 |
alninfo=new GXAlnInfo(q_seq, q_alnstart+1-q_ext_l, q_alnstart+q_ext_r, s_seq, s_alnstart+1-s_ext_l, s_alnstart+s_ext_r); |
898 |
int hsp_length = GMIN(q_ext_l+q_ext_r, s_ext_l+s_ext_r); |
899 |
alninfo->score=retscore; |
900 |
if (gxmem->scaled) alninfo->score >>= 1; |
901 |
alninfo->pid = 100 * (1 - ((double) numdiffs) / hsp_length); |
902 |
#ifdef TRIMDEBUG |
903 |
//if (ed_script_rev) { |
904 |
// GMessage("Final Edit script ::: "); |
905 |
// printEditScript(ed_script_rev); |
906 |
// } |
907 |
#endif |
908 |
alninfo->editscript=ed_script_rev; |
909 |
alninfo->gapinfo = new CAlnGapInfo(ed_script_rev, alninfo->ql-1, alninfo->sl-1); |
910 |
} |
911 |
else { |
912 |
//if (freeAlnMem) delete gxmem; |
913 |
delete ed_script_rev; |
914 |
delete alninfo; |
915 |
alninfo=NULL; |
916 |
} |
917 |
if (freeAlnMem) delete gxmem; |
918 |
delete ed_script_fwd; |
919 |
return alninfo; |
920 |
} |
921 |
|
922 |
GXAlnInfo* GreedyAlignRegion(const char* q_seq, int q_alnstart, int q_max, |
923 |
const char* s_seq, int s_alnstart, int s_max, CGreedyAlignData* gxmem, |
924 |
CAlnTrim* trim, bool editscript) { |
925 |
int reward=2; |
926 |
int penalty=10; |
927 |
int xdrop=32; |
928 |
if (gxmem) { |
929 |
reward=gxmem->match_reward; |
930 |
penalty=gxmem->mismatch_penalty; |
931 |
xdrop=gxmem->x_drop; |
932 |
} |
933 |
return GreedyAlignRegion(q_seq, q_alnstart, q_max, s_seq, s_alnstart, s_max, |
934 |
reward, penalty, xdrop, gxmem, trim, editscript); |
935 |
} |
936 |
|
937 |
GXAlnInfo* match_adaptor(GXSeqData& sd, GAlnTrimType trim_type, int minMatch, |
938 |
CGreedyAlignData* gxmem, int min_pid) { |
939 |
bool editscript=false; |
940 |
#ifdef TRIMDEBUG |
941 |
editscript=true; |
942 |
if (trim_type==galn_TrimLeft) { |
943 |
GMessage("=======> searching left (5') end : %s\n", sd.aseq); |
944 |
} |
945 |
else if (trim_type==galn_TrimRight) { |
946 |
GMessage("=======> searching right(3') end : %s\n", sd.aseq); |
947 |
} |
948 |
else if (trim_type==galn_TrimEither) { |
949 |
GMessage("==========> searching both ends : %s\n", sd.aseq); |
950 |
} |
951 |
#endif |
952 |
CAlnTrim trimInfo(trim_type, sd.bseq, sd.blen, sd.alen, minMatch, sd.amlen); |
953 |
GList<GXSeed> rseeds(true,true,false); |
954 |
GXBandSet* alnbands=collectSeeds(rseeds, sd, trim_type); |
955 |
GList<GXSeed> anchor_seeds(cmpSeedDiag, NULL, true); //stores unique seeds per diagonal |
956 |
//did we find a shortcut? |
957 |
if (alnbands->qmatch) { |
958 |
#ifdef TRIMDEBUG |
959 |
GMessage("::: Found a quick long match at %d, len %d\n", |
960 |
alnbands->qmatch->b_ofs, alnbands->qmatch->len); |
961 |
#endif |
962 |
anchor_seeds.Add(alnbands->qmatch); |
963 |
} |
964 |
else { |
965 |
int max_top_bands=5; |
966 |
int top_band_count=0; |
967 |
for (int b=0;b<alnbands->Count();b++) { |
968 |
if (alnbands->Get(b)->score<6) break; |
969 |
//#ifdef TRIMDEBUG |
970 |
//GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score); |
971 |
//#endif |
972 |
top_band_count++; |
973 |
GXBand& band=*(alnbands->Get(b)); |
974 |
band.seeds.setSorted(cmpSeedScore); |
975 |
anchor_seeds.Add(band.seeds.First()); |
976 |
//band.tested=true; |
977 |
if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break; |
978 |
} |
979 |
//#ifdef TRIMDEBUG |
980 |
//GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count()); |
981 |
//#endif |
982 |
} |
983 |
GList<GXAlnInfo> galns(true,true,false); |
984 |
for (int i=0;i<anchor_seeds.Count();i++) { |
985 |
GXSeed& aseed=*anchor_seeds[i]; |
986 |
int a1=aseed.a_ofs+(aseed.len>>1)+1; |
987 |
int a2=aseed.b_ofs+(aseed.len>>1)+1; |
988 |
trimInfo.seedlen=aseed.len; |
989 |
#ifdef TRIMDEBUG |
990 |
GMessage("\t::: align from seed (%d, %d)of len %d.\n",aseed.a_ofs, aseed.b_ofs, |
991 |
aseed.len); |
992 |
#endif |
993 |
GXAlnInfo* alninfo=GreedyAlignRegion(sd.aseq, a1, sd.alen, |
994 |
sd.bseq, a2, sd.blen, gxmem, &trimInfo, editscript); |
995 |
if (alninfo && alninfo->pid>=min_pid && trimInfo.validate(alninfo)) |
996 |
galns.AddIfNew(alninfo, true); |
997 |
else delete alninfo; |
998 |
} |
999 |
|
1000 |
if (galns.Count()==0) { |
1001 |
//last resort: look for weaker terminal seeds |
1002 |
GPVec<GXSeed> tmatches(2,false); |
1003 |
if (trim_type!=galn_TrimRight) { |
1004 |
if (alnbands->tmatch_l) |
1005 |
tmatches.Add(alnbands->tmatch_l); |
1006 |
} |
1007 |
if (trim_type!=galn_TrimLeft) { |
1008 |
if (alnbands->tmatch_r) |
1009 |
tmatches.Add(alnbands->tmatch_r); |
1010 |
} |
1011 |
for (int i=0;i<tmatches.Count();i++) { |
1012 |
GXSeed& aseed=*tmatches[i]; |
1013 |
int halfseed=aseed.len>>1; |
1014 |
int a1=aseed.a_ofs+halfseed+1; |
1015 |
int a2=aseed.b_ofs+halfseed+1; |
1016 |
trimInfo.seedlen=aseed.len; |
1017 |
#ifdef TRIMDEBUG |
1018 |
GMessage("\t::: align from terminal seed (%d, %d)of len %d.\n",aseed.a_ofs, aseed.b_ofs, |
1019 |
aseed.len); |
1020 |
#endif |
1021 |
GXAlnInfo* alninfo=GreedyAlignRegion(sd.aseq, a1, sd.alen, |
1022 |
sd.bseq, a2, sd.blen, gxmem, &trimInfo, editscript); |
1023 |
if (alninfo && alninfo->pid>=min_pid && trimInfo.validate(alninfo)) |
1024 |
galns.AddIfNew(alninfo, true); |
1025 |
else delete alninfo; |
1026 |
}//for each terminal seed |
1027 |
} |
1028 |
//---- found all alignments |
1029 |
delete alnbands; |
1030 |
/* |
1031 |
#ifdef TRIMDEBUG |
1032 |
//print all valid alignments found |
1033 |
for (int i=0;i<galns.Count();i++) { |
1034 |
GXAlnInfo* alninfo=galns[i]; |
1035 |
GMessage("a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", alninfo->ql, alninfo->qr, |
1036 |
alninfo->sl, alninfo->sr, alninfo->score, alninfo->pid); |
1037 |
if (alninfo->gapinfo!=NULL) { |
1038 |
GMessage("Alignment:\n"); |
1039 |
alninfo->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len); |
1040 |
} |
1041 |
} |
1042 |
#endif |
1043 |
*/ |
1044 |
if (galns.Count()) { |
1045 |
GXAlnInfo* bestaln=galns.Shift(); |
1046 |
#ifdef TRIMDEBUG |
1047 |
GMessage("Best alignment: a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", bestaln->ql, bestaln->qr, |
1048 |
bestaln->sl, bestaln->sr, bestaln->score, bestaln->pid); |
1049 |
if (bestaln->gapinfo!=NULL) { |
1050 |
bestaln->gapinfo->printAlignment(stderr, sd.aseq, sd.alen, sd.bseq, sd.blen); |
1051 |
} |
1052 |
#endif |
1053 |
return bestaln; |
1054 |
} |
1055 |
else return NULL; |
1056 |
} |