ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/PrimerMatch/primer_alignment.cc
Revision: 1.2
Committed: Wed May 4 18:03:45 2005 UTC (11 years, 7 months ago) by nje01
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +116 -40 lines
Log Message:
Small bug fixes, plus codon based edit distance for peptide searching.

Line File contents
1 /**************************************************************************
2 * This code is part of the supporting infrastructure for ATA Mapper.
3 * Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved.
4 * Author: Nathan Edwards
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received (LICENSE.txt) a copy of the GNU General Public
17 * License along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *************************************************************************/
20
21
22 #include "primer_alignment.h"
23 #include "util.h"
24 #include <list>
25
26 #if !defined(NO_STD_NAMESPACE)
27 using namespace std;
28 #endif
29
30 bool primer_alignment::global_align(char * const & text, unsigned int textlen,
31 std::string const & pattern,
32 int dirn,
33 unsigned int lmatch,
34 unsigned int rmatch,
35 int & matchlen) {
36 unsigned int textlenp1 = textlen+1;
37 unsigned int patlen = pattern.length();
38 unsigned int patlenp1 = patlen+1;
39
40 if (!dp_ || matsize_ < textlenp1*patlenp1) {
41 if (dp_) {
42 delete [] dp_;
43 delete [] best_;
44 }
45 // size should always be larger than (textlen+1)*(patlen+1)
46 if ((maxpatlen_+1) < patlenp1) {
47 maxpatlen_ = patlen;
48 }
49 matsize_ = (maxpatlen_+maxdist_+1)*(maxpatlen_+1);
50 if (matsize_ < textlenp1*patlenp1) {
51 matsize_ = textlenp1*patlenp1;
52 }
53 dp_ = new unsigned int[matsize_];
54 best_ = new unsigned int[matsize_];
55 }
56 #define index(i,j) ((i)*(textlenp1)+(j))
57 // checkpoint;
58 // cerr << text << endl;
59 // cerr << pattern << endl;
60 // cerr << dirn << endl;
61 // cerr << lmatch << endl;
62 // cerr << rmatch << endl;
63 // cerr << (int)eos_ << endl;
64 int lbexact=0;
65 int rbexact=patlenp1;
66 int const_violation = 5*maxdist_+1;
67 if (dirn < 0) {
68 if (lmatch > 0) rbexact = patlenp1-lmatch;
69 if (rmatch > 0) lbexact = rmatch;
70 } else {
71 if (lmatch > 0) lbexact = lmatch;
72 if (rmatch > 0) rbexact = patlenp1-rmatch;
73 }
74 // cerr << dirn << " " << lbexact << " " << rbexact << endl;
75 int p,t;
76 long unsigned int ind,ind10,ind01,ind11;
77 dp_[/*index(0,0)=*/0] = 0;
78 // cerr << "dp_ " << 0 << " " << 0 << " " << dp_[index(0,0)] << endl;
79 int lb,ub;
80 ub = (indels_?(dna_mut_?1:maxdist_):0);
81 if (patlen < ub) {
82 ub = patlen;
83 }
84 for (p=1,ind=textlenp1,ind10=0;p<=ub;p++,ind10=ind,ind+=textlenp1) {
85 // assert(index(p,0) == ind);
86 // assert(index(p-1,0) == ind10);
87 if (!indels_ || p < lbexact || p >= rbexact) {
88 dp_[/*index(p,0)=*/ind] = const_violation; /* price away a possible
89 deletion of this pattern
90 character... */
91 best_[/*index(p,0)=*/ind] = alignment_mask_constraint_violation;
92 } else {
93 if (!dna_mut_) {
94 dp_[/*index(p,0)=*/ind] = dp_[/*index(p-1,0)=*/ind10]+1; /* cost for a deletion */
95 best_[/*index(p,0)=*/ind] = alignment_mask_deletion;
96 } else {
97 dp_[/*index(p,0)=*/ind] = dp_[/*index(p-1,0)=*/ind10]+3; /* cost for a deletion */
98 best_[/*index(p,0)=*/ind] = alignment_mask_deletion_3;
99 }
100 }
101 // cerr << "dp_ " << p << " " << 0 << " " << dp_[index(p,0)] << endl;
102 }
103 ub = (indels_?(dna_mut_?1:maxdist_):0);
104 if (textlen < ub) {
105 ub = textlen;
106 }
107 char textch,patch;
108 for (t=1,ind01=0;t<=ub;ind01=t++) {
109 // assert(index(0,t) == t && index(0,t-1) == ind01);
110 if (dirn>0) {
111 textch = text[t-1];
112 } else {
113 textch = text[textlen-t];
114 }
115 if (!indels_ || 0 < lbexact || 0 >= rbexact || textch == eos_ ) {
116 dp_[/*index(0,t)=*/t] = const_violation; /* price away a
117 possible
118 insertion at
119 this pattern
120 character... */
121 best_[/*index(0,t)=*/t] = alignment_mask_constraint_violation;
122 } else {
123 if (!dna_mut_) {
124 dp_[/*index(0,t)=*/t] = dp_[/*index(0,t-1)=*/ind01]+1; /* cost for a insertion */
125 best_[/*index(0,t)=*/t] = alignment_mask_insertion;
126 } else {
127 dp_[/*index(0,t)=*/t] = dp_[/*index(0,t-1)=*/ind01]+3; /* cost for a insertion */
128 best_[/*index(0,t)=*/t] = alignment_mask_insertion_3;
129 }
130 }
131 // cerr << "dp_ " << 0 << " " << t << " " << dp_[index(0,t)] << endl;
132 }
133 int v,v1;
134 unsigned int ac,ac1;
135 // checkpoint;
136 for (p=1;p<=patlen;p++) {
137 lb = p-(indels_?(dna_mut_?1:maxdist_):0);
138 if (lb < 1) {
139 lb = 1;
140 }
141 ub = p+(indels_?(dna_mut_?1:maxdist_):0);
142 if (textlen < ub) {
143 ub = textlen;
144 }
145 int bestscorerow=const_violation;
146 // checkpoint;
147 ind = index(p,lb);
148 ind01 = ind-1;
149 ind11 = ind01-textlenp1;
150 ind10 = ind11+1;
151 for (t=lb;t<=ub;t++) {
152 // cerr << index(p,t) << " " << ind << endl;
153 // cerr << index(p-1,t) << " " << ind10 << endl;
154 // cerr << index(p-1,t-1) << " " << ind11 << endl;
155 // cerr << index(p,t-1) << " " << ind01 << endl;
156 // assert(index(p,t) == ind &&
157 // index(p-1,t) == ind10 &&
158 // index(p,t-1) == ind01 &&
159 // index(p-1,t-1) == ind11);
160 int ac=0;
161 if (dirn>0) {
162 textch = text[t-1];
163 patch = pattern[p-1];
164 } else {
165 textch = text[textlen-t];
166 patch = pattern[patlen-p];
167 }
168 if (textch == patch){
169 v = dp_[/*index(p-1,t-1)=*/ind11]; /* characters match, get profit*/
170 ac = alignment_mask_equal;
171 } else if (wc_ && iupac_compatible(textch,patch) &&
172 (tn_ || textch != 'N')) {
173 v = dp_[/*index(p-1,t-1)=*/ind11]; /* characters wildcard match, get profit*/
174 ac = alignment_mask_wildcard_equal;
175 } else if (textch == eos_ || p <= lbexact || p >=rbexact) {
176 /* cannot tolerate a substitution error with eos char or if it
177 is in the exact range... */
178 v = const_violation;
179 ac = alignment_mask_constraint_violation;
180 } else {
181 int mut=-1;
182 if (!dna_mut_) {
183 v = dp_[/*index(p-1,t-1)=*/ind11]+1; /* cost for subst*/
184 ac = alignment_mask_substitution;
185 } else if ((mut=aasubdist(textch,patch))>=0) {
186 v = dp_[/*index(p-1,t-1)=*/ind11]+mut; /* cost for subst*/
187 switch(mut) {
188 case 1:
189 ac = alignment_mask_substitution_1;
190 break;
191 case 2:
192 ac = alignment_mask_substitution_2;
193 break;
194 case 3:
195 ac = alignment_mask_substitution_3;
196 break;
197 default:
198 checkpoint;
199 assert(0);
200 }
201 } else {
202 v = const_violation;
203 ac = alignment_mask_constraint_violation;
204 }
205 }
206 // checkpoint;
207 if (textch == eos_ || !indels_ || t <= lb ||
208 p < lbexact || p >= rbexact) {
209 // checkpoint;
210 v1 = const_violation;
211 /* cannot tolerate an insertion in the text
212 if that insertion is an eos or if the
213 pattern is in the exact range */
214 ac1 = alignment_mask_constraint_violation;
215 } else {
216 // checkpoint;
217 if (!dna_mut_) {
218 v1 = dp_[/*index(p,t-1)=*/ind01]+1; /* cost for an insertion */
219 ac1 = alignment_mask_insertion;
220 } else {
221 v1 = dp_[/*index(p,t-1)=*/ind01]+3; /* cost for an insertion */
222 ac1 = alignment_mask_insertion_3;
223 }
224 }
225 if (v1 < v) {
226 // checkpoint;
227 v = v1;
228 ac = ac1;
229 } else if (v1 == v) {
230 ac |= ac1;
231 }
232 // checkpoint;
233 if (!indels_ || t >= ub || p <= lbexact || p >= rbexact) {
234 v1 = const_violation;
235 ac1 = alignment_mask_constraint_violation;
236 } else {
237 if (!dna_mut_) {
238 v1 = dp_[/*index(p-1,t)=*/ind10]+1; /* cost for a deletion. Note that we
239 permit a deletion in the text, even
240 if the text charcter is an eos... */
241 ac1 = alignment_mask_deletion;
242 } else {
243 v1 = dp_[/*index(p-1,t)=*/ind10]+3;
244 ac1 = alignment_mask_deletion_3;
245 }
246 }
247 if (v1 < v) {
248 v = v1;
249 ac = ac1;
250 } else if (v1 == v) {
251 ac |= ac1;
252 }
253 dp_[/*index(p,t)=*/ind] = v;
254 // cerr << "dp_ " << p << " " << t << " " << dp_[index(p,t)] << endl;
255 best_[/*index(p,t)=*/ind] = ac;
256 if (bestscorerow > v) {
257 bestscorerow = v;
258 }
259 ind01 = ind++;
260 ind11 = ind10++;
261 }
262 // checkpoint;
263 if (bestscorerow > maxdist_) {
264 // delete [] dp_;
265 // delete [] best_;
266 return false;
267 }
268 // checkpoint;
269 }
270 // checkpoint;
271
272 int val;
273 int bestpos = patlen-(indels_?(dna_mut_?1:maxdist_):0);
274 if (textlen < bestpos) {
275 bestpos = textlen;
276 }
277 if (bestpos < 0) {
278 bestpos = 0;
279 }
280 ind = index(patlen,bestpos);
281 int k,bestval=dp_[ind];
282 ub = patlen+(indels_?(dna_mut_?1:maxdist_):0);
283 if (textlen < ub) {
284 ub = textlen;
285 }
286 for (k=bestpos+1,ind++;k<=ub;k++,ind++) {
287 // assert(index(patlen,k)==ind);
288 if ((val=dp_[/*index(patlen,k)=*/ind]) < bestval ||
289 ((val=dp_[/*index(patlen,k)=*/ind]) <= bestval &&
290 (best_[/*index(patlen,k)=*/ind]&
291 (alignment_mask_equal|
292 alignment_mask_wildcard_equal|
293 alignment_mask_substitution|
294 alignment_mask_substitution_1|
295 alignment_mask_substitution_2|
296 alignment_mask_substitution_3)))) {
297 bestval = val;
298 bestpos = k;
299 }
300 }
301
302 // checkpoint;
303
304 p=patlen; t=bestpos;
305 if (t < p - (indels_?(dna_mut_?1:maxdist_):0) || t > p + (indels_?(dna_mut_?1:maxdist_):0)) {
306 // delete [] dp_;
307 // delete [] best_;
308 return false;
309 }
310 if (yesno_) {
311 // checkpoint;
312 // cerr << bestval << endl;
313 // cerr << bestpos << endl;
314 // delete [] dp_;
315 // delete [] best_;
316 matchlen = bestpos;
317 return true;
318 }
319 // checkpoint;
320 matching_text_.resize(bestpos);
321 if (dirn>0) {
322 for (int i=0;i<bestpos;i++) {
323 matching_text_[i] = text[i];
324 }
325 } else {
326 for (int i=0,j=textlen-bestpos;i<bestpos;i++,j++) {
327 matching_text_[i] = text[j];
328 }
329 }
330 std::list<alignment_code> alignment;
331 std::list<alignment_code>::iterator alit;
332 bool match,ins,del,wc,sub;
333 alignment_code lastac=alignment_none;
334 while (p!=0||t!=0) {
335 ac = best_[index(p,t)];
336 match = (ac & (alignment_mask_equal |
337 alignment_mask_wildcard_equal |
338 alignment_mask_substitution |
339 alignment_mask_substitution_1 |
340 alignment_mask_substitution_2 |
341 alignment_mask_substitution_3)
342 );
343 wc = (ac & alignment_mask_wildcard_equal);
344 sub = (ac & (alignment_mask_substitution|
345 alignment_mask_substitution_1|
346 alignment_mask_substitution_2|
347 alignment_mask_substitution_3)
348 );
349 ins = (ac & (alignment_mask_insertion|
350 alignment_mask_insertion_3)
351 );
352 del = (ac & (alignment_mask_deletion|
353 alignment_mask_deletion_3)
354 );
355 if (match && !(((lastac == alignment_insertion ||
356 lastac == alignment_insertion_3) && ins) ||
357 ((lastac == alignment_deletion ||
358 lastac == alignment_deletion_3) && del) ||
359 (lastac == alignment_wildcard_equal && !wc && (ins || del)))) {
360 p--; t--;
361 if ((ac & alignment_mask_equal) &&
362 !((lastac == alignment_wildcard_equal && wc) ||
363 (lastac == alignment_substitution && sub))) {
364 lastac = alignment_equal;
365 } else if (wc) {
366 lastac = alignment_wildcard_equal;
367 } else if (sub) {
368 if (ac & alignment_mask_substitution) {
369 lastac = alignment_substitution;
370 } else if (ac & alignment_mask_substitution_1) {
371 lastac = alignment_substitution_1;
372 } else if (ac & alignment_mask_substitution_2) {
373 lastac = alignment_substitution_2;
374 } else if (ac & alignment_mask_substitution_3) {
375 lastac = alignment_substitution_3;
376 }
377 }
378 } else if (del) {
379 p--;
380 if (ac & alignment_mask_deletion) {
381 lastac = alignment_deletion;
382 } else if (ac & alignment_mask_deletion_3) {
383 lastac = alignment_deletion_3;
384 }
385 } else if (ins) {
386 t--;
387 if (ac & alignment_mask_insertion) {
388 lastac = alignment_insertion;
389 } else if (ac & alignment_mask_insertion_3) {
390 lastac = alignment_insertion_3;
391 }
392 } else if (ac & alignment_mask_constraint_violation) {
393 p = 0; t = 0;
394 lastac = alignment_constraint_violation;
395 } else {
396 // Dump everything we know about the DP!
397 // checkpoint;
398 cerr << "Abort DP: Dumping everything!\n";
399 cerr << "Text: " << text << endl;
400 cerr << "Pattern: " << pattern << endl;
401 cerr << "Dirn: " << dirn << endl;
402 cerr << "LMatch: " << lmatch << endl;
403 cerr << "RMatch: " << rmatch << endl;
404 cerr << "DP Matrix: " << endl;
405 for (p=0;p<=patlen;p++) {
406 for (t=0;t<=textlen;t++) {
407 cerr << dp_[index(p,t)] << " ";
408 }
409 cerr << endl;
410 }
411 cerr << "Best Matrix: " << endl;
412 for (p=0;p<=patlen;p++) {
413 for (t=0;t<=textlen;t++) {
414 cerr << best_[index(p,t)] << " ";
415 }
416 cerr << endl;
417 }
418 cerr << "Bestval: " << bestval << endl;
419 cerr << "Bestpos: " << bestpos << endl;
420 cerr << "Matching Text: " << matching_text_ << endl;
421 cerr << "Alignment chars: ";
422
423 for (alit=alignment.begin();alit!=alignment.end();++alit) {
424 cerr << *alit << " ";
425 }
426 cerr << endl;
427 cerr << "Current p,t: " << p << "," << t << endl;
428 assert(0);
429 }
430 if (dirn > 0) {
431 alignment.push_front(lastac);
432 } else if (dirn < 0) {
433 alignment.push_back(lastac);
434 }
435 stats_[lastac]++;
436 }
437 // checkpoint;
438 if (p != 0 || t!=0) {
439 // Dump everything we know about the DP!
440 cerr << "Abort DP: Dumping everything!\n";
441 cerr << "Text: " << text << endl;
442 cerr << "Pattern: " << pattern << endl;
443 cerr << "Dirn: " << dirn << endl;
444 cerr << "LMatch: " << lmatch << endl;
445 cerr << "RMatch: " << rmatch << endl;
446 cerr << "DP Matrix: " << endl;
447 for (p=0;p<=patlen;p++) {
448 for (t=0;t<=textlen;t++) {
449 cerr << dp_[index(p,t)] << " ";
450 }
451 cerr << endl;
452 }
453 cerr << "Best Matrix: " << endl;
454 for (p=0;p<=patlen;p++) {
455 for (t=0;t<=textlen;t++) {
456 cerr << best_[index(p,t)] << " ";
457 }
458 cerr << endl;
459 }
460 cerr << "Bestval: " << bestval << endl;
461 cerr << "Bestpos: " << bestpos << endl;
462 cerr << "Matching Text: " << matching_text_ << endl;
463 cerr << "Alignment chars: ";
464 for (alit=alignment.begin();alit!=alignment.end();++alit) {
465 cerr << *alit << " ";
466 }
467 cerr << endl;
468 cerr << "Current p,t: " << p << "," << t << endl;
469 assert(0);
470 // cerr << "Something is wrong with the dp..." << endl;
471 }
472 alignment_.resize(alignment.size());
473 for (int i=0;i<alignment_.size();i++) {
474 alignment_[i] = alignment.front();
475 alignment.pop_front();
476 }
477 // delete [] dp_;
478 // delete [] best_;
479 matchlen = bestpos;
480 return true;
481 }
482
483 bool primer_alignment_2match::align(CharacterProducer & cp,
484 std::string const & pattern1,
485 std::string const & pattern2) {
486 bool retval = (((end2_ - pattern2.length()) >= end1_-1) &&
487 ((end2_ - pattern2.length()) <= end1_+1));
488 if (yesno_) {
489 end_ = end2_;
490 end_defined_ = true;
491 return retval;
492 }
493 // checkpoint;
494 start_ = end1_ - pattern1.length();
495 end_ = end2_;
496 int txtlen = end_-start_;
497 char *buffer = new char[txtlen+1];
498 cp.pos(start_);
499 for (int i=0;i<txtlen;i++) {
500 buffer[i] = cp.getch();
501 }
502 buffer[txtlen] = '\0';
503 // checkpoint;
504 alignment_.resize(std::max(txtlen,(int)(pattern1.length() + pattern2.length())));
505 // cerr << pattern1.length() << " " << pattern2.length() << " " << " "
506 // << txtlen << " " << alignment_.size() << endl;
507 for (int i=0;i<pattern1.length();i++) {
508 if (buffer[i] == pattern1[i]) {
509 alignment_[i] = alignment_equal;
510 } else if (wc_ && iupac_compatible(buffer[i],pattern1[i]) &&
511 (tn_ || buffer[i] != 'N')) {
512 alignment_[i] = alignment_wildcard_equal;
513 } else {
514 // checkpoint;
515 alignment_[i] = alignment_substitution;
516 }
517 // cerr << i << ": " << (int)alignment_[i] << endl;
518 }
519 for (int i=0;i<pattern2.length();i++) {
520 if (buffer[txtlen-i-1] == pattern2[pattern2.length()-i-1]) {
521 alignment_[alignment_.size()-i-1] = alignment_equal;
522 } else if (wc_ &&
523 iupac_compatible(buffer[txtlen-i-1],pattern2[pattern2.length()-i-1]) &&
524 (tn_ || buffer[txtlen-i-1] != 'N')) {
525 alignment_[alignment_.size()-i-1] = alignment_wildcard_equal;
526 } else {
527 alignment_[i] = alignment_substitution;
528 }
529 // cerr << alignment_.size()-i-1 << ": " << (int)alignment_[alignment_.size()-i-1] << endl;
530 }
531 if (pattern1.length() + pattern2.length() < txtlen) {
532 if (buffer[pattern1.length()] != '\n'
533 && lmatch_ < pattern1.length()
534 && rmatch_ < pattern2.length()) {
535 alignment_[pattern1.length()] = alignment_insertion;
536 } else {
537 alignment_[pattern1.length()] = alignment_constraint_violation;
538 }
539 } else if (pattern1.length() + pattern2.length() > txtlen) {
540 if (lmatch_ >= pattern1.length()
541 || rmatch_ >= pattern2.length()) {
542 alignment_[pattern1.length()] = alignment_constraint_violation;
543 } else if (buffer[pattern1.length()-1] == pattern1[pattern1.length()-1]) {
544 // checkpoint;
545 // Position may match last char of first piece...
546 // so put the deletion on the first char of the second piece...
547 alignment_[pattern1.length()] = alignment_deletion;
548 // cerr << pattern1.length() << ": " << (int)alignment_[pattern1.length()] << endl;
549 } else if (buffer[pattern1.length()-1] == pattern2[0]) {
550 // checkpoint;
551 // Position may match first char of second piece...
552 // so put the deletion on the last char of the first piece...
553 alignment_[pattern1.length()-1] = alignment_deletion;
554 // cerr << pattern1.length()-1 << ": " << (int)alignment_[pattern1.length()-1] << endl;
555 } else if (wc_ && iupac_compatible(buffer[pattern1.length()-1],pattern1[pattern1.length()-1]) &&
556 (tn_ || buffer[pattern1.length()-1]!='N')) {
557 // checkpoint;
558 // Position may wildcard match last char of first piece...
559 // so put the deletion on the first char of the second piece...
560 alignment_[pattern1.length()] = alignment_deletion;
561 // cerr << pattern1.length() << ": " << (int)alignment_[pattern1.length()] << endl;
562 } else if (wc_ && iupac_compatible(buffer[pattern1.length()-1],pattern2[0]) &&
563 (tn_ || buffer[pattern1.length()-1]!='N')) {
564 // checkpoint;
565 // Position may wildcard match first char of second piece...
566 // so put the deletion on the last char of the first piece...
567 alignment_[pattern1.length()-1] = alignment_deletion;
568 // cerr << pattern1.length()-1 << ": " << (int)alignment_[pattern1.length()-1] << endl;
569 } else {
570 // checkpoint;
571 // No match, so put the deletion on the first char of the second piece...
572 alignment_[pattern1.length()] = alignment_deletion;
573 // cerr << pattern1.length() << ": " << (int)alignment_[pattern1.length()] << endl;
574 }
575 // checkpoint;
576 }
577 for (int i=0;i<alignment_.size();i++) {
578 stats_[alignment_[i]]++;
579 }
580 matching_text_ = buffer;
581 delete [] buffer;
582 alignment_done_ = true;
583 return retval;
584 }
585
586 bool primer_alignment_lmatch::align(CharacterProducer & cp,
587 std::string const & pattern1,
588 std::string const & pattern2) {
589 // checkpoint;
590 // cerr << cp.pos() << " " << pattern1 << " " << pattern2 << endl;
591 FILE_POSITION_TYPE textstart = end1_;
592 int buflen=pattern2.length()+maxdist_;
593 if (!buffer1_ || bufsize_ < buflen+1) {
594 // checkpoint;
595 if (buffer1_) {
596 delete [] buffer0_;
597 delete [] buffer1_;
598 }
599 // size should always be larger than buflen+1;
600 bufsize_ = maxpatlen_+((indels_)?(dna_mut_?1:maxdist_):0);
601 if (bufsize_ < buflen+1) {
602 bufsize_ = buflen+1;
603 }
604 buffer1_ = new char[bufsize_];
605 buffer0_ = new char[bufsize_];
606 // checkpoint;
607 }
608 // checkpoint;
609 char *buffer;
610 if (textstart < bufstart_ || textstart + buflen >= bufend_) {
611 // cerr << "LMatch reading: " << textstart << "-" << textstart+bufsize_ << endl;
612 cp.pos(textstart);
613 for (int i=0;i<bufsize_;i++) {
614 buffer1_[i] = cp.getch();
615 }
616 bufstart_ = textstart;
617 bufend_ = textstart+bufsize_;
618 buffer = buffer1_;
619 } else {
620 buffer = buffer1_+(textstart-bufstart_);
621 }
622 // checkpoint;
623 // cerr << buffer << endl;
624 int matchlen;
625 bool retval = global_align(buffer,buflen,pattern2,1,lmatch_-pattern1.length(),rmatch_,matchlen);
626 // checkpoint;
627 if (yesno_) {
628 // checkpoint;
629 // delete buffer;
630 end_ = end1_+matchlen;
631 end_defined_ = true;
632 return retval;
633 }
634 textstart = end1_-pattern1.length();
635 cp.pos(textstart);
636 int buflen0 = pattern1.length();
637 for (int i=0;i<buflen0;i++) {
638 buffer0_[i] = cp.getch();
639 }
640 buffer0_[buflen0] = '\0';
641 matching_text_ = std::string(buffer0_)+matching_text_;
642 start_ = end1_-pattern1.length();
643 end_ = start_ + matching_text_.length();
644 int oldsize=alignment_.size();
645 alignment_.resize(oldsize+pattern1.length());
646 for (int i=oldsize-1;i>=0;i--) {
647 alignment_[i+pattern1.length()] = alignment_[i];
648 }
649 for (int i=0;i<pattern1.length();i++) {
650 if (buffer0_[i] == pattern1[i]) {
651 alignment_[i] = alignment_equal;
652 } else if (wc_ && iupac_compatible(buffer0_[i],pattern1[i]) &&
653 (tn_ || buffer0_[i]!='N')) {
654 alignment_[i] = alignment_wildcard_equal;
655 } else {
656 alignment_[i] = alignment_substitution;
657 }
658 stats_[alignment_[i]]++;
659 }
660 alignment_done_=true;
661 // delete [] buffer;
662 // delete [] buffer0;
663 return retval;
664 }
665
666 bool primer_alignment_rmatch::align(CharacterProducer & cp,
667 std::string const & pattern1,
668 std::string const & pattern2) {
669 // checkpoint;
670 // cerr << end2_ << endl;
671 // cerr << pattern1 << " " << pattern2 << endl;
672 FILE_POSITION_TYPE textstart=0;
673 int patlen = pattern1.length()+pattern2.length()+maxdist_;
674 if (end2_>(FILE_POSITION_TYPE)patlen) {
675 textstart = end2_-patlen;
676 }
677 int buflen = end2_-pattern2.length()-textstart;
678 // cerr << textstart << " " << buflen << endl;
679 if (!buffer1_ || bufsize_ < buflen+1) {
680 // checkpoint;
681 if (buffer1_) {
682 delete [] buffer0_;
683 delete [] buffer1_;
684 }
685 // size should always be larger than buflen+1;
686 bufsize_ = maxpatlen_+((indels_)?(dna_mut_?1:maxdist_):0);
687 if (bufsize_ < buflen+1) {
688 bufsize_ = buflen+1;
689 }
690 buffer1_ = new char[bufsize_];
691 buffer0_ = new char[bufsize_];
692 // checkpoint;
693 }
694 // checkpoint;
695 char *buffer;
696 if (textstart < bufstart_ || textstart + buflen >= bufend_) {
697 // cerr << "RMatch reading: " << textstart << "-" << textstart+bufsize_ << endl;
698 cp.pos(textstart);
699 for (int i=0;i<bufsize_;i++) {
700 buffer1_[i] = cp.getch();
701 }
702 bufstart_ = textstart;
703 bufend_ = textstart+bufsize_;
704 buffer = buffer1_;
705 } else {
706 buffer = buffer1_+(textstart - bufstart_);
707 }
708 int matchlen;
709 bool retval = global_align(buffer,buflen,pattern1,-1,lmatch_,rmatch_-pattern2.length(),matchlen);
710 // checkpoint;
711 if (yesno_) {
712 // checkpoint;
713 // delete buffer;
714 end_ = end2_;
715 end_defined_ = true;
716 return retval;
717 }
718 for (int i=0;i<pattern2.length();i++) {
719 buffer0_[i] = cp.getch();
720 }
721 buffer0_[pattern2.length()] = '\0';
722 matching_text_ = matching_text_+buffer0_;
723 end_ = end2_;
724 start_ = end_-matching_text_.length();
725 int oldsize=alignment_.size();
726 alignment_.resize(alignment_.size()+pattern2.length());
727 for (int i=0;i<pattern2.length();i++) {
728 if (buffer0_[i] == pattern2[i]) {
729 alignment_[oldsize+i] = alignment_equal;
730 } else if (wc_ && iupac_compatible(buffer0_[i],pattern2[i]) &&
731 (tn_ || buffer0_[i]!='N')) {
732 alignment_[oldsize+i] = alignment_wildcard_equal;
733 }
734 stats_[alignment_[oldsize+i]] ++;
735 }
736 alignment_done_=true;
737 // delete [] buffer;
738 // delete [] buffer0;
739 return retval;
740 }
741
742 // void primer_alignment::write(ostream & os, FILE_POSITION_TYPE const seq_pos,
743 // std::string const & pattern1,
744 // std::string const & pattern2,
745 // long unsigned int id, bool revcomp) {
746 // std::string const & mt = matching_text();
747 // int p=0;
748 // os << " ";
749 // for (int i=0; i<size(); i++) {
750 // if ((*this)[i] != alignment_deletion) {
751 // os << mt[p];
752 // p++;
753 // } else {
754 // os << "-";
755 // }
756 // }
757 // os /* << " " << start() << " " << end() */
758 // << " " << seq_pos - length() + 1 << " " << seq_pos
759 // << " " << editdist();
760 // os << endl;
761 // os << " ";
762 // for (int i=0; i<size(); i++) {
763 // if ((*this)[i] == alignment_equal) {
764 // os << "|";
765 // p++;
766 // } else if ((*this)[i] == alignment_substitution) {
767 // os << "*";
768 // } else {
769 // os << " ";
770 // }
771 // }
772 // os << endl;
773 // std::string kw = pattern1 + pattern2;
774 // p = 0;
775 // os << " ";
776 // for (int i=0; i<size(); i++) {
777 // if ((*this)[i] != alignment_insertion) {
778 // os << kw[p];
779 // p++;
780 // } else {
781 // os << "-";
782 // }
783 // }
784 // os << " " << id;
785 // if (revcomp) {
786 // os << " REVCOMP";
787 // }
788 // os << endl;
789 // }