ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/PrimerMatch/keyword_tree.cc
Revision: 1.1.1.1 (vendor branch)
Committed: Wed Dec 22 21:37:17 2004 UTC (11 years, 8 months ago) by nje01
Branch: MAIN
CVS Tags: HEAD, RELEASE-20041222, HEAD
Changes since 1.1: +0 -0 lines
Log Message:
Initial primer_match import

Line File contents
1 /**************************************************************************
2 * This code is part of the supporting infrastructure for ATA Mapper.
3 * Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved.
4 * Author: Nathan Edwards
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received (LICENSE.txt) a copy of the GNU General Public
17 * License along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *************************************************************************/
20
21
22 #include "keyword_tree.h"
23 #include "keyword_tree.t"
24 #include <string.h>
25 #include <assert.h>
26 #include "char_io.h"
27 #include "util.h"
28 #include <unistd.h>
29
30 ktnode_list::ktnode_list() : kidlist_(0) {};
31
32 ktnode_dna_list::ktnode_dna_list() : kidlist_(0) {
33 acgt_[0] = 0; acgt_[1] = 0; acgt_[2] = 0; acgt_[3] = 0;
34 }
35
36 ktnode_jtable::ktnode_jtable() : kidptr_(0) {};
37
38 ktnode_suffix::ktnode_suffix() : kidlist_(0), kidjtable_(0), level_(0) {};
39
40 ktnode_lsuffix::ktnode_lsuffix() : kidlist_(0), level_(0) {};
41
42 ktnode_list::~ktnode_list() {
43 if (kidlist_) {
44 delete kidlist_;
45 }
46 };
47 ktnode_dna_list::~ktnode_dna_list() {
48 if (kidlist_) {
49 delete kidlist_;
50 }
51 };
52
53 ktnode_jtable::~ktnode_jtable() {
54 // kidptr points to things that don't need to be deleted, as long as
55 // optimize_nodes has been called....
56 };
57
58 ktnode_suffix::~ktnode_suffix() {
59 if (kidlist_) {
60 delete kidlist_;
61 }
62 };
63
64 ktnode_lsuffix::~ktnode_lsuffix() {
65 if (kidlist_) {
66 delete kidlist_;
67 }
68 };
69
70 ktnode_list * const ktnode_list::lchild(unsigned char ch) const {
71 // checkpoint;
72 if (!kidlist_) return 0;
73 // checkpoint;
74 tinylist<kid_list_element<ktnode_list> >::iterator it;
75 // checkpoint;
76 for (it=kidlist_->begin(); it!=kidlist_->end(); ++it) {
77 // checkpoint;
78 // cerr << ch << " " << it->ch << " " << (void*)it->nd << endl;
79 if (ch == it->ch) {
80 return it->nd;
81 } else if (ch < it->ch) {
82 // checkpoint;
83 return 0;
84 }
85 }
86 // checkpoint;
87 return 0;
88 }
89
90 ktnode_list * const ktnode_list::child(unsigned char ch) const {
91 return this->lchild(ch);
92 }
93
94 ktnode_dna_list * const ktnode_dna_list::lchild(unsigned char ch) const {
95 if (!kidlist_) return 0;
96 tinylist<kid_list_element<ktnode_dna_list> >::iterator it;
97 for (it=kidlist_->begin(); it!=kidlist_->end(); ++it) {
98 if (ch == it->ch) {
99 return it->nd;
100 } else if (ch < it->ch) {
101 return 0;
102 }
103 }
104 return 0;
105 }
106
107 ktnode_dna_list * const ktnode_dna_list::child(unsigned char ch) const {
108 if (ch < 4) {
109 return acgt_[ch];
110 }
111 if (!kidlist_) return 0;
112 tinylist<kid_list_element<ktnode_dna_list> >::iterator it;
113 for (it=kidlist_->begin(); it!=kidlist_->end(); ++it) {
114 if (ch == it->ch) {
115 return it->nd;
116 } else if (ch < it->ch) {
117 return 0;
118 }
119 }
120 return 0;
121 }
122
123 ktnode_jtable * const ktnode_jtable::lchild(unsigned char ch) const {
124 if (!kidptr_)
125 return 0;
126
127 tinylist<kid_list_element<ktnode_jtable> >::iterator it;
128 for (it=((tinylist<kid_list_element<ktnode_jtable> >*)kidptr_)->begin();
129 it!=((tinylist<kid_list_element<ktnode_jtable> >*)kidptr_)->end();
130 ++it) {
131 if (ch == it->ch) {
132 return it->nd;
133 } else if (ch < it->ch) {
134 return 0;
135 }
136 }
137 return 0;
138 }
139
140 ktnode_suffix * const ktnode_suffix::lchild(unsigned char ch) const {
141 if (!kidlist_)
142 return 0;
143
144 tinylist<kid_list_element<ktnode_suffix> >::iterator it;
145 for (it=kidlist_->begin();
146 it!=kidlist_->end();
147 ++it) {
148 if (ch == it->ch) {
149 return it->nd;
150 } else if (ch < it->ch) {
151 return 0;
152 }
153 }
154 return 0;
155 }
156
157 ktnode_lsuffix * const ktnode_lsuffix::lchild(unsigned char ch) const {
158 if (!kidlist_)
159 return 0;
160
161 tinylist<kid_list_element<ktnode_lsuffix> >::iterator it;
162 for (it=kidlist_->begin();
163 it!=kidlist_->end();
164 ++it) {
165 if (ch == it->ch) {
166 return it->nd;
167 } else if (ch < it->ch) {
168 return 0;
169 }
170 }
171 return 0;
172 }
173
174 ktnode_list * const ktnode_list::add_child(keyword_tree<ktnode_list> & kt,
175 unsigned char ch) {
176 ktnode_list *n = kt.newktnode();
177 if (!kidlist_) {
178 kidlist_ = new tinylist<kid_list_element<ktnode_list> >;
179 kidlist_->push_front(kid_list_element<ktnode_list>(ch,n));
180 return n;
181 }
182 tinylist<kid_list_element<ktnode_list> >::iterator it;
183 tinylist<kid_list_element<ktnode_list> >::iterator it1=kidlist_->end();
184 for (it=kidlist_->begin(); it!=kidlist_->end(); ++it) {
185 if (ch < it->ch) {
186 break;
187 }
188 it1=it;
189 }
190 if (it1==kidlist_->end()) {
191 kidlist_->push_front(kid_list_element<ktnode_list>(ch,n));
192 } else {
193 kidlist_->insert_after(it1,kid_list_element<ktnode_list>(ch,n));
194 }
195 return n;
196 }
197
198 ktnode_dna_list * const ktnode_dna_list::add_child(keyword_tree<ktnode_dna_list> & kt,
199 unsigned char ch) {
200 ktnode_dna_list *n = kt.newktnode();
201 if (!kidlist_) {
202 kidlist_ = new tinylist<kid_list_element<ktnode_dna_list> >;
203 kidlist_->push_front(kid_list_element<ktnode_dna_list>(ch,n));
204 return n;
205 }
206 tinylist<kid_list_element<ktnode_dna_list> >::iterator it;
207 tinylist<kid_list_element<ktnode_dna_list> >::iterator it1=kidlist_->end();
208 for (it=kidlist_->begin(); it!=kidlist_->end(); ++it) {
209 if (ch < it->ch) {
210 break;
211 }
212 it1=it;
213 }
214 if (it1==kidlist_->end()) {
215 kidlist_->push_front(kid_list_element<ktnode_dna_list>(ch,n));
216 } else {
217 kidlist_->insert_after(it1,kid_list_element<ktnode_dna_list>(ch,n));
218 }
219 return n;
220 }
221
222 ktnode_jtable * const ktnode_jtable::add_child(keyword_tree<ktnode_jtable> & kt,
223 unsigned char ch) {
224 ktnode_jtable *n = kt.newktnode();
225 if (!kidptr_) {
226 kidptr_ = (void*)(new tinylist<kid_list_element<ktnode_jtable> >);
227 ((tinylist<kid_list_element<ktnode_jtable> >*)kidptr_)->push_front(kid_list_element<ktnode_jtable>(ch,n));
228 return n;
229 }
230 tinylist<kid_list_element<ktnode_jtable> >::iterator it;
231 tinylist<kid_list_element<ktnode_jtable> >::iterator it1;
232 it1=((tinylist<kid_list_element<ktnode_jtable> >*)kidptr_)->end();
233 for (it=((tinylist<kid_list_element<ktnode_jtable> >*)kidptr_)->begin();
234 it!=((tinylist<kid_list_element<ktnode_jtable> >*)kidptr_)->end();
235 ++it) {
236 if (ch < it->ch) {
237 break;
238 }
239 it1=it;
240 }
241 if (it1==((tinylist<kid_list_element<ktnode_jtable> >*)kidptr_)->end()) {
242 ((tinylist<kid_list_element<ktnode_jtable> >*)kidptr_)->push_front(kid_list_element<ktnode_jtable>(ch,n));
243 } else {
244 ((tinylist<kid_list_element<ktnode_jtable> >*)kidptr_)->insert_after(it1,kid_list_element<ktnode_jtable>(ch,n));
245 }
246 return n;
247 }
248
249 ktnode_suffix*const ktnode_suffix::add_child(keyword_tree<ktnode_suffix> & kt,
250 unsigned char ch) {
251 ktnode_suffix *n = kt.newktnode();
252 n->level_ = level_+1;
253 if (!kidlist_) {
254 kidlist_ = new tinylist<kid_list_element<ktnode_suffix> >;
255 kidlist_->push_front(kid_list_element<ktnode_suffix>(ch,n));
256 return n;
257 }
258 tinylist<kid_list_element<ktnode_suffix> >::iterator it;
259 tinylist<kid_list_element<ktnode_suffix> >::iterator it1;
260 it1=kidlist_->end();
261 for (it=kidlist_->begin();
262 it!=kidlist_->end();
263 ++it) {
264 if (ch < it->ch) {
265 break;
266 }
267 it1=it;
268 }
269 if (it1==kidlist_->end()) {
270 kidlist_->push_front(kid_list_element<ktnode_suffix>(ch,n));
271 } else {
272 kidlist_->insert_after(it1,kid_list_element<ktnode_suffix>(ch,n));
273 }
274 return n;
275 }
276
277 ktnode_lsuffix*const ktnode_lsuffix::add_child(keyword_tree<ktnode_lsuffix> & kt,
278 unsigned char ch) {
279 ktnode_lsuffix *n = kt.newktnode();
280 n->level_ = level_+1;
281 if (!kidlist_) {
282 kidlist_ = new tinylist<kid_list_element<ktnode_lsuffix> >;
283 kidlist_->push_front(kid_list_element<ktnode_lsuffix>(ch,n));
284 return n;
285 }
286 tinylist<kid_list_element<ktnode_lsuffix> >::iterator it;
287 tinylist<kid_list_element<ktnode_lsuffix> >::iterator it1;
288 it1=kidlist_->end();
289 for (it=kidlist_->begin();
290 it!=kidlist_->end();
291 ++it) {
292 if (ch < it->ch) {
293 break;
294 }
295 it1=it;
296 }
297 if (it1==kidlist_->end()) {
298 kidlist_->push_front(kid_list_element<ktnode_lsuffix>(ch,n));
299 } else {
300 kidlist_->insert_after(it1,kid_list_element<ktnode_lsuffix>(ch,n));
301 }
302 return n;
303 }
304
305 // std::list<kid_list_element> const & ktnode::children() const {
306 // return *kids_;
307 // }
308 /*
309
310 void ktnode::writeref(ostream & os) const {
311 os << "{";
312 os << tag_;
313 os << "}";
314 }
315
316 void ktnode::write(ostream & os, int indent, bool recurse) const {
317 char brack1='[', brack2=']';
318 if (patid_) {
319 brack1 = '(';
320 brack2 = ')';
321 }
322 os << brack1
323 << std::string("%*d",(int)(floor(log10(kt_.nodetag_)+1)),tag_)
324 << brack2
325 << " ";
326 indent += 3;
327 indent += (int)(floor(log10(kt_.nodetag_)+1));
328 // os << "(" << tag_ << " " << tmp << ")";
329 list_item li;
330 forall_items(li,kids_) {
331 os << kids_[li].first() << " => ";
332 if (recurse) {
333 kids_[li].second()->write(os,indent+5);
334 } else {
335 kids_[li].second()->writeref(os);
336 }
337 // if (li != kids_.last()) {
338 os << endl;
339 for(int i=0;i<indent;i++) {
340 os << " ";
341 }
342 // }
343 }
344 os << "! => ";
345 if (fail()) {
346 fail()->writeref(os);
347 } else {
348 os << "NULL";
349 }
350 os << endl;
351 for(int i=0;i<indent;i++) {
352 os << " ";
353 }
354 os << "> => ";
355 if (output()) {
356 output()->writeref(os);
357 } else {
358 os << "NULL";
359 }
360
361 }
362
363 */
364
365
366 /*
367
368 void keyword_tree::write(ostream & os) const {
369 pattern_list_element ple;
370 forall(ple,patterns()) {
371 ple.write(os);
372 os << endl;
373 }
374 if (root_) {
375 root_->write(os,0);
376 }
377 }
378
379 ostream & operator<<(ostream & os, keyword_tree const & kt) {
380 kt.write(os);
381 return os;
382 }
383
384 */
385
386 void ktnode_list::optimize_node(keyword_tree<ktnode_list> & kt,
387 CharacterProducer & cp) {
388 tinylist<kid_list_element<ktnode_list> > *kids = kidlist_;
389 if (kids) {
390 tinylist<kid_list_element<ktnode_list> >::iterator it;
391 for (it=kids->begin();it!=kids->end();++it) {
392 if (it->nd) {
393 it->nd->optimize_node(kt,cp);
394 }
395 it->ch = cp.nch(it->ch);
396 // cerr << it->ch << " " << (void*)it->nd << endl;
397 }
398 }
399 }
400
401 void ktnode_lsuffix::optimize_node(keyword_tree<ktnode_lsuffix> & kt,
402 CharacterProducer & cp) {
403 tinylist<kid_list_element<ktnode_lsuffix> > *kids = kidlist_;
404 if (kids) {
405 tinylist<kid_list_element<ktnode_lsuffix> >::iterator it;
406 for (it=kids->begin();it!=kids->end();++it) {
407 if (it->nd) {
408 it->nd->optimize_node(kt,cp);
409 }
410 it->ch = cp.nch(it->ch);
411 // cerr << it->ch << " " << (void*)it->nd << endl;
412 }
413 }
414 }
415
416 void ktnode_dna_list::optimize_node(keyword_tree<ktnode_dna_list> & kt,
417 CharacterProducer & cp) {
418 if (kidlist_) {
419 tinylist<kid_list_element<ktnode_dna_list> >::iterator it;
420 tinylist<kid_list_element<ktnode_dna_list> >::iterator it1;
421 it=kidlist_->begin();
422 it1=kidlist_->end();
423 while (it!=kidlist_->end()) {
424 if (it->nd) {
425 it->nd->optimize_node(kt,cp);
426 }
427 int nch = cp.nch(it->ch);
428 if (nch >= 0 && nch < 4) {
429 acgt_[nch] = it->nd;
430 if (it1 != kidlist_->end()) {
431 it = kidlist_->erase_after(it1);
432 } else {
433 it = kidlist_->pop();
434 }
435 } else {
436 it->ch = nch;
437 it1 = it;
438 ++it;
439 }
440 }
441 if (kidlist_->empty()) {
442 delete kidlist_;
443 kidlist_=0;
444 }
445 }
446 }
447
448 void ktnode_jtable::optimize_node(keyword_tree<ktnode_jtable> & kt,
449 CharacterProducer & cp) {
450 kid_jump_table<ktnode_jtable> *compatible_kjt=0;
451 tinylist<kid_list_element<ktnode_jtable> > *kids=
452 ((tinylist<kid_list_element<ktnode_jtable> > *)kidptr_);
453 tinylist<kid_list_element<ktnode_jtable> >::iterator kit;
454 std::list<kid_jump_table<ktnode_jtable> *>::iterator kjtit;
455 for (kjtit=kt.jump_tables().begin();kjtit!=kt.jump_tables().end();++kjtit) {
456 kid_jump_table<ktnode_jtable> *kjt = *kjtit;
457 if (kids) {
458 bool conflict=false;
459 for (kit=kids->begin();kit!=kids->end();++kit) {
460 int nch = cp.nch(kit->ch);
461 if (nch >= 0 && kjt[nch].which) {
462 conflict=true;
463 break;
464 }
465 }
466 if (!conflict) {
467 compatible_kjt = kjt;
468 break;
469 }
470 }
471 }
472 if (!compatible_kjt) {
473 compatible_kjt = kt.newktjumptable(cp.size());
474 }
475 kidptr_ = (void*)compatible_kjt;
476 if (kids) {
477 for (kit=kids->begin();kit!=kids->end();++kit) {
478 int nch = cp.nch(kit->ch);
479 if (nch >= 0) {
480 ((kid_jump_table<ktnode_jtable>*)kidptr_)[nch].child = kit->nd;
481 ((kid_jump_table<ktnode_jtable>*)kidptr_)[nch].which = this;
482 }
483 }
484 for (kit=kids->begin();kit!=kids->end();++kit) {
485 kit->nd->optimize_node(kt,cp);
486 }
487 }
488 delete kids;
489 }
490
491 void ktnode_suffix::optimize_node(keyword_tree<ktnode_suffix> & kt,
492 CharacterProducer & cp) {
493 // checkpoint;
494 // cerr << level_ << endl;
495 kid_jump_table<ktnode_suffix> *compatible_kjt=0;
496 tinylist<kid_list_element<ktnode_suffix> > *kids=kidlist_;
497 tinylist<kid_list_element<ktnode_suffix> >::iterator kit;
498 std::list<kid_jump_table<ktnode_suffix> *>::iterator kjtit;
499 for (kjtit=kt.jump_tables().begin();kjtit!=kt.jump_tables().end();++kjtit) {
500 kid_jump_table<ktnode_suffix> *kjt = *kjtit;
501 if (kids) {
502 bool conflict=false;
503 for (kit=kids->begin();kit!=kids->end();++kit) {
504 int nch = cp.nch(kit->ch);
505 if (nch >= 0 && kjt[nch].which) {
506 conflict=true;
507 break;
508 }
509 }
510 if (!conflict) {
511 compatible_kjt = kjt;
512 break;
513 }
514 }
515 }
516 if (!compatible_kjt) {
517 compatible_kjt = kt.newktjumptable(cp.size());
518 }
519 kidjtable_ = compatible_kjt;
520 if (kids) {
521 for (kit=kids->begin();kit!=kids->end();++kit) {
522 int nch = cp.nch(kit->ch);
523 if (nch >= 0) {
524 kidjtable_[nch].child = kit->nd;
525 kidjtable_[nch].which = this;
526 }
527 }
528 for (kit=kids->begin();kit!=kids->end();++kit) {
529 kit->nd->optimize_node(kt,cp);
530 }
531 }
532 // checkpoint;
533 }