ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/PrimerMatch/keyword_tree.h
Revision: 1.1.1.1 (vendor branch)
Committed: Wed Dec 22 21:37:18 2004 UTC (11 years, 8 months ago) by nje01
Branch: MAIN
CVS Tags: HEAD, RELEASE-20041222, HEAD
Changes since 1.1: +0 -0 lines
Log Message:
Initial primer_match import

Line File contents
1 /**************************************************************************
2 * This code is part of the supporting infrastructure for ATA Mapper.
3 * Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved.
4 * Author: Nathan Edwards
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received (LICENSE.txt) a copy of the GNU General Public
17 * License along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *************************************************************************/
20
21
22 #ifndef _IBPEP_keyword_tree_h
23 #define _IBPEP_keyword_tree_h
24
25 #include <iostream>
26 #include <list>
27 #include "util.h"
28 #include "char_io.h"
29 #include "pattern_alignment.h"
30 #include "pattern_match.h"
31 #include "memory_debug.h"
32 #include "tinylist.t"
33
34 class ktnode_list;
35 class ktnode_dna_list;
36 class ktnode_jtable;
37
38 template<class KTND>
39 class kid_list_element {
40 public:
41 unsigned char ch;
42 KTND *nd;
43 kid_list_element(unsigned char c=0,KTND* n=0) : ch(c),nd(n) {};
44 MEMORY_DEBUG(kid_list_element<KTND>)
45 };
46
47 template <class KTND>
48 struct kid_jump_table {
49 KTND *which;
50 KTND *child;
51 MEMORY_DEBUG(kid_jump_table<KTND>)
52 };
53
54 template <class KTND>
55 class keyword_tree : public PatternMatch {
56 private:
57 bool first_char_;
58 unsigned char ch_;
59 KTND * w_;
60 std::list<kid_jump_table<KTND>*> jump_tables_;
61 std::list<kid_jump_table<KTND>*> jump_table_delete_list;
62 KTND *root_;
63 std::list<KTND*> ktnodes;
64 std::list<KTND*> ktnode_delete_list;
65 bool *relchars_;
66 void add_keyword_(KTND * v, char const * const pat,
67 pattern_list::const_iterator it);
68 void failure_links_(std::list<KTND*> & q);
69 void compute_failure_links();
70 void optimize_nodes(CharacterProducer & cp);
71 void compress_relchars(CharacterProducer & cp);
72 public:
73 keyword_tree();
74 ~keyword_tree();
75 unsigned long int add_pattern(std::string const & pat,
76 unsigned long int id=0,
77 int esb=0, int eeb=0);
78 void init(CharacterProducer & cp) {
79 compute_failure_links();
80 optimize_nodes(cp);
81 compress_relchars(cp);
82 }
83 bool find_patterns(CharacterProducer & cp,
84 pattern_hit_vector & pas,
85 long unsigned minka=1);
86 bool find_suffixes(CharacterProducer & cp,
87 pattern_hit_vector & pas,
88 long unsigned int minlevel=1);
89 void reset();
90 KTND *newktnode();
91 std::list<kid_jump_table<KTND>*> & jump_tables() {
92 return jump_tables_;
93 }
94 kid_jump_table<KTND> *newktjumptable(unsigned int size);
95 MEMORY_DEBUG(keyword_tree<KTND>)
96 };
97
98 template<class KTND>
99 class ktnode {
100 private:
101 KTND *fail_;
102 KTND *output_;
103 tinylist<pattern_list::const_iterator> *patid_;
104 public:
105 ktnode();
106 ~ktnode();
107 inline KTND * fail() const;
108 void fail(KTND * const v);
109 inline KTND * output() const;
110 void output(KTND * const v);
111 tinylist<pattern_list::const_iterator> * const & patid() const;
112 void add_patid(pattern_list::const_iterator const & it);
113 MEMORY_DEBUG(ktnode<KTND>)
114 };
115
116 class ktnode_list : public ktnode<ktnode_list> {
117 private:
118 tinylist<kid_list_element<ktnode_list> > *kidlist_;
119 public:
120 ktnode_list();
121 ~ktnode_list();
122 ktnode_list * const lchild(unsigned char ch) const;
123 ktnode_list * const child(unsigned char ch) const;
124 ktnode_list * const add_child(keyword_tree<ktnode_list> & kt,
125 unsigned char ch);
126 tinylist<kid_list_element<ktnode_list> > * kids() const {
127 return kidlist_;
128 }
129 void optimize_node(keyword_tree<ktnode_list> & kt,
130 CharacterProducer & cp);
131 MEMORY_DEBUG(ktnode_list)
132 };
133
134 class ktnode_dna_list : public ktnode<ktnode_dna_list> {
135 private:
136 tinylist<kid_list_element<ktnode_dna_list> > *kidlist_;
137 ktnode_dna_list *acgt_[4];
138 public:
139 ktnode_dna_list();
140 ~ktnode_dna_list();
141 ktnode_dna_list * const lchild(unsigned char ch) const;
142 ktnode_dna_list * const child(unsigned char ch) const;
143 ktnode_dna_list * const add_child(keyword_tree<ktnode_dna_list> & kt,
144 unsigned char ch);
145 tinylist<kid_list_element<ktnode_dna_list> > * kids() const {
146 return kidlist_;
147 }
148 void optimize_node(keyword_tree<ktnode_dna_list> & kt,
149 CharacterProducer & cp);
150 MEMORY_DEBUG(ktnode_dna_list)
151 };
152
153 class ktnode_jtable : public ktnode<ktnode_jtable> {
154 private:
155 void* kidptr_;
156 public:
157 ktnode_jtable();
158 ~ktnode_jtable();
159 ktnode_jtable * const lchild(unsigned char ch) const;
160 inline ktnode_jtable * const child(unsigned char ch) const {
161 if (((kid_jump_table<ktnode_jtable>*)kidptr_)[ch].which == this) {
162 return ((kid_jump_table<ktnode_jtable>*)kidptr_)[ch].child;
163 } else {
164 return 0;
165 }
166 }
167 ktnode_jtable * const add_child(keyword_tree<ktnode_jtable> & kt,
168 unsigned char ch);
169 tinylist<kid_list_element<ktnode_jtable> > * kids() const {
170 return ((tinylist<kid_list_element<ktnode_jtable> > *)kidptr_);
171 }
172 void optimize_node(keyword_tree<ktnode_jtable> & kt,
173 CharacterProducer & cp);
174 MEMORY_DEBUG(ktnode_jtable)
175 };
176
177 class ktnode_suffix : public ktnode<ktnode_suffix> {
178 private:
179 tinylist<kid_list_element<ktnode_suffix> > * kidlist_;
180 kid_jump_table<ktnode_suffix>* kidjtable_;
181 unsigned int level_;
182 public:
183 ktnode_suffix();
184 ~ktnode_suffix();
185 ktnode_suffix * const lchild(unsigned char ch) const;
186 inline ktnode_suffix * const child(unsigned char ch) const {
187 if (kidjtable_[ch].which == this) {
188 return kidjtable_[ch].child;
189 } else {
190 return 0;
191 }
192 }
193 ktnode_suffix * const add_child(keyword_tree<ktnode_suffix> & kt,
194 unsigned char ch);
195 tinylist<kid_list_element<ktnode_suffix> > * kids() const {
196 return kidlist_;
197 }
198 void optimize_node(keyword_tree<ktnode_suffix> & kt,
199 CharacterProducer & cp);
200 unsigned int level() const {
201 return level_;
202 }
203 MEMORY_DEBUG(ktnode_suffix)
204 };
205
206 class ktnode_lsuffix : public ktnode<ktnode_lsuffix> {
207 private:
208 tinylist<kid_list_element<ktnode_lsuffix> > * kidlist_;
209 unsigned int level_;
210 public:
211 ktnode_lsuffix();
212 ~ktnode_lsuffix();
213 ktnode_lsuffix * const lchild(unsigned char ch) const;
214 inline ktnode_lsuffix * const child(unsigned char ch) const {
215 return lchild(ch);
216 }
217 ktnode_lsuffix * const add_child(keyword_tree<ktnode_lsuffix> & kt,
218 unsigned char ch);
219 tinylist<kid_list_element<ktnode_lsuffix> > * kids() const {
220 return kidlist_;
221 }
222 void optimize_node(keyword_tree<ktnode_lsuffix> & kt,
223 CharacterProducer & cp);
224 unsigned int level() const {
225 return level_;
226 }
227 MEMORY_DEBUG(ktnode_lsuffix)
228 };
229
230 #endif