1 |
/************************************************************************** |
2 |
* This code is part of the supporting infrastructure for ATA Mapper. |
3 |
* Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved. |
4 |
* Author: Nathan Edwards |
5 |
* |
6 |
* This program is free software; you can redistribute it and/or modify |
7 |
* it under the terms of the GNU General Public License as published by |
8 |
* the Free Software Foundation; either version 2 of the License, or |
9 |
* (at your option) any later version. |
10 |
* |
11 |
* This program is distributed in the hope that it will be useful, |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
* GNU General Public License for more details. |
15 |
* |
16 |
* You should have received (LICENSE.txt) a copy of the GNU General Public |
17 |
* License along with this program; if not, write to the Free Software |
18 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
*************************************************************************/ |
20 |
|
21 |
|
22 |
#include "filter_bitvec.h" |
23 |
#include "primer_alignment.h" |
24 |
// #include "sortedvector.t" |
25 |
|
26 |
filter_bitvec::filter_bitvec(unsigned int k, char eos, |
27 |
bool wc, bool tn, bool id, bool dm) : |
28 |
num_patterns_(0) |
29 |
{ |
30 |
pm_ = new shift_and_inexact(k,eos,wc,tn,id,dm); |
31 |
} |
32 |
|
33 |
filter_bitvec::~filter_bitvec() { |
34 |
delete pm_; |
35 |
} |
36 |
|
37 |
unsigned int filter_bitvec::mismatches() const { |
38 |
return pm_->mismatches(); |
39 |
} |
40 |
|
41 |
void filter_bitvec::mismatches(unsigned int k) { |
42 |
pm_->mismatches(k); |
43 |
} |
44 |
|
45 |
bool filter_bitvec::wildcards() const { |
46 |
return pm_->wildcards(); |
47 |
} |
48 |
|
49 |
void filter_bitvec::wildcards(bool wc) { |
50 |
pm_->wildcards(wc); |
51 |
} |
52 |
|
53 |
bool filter_bitvec::wildcard_text_N() const { |
54 |
return pm_->wildcard_text_N(); |
55 |
} |
56 |
|
57 |
void filter_bitvec::wildcard_text_N(bool tn) { |
58 |
pm_->wildcard_text_N(tn); |
59 |
} |
60 |
|
61 |
bool filter_bitvec::indels() const { |
62 |
return pm_->indels(); |
63 |
} |
64 |
|
65 |
void filter_bitvec::indels(bool id) { |
66 |
pm_->indels(id); |
67 |
} |
68 |
|
69 |
bool filter_bitvec::dna_mut() const { |
70 |
return pm_->dna_mut(); |
71 |
} |
72 |
|
73 |
void filter_bitvec::dna_mut(bool dm) { |
74 |
pm_->dna_mut(dm); |
75 |
} |
76 |
|
77 |
char filter_bitvec::eos_char() const { |
78 |
return pm_->eos_char(); |
79 |
} |
80 |
|
81 |
void filter_bitvec::eos_char(char c) { |
82 |
pm_->eos_char(c); |
83 |
} |
84 |
|
85 |
long unsigned int |
86 |
filter_bitvec::add_pattern(std::string const & pat, unsigned long id, |
87 |
int esb, int eeb) { |
88 |
add_pattern_(pat,id,esb,eeb); |
89 |
num_patterns_++; |
90 |
return id; |
91 |
} |
92 |
|
93 |
bool |
94 |
filter_bitvec::find_patterns(CharacterProducer & cp, |
95 |
pattern_hit_vector & phs, |
96 |
long unsigned minka) { |
97 |
// checkpoint; |
98 |
|
99 |
// the eos_char we store in the shift_and_inexact is normalized, so |
100 |
// we much unnormalize it before passing it on to the |
101 |
// editdist_alignment class. |
102 |
editdist_alignment pa(0,0,mismatches(),cp.ch(eos_char()), |
103 |
wildcards(),wildcard_text_N(), |
104 |
indels(),dna_mut(),0,0,true); |
105 |
pattern_hit_vector::iterator it,it1,it2; |
106 |
l.reserve(minka*2); |
107 |
bool more; |
108 |
while ((more=pm_->find_patterns(cp,l,minka))||!l.empty()) { |
109 |
// checkpoint; |
110 |
FILE_POSITION_TYPE oldcharspos; |
111 |
oldcharspos = cp.pos(); |
112 |
l.normalize(); |
113 |
// checkpoint; |
114 |
it = l.begin(); |
115 |
while (it != l.end()) { |
116 |
// checkpoint; |
117 |
FILE_POSITION_TYPE firstpos = it->key(); |
118 |
if (firstpos > 0) { |
119 |
long unsigned int pid = it->value()->id(); |
120 |
FILE_POSITION_TYPE pos = firstpos; |
121 |
// checkpoint; |
122 |
// cerr << pid << " " << firstpos << endl; |
123 |
tinylist<pattern_hit_vector::iterator> adj; |
124 |
adj.push_front(it); |
125 |
it1 = it+1; |
126 |
while (it1 != l.end() && it1->key() <= pos+2*mismatches()+1) { |
127 |
// checkpoint; |
128 |
if (it1->value()->id() == pid) { |
129 |
// checkpoint; |
130 |
// cerr << it1->value()->id() << " " << it1->key() << endl; |
131 |
pos = it1->key(); |
132 |
adj.push_front(it1); |
133 |
} |
134 |
++it1; |
135 |
// checkpoint; |
136 |
} |
137 |
// checkpoint; |
138 |
if (oldcharspos < pos+2*mismatches()+1 && more) { |
139 |
// end of returned hit condition... |
140 |
// checkpoint; |
141 |
break; |
142 |
} else { |
143 |
// checkpoint; |
144 |
// cerr << pid << " " << firstpos << " " << pos << endl; |
145 |
pattern_list::const_iterator plit = plit_[pid]; |
146 |
pa.reset(); |
147 |
pa.poslb(firstpos); |
148 |
pa.posub(pos); |
149 |
pa.exact_start_bases(plit->exact_start_bases()); |
150 |
pa.exact_end_bases(plit->exact_end_bases()); |
151 |
// cerr << plit->pattern() << endl; |
152 |
// checkpoint; |
153 |
if (pa.align(cp,plit->pattern())) { |
154 |
// checkpoint; |
155 |
phs.push_back(pa.end(),plit); |
156 |
} |
157 |
tinylist<pattern_hit_vector::iterator>::iterator adjit; |
158 |
adjit = adj.begin(); |
159 |
while (adjit != adj.end()) { |
160 |
(*adjit)->key() = 0; |
161 |
++adjit; |
162 |
} |
163 |
} |
164 |
} |
165 |
++it; |
166 |
} |
167 |
// Move everything with valid key to beginning... |
168 |
// int i,i1; |
169 |
it=l.begin(); |
170 |
it1=l.begin(); |
171 |
// i=0; |
172 |
// i1=0; |
173 |
long unsigned int newsize=0; |
174 |
while (it!=l.end()) { |
175 |
// cerr << i << " " << i1 << endl; |
176 |
// cerr << l.key(it) << " " << l.value(it)->id() << endl; |
177 |
if (it->key() != 0) { |
178 |
if (it != it1) { |
179 |
it1->key() = it->key(); |
180 |
it1->value() = it->value(); |
181 |
} |
182 |
++newsize; |
183 |
++it1; |
184 |
// i1++; |
185 |
} |
186 |
++it; |
187 |
// i++; |
188 |
} |
189 |
// cerr << newsize << endl; |
190 |
l.resize(newsize); |
191 |
// checkpoint; |
192 |
// it=l.begin(); |
193 |
// while (it!=l.end()) { |
194 |
// cerr << it->key() << " " << it->value()->id() << endl; |
195 |
// ++it; |
196 |
// } |
197 |
cp.pos(oldcharspos); |
198 |
report_progress(cp); |
199 |
if (phs.size() >= minka || |
200 |
(more==false && phs.size() > 0)) return true; |
201 |
} |
202 |
return false; |
203 |
} |
204 |
|
205 |
void |
206 |
filter_bitvec::init(CharacterProducer & cp) { |
207 |
assert(pm_!=((void*)0)); |
208 |
plit_.resize(num_patterns_+1); |
209 |
long unsigned int id; |
210 |
tinylist<pattern_list_element>::const_iterator it; |
211 |
for (it=patterns().begin();it!=patterns().end();++it) { |
212 |
id = pm_->add_pattern(it->pattern()); |
213 |
plit_[id] = it; |
214 |
} |
215 |
pm_->init(cp); |
216 |
} |
217 |
|
218 |
void filter_bitvec::reset() { |
219 |
pm_->reset(); |
220 |
} |
221 |
|
222 |
|
223 |
|
224 |
/* |
225 |
|
226 |
|
227 |
// checkpoint; |
228 |
std::list<pattern_hit*>::iterator it,it1,nextit; |
229 |
unsigned long int mindist=MAXINT; |
230 |
bool assigned_nextit=false; |
231 |
std::list<std::list<pattern_hit*>::iterator> adj; |
232 |
std::list<std::list<pattern_hit*>::iterator>::iterator it2; |
233 |
it = pas.begin(); |
234 |
// cerr << "it: " << (*it)->pattern_id() << " " << (*it)->pos() << " " << ((pattern_hit_w_dist*)(*it))->editdist() << endl; |
235 |
long unsigned int pos; |
236 |
while (it!=pas.end()) { |
237 |
// checkpoint; |
238 |
it1=it; |
239 |
mindist=((pattern_hit_w_dist*)(*it1))->editdist(); |
240 |
// cerr << "it1: " << (*it1)->pattern_id() << " " << (*it1)->pos() << " " << ((pattern_hit_w_dist*)(*it1))->editdist() << endl; |
241 |
// cerr << "mindist: " << mindist << endl; |
242 |
pos=(*it)->pos(); |
243 |
// checkpoint; |
244 |
adj.clear(); |
245 |
// checkpoint; |
246 |
assigned_nextit=false; |
247 |
while (it1 != pas.end() && |
248 |
(*it1)->pos() <= pos+2*k_+1) { |
249 |
// checkpoint; |
250 |
if ((*it)->pattern_id() == (*it1)->pattern_id()) { |
251 |
if (mindist > ((pattern_hit_w_dist*)(*it1))->editdist()) { |
252 |
// cerr << "mindist: " << mindist << endl; |
253 |
mindist = ((pattern_hit_w_dist*)(*it1))->editdist(); |
254 |
} |
255 |
adj.push_back(it1); |
256 |
} else { |
257 |
if (!assigned_nextit) { |
258 |
nextit=it1; |
259 |
assigned_nextit=true; |
260 |
} |
261 |
} |
262 |
// checkpoint; |
263 |
pos = (*it1)->pos(); |
264 |
it1++; |
265 |
// if (it1 != pas.end()) { |
266 |
// cerr << "it1: " << (*it1)->pattern_id() << " " << (*it1)->pos() << " " << ((pattern_hit_w_dist*)(*it1))->editdist() << endl; |
267 |
// } |
268 |
} |
269 |
// checkpoint; |
270 |
// cerr << adj.size() << " " << mindist << endl; |
271 |
if (adj.size() > 1) { |
272 |
// checkpoint; |
273 |
it2 = adj.begin(); |
274 |
bool assigned_keptit=false; |
275 |
std::list<pattern_hit*>::iterator keptit; |
276 |
while (it2 != adj.end()) { |
277 |
if (((pattern_hit_w_dist*)(**it2))->editdist() > mindist || |
278 |
assigned_keptit) { |
279 |
// cerr << "erase *it2: " << (**it2)->pattern_id() << " " << (**it2)->pos() << " " << ((pattern_hit_w_dist*)(**it2))->editdist() << endl; |
280 |
delete **it2; |
281 |
pas.erase(*it2); |
282 |
} else { |
283 |
// cerr << "keep *it2: " << (**it2)->pattern_id() << " " << (**it2)->pos() << " " << ((pattern_hit_w_dist*)(**it2))->editdist() << endl; |
284 |
if (!assigned_keptit) { |
285 |
keptit=*it2; |
286 |
assigned_keptit=true; |
287 |
} |
288 |
} |
289 |
it2++; |
290 |
} |
291 |
if (assigned_nextit) { |
292 |
it = nextit; |
293 |
} else { |
294 |
it = keptit; |
295 |
it++; |
296 |
} |
297 |
} else { |
298 |
it++; |
299 |
} |
300 |
// if (it != pas.end()) { |
301 |
// cerr << "it: " << (*it)->pattern_id() << " " << (*it)->pos() << " " << ((pattern_hit_w_dist*)(*it))->editdist() << endl; |
302 |
// } |
303 |
} |
304 |
// checkpoint; |
305 |
|
306 |
|
307 |
|
308 |
|
309 |
|
310 |
|
311 |
*/ |