ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/PrimerMatch/filter_bitvec.cc
Revision: 1.2
Committed: Wed May 4 18:03:44 2005 UTC (10 years, 11 months ago) by nje01
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +11 -3 lines
Log Message:
Small bug fixes, plus codon based edit distance for peptide searching.

Line File contents
1 /**************************************************************************
2 * This code is part of the supporting infrastructure for ATA Mapper.
3 * Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved.
4 * Author: Nathan Edwards
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received (LICENSE.txt) a copy of the GNU General Public
17 * License along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *************************************************************************/
20
21
22 #include "filter_bitvec.h"
23 #include "primer_alignment.h"
24 // #include "sortedvector.t"
25
26 filter_bitvec::filter_bitvec(unsigned int k, char eos,
27 bool wc, bool tn, bool id, bool dm) :
28 num_patterns_(0)
29 {
30 pm_ = new shift_and_inexact(k,eos,wc,tn,id,dm);
31 }
32
33 filter_bitvec::~filter_bitvec() {
34 delete pm_;
35 }
36
37 unsigned int filter_bitvec::mismatches() const {
38 return pm_->mismatches();
39 }
40
41 void filter_bitvec::mismatches(unsigned int k) {
42 pm_->mismatches(k);
43 }
44
45 bool filter_bitvec::wildcards() const {
46 return pm_->wildcards();
47 }
48
49 void filter_bitvec::wildcards(bool wc) {
50 pm_->wildcards(wc);
51 }
52
53 bool filter_bitvec::wildcard_text_N() const {
54 return pm_->wildcard_text_N();
55 }
56
57 void filter_bitvec::wildcard_text_N(bool tn) {
58 pm_->wildcard_text_N(tn);
59 }
60
61 bool filter_bitvec::indels() const {
62 return pm_->indels();
63 }
64
65 void filter_bitvec::indels(bool id) {
66 pm_->indels(id);
67 }
68
69 bool filter_bitvec::dna_mut() const {
70 return pm_->dna_mut();
71 }
72
73 void filter_bitvec::dna_mut(bool dm) {
74 pm_->dna_mut(dm);
75 }
76
77 char filter_bitvec::eos_char() const {
78 return pm_->eos_char();
79 }
80
81 void filter_bitvec::eos_char(char c) {
82 pm_->eos_char(c);
83 }
84
85 long unsigned int
86 filter_bitvec::add_pattern(std::string const & pat, unsigned long id,
87 int esb, int eeb) {
88 add_pattern_(pat,id,esb,eeb);
89 num_patterns_++;
90 return id;
91 }
92
93 bool
94 filter_bitvec::find_patterns(CharacterProducer & cp,
95 pattern_hit_vector & phs,
96 long unsigned minka) {
97 // checkpoint;
98
99 // the eos_char we store in the shift_and_inexact is normalized, so
100 // we much unnormalize it before passing it on to the
101 // editdist_alignment class.
102 editdist_alignment pa(0,0,mismatches(),cp.ch(eos_char()),
103 wildcards(),wildcard_text_N(),
104 indels(),dna_mut(),0,0,true);
105 pattern_hit_vector::iterator it,it1,it2;
106 l.reserve(minka*2);
107 bool more;
108 while ((more=pm_->find_patterns(cp,l,minka))||!l.empty()) {
109 // checkpoint;
110 FILE_POSITION_TYPE oldcharspos;
111 oldcharspos = cp.pos();
112 l.normalize();
113 // checkpoint;
114 it = l.begin();
115 while (it != l.end()) {
116 // checkpoint;
117 FILE_POSITION_TYPE firstpos = it->key();
118 if (firstpos > 0) {
119 long unsigned int pid = it->value()->id();
120 FILE_POSITION_TYPE pos = firstpos;
121 // checkpoint;
122 // cerr << pid << " " << firstpos << endl;
123 tinylist<pattern_hit_vector::iterator> adj;
124 adj.push_front(it);
125 it1 = it+1;
126 while (it1 != l.end() && it1->key() <= pos+2*mismatches()+1) {
127 // checkpoint;
128 if (it1->value()->id() == pid) {
129 // checkpoint;
130 // cerr << it1->value()->id() << " " << it1->key() << endl;
131 pos = it1->key();
132 adj.push_front(it1);
133 }
134 ++it1;
135 // checkpoint;
136 }
137 // checkpoint;
138 if (oldcharspos < pos+2*mismatches()+1 && more) {
139 // end of returned hit condition...
140 // checkpoint;
141 break;
142 } else {
143 // checkpoint;
144 // cerr << pid << " " << firstpos << " " << pos << endl;
145 pattern_list::const_iterator plit = plit_[pid];
146 pa.reset();
147 pa.poslb(firstpos);
148 pa.posub(pos);
149 pa.exact_start_bases(plit->exact_start_bases());
150 pa.exact_end_bases(plit->exact_end_bases());
151 // cerr << plit->pattern() << endl;
152 // checkpoint;
153 if (pa.align(cp,plit->pattern())) {
154 // checkpoint;
155 phs.push_back(pa.end(),plit);
156 }
157 tinylist<pattern_hit_vector::iterator>::iterator adjit;
158 adjit = adj.begin();
159 while (adjit != adj.end()) {
160 (*adjit)->key() = 0;
161 ++adjit;
162 }
163 }
164 }
165 ++it;
166 }
167 // Move everything with valid key to beginning...
168 // int i,i1;
169 it=l.begin();
170 it1=l.begin();
171 // i=0;
172 // i1=0;
173 long unsigned int newsize=0;
174 while (it!=l.end()) {
175 // cerr << i << " " << i1 << endl;
176 // cerr << l.key(it) << " " << l.value(it)->id() << endl;
177 if (it->key() != 0) {
178 if (it != it1) {
179 it1->key() = it->key();
180 it1->value() = it->value();
181 }
182 ++newsize;
183 ++it1;
184 // i1++;
185 }
186 ++it;
187 // i++;
188 }
189 // cerr << newsize << endl;
190 l.resize(newsize);
191 // checkpoint;
192 // it=l.begin();
193 // while (it!=l.end()) {
194 // cerr << it->key() << " " << it->value()->id() << endl;
195 // ++it;
196 // }
197 cp.pos(oldcharspos);
198 report_progress(cp);
199 if (phs.size() >= minka ||
200 (more==false && phs.size() > 0)) return true;
201 }
202 return false;
203 }
204
205 void
206 filter_bitvec::init(CharacterProducer & cp) {
207 assert(pm_!=((void*)0));
208 plit_.resize(num_patterns_+1);
209 long unsigned int id;
210 tinylist<pattern_list_element>::const_iterator it;
211 for (it=patterns().begin();it!=patterns().end();++it) {
212 id = pm_->add_pattern(it->pattern());
213 plit_[id] = it;
214 }
215 pm_->init(cp);
216 }
217
218 void filter_bitvec::reset() {
219 pm_->reset();
220 }
221
222
223
224 /*
225
226
227 // checkpoint;
228 std::list<pattern_hit*>::iterator it,it1,nextit;
229 unsigned long int mindist=MAXINT;
230 bool assigned_nextit=false;
231 std::list<std::list<pattern_hit*>::iterator> adj;
232 std::list<std::list<pattern_hit*>::iterator>::iterator it2;
233 it = pas.begin();
234 // cerr << "it: " << (*it)->pattern_id() << " " << (*it)->pos() << " " << ((pattern_hit_w_dist*)(*it))->editdist() << endl;
235 long unsigned int pos;
236 while (it!=pas.end()) {
237 // checkpoint;
238 it1=it;
239 mindist=((pattern_hit_w_dist*)(*it1))->editdist();
240 // cerr << "it1: " << (*it1)->pattern_id() << " " << (*it1)->pos() << " " << ((pattern_hit_w_dist*)(*it1))->editdist() << endl;
241 // cerr << "mindist: " << mindist << endl;
242 pos=(*it)->pos();
243 // checkpoint;
244 adj.clear();
245 // checkpoint;
246 assigned_nextit=false;
247 while (it1 != pas.end() &&
248 (*it1)->pos() <= pos+2*k_+1) {
249 // checkpoint;
250 if ((*it)->pattern_id() == (*it1)->pattern_id()) {
251 if (mindist > ((pattern_hit_w_dist*)(*it1))->editdist()) {
252 // cerr << "mindist: " << mindist << endl;
253 mindist = ((pattern_hit_w_dist*)(*it1))->editdist();
254 }
255 adj.push_back(it1);
256 } else {
257 if (!assigned_nextit) {
258 nextit=it1;
259 assigned_nextit=true;
260 }
261 }
262 // checkpoint;
263 pos = (*it1)->pos();
264 it1++;
265 // if (it1 != pas.end()) {
266 // cerr << "it1: " << (*it1)->pattern_id() << " " << (*it1)->pos() << " " << ((pattern_hit_w_dist*)(*it1))->editdist() << endl;
267 // }
268 }
269 // checkpoint;
270 // cerr << adj.size() << " " << mindist << endl;
271 if (adj.size() > 1) {
272 // checkpoint;
273 it2 = adj.begin();
274 bool assigned_keptit=false;
275 std::list<pattern_hit*>::iterator keptit;
276 while (it2 != adj.end()) {
277 if (((pattern_hit_w_dist*)(**it2))->editdist() > mindist ||
278 assigned_keptit) {
279 // cerr << "erase *it2: " << (**it2)->pattern_id() << " " << (**it2)->pos() << " " << ((pattern_hit_w_dist*)(**it2))->editdist() << endl;
280 delete **it2;
281 pas.erase(*it2);
282 } else {
283 // cerr << "keep *it2: " << (**it2)->pattern_id() << " " << (**it2)->pos() << " " << ((pattern_hit_w_dist*)(**it2))->editdist() << endl;
284 if (!assigned_keptit) {
285 keptit=*it2;
286 assigned_keptit=true;
287 }
288 }
289 it2++;
290 }
291 if (assigned_nextit) {
292 it = nextit;
293 } else {
294 it = keptit;
295 it++;
296 }
297 } else {
298 it++;
299 }
300 // if (it != pas.end()) {
301 // cerr << "it: " << (*it)->pattern_id() << " " << (*it)->pos() << " " << ((pattern_hit_w_dist*)(*it))->editdist() << endl;
302 // }
303 }
304 // checkpoint;
305
306
307
308
309
310
311 */