ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/PrimerMatch/pattern_alignment.h
Revision: 1.2
Committed: Wed May 4 18:03:45 2005 UTC (11 years ago) by nje01
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +31 -9 lines
Log Message:
Small bug fixes, plus codon based edit distance for peptide searching.

Line File contents
1 /**************************************************************************
2 * This code is part of the supporting infrastructure for ATA Mapper.
3 * Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved.
4 * Author: Nathan Edwards
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received (LICENSE.txt) a copy of the GNU General Public
17 * License along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *************************************************************************/
20
21
22 #ifndef _IBPEP_pattern_alignment_h
23 #define _IBPEP_pattern_alignment_h
24
25 #include <assert.h>
26 #include <iostream>
27 #include <vector>
28 #include "char_io.h"
29 #include "alignment_code.h"
30 #include "memory_debug.h"
31
32 #if !defined(NO_STD_NAMESPACE)
33 using namespace std;
34 #endif
35
36 class pattern_hit {
37 private:
38 unsigned long pattern_;
39 FILE_POSITION_TYPE pos_;
40 public:
41 pattern_hit(unsigned long k=0, FILE_POSITION_TYPE p=0)
42 : pattern_(k), pos_(p) {}
43 virtual ~pattern_hit() {};
44 unsigned long const & pattern_id() const {
45 return pattern_;
46 }
47 void pattern_iter(unsigned long const & id) {
48 pattern_ = id;
49 }
50 FILE_POSITION_TYPE const & pos() const {
51 return pos_;
52 }
53 void pos(FILE_POSITION_TYPE const & e) {
54 pos_ = e;
55 }
56 MEMORY_DEBUG(pattern_hit);
57 };
58
59 class pattern_hit_w_dist : public pattern_hit {
60 int dist_;
61 public:
62 pattern_hit_w_dist(unsigned long k=0, FILE_POSITION_TYPE p=0, int d=-1)
63 : pattern_hit(k,p), dist_(d) {};
64 pattern_hit_w_dist() {};
65 void editdist(int const & d) {
66 dist_ = d;
67 }
68 int const & editdist() const {
69 return dist_;
70 }
71 MEMORY_DEBUG(pattern_hit_w_dist);
72 };
73
74 class pattern_alignment {
75 public:
76 pattern_alignment(FILE_POSITION_TYPE e=0, bool yn=false)
77 : end_(e), alignment_done_(false), yesno_(yn) {
78 if (!yesno_) {
79 stats_.resize(alignment_codes);
80 fill(stats_.begin(),stats_.end(),0);
81 }
82 }
83 pattern_alignment(pattern_hit const & ph, bool yn=false)
84 : end_(ph.pos()), alignment_done_(false) {
85 if (!yesno_) {
86 stats_.resize(alignment_codes);
87 fill(stats_.begin(),stats_.end(),0);
88 }
89 }
90 virtual ~pattern_alignment();
91 FILE_POSITION_TYPE const & end() const {
92 return end_;
93 }
94 void end(FILE_POSITION_TYPE const & e) {
95 end_ = e;
96 }
97 virtual bool align(CharacterProducer &, std::string const & pattern) =0;
98 void reset() {
99 alignment_done_ = false;
100 if (!yesno_) {
101 fill(stats_.begin(),stats_.end(),0);
102 }
103 }
104 int size() const {
105 assert(alignment_done_);
106 return alignment_.size();
107 };
108 alignment_code operator[](int i) const {
109 assert(alignment_done_);
110 return alignment_[i];
111 }
112 FILE_POSITION_TYPE start() const {
113 assert(alignment_done_);
114 return start_;
115 }
116 FILE_POSITION_TYPE length() const {
117 assert(alignment_done_);
118 return end_-start_+1;
119 }
120 std::string const & matching_text() const {
121 assert(alignment_done_);
122 return matching_text_;
123 }
124 unsigned int stats(alignment_code ac) const {
125 assert(alignment_done_);
126 return stats_[ac];
127 }
128 unsigned int editdist() const {
129 assert(alignment_done_);
130 if (stats_[alignment_constraint_violation] > 0) {
131 // checkpoint;
132 return MAXINT;
133 } else {
134 return stats_[alignment_substitution] +
135 stats_[alignment_substitution_1] +
136 2*stats_[alignment_substitution_2] +
137 3*stats_[alignment_substitution_3] +
138 stats_[alignment_insertion] +
139 3*stats_[alignment_insertion_3] +
140 stats_[alignment_deletion] +
141 3*stats_[alignment_deletion_3];
142 }
143 }
144 std::string alignment_string() const {
145 assert(alignment_done_);
146 char *r = new char[size()+1];
147 for (int i=0; i<size(); i++) {
148 char ch;
149 switch ((*this)[i]) {
150 case alignment_equal :
151 ch = '|';
152 break;
153 case alignment_wildcard_equal :
154 ch = '+';
155 break;
156 case alignment_substitution :
157 ch = '*';
158 break;
159 case alignment_substitution_1 :
160 ch = '.';
161 break;
162 case alignment_substitution_2 :
163 ch = ':';
164 break;
165 case alignment_substitution_3 :
166 ch = 'x';
167 break;
168 case alignment_insertion :
169 case alignment_insertion_3 :
170 ch = '^';
171 break;
172 case alignment_deletion :
173 case alignment_deletion_3 :
174 ch = 'v';
175 break;
176 case alignment_constraint_violation :
177 ch = '!';
178 break;
179 default:
180 ch = ' ';
181 break;
182 }
183 r[i] = ch;
184 }
185 r[size()] = '\0';
186 std::string r1(r);
187 delete r;
188 return r1;
189 }
190 std::string alignment_text() const {
191 assert(alignment_done_);
192 std::string r("");
193 std::string const & mt = matching_text();
194 int p=0;
195 for (int i=0; i<size(); i++) {
196 if ((*this)[i] != alignment_deletion &&
197 (*this)[i] != alignment_deletion_3) {
198 r += mt[p];
199 p++;
200 } else {
201 r += "-";
202 }
203 }
204 return r;
205 }
206 std::string alignment_pattern(std::string const & pat) const {
207 assert(alignment_done_);
208 std::string r("");
209 int p=0;
210 for (int i=0; i<size(); i++) {
211 if ((*this)[i] != alignment_insertion &&
212 (*this)[i] != alignment_insertion_3) {
213 r += pat[p];
214 p++;
215 } else {
216 r += "-";
217 }
218 }
219 return r;
220 }
221 void yesno(bool yn) {
222 yesno_ = yn;
223 }
224 // void write(ostream & os,
225 // FILE_POSITION_TYPE const seq_pos,
226 // std::string const & pattern,
227 // long unsigned int id, bool revcomp);
228 virtual void write(ostream & os) const;
229 virtual void read(istream & is) {
230 is >> pattern_ >> end_;
231 }
232 MEMORY_DEBUG(pattern_alignment);
233 private:
234 unsigned long pattern_;
235 FILE_POSITION_TYPE end_;
236
237 protected:
238 std::vector<alignment_code> alignment_;
239 std::vector<int> stats_;
240 std::string matching_text_;
241 FILE_POSITION_TYPE start_;
242 bool alignment_done_;
243 bool yesno_;
244 };
245
246 istream & operator>>(istream & is, pattern_alignment & ka);
247 ostream & operator<<(ostream & os, pattern_alignment const & ka);
248
249 class exact_alignment : public pattern_alignment {
250 public:
251 exact_alignment(FILE_POSITION_TYPE e=0)
252 : pattern_alignment(e) {};
253 exact_alignment(pattern_hit const & ph)
254 : pattern_alignment(ph) {};
255 ~exact_alignment() {};
256 bool align(CharacterProducer &, std::string const & pattern);
257 MEMORY_DEBUG(exact_alignment)
258 };
259
260 class exact_peptide_alignment : public pattern_alignment {
261 private:
262 char lcontext_;
263 char rcontext_;
264 public:
265 exact_peptide_alignment(FILE_POSITION_TYPE e=0)
266 : pattern_alignment(e) {};
267 exact_peptide_alignment(pattern_hit const & ph)
268 : pattern_alignment(ph) {};
269 ~exact_peptide_alignment() {};
270 bool align(CharacterProducer &, std::string const & pattern);
271 char lcontext() const {
272 assert(alignment_done_);
273 return lcontext_;
274 }
275 char rcontext() const {
276 assert(alignment_done_);
277 return rcontext_;
278 }
279 MEMORY_DEBUG(exact_peptide_alignment)
280 };
281
282 class exact_wc_alignment : public pattern_alignment {
283 private:
284 bool textn_;
285 public:
286 exact_wc_alignment(FILE_POSITION_TYPE e=0, bool tn=false)
287 : pattern_alignment(e), textn_(tn) {};
288 exact_wc_alignment(pattern_hit const & ph, bool tn=false)
289 : pattern_alignment(ph), textn_(tn) {};
290 ~exact_wc_alignment() {};
291 bool align(CharacterProducer &, std::string const & pattern);
292 MEMORY_DEBUG(exact_wc_alignment)
293 };
294
295 class mismatch_alignment : public pattern_alignment {
296 public:
297 mismatch_alignment(FILE_POSITION_TYPE e=0)
298 : pattern_alignment(e) {};
299 mismatch_alignment(pattern_hit const & ph)
300 : pattern_alignment(ph) {};
301 ~mismatch_alignment() {};
302 bool align(CharacterProducer &, std::string const & pattern);
303 MEMORY_DEBUG(mismatch_alignment)
304 };
305
306 class editdist_alignment : public pattern_alignment {
307 FILE_POSITION_TYPE end2_;
308 unsigned int k_;
309 char eos_;
310 bool wc_;
311 bool textn_;
312 bool indels_;
313 bool dna_mut_;
314 int lconst_;
315 int rconst_;
316 char *buffer_;
317 FILE_POSITION_TYPE bufstart_;
318 FILE_POSITION_TYPE bufend_;
319 long unsigned int bufsize_;
320 unsigned int maxpatlen_;
321 unsigned int matsize_;
322 unsigned int *dp_;
323 int *best_;
324 public:
325 editdist_alignment(FILE_POSITION_TYPE e=0,
326 FILE_POSITION_TYPE e2=0,
327 unsigned int k=0, char eos='\n',
328 bool wc=false, bool tn=false, bool id=true, bool dm=false,
329 int lconst=0, int rconst=0, bool yn=false)
330 : pattern_alignment(e,yn), end2_(e2), k_(k), eos_(eos),
331 wc_(wc), textn_(tn), indels_(id), dna_mut_(dm), lconst_(lconst), rconst_(rconst),
332 buffer_(0), bufstart_(0), bufend_(0), maxpatlen_(0), matsize_(0), dp_(0), best_(0) {};
333 editdist_alignment(FILE_POSITION_TYPE e=0,
334 unsigned int k=0, char eos='\n',
335 bool wc=false, bool tn=false, bool id=true, bool dm=false,
336 int lconst=0, int rconst=0, bool yn=false)
337 : pattern_alignment(e,yn), end2_(e), k_(k), eos_(eos), wc_(wc), textn_(tn),
338 indels_(id), dna_mut_(dm), lconst_(lconst), rconst_(rconst),
339 buffer_(0), bufstart_(0), bufend_(0), maxpatlen_(0), matsize_(0), dp_(0), best_(0) {};
340 editdist_alignment(pattern_hit const & ph, unsigned int k=0, char eos='\n',
341 bool wc=false, bool tn=false, bool id=true, bool dm=false,
342 int lconst=0, int rconst=0, bool yn=false)
343 : pattern_alignment(ph,yn), end2_(ph.pos()), k_(k), eos_(eos),
344 wc_(wc), textn_(tn), indels_(id), dna_mut_(dm), lconst_(lconst), rconst_(rconst),
345 buffer_(0), bufstart_(0), bufend_(0), maxpatlen_(0), matsize_(0), dp_(0), best_(0) {};
346 ~editdist_alignment() {
347 delete [] buffer_;
348 delete [] dp_;
349 delete [] best_;
350 };
351 void poslb(FILE_POSITION_TYPE p) {
352 pattern_alignment::end(p);
353 }
354 void posub(FILE_POSITION_TYPE p) {
355 end2_ = p;
356 }
357 void pos(FILE_POSITION_TYPE p) {
358 pattern_alignment::end(p);
359 end2_ = p;
360 }
361 void exact_start_bases(unsigned int esb) {
362 lconst_ = esb;
363 }
364 void exact_end_bases(unsigned int eeb) {
365 rconst_ = eeb;
366 }
367 void eos(char ch) {
368 eos_ = ch;
369 }
370 char eos() {
371 return eos_;
372 }
373 void kmax(int k) {
374 k_ = k;
375 }
376 void wc(bool wc) {
377 wc_ = wc;
378 }
379 void tn(bool tn) {
380 textn_ = tn;
381 }
382 void indels(bool id) {
383 indels_ = id;
384 }
385 void dna_mut(bool dm) {
386 dna_mut_ = dm;
387 }
388 void maxpatlen(long unsigned int mpl) {
389 maxpatlen_ = mpl;
390 }
391 bool align(CharacterProducer &, std::string const & pattern);
392 MEMORY_DEBUG(editdist_alignment)
393 };
394
395 #endif