ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/PrimerMatch/primer_alignment.h
Revision: 1.2
Committed: Wed May 4 18:03:45 2005 UTC (11 years, 3 months ago) by nje01
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +22 -4 lines
Log Message:
Small bug fixes, plus codon based edit distance for peptide searching.

Line File contents
1 /**************************************************************************
2 * This code is part of the supporting infrastructure for ATA Mapper.
3 * Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved.
4 * Author: Nathan Edwards
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received (LICENSE.txt) a copy of the GNU General Public
17 * License along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *************************************************************************/
20
21
22 #ifndef _IBPEP_primer_alignment_h
23 #define _IBPEP_primer_alignment_h
24
25 #include <assert.h>
26 #include <iostream>
27 #include <string>
28 #include <vector>
29 #include "char_io.h"
30 #include "types.h"
31 #include "alignment_code.h"
32 #include "memory_debug.h"
33
34 class primer_alignment {
35 public:
36 primer_alignment() :
37 eos_('\n'), maxdist_(1),
38 wc_(false), tn_(false),
39 indels_(true), dna_mut_(false),
40 yesno_(false),
41 maxpatlen_(0), alignment_done_(false),
42 end_defined_(false), matsize_(0), dp_(0), best_(0)
43 {
44 if (!yesno_) {
45 stats_.resize(alignment_codes);
46 fill(stats_.begin(),stats_.end(),0);
47 }
48 }
49 virtual ~primer_alignment() {};
50 virtual bool align(CharacterProducer &, std::string const & pattern1, std::string const & pattern2) =0;
51 int size() const {
52 assert(alignment_done_);
53 return alignment_.size();
54 };
55 void reset() {
56 alignment_done_ = false;
57 end_defined_ = false;
58 if (!yesno_) {
59 fill(stats_.begin(),stats_.end(),0);
60 }
61 }
62 alignment_code operator[](int i) const {
63 assert(alignment_done_);
64 return alignment_[i];
65 }
66 FILE_POSITION_TYPE start() const {
67 assert(alignment_done_);
68 return start_;
69 }
70 FILE_POSITION_TYPE const & end() const {
71 assert(end_defined_ || alignment_done_);
72 return end_;
73 }
74 FILE_POSITION_TYPE length() const {
75 assert(alignment_done_);
76 return end_-start_+1;
77 }
78 std::string const & matching_text() const {
79 assert(alignment_done_);
80 return matching_text_;
81 }
82 unsigned int stats(alignment_code ac) const {
83 assert(alignment_done_);
84 return stats_[ac];
85 }
86 unsigned int editdist() const {
87 assert(alignment_done_);
88 // checkpoint;
89 // cerr << stats_[alignment_constraint_violation] << endl;
90 if (stats_[alignment_constraint_violation] > 0) {
91 // checkpoint;
92 return MAXINT;
93 } else {
94 // checkpoint;
95 return stats_[alignment_substitution] +
96 stats_[alignment_substitution_1] +
97 2*stats_[alignment_substitution_2] +
98 3*stats_[alignment_substitution_3] +
99 stats_[alignment_insertion] +
100 stats_[alignment_deletion] +
101 3*stats_[alignment_insertion_3] +
102 3*stats_[alignment_deletion_3];
103 }
104 }
105 std::string alignment_string() const {
106 assert(alignment_done_);
107 std::string r("");
108 // checkpoint;
109 for (int i=0; i<size(); i++) {
110 // cerr << i << ": " << (int) (*this)[i] << endl;
111 if ((*this)[i] == alignment_equal) {
112 r += "|";
113 } else if ((*this)[i] == alignment_wildcard_equal) {
114 r += "+";
115 } else if ((*this)[i] == alignment_substitution) {
116 r += "*";
117 } else if ((*this)[i] == alignment_substitution_1) {
118 r += ".";
119 } else if ((*this)[i] == alignment_substitution_2) {
120 r += ":";
121 } else if ((*this)[i] == alignment_substitution_3) {
122 r += "x";
123 } else if ((*this)[i] == alignment_insertion) {
124 r += "^";
125 } else if ((*this)[i] == alignment_deletion) {
126 r += "v";
127 } else if ((*this)[i] == alignment_constraint_violation) {
128 r += "!";
129 } else {
130 checkpoint;
131 abort();
132 }
133 }
134 return r;
135 }
136 std::string alignment_text() const {
137 assert(alignment_done_);
138 std::string r("");
139 std::string const & mt = matching_text();
140 int p=0;
141 for (int i=0; i<size(); i++) {
142 if ((*this)[i] != alignment_deletion &&
143 (*this)[i] != alignment_deletion_3) {
144 r += mt[p];
145 p++;
146 } else {
147 r += "-";
148 }
149 }
150 return r;
151 }
152 std::string alignment_pattern(std::string const & pat) const {
153 assert(alignment_done_);
154 std::string r("");
155 int p=0;
156 for (int i=0; i<size(); i++) {
157 if ((*this)[i] != alignment_insertion &&
158 (*this)[i] != alignment_insertion_3) {
159 r += pat[p];
160 p++;
161 } else {
162 r += "-";
163 }
164 }
165 return r;
166 }
167 // void write(ostream & os,
168 // FILE_POSITION_TYPE const seq_pos,
169 // std::string const & pattern1,
170 // std::string const & pattern2,
171 // long unsigned int, bool);
172 void eos(char ch) {
173 eos_ = ch;
174 }
175 void kmax(int k) {
176 maxdist_ = k;
177 }
178 void wc(bool wc) {
179 wc_ = wc;
180 }
181 void tn(bool tn) {
182 tn_ = tn;
183 }
184 void indels(bool id) {
185 indels_ = id;
186 }
187 void dna_mut(bool dm) {
188 dna_mut_ = dm;
189 }
190 void yesno(bool yn) {
191 yesno_ = yn;
192 }
193 void maxpatlen(long unsigned int mpl) {
194 maxpatlen_ = mpl;
195 }
196 MEMORY_DEBUG(primer_alignment)
197 protected:
198 bool global_align(char* const & text, unsigned int textlen,
199 std::string const & pattern,
200 int dirn, unsigned int lmatch, unsigned int rmatch,
201 int & matchlen);
202
203 std::vector<alignment_code> alignment_;
204 std::vector<int> stats_;
205 std::string matching_text_;
206 FILE_POSITION_TYPE start_;
207 FILE_POSITION_TYPE end_;
208 bool alignment_done_;
209 bool end_defined_;
210
211 protected:
212 char eos_;
213 int maxdist_;
214 bool wc_;
215 bool tn_;
216 bool indels_;
217 bool dna_mut_;
218 bool yesno_;
219 long unsigned int maxpatlen_;
220 unsigned long int matsize_;
221 unsigned int *dp_;
222 unsigned int *best_;
223 };
224
225 class primer_alignment_2match : public primer_alignment {
226 private:
227 FILE_POSITION_TYPE end1_;
228 FILE_POSITION_TYPE end2_;
229 int lmatch_;
230 int rmatch_;
231 public:
232 primer_alignment_2match(FILE_POSITION_TYPE e1, FILE_POSITION_TYPE e2,
233 int lmatch, int rmatch)
234 : end1_(e1), end2_(e2), lmatch_(lmatch), rmatch_(rmatch) {}
235 ~primer_alignment_2match() {}
236 bool align(CharacterProducer &,
237 std::string const & pattern1, std::string const & pattern2);
238 MEMORY_DEBUG(primer_alignment_2match)
239 };
240
241 class primer_alignment_lmatch : public primer_alignment {
242 private:
243 char *buffer0_;
244 char *buffer1_;
245 FILE_POSITION_TYPE bufstart_;
246 FILE_POSITION_TYPE bufend_;
247 long unsigned int bufsize_;
248 FILE_POSITION_TYPE end1_;
249 unsigned int lmatch_;
250 unsigned int rmatch_;
251 public:
252 primer_alignment_lmatch(FILE_POSITION_TYPE e1=0,
253 unsigned int lmatch=0,
254 unsigned int rmatch=0)
255 : buffer0_(0), buffer1_(0), bufstart_(0), bufend_(0), bufsize_(0),
256 end1_(e1), lmatch_(lmatch), rmatch_(rmatch) {}
257 ~primer_alignment_lmatch() {
258 delete [] buffer0_;
259 delete [] buffer1_;
260 }
261 FILE_POSITION_TYPE pos() const {
262 return end1_;
263 }
264 void pos(FILE_POSITION_TYPE p) {
265 end1_ = p;
266 }
267 unsigned int exact_start_bases() const {
268 return lmatch_;
269 }
270 void exact_start_bases(unsigned int esb) {
271 lmatch_ = esb;
272 }
273 unsigned int exact_end_bases() const {
274 return rmatch_;
275 }
276 void exact_end_bases(unsigned int eeb) {
277 rmatch_ = eeb;
278 }
279 bool align(CharacterProducer &,
280 std::string const & pattern1, std::string const & pattern2);
281 MEMORY_DEBUG(primer_alignment_lmatch)
282 };
283
284 class primer_alignment_rmatch : public primer_alignment {
285 private:
286 char *buffer0_;
287 char *buffer1_;
288 FILE_POSITION_TYPE bufstart_;
289 FILE_POSITION_TYPE bufend_;
290 long unsigned int bufsize_;
291 FILE_POSITION_TYPE end2_;
292 unsigned int lmatch_;
293 unsigned int rmatch_;
294 public:
295 primer_alignment_rmatch(FILE_POSITION_TYPE e2=0,
296 unsigned int lmatch=0,
297 unsigned int rmatch=0)
298 : buffer0_(0), buffer1_(0), bufstart_(0), bufend_(0), bufsize_(0),
299 end2_(e2), lmatch_(lmatch), rmatch_(rmatch) {}
300 ~primer_alignment_rmatch() {
301 delete [] buffer0_;
302 delete [] buffer1_;
303 }
304 FILE_POSITION_TYPE pos() const {
305 return end2_;
306 }
307 void pos(FILE_POSITION_TYPE p) {
308 end2_ = p;
309 }
310 unsigned int exact_start_bases() const {
311 return lmatch_;
312 }
313 void exact_start_bases(unsigned int esb) {
314 lmatch_ = esb;
315 }
316 unsigned int exact_end_bases() const {
317 return rmatch_;
318 }
319 void exact_end_bases(unsigned int eeb) {
320 rmatch_ = eeb;
321 }
322 bool align(CharacterProducer &,
323 std::string const & pattern1, std::string const & pattern2);
324 MEMORY_DEBUG(primer_alignment_rmatch)
325 };
326
327 #endif