ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/PrimerMatch/exact_bases.cc
Revision: 1.2
Committed: Wed May 4 18:03:44 2005 UTC (11 years, 2 months ago) by nje01
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +7 -7 lines
Log Message:
Small bug fixes, plus codon based edit distance for peptide searching.

Line File contents
1 /**************************************************************************
2 * This code is part of the supporting infrastructure for ATA Mapper.
3 * Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved.
4 * Author: Nathan Edwards
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received (LICENSE.txt) a copy of the GNU General Public
17 * License along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *************************************************************************/
20
21
22 #include "exact_bases.h"
23 #include "primer_alignment.h"
24 // #include "sortedvector.t"
25
26 exact_bases::exact_bases(PatternMatch *pm,
27 unsigned int k, unsigned char eos, bool wc, bool tn, bool id, bool dm)
28 : pm_(pm), num_patterns_(0), k_(k), eos_(eos), _wc(wc), _textn(tn), _indels(id), _dna_mut(dm)
29 {
30 }
31
32 exact_bases::exact_bases(PatternMatch *pm)
33 : pm_(pm), num_patterns_(0), k_(0), eos_('\n'), _wc(false), _textn(false), _indels(true), _dna_mut(false)
34 {
35 }
36
37 exact_bases::~exact_bases() {
38 delete pm_;
39 }
40
41 unsigned int exact_bases::mismatches() const {
42 return k_;
43 }
44
45 void exact_bases::mismatches(unsigned int k) {
46 k_ = k;
47 }
48
49 bool exact_bases::wildcards() const {
50 return _wc;
51 }
52
53 void exact_bases::wildcards(bool wc) {
54 _wc = wc;
55 }
56
57 bool exact_bases::wildcard_text_N() const {
58 return _textn;
59 }
60
61 void exact_bases::wildcard_text_N(bool tn) {
62 _textn = tn;
63 }
64
65 bool exact_bases::indels() const {
66 return _indels;
67 }
68
69 void exact_bases::indels(bool id) {
70 _indels = id;
71 }
72
73 unsigned char exact_bases::eos_char() const {
74 return eos_;
75 }
76
77 void exact_bases::eos_char(unsigned char c) {
78 eos_ = c;
79 }
80
81 long unsigned int
82 exact_bases::add_pattern(std::string const & pat, unsigned long id,
83 int esb, int eeb) {
84 add_pattern_(pat,id,esb,eeb);
85 num_patterns_++;
86 return id;
87 }
88
89 bool
90 exact_bases::find_patterns(CharacterProducer & cp,
91 pattern_hit_vector & phs,
92 long unsigned minka) {
93 // checkpoint;
94 pattern_hit_vector l;
95 pattern_hit_vector::iterator it;
96 primer_alignment_lmatch pal;
97 primer_alignment_rmatch par;
98 pal.eos(eos_); pal.kmax(k_); pal.wc(_wc);
99 pal.tn(_textn); pal.indels(_indels); pal.dna_mut(_dna_mut);
100 pal.maxpatlen(_mpl); pal.yesno(true);
101 par.eos(eos_); par.kmax(k_); par.wc(_wc);
102 par.tn(_textn); par.indels(_indels); par.dna_mut(_dna_mut);
103 par.maxpatlen(_mpl); par.yesno(true);
104 bool more;
105 // checkpoint;
106 while ((more=pm_->find_patterns(cp,l,minka))||!l.empty()) {
107 FILE_POSITION_TYPE oldcharspos;
108 oldcharspos = cp.pos();
109 it = l.begin();
110 // checkpoint;
111 while (it != l.end()) {
112 // checkpoint;
113 long unsigned int pid0(it->value()->id());
114 FILE_POSITION_TYPE pos(it->key());
115 pattern_list::const_iterator const & plit(plit_[pid0]);
116 long unsigned int pid = plit->id();
117 int esb=plit->exact_start_bases();
118 int eeb=plit->exact_end_bases();
119 if (prefix_[pid]) {
120 // Exact match to first part
121 // checkpoint;
122 pal.reset();
123 pal.pos(it->key());
124 pal.exact_start_bases(esb);
125 pal.exact_end_bases(eeb);
126 if (pal.align(cp,it->value()->pattern(),rempat_[pid])) {
127 phs.push_back(pal.end(),plit);
128 }
129 } else {
130 // Exact match to second part
131 // checkpoint;
132 par.reset();
133 par.pos(it->key());
134 par.exact_start_bases(esb);
135 par.exact_end_bases(eeb);
136 if (par.align(cp,rempat_[pid],it->value()->pattern())) {
137 phs.push_back(par.end(),plit);
138 }
139 }
140 ++it;
141 }
142 l.clear();
143 cp.pos(oldcharspos);
144 report_progress(cp);
145 if (phs.size() >= minka ||
146 (more==false && phs.size() > 0)) return true;
147 }
148 return false;
149 }
150
151 void
152 exact_bases::init(CharacterProducer & cp) {
153 assert(pm_!=((void*)0));
154 long unsigned int id=0;
155 pattern_list::const_iterator it;
156 plit_.resize(num_patterns_+1);
157 prefix_.resize(num_patterns_+1);
158 rempat_.resize(num_patterns_+1);
159 _mpl=0;
160 for (it=patterns().begin();it!=patterns().end();++it) {
161 int esb=it->exact_start_bases();
162 int eeb=it->exact_end_bases();
163 if (it->pattern().length() > _mpl) {
164 _mpl = it->pattern().length();
165 }
166 if (esb >= eeb) {
167 id = pm_->add_pattern(it->pattern().substr(0,esb));
168 prefix_[id] = true;
169 rempat_[id] = it->pattern().substr(esb);
170 } else {
171 int patlen=it->pattern().length();
172 id = pm_->add_pattern(it->pattern().substr(patlen-eeb));
173 prefix_[id] = false;
174 rempat_[id] = it->pattern().substr(0,patlen-eeb);
175 }
176 plit_[id] = it;
177 }
178 pm_->init(cp);
179 // checkpoint;
180 }
181
182 void exact_bases::reset() {
183 pm_->reset();
184 }