ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/PrimerMatch/exact_halves.cc
Revision: 1.2
Committed: Wed May 4 18:03:44 2005 UTC (11 years, 2 months ago) by nje01
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +8 -7 lines
Log Message:
Small bug fixes, plus codon based edit distance for peptide searching.

Line File contents
1 /**************************************************************************
2 * This code is part of the supporting infrastructure for ATA Mapper.
3 * Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved.
4 * Author: Nathan Edwards
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received (LICENSE.txt) a copy of the GNU General Public
17 * License along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *************************************************************************/
20
21
22 #include "exact_halves.h"
23 #include "primer_alignment.h"
24 // #include "sortedvector.t"
25
26 exact_halves::exact_halves(PatternMatch *pm,
27 unsigned int k, unsigned char eos,
28 bool wc, bool tn, bool id, bool dm)
29 : pm_(pm), num_patterns_(0), k_(k), eos_(eos), _wc(wc), _textn(tn), _indels(id), _dna_mut(dm)
30 {
31 }
32
33 exact_halves::exact_halves(PatternMatch *pm)
34 : pm_(pm), num_patterns_(0), k_(0), eos_('\n'), _wc(false), _textn(false), _indels(true), _dna_mut(false)
35 {
36 }
37
38 exact_halves::~exact_halves() {
39 delete pm_;
40 }
41
42 unsigned int exact_halves::mismatches() const {
43 return k_;
44 }
45
46 void exact_halves::mismatches(unsigned int k) {
47 k_ = k;
48 }
49
50 bool exact_halves::wildcards() const {
51 return _wc;
52 }
53
54 void exact_halves::wildcards(bool wc) {
55 _wc = wc;
56 }
57
58 bool exact_halves::wildcard_text_N() const {
59 return _textn;
60 }
61
62 void exact_halves::wildcard_text_N(bool tn) {
63 _textn = tn;
64 }
65
66 bool exact_halves::indels() const {
67 return _indels;
68 }
69
70 void exact_halves::indels(bool id) {
71 _indels = id;
72 }
73
74 unsigned char exact_halves::eos_char() const {
75 return eos_;
76 }
77
78 void exact_halves::eos_char(unsigned char c) {
79 eos_ = c;
80 }
81
82 long unsigned int
83 exact_halves::add_pattern(std::string const & pat, unsigned long id,
84 int esb, int eeb) {
85 add_pattern_(pat,id,esb,eeb);
86 num_patterns_++;
87 return id;
88 }
89
90 bool
91 exact_halves::find_patterns(CharacterProducer & cp,
92 pattern_hit_vector & phs,
93 long unsigned minka) {
94 pattern_hit_vector l(minka);
95 pattern_hit_vector::iterator it;
96 primer_alignment_lmatch pal;
97 primer_alignment_rmatch par;
98 pal.eos(eos_); pal.kmax(k_); pal.wc(_wc);
99 pal.tn(_textn); pal.indels(_indels); pal.dna_mut(_dna_mut);
100 pal.maxpatlen(_mpl); pal.yesno(true);
101 par.eos(eos_); par.kmax(k_); par.wc(_wc);
102 par.tn(_textn); par.indels(_indels); par.dna_mut(_dna_mut);
103 par.maxpatlen(_mpl); par.yesno(true);
104 bool more;
105 // checkpoint;
106 while ((more=pm_->find_patterns(cp,l,minka))||!l.empty()) {
107 // checkpoint;
108 FILE_POSITION_TYPE oldcharspos;
109 oldcharspos = cp.pos();
110 l.normalize();
111 // checkpoint;
112 it = l.begin();
113 while (it != l.end()) {
114 long unsigned int pid(it->value()->id());
115 FILE_POSITION_TYPE pos(it->key());
116 tinylist<pattern_list_element>::const_iterator const & pit(plit_[pid]);
117 int esb = pit->exact_start_bases();
118 int eeb = pit->exact_end_bases();
119 if (pid%2==1) {
120 // Exact match to first half
121 // checkpoint;
122 pal.reset();
123 pal.pos(pos);
124 pal.exact_start_bases(esb);
125 pal.exact_end_bases(eeb);
126 if (pal.align(cp,pattern_halves_[pid],pattern_halves_[pid+1])) {
127 // checkpoint;
128 // cerr << pattern_halves_[pid] << " " << pattern_halves_[pid+1] << endl;
129 phs.push_back(pal.end(),pit);
130 lasthit_[(pid+1)/2] = pal.end();
131 }
132 } else if (pos > lasthit_[pid/2]) {
133 // Exact match to second half
134 // checkpoint;
135 par.reset();
136 par.pos(pos);
137 par.exact_start_bases(esb);
138 par.exact_end_bases(eeb);
139 // checkpoint;
140 if (par.align(cp,pattern_halves_[pid-1],pattern_halves_[pid])) {
141 // checkpoint;
142 phs.push_back(par.end(),pit);
143 }
144 }
145 // checkpoint;
146 ++it;
147 // checkpoint;
148 }
149 // checkpoint;
150 l.clear();
151 // checkpoint;
152 cp.pos(oldcharspos);
153 report_progress(cp);
154 if (phs.size() >= minka ||
155 (more==false && phs.size() > 0)) return true;
156 }
157 return false;
158 }
159
160 void
161 exact_halves::init(CharacterProducer & cp) {
162 assert(pm_!=((void*)0));
163 plit_.resize(num_patterns_*2+1);
164 pattern_halves_.resize(num_patterns_*2+1);
165 lasthit_.resize(num_patterns_+1);
166 tinylist<pattern_list_element>::const_iterator it;
167 long unsigned int id=0;
168 _mpl=0;
169 for (it=patterns().begin();it!=patterns().end();++it) {
170 int patlen = it->pattern().length();
171 if (patlen > _mpl) {
172 _mpl = patlen;
173 }
174 std::string const & patl = it->pattern().substr(0,patlen/2);
175 std::string const & patr = it->pattern().substr(patlen/2);
176 id = pm_->add_pattern(patl);
177 plit_[id] = it;
178 pattern_halves_[id] = patl;
179 id = pm_->add_pattern(patr);
180 plit_[id] = it;
181 pattern_halves_[id] = patr;
182 lasthit_[id/2] = 0;
183 }
184 pm_->init(cp);
185 }
186
187 void exact_halves::reset() {
188 pm_->reset();
189 }