ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/PrimerMatch/util.cc
Revision: 1.2
Committed: Wed May 4 18:03:45 2005 UTC (11 years, 3 months ago) by nje01
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +55 -4 lines
Log Message:
Small bug fixes, plus codon based edit distance for peptide searching.

Line File contents
1 /**************************************************************************
2 * This code is part of the supporting infrastructure for ATA Mapper.
3 * Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved.
4 * Author: Nathan Edwards
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received (LICENSE.txt) a copy of the GNU General Public
17 * License along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *************************************************************************/
20
21
22 #include <ctype.h>
23 #include <string.h>
24 #include <signal.h>
25 #include <unistd.h>
26 #include <iostream>
27 #include "util.h"
28 #include "types.h"
29
30 #ifdef PROFILE
31
32 void profile_signal_handler(int sig) {
33 switch (sig) {
34 case SIGUSR1:
35 exit(0);
36 break;
37 default:
38 ; // do nothing, should not happen!
39 }
40 }
41
42 void set_profile_signal_handler() {
43 struct sigaction sa;
44 sa.sa_handler = &profile_signal_handler;
45 SIGINITSET(sa.sa_mask);
46 sa.sa_flags = 0;
47 sigaction(SIGUSR1,&sa,0);
48 }
49
50 #endif
51
52 time_t tictoctime_=0;
53
54 unsigned int least_common_multiple(unsigned int a, unsigned int b) {
55 unsigned int mult = a*b;
56 while (b != 0) {
57 unsigned int tmp;
58 tmp = a; a = b; b = tmp;
59 b = b % a;
60 }
61 return mult/a;
62 };
63
64 static char *trues[] = {"true", "True", "TRUE",
65 "T", "t", "1",
66 "Yes", "YES", "yes", ""};
67 static char *falses[] = {"false", "False", "FALSE",
68 "F", "f", "0",
69 "No", "NO", "no", ""};
70
71
72 bool find_str(char const * const s, char * values[]) {
73 char **p = values;
74 while (p && strlen(*p) > 0) {
75 if (*p && !strcmp(*p,s)) {
76 return true;
77 }
78 p++;
79 }
80 return false;
81 }
82
83 bool is_false(char const * const s) {
84 return find_str(s,falses);
85 }
86
87 bool is_true(char const * const s) {
88 return find_str(s,trues);
89 }
90
91 std::string binarystr(bigword val, int length) {
92 std::string str="";
93 bigword mask = 1;
94 for (int i=0;i<length;i++) {
95 if (val & mask) {
96 str = "1" + str;
97 } else {
98 str = "0" + str;
99 }
100 if (i%8 == 7) {
101 str = " " + str;
102 }
103 mask <<= 1;
104 }
105 return str;
106 }
107
108 std::string binary(unsigned char val) {
109 return binarystr(val,sizeof(unsigned char)*8);
110 }
111
112 std::string binary(long unsigned int val) {
113 return binarystr(val,sizeof(long unsigned int)*8);
114 }
115
116 std::string binary(long long unsigned int val) {
117 return binarystr(val,sizeof(long long unsigned int)*8);
118 }
119
120 void uppercase(std::string & s) {
121 for (int i=0;i<s.length();i++) {
122 s[i] = toupper(s[i]);
123 }
124 }
125
126 char *iupac_compatible(char w) {
127 static char **compatiblestr=0;
128 if (!compatiblestr) {
129 compatiblestr = new char*[128];
130 memset(compatiblestr,0,128*sizeof(char*));
131 compatiblestr['A'] = strdup("ARMWDHVN");
132 compatiblestr['B'] = strdup("GTUCYKSBN");
133 compatiblestr['C'] = strdup("CYMSBHVN");
134 compatiblestr['D'] = strdup("GATURWKDN");
135 compatiblestr['G'] = strdup("GRKSBDVN");
136 compatiblestr['H'] = strdup("ACTUMYWHN");
137 compatiblestr['K'] = strdup("GTKBDN");
138 compatiblestr['M'] = strdup("ACMHVN");
139 compatiblestr['N'] = strdup("ACGTURYKMSWVDHV");
140 compatiblestr['R'] = strdup("GARDVN");
141 compatiblestr['S'] = strdup("GCSBVN");
142 compatiblestr['T'] = strdup("TUYKWVDHN");
143 compatiblestr['U'] = strdup("UTYKWVDHN");
144 compatiblestr['V'] = strdup("GCARSMVN");
145 compatiblestr['W'] = strdup("ATUWDHN");
146 compatiblestr['Y'] = strdup("TUCYBHN");
147 compatiblestr['a'] = strdup("armwdhvn");
148 compatiblestr['b'] = strdup("gtucyksbn");
149 compatiblestr['c'] = strdup("cymsbhvn");
150 compatiblestr['d'] = strdup("gaturwkdn");
151 compatiblestr['g'] = strdup("grksbdvn");
152 compatiblestr['h'] = strdup("actumywhn");
153 compatiblestr['k'] = strdup("gtkbdn");
154 compatiblestr['m'] = strdup("acmhvn");
155 compatiblestr['n'] = strdup("acgturykmswvdhv");
156 compatiblestr['r'] = strdup("gardvn");
157 compatiblestr['s'] = strdup("gcsbvn");
158 compatiblestr['t'] = strdup("tuykwvdhn");
159 compatiblestr['u'] = strdup("utykwvdhn");
160 compatiblestr['v'] = strdup("gcarsmvn");
161 compatiblestr['w'] = strdup("atuwdhn");
162 compatiblestr['y'] = strdup("tucybhn");
163 }
164 return compatiblestr[w];
165 }
166
167 bool iupac_compatible(char w, char c) {
168 static bool **compatiblemap=0;
169 if (!compatiblemap) {
170 bool * buffer = new bool[128*128];
171 memset(buffer,0,128*128*sizeof(bool));
172 compatiblemap = new bool*[128];
173 memset(compatiblemap,0,128*sizeof(bool*));
174 char *compatible=0;
175 int j;
176 for (int w=0;w<128;w++) {
177 compatiblemap[w] = buffer+w*128;
178 if ((compatible=iupac_compatible(w))!=0) {
179 j=0;
180 while (compatible[j]) {
181 compatiblemap[w][compatible[j]] = true;
182 j++;
183 }
184 }
185 }
186 }
187 return compatiblemap[w][c];
188 }
189
190 char *iupac_contains(char w) {
191 static char **containsstr=0;
192 if (!containsstr) {
193 containsstr = new char*[256];
194 memset(containsstr,0,256*sizeof(char));
195 containsstr['A'] = strdup("A");
196 containsstr['B'] = strdup("GTUCYKSB");
197 containsstr['C'] = strdup("C");
198 containsstr['D'] = strdup("GATURWKD");
199 containsstr['G'] = strdup("G");
200 containsstr['H'] = strdup("ACTUMYWH");
201 containsstr['K'] = strdup("GTK");
202 containsstr['M'] = strdup("ACM");
203 containsstr['N'] = strdup("ACGTURYKMSWVDHVN");
204 containsstr['R'] = strdup("GAR");
205 containsstr['S'] = strdup("GCS");
206 containsstr['T'] = strdup("TU");
207 containsstr['U'] = strdup("UT");
208 containsstr['V'] = strdup("GCARSMV");
209 containsstr['W'] = strdup("ATUW");
210 containsstr['Y'] = strdup("TUCY");
211 containsstr['a'] = strdup("a");
212 containsstr['b'] = strdup("gtucyksb");
213 containsstr['c'] = strdup("c");
214 containsstr['d'] = strdup("gaturwkd");
215 containsstr['g'] = strdup("g");
216 containsstr['h'] = strdup("actumywh");
217 containsstr['k'] = strdup("gtk");
218 containsstr['m'] = strdup("acm");
219 containsstr['n'] = strdup("acgturykmswvdhvn");
220 containsstr['r'] = strdup("gar");
221 containsstr['s'] = strdup("gcs");
222 containsstr['t'] = strdup("tu");
223 containsstr['u'] = strdup("ut");
224 containsstr['v'] = strdup("gcarsmv");
225 containsstr['w'] = strdup("atuw");
226 containsstr['y'] = strdup("tucy");
227 }
228 return containsstr[w];
229 }
230
231 bool iupac_contains(char w, char c) {
232 static bool **containsmap=0;
233 if (!containsmap) {
234 bool * buffer = new bool[256*256];
235 memset(buffer,0,256*256*sizeof(bool));
236 containsmap = new bool*[256];
237 memset(containsmap,0,256*sizeof(bool*));
238 char *contained;
239 int j;
240 for (int w=0;w<256;w++) {
241 containsmap[w] = buffer+w*256;
242 if ((contained=iupac_contains(w))!=0) {
243 j=0;
244 while (contained[j]) {
245 containsmap[w][contained[j]] = true;
246 j++;
247 }
248 }
249 }
250 }
251 return containsmap[w][c];
252 }
253
254 char *iupac_contained(char w) {
255 static char **containedstr=0;
256 if (!containedstr) {
257 containedstr = new char*[256];
258 memset(containedstr,0,256*sizeof(char));
259 containedstr['A'] = strdup("ARMWDHVN");
260 containedstr['B'] = strdup("BN");
261 containedstr['C'] = strdup("CYMSBHVN");
262 containedstr['D'] = strdup("DN");
263 containedstr['G'] = strdup("GRKSBDVN");
264 containedstr['H'] = strdup("HN");
265 containedstr['K'] = strdup("KBDN");
266 containedstr['M'] = strdup("MHVN");
267 containedstr['N'] = strdup("N");
268 containedstr['R'] = strdup("RDVN");
269 containedstr['S'] = strdup("SBVN");
270 containedstr['T'] = strdup("TUYKWVDHN");
271 containedstr['U'] = strdup("UTYKWVDHN");
272 containedstr['V'] = strdup("VN");
273 containedstr['W'] = strdup("WDHN");
274 containedstr['Y'] = strdup("YBHN");
275 containedstr['a'] = strdup("armwdhvn");
276 containedstr['b'] = strdup("bn");
277 containedstr['c'] = strdup("cymsbhvn");
278 containedstr['d'] = strdup("dn");
279 containedstr['g'] = strdup("grksbdvn");
280 containedstr['h'] = strdup("hn");
281 containedstr['k'] = strdup("kbdn");
282 containedstr['m'] = strdup("mhvn");
283 containedstr['n'] = strdup("n");
284 containedstr['r'] = strdup("rdvn");
285 containedstr['s'] = strdup("sbvn");
286 containedstr['t'] = strdup("tuykwvdhn");
287 containedstr['u'] = strdup("utykwvdhn");
288 containedstr['v'] = strdup("vn");
289 containedstr['w'] = strdup("wdhn");
290 containedstr['y'] = strdup("ybhn");
291 }
292 return containedstr[w];
293 }
294
295 bool iupac_contained(char w, char c) {
296 static bool **containedmap=0;
297 if (!containedmap) {
298 bool * buffer = new bool[256*256];
299 memset(buffer,0,256*256*sizeof(bool));
300 containedmap = new bool*[256];
301 memset(containedmap,0,256*sizeof(bool*));
302 char *contains;
303 int j;
304 for (int w=0;w<256;w++) {
305 containedmap[w] = buffer+w*256;
306 if ((contains=iupac_contained(w))!=0) {
307 j=0;
308 while (contains[j]) {
309 containedmap[w][contains[j]] = true;
310 j++;
311 }
312 }
313 }
314 }
315 return containedmap[w][c];
316 }
317
318 char iupac_rc(char c) {
319 static char *rcmap = 0;
320 if (!rcmap) {
321 rcmap = new char[256];
322 for (int i=0;i<256;i++) {
323 rcmap[i] = i;
324 }
325
326 rcmap['a'] = 't'; rcmap['A'] = 'T';
327 rcmap['c'] = 'g'; rcmap['C'] = 'G';
328 rcmap['g'] = 'c'; rcmap['G'] = 'C';
329 rcmap['t'] = 'a'; rcmap['T'] = 'A';
330 rcmap['u'] = 'a'; rcmap['U'] = 'A';
331
332 rcmap['m'] = 'k'; rcmap['M'] = 'K';
333 rcmap['r'] = 'y'; rcmap['R'] = 'Y';
334 rcmap['w'] = 'w'; rcmap['W'] = 'W';
335 rcmap['s'] = 's'; rcmap['S'] = 'S';
336 rcmap['y'] = 'r'; rcmap['Y'] = 'R';
337 rcmap['k'] = 'm'; rcmap['K'] = 'M';
338
339 rcmap['v'] = 'b'; rcmap['V'] = 'B';
340 rcmap['h'] = 'd'; rcmap['H'] = 'D';
341 rcmap['d'] = 'h'; rcmap['D'] = 'H';
342 rcmap['b'] = 'v'; rcmap['B'] = 'V';
343 }
344 return rcmap[c];
345 }
346
347 std::string reverse_comp(std::string const & sequence) {
348 std::string str(sequence);
349 int i,n = sequence.length();
350 for(i=0; i<n; i++) {
351 str[i] = iupac_rc(sequence[n-1-i]);
352 }
353 return str;
354 }
355
356 double monomolwt(char c) {
357 static double *monomolwt = 0;
358 c = toupper(c);
359 if (!monomolwt) {
360 monomolwt = new double[256];
361 for (int i=0;i<256;i++) {
362 monomolwt[i] = -1;
363 }
364 monomolwt['A']=71.037113848;
365 monomolwt['C']=103.009185648;
366 monomolwt['D']=115.026943128;
367 monomolwt['E']=129.042593208;
368 monomolwt['F']=147.068414008;
369 monomolwt['G']=57.021463768;
370 monomolwt['H']=137.058911944;
371 monomolwt['I']=113.084064088;
372 monomolwt['K']=128.094963136;
373 monomolwt['L']=113.084064088;
374 monomolwt['M']=131.040485808;
375 monomolwt['N']=114.042927536;
376 monomolwt['P']=97.052763928;
377 monomolwt['Q']=128.058577616;
378 monomolwt['R']=156.101111152;
379 monomolwt['S']=87.032028488;
380 monomolwt['T']=101.047678568;
381 monomolwt['V']=99.068414008;
382 monomolwt['W']=186.079313056;
383 monomolwt['Y']=163.063328648;
384 }
385 return monomolwt[c];
386 }
387
388 double avemolwt(char c) {
389 static double *avemolwt = 0;
390 c = toupper(c);
391 if (!avemolwt) {
392 avemolwt = new double[256];
393 for (int i=0;i<256;i++) {
394 avemolwt[i] = -1;
395 }
396 avemolwt['A']=71.078826901;
397 avemolwt['C']=103.143216117;
398 avemolwt['D']=115.088513436;
399 avemolwt['E']=129.115401675;
400 avemolwt['F']=147.176750991;
401 avemolwt['G']=57.051938663;
402 avemolwt['H']=137.141315021;
403 avemolwt['I']=113.159491617;
404 avemolwt['K']=128.174180322;
405 avemolwt['L']=113.159491617;
406 avemolwt['M']=131.196992594;
407 avemolwt['N']=114.103877326;
408 avemolwt['P']=97.116752043;
409 avemolwt['Q']=128.130765564;
410 avemolwt['R']=156.187706397;
411 avemolwt['S']=87.078151717;
412 avemolwt['T']=101.105039956;
413 avemolwt['V']=99.132603378;
414 avemolwt['W']=186.213513503;
415 avemolwt['Y']=163.176075807;
416 }
417 return avemolwt[c];
418 }
419
420 int aasubdist(char f, char t) {
421 static signed char *aasubdist = 0;
422 static signed char *aachars =0;
423 f = toupper(f);
424 t = toupper(t);
425 if (!aasubdist) {
426 aasubdist = new signed char[400];
427 for (int i=0;i<400;i++) {
428 aasubdist[i] = -1;
429 }
430 aachars = new signed char[256];
431 for (int i=0;i<256;i++) {
432 aachars[i] = -1;
433 }
434 char aas[] = "ARNDCQEGHILKMFPSTWYV";
435 for (int i=0;i<20;i++) {
436 aachars[aas[i]] = i;
437 }
438 int subs[] = {0,
439 2,0,
440 2,2,0,
441 1,2,1,0,
442 2,1,2,2,0,
443 2,1,2,2,2,0,
444 1,2,2,1,3,1,0,
445 1,1,2,1,1,2,1,0,
446 2,1,1,1,2,1,2,2,0,
447 2,2,1,2,2,3,3,2,2,0,
448 2,1,2,2,2,1,2,1,1,1,0,
449 2,1,1,2,3,1,1,2,2,2,2,0,
450 2,1,2,3,3,2,2,2,3,1,1,1,0,
451 2,2,2,2,1,2,3,2,2,1,1,3,2,0,
452 1,1,2,2,2,1,2,2,1,2,1,2,2,2,0,
453 1,1,1,2,1,1,2,1,2,1,1,2,2,1,1,0,
454 1,1,1,2,2,2,2,2,2,1,2,1,1,2,1,1,0,
455 2,1,3,3,1,1,2,1,3,3,1,2,2,2,2,1,2,0,
456 2,2,1,1,1,1,2,2,1,2,2,2,3,1,2,1,2,2,0,
457 1,2,2,1,1,2,1,1,2,1,1,2,1,1,2,2,2,2,2,0};
458 int k=0;
459 for (int i=0;i<20;i++) {
460 for (int j=0;j<=i;j++) {
461 aasubdist[20*aachars[aas[i]]+aachars[aas[j]]] = subs[k];
462 aasubdist[20*aachars[aas[j]]+aachars[aas[i]]] = subs[k];
463 k++;
464 }
465 }
466 }
467 if (aachars[f] < 0 || aachars[t] < 0) {
468 return -1;
469 } else {
470 return aasubdist[20*aachars[f]+aachars[t]];
471 }
472 }
473
474
475
476
477
478
479
480 #include <sys/stat.h>
481
482 bool exist(std::string const & filename)
483 {
484 struct stat stat_buf;
485 if (stat(filename.c_str(),&stat_buf) != 0) return false;
486 return (stat_buf.st_mode & S_IFMT) == S_IFREG;
487 }
488
489 time_t modtime(std::string const & filename) {
490 struct stat stat_buf;
491 if (stat(filename.c_str(),&stat_buf) != 0) return ((time_t)0);
492 time_t lastmod(stat_buf.st_mtime);
493 if (lastmod < stat_buf.st_ctime) {
494 lastmod = stat_buf.st_ctime;
495 }
496 return lastmod;
497 }
498
499 int anypos(std::string const & s0, std::string const & s1, int pos) {
500 std::string::size_type retval;
501 if ((retval=s0.find_first_of(s1,pos))==std::string::npos) {
502 return -1;
503 } else {
504 return retval;
505 }
506 }