ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/codons.cpp
Revision: 29
Committed: Tue Aug 2 21:24:54 2011 UTC (8 years, 2 months ago) by gpertea
File size: 4582 byte(s)
Log Message:
adding tophat source work

Line File contents
1 #include "codons.h"
2
3 static char codonTable[32768]; //32K table for fasta codon decoding
4 // codons are encoded as triplets of 5-bit-encoded nucleotides
5 // (so any codon can be encoded/decoded as a unique 15-bit value)
6
7 static char codonData[]={ //long list of 3+1 characters (codon+translation)
8 'A','A','A','K', 'A','A','C','N', 'A','A','G','K', 'A','A','R','K', 'A','A','T','N',
9 'A','A','Y','N', 'A','C','A','T', 'A','C','B','T', 'A','C','C','T', 'A','C','D','T',
10 'A','C','G','T', 'A','C','H','T', 'A','C','K','T', 'A','C','M','T', 'A','C','N','T',
11 'A','C','R','T', 'A','C','S','T', 'A','C','T','T', 'A','C','V','T', 'A','C','W','T',
12 'A','C','Y','T', 'A','G','A','R', 'A','G','C','S', 'A','G','G','R', 'A','G','R','R',
13 'A','G','T','S', 'A','G','Y','S', 'A','T','A','I', 'A','T','C','I', 'A','T','G','M',
14 'A','T','H','I', 'A','T','M','I', 'A','T','T','I', 'A','T','W','I', 'A','T','Y','I',
15 'C','A','A','Q', 'C','A','C','H', 'C','A','G','Q', 'C','A','R','Q', 'C','A','T','H',
16 'C','A','Y','H', 'C','C','A','P', 'C','C','B','P', 'C','C','C','P', 'C','C','D','P',
17 'C','C','G','P', 'C','C','H','P', 'C','C','K','P', 'C','C','M','P', 'C','C','N','P',
18 'C','C','R','P', 'C','C','S','P', 'C','C','T','P', 'C','C','V','P', 'C','C','W','P',
19 'C','C','Y','P', 'C','G','A','R', 'C','G','B','R', 'C','G','C','R', 'C','G','D','R',
20 'C','G','G','R', 'C','G','H','R', 'C','G','K','R', 'C','G','M','R', 'C','G','N','R',
21 'C','G','R','R', 'C','G','S','R', 'C','G','T','R', 'C','G','V','R', 'C','G','W','R',
22 'C','G','Y','R', 'C','T','A','L', 'C','T','B','L', 'C','T','C','L', 'C','T','D','L',
23 'C','T','G','L', 'C','T','H','L', 'C','T','K','L', 'C','T','M','L', 'C','T','N','L',
24 'C','T','R','L', 'C','T','S','L', 'C','T','T','L', 'C','T','V','L', 'C','T','W','L',
25 'C','T','Y','L', 'G','A','A','E', 'G','A','C','D', 'G','A','G','E', 'G','A','R','E',
26 'G','A','T','D', 'G','A','Y','D', 'G','C','A','A', 'G','C','B','A', 'G','C','C','A',
27 'G','C','D','A', 'G','C','G','A', 'G','C','H','A', 'G','C','K','A', 'G','C','M','A',
28 'G','C','N','A', 'G','C','R','A', 'G','C','S','A', 'G','C','T','A', 'G','C','V','A',
29 'G','C','W','A', 'G','C','Y','A', 'G','G','A','G', 'G','G','B','G', 'G','G','C','G',
30 'G','G','D','G', 'G','G','G','G', 'G','G','H','G', 'G','G','K','G', 'G','G','M','G',
31 'G','G','N','G', 'G','G','R','G', 'G','G','S','G', 'G','G','T','G', 'G','G','V','G',
32 'G','G','W','G', 'G','G','Y','G', 'G','T','A','V', 'G','T','B','V', 'G','T','C','V',
33 'G','T','D','V', 'G','T','G','V', 'G','T','H','V', 'G','T','K','V', 'G','T','M','V',
34 'G','T','N','V', 'G','T','R','V', 'G','T','S','V', 'G','T','T','V', 'G','T','V','V',
35 'G','T','W','V', 'G','T','Y','V', 'M','G','A','R', 'M','G','G','R', 'M','G','R','R',
36 'N','N','N','X', 'R','A','Y','B', 'S','A','R','Z', 'T','A','A','.', 'T','A','C','Y',
37 'T','A','G','.', 'T','A','R','.', 'T','A','T','Y', 'T','A','Y','Y', 'T','C','A','S',
38 'T','C','B','S', 'T','C','C','S', 'T','C','D','S', 'T','C','G','S', 'T','C','H','S',
39 'T','C','K','S', 'T','C','M','S', 'T','C','N','S', 'T','C','R','S', 'T','C','S','S',
40 'T','C','T','S', 'T','C','V','S', 'T','C','W','S', 'T','C','Y','S', 'T','G','A','.',
41 'T','G','C','C', 'T','G','G','W', 'T','G','T','C', 'T','G','Y','C', 'T','R','A','.',
42 'T','T','A','L', 'T','T','C','F', 'T','T','G','L', 'T','T','R','L', 'T','T','T','F',
43 'T','T','Y','F', 'X','X','X','X', 'Y','T','A','L', 'Y','T','G','L', 'Y','T','R','L'
44 };
45
46
47 static bool isCodonTableReady=codonTableInit();
48
49 unsigned short packCodon(char n1, char n2, char n3) {
50 //assumes they are uppercase already!
51 byte b1=n1-'A';
52 byte b2=n2-'A';
53 byte b3=n3-'A';
54 b1 |= (b2 << 5);
55 b2 = (b2 >> 3) | (b3 << 2);
56 return ( ((unsigned short)b2) << 8) + b1;
57 }
58
59 bool codonTableInit() {
60 memset((void*)codonTable, 'X', 32768);
61 int cdsize=sizeof(codonData);
62 for (int i=0;i<cdsize;i+=4) {
63 unsigned short aacode=packCodon(codonData[i], codonData[i+1], codonData[i+2]);
64 codonTable[aacode]=codonData[i+3];
65 }
66 return true;
67 }
68
69
70 char Codon::translate() {
71 for (byte i=0;i<3;i++) nuc[i]=toupper(nuc[i]);
72 unsigned short aacode=packCodon(nuc[0], nuc[1], nuc[2]);
73 return codonTable[aacode];
74 }
75
76 //simple 1st frame forward translation of a given DNA string
77 // allocate and returns the translation string
78 char* translateDNA(const char* dnastr, int& aalen, int dnalen) {
79 if (dnastr==NULL || *dnastr==0) return NULL;
80 if (dnalen==0) dnalen=strlen(dnastr);
81 aalen=dnalen/3;
82 char* r=NULL;
83 GMALLOC(r, aalen+1);
84 r[aalen]=0;
85 int ai=0;
86 for (int i=0;i+2<dnalen;i+=3,ai++) {
87 r[ai]=codonTable[packCodon(toupper(dnastr[i]),toupper(dnastr[i+1]),toupper(dnastr[i+2]))];
88 }
89 return r;
90 }