ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/seqalign/seqalign.cpp
Revision: 95
Committed: Wed Oct 5 18:30:51 2011 UTC (7 years, 9 months ago) by gpertea
File size: 3875 byte(s)
Log Message:
Line File contents
1 #include "GBase.h"
2 #include "GArgs.h"
3 #include "GStr.h"
4 #include "GList.hh"
5 #include "GAlnExtend.h"
6
7 #define USAGE "Usage:\n\
8 seqalign {-i <2line_file> | -a <seq1> -b <seq2>} \\ \n\
9 [X=<xdrop>] [M=<match_reward>] [N=<mismatch_penalty]\
10 "
11
12 bool debug=true;
13 int Xdrop=8;
14 int match_reward=2;
15 int mismatch_penalty=3;
16
17 #define FRCLOSE(fh) if (fh!=NULL && fh!=stdin) fclose(fh)
18 #define FWCLOSE(fh) if (fh!=NULL && fh!=stdout) fclose(fh)
19
20 char* seq_cleanup(const char* s, int& s_len) {
21 char* r=NULL;
22 int slen=strlen(s);
23 GMALLOC(r, slen+1);
24 int newlen=0;
25 for (int i=0;i<slen;i++) {
26 if (s[i]<'A' || s[i]>'z' || (s[i]>'Z' && s[i]<'a')) continue;
27 r[newlen]=s[i];
28 newlen++;
29 }
30 r[newlen]=0;
31 s_len=newlen;
32 return r;
33 }
34
35 bool loadSeqs(GLineReader& lr, char*& s1, char*& s2,
36 int& s1_len, int& s2_len) {
37 char* l=NULL;
38 s1=NULL;
39 s2=NULL;
40 while ((l=lr.nextLine())!=NULL) {
41 if (lr.length()<=3 || l[0]=='#') continue;
42 if (s1==NULL) {
43 s1=seq_cleanup(l, s1_len);
44 }
45 else if (s2==NULL) {
46 s2=seq_cleanup(l, s2_len);
47 return true;
48 }
49 else break;
50 }
51 if (s1!=NULL) GFREE(s1);
52 return false;
53 }
54
55 void findTrimming(char* aseq, int alen, char* rseq, int rlen, SGreedyAlignMem* gxmem=NULL) {
56
57 GXAlnInfo* r_bestaln=match_RightEnd(aseq, alen, rseq, rlen, gxmem, match_reward, mismatch_penalty, Xdrop);
58
59 #ifdef GDEBUG
60 if (r_bestaln)
61 GMessage("******** Best 3' Alignment: %d-%d vs %d-%d, score=%d, pid=%.2f\n",
62 r_bestaln->ql, r_bestaln->qr, r_bestaln->sl, r_bestaln->sr, r_bestaln->score, r_bestaln->pid);
63 else GMessage("******** No suitable 3' alignment found.\n");
64 #endif
65 if (r_bestaln)
66 fprintf(stdout, "3prime %d-%d:%d-%d|score=%d|pid=%.2f\n",
67 r_bestaln->ql, r_bestaln->qr, r_bestaln->sl, r_bestaln->sr, r_bestaln->score, r_bestaln->pid);
68
69 delete r_bestaln;
70
71 //testing: reverse both sequences and redo the search at 5' end
72 reverseChars(aseq);
73 reverseChars(rseq);
74
75 GXAlnInfo* l_bestaln=match_LeftEnd(aseq, alen, rseq, rlen, gxmem, match_reward, mismatch_penalty, Xdrop);
76
77 #ifdef GDEBUG
78 if (l_bestaln)
79 GMessage("******** Best 5' Alignment: %d-%d vs %d-%d, score=%d, pid=%.2f\n",
80 l_bestaln->ql, l_bestaln->qr, l_bestaln->sl, l_bestaln->sr, l_bestaln->score, l_bestaln->pid);
81 else GMessage("******** No suitable 5' alignment found.\n");
82 #endif
83 if (l_bestaln)
84 fprintf(stdout, "5prime %d-%d:%d-%d|score=%d|pid=%.2f\n",
85 l_bestaln->ql, l_bestaln->qr, l_bestaln->sl, l_bestaln->sr, l_bestaln->score, l_bestaln->pid);
86
87 delete l_bestaln;
88
89 GFREE(aseq);
90 GFREE(rseq);
91 }
92
93 //================= MAIN =====================
94
95 int main(int argc, char * const argv[]) {
96 GArgs args(argc, argv, "X=M=N=a:b:i:");
97 int e;
98 if ((e=args.isError())>0)
99 GError("%s\nInvalid argument: %s\n", USAGE, argv[e]);
100 if (args.getOpt('h')!=NULL) GError("%s\n", USAGE);
101 char* seqa=NULL;
102 char* seqb=NULL;
103 GStr s=args.getOpt('X');
104 if (!s.is_empty())
105 Xdrop=s.asInt();
106 s=args.getOpt('M');
107 if (!s.is_empty())
108 match_reward=s.asInt();
109 s=args.getOpt('N');
110 if (!s.is_empty())
111 mismatch_penalty=s.asInt();
112
113 s=args.getOpt('i');
114 int seqa_len=0, seqb_len=0;
115 if (!s.is_empty()) {
116 GLineReader lr(s.chars());
117 SGreedyAlignMem* gxmem=new SGreedyAlignMem(match_reward, mismatch_penalty, Xdrop);
118 while (loadSeqs(lr, seqa, seqb, seqa_len, seqb_len)) {
119 findTrimming(seqa, seqa_len, seqb, seqb_len, gxmem);
120 }
121 delete gxmem;
122 }
123 else {
124 s=args.getOpt('a');
125 if (!s.is_empty()) seqa=seq_cleanup(s.chars(), seqa_len);
126 s=args.getOpt('b');
127 if (!s.is_empty()) seqb=seq_cleanup(s.chars(), seqb_len);
128 if (seqa==NULL || seqa_len<2 || seqb==NULL || seqb_len<2)
129 GError("Error: two sequences with length>1 should be given!\n");
130 findTrimming(seqa, seqa_len, seqb, seqb_len);
131 }
132
133 }