ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/cuffcompare/testfai.cpp
Revision: 20
Committed: Mon Jul 18 21:08:36 2011 UTC (8 years, 3 months ago) by gpertea
File size: 4861 byte(s)
Log Message:
added cuffcompare sources

Line File contents
1 /*
2 * testfai.cpp
3 *
4 * Created on: Sep 7, 2010
5 * Author: gpertea
6 */
7
8 #include "GArgs.h"
9 #include "GFaSeqGet.h"
10 #include "GFastaIndex.h"
11 #include "GStr.h"
12 #include <ctype.h>
13
14 #define USAGE "Usage:\n\
15 testfai <multi-fasta> [-F] [-q <seqname>] [-r <range>] [-C] [-o <out.fasta>]\n\
16 \n\
17 Create the index <multi-fasta>.fai if not available and retrieve the\n\
18 fasta sequence for <seqname>, optionally only pulling range <range>\n\
19 (reverse complemented if -C is given)\n\
20 \n\
21 Options:\n\
22 -F : force rebuilding of the fasta index in the current directory\n\
23 "
24 void openfwrite(FILE* &f, GArgs& args, char opt) {
25 GStr s=args.getOpt(opt);
26 if (!s.is_empty()) {
27 if (s=='-')
28 f=stdout;
29 else {
30 f=fopen(s,"w");
31 if (f==NULL) GError("Error creating file: %s\n", s.chars());
32 }
33 }
34 }
35
36 int main(int argc, char * const argv[]) {
37 GArgs args(argc, argv, "hFCq:r:o:");
38 int e;
39 if ((e=args.isError())>0)
40 GError("%s\nInvalid argument: %s\n", USAGE, argv[e]);
41 if (args.getOpt('h')!=NULL){
42 GMessage("%s\n", USAGE);
43 exit(1);
44 }
45 args.startNonOpt();
46 GStr fadb(args.nextNonOpt());
47 if (fadb.is_empty()) GError("%s Error: multi-fasta file expected!\n",USAGE);
48 GStr fname(fadb);
49 fname.append(".fai");
50 bool createLocal=(args.getOpt('F')!=NULL);
51 const char* idxname=(createLocal)? NULL : fname.chars();
52 GFastaIndex faidx(fadb.chars(), idxname);
53 //also tried to load the index if exists in the current directory
54 GStr fnamecwd(fname); //name in current directory (without path)
55 int ip=-1;
56 if ((ip=fnamecwd.rindex(CHPATHSEP))>=0) {
57 fnamecwd.cut(0,ip+1);
58 }
59 if (!createLocal) { //look for existing indexes to load
60 //try the same directory as the fasta file first
61 if (!faidx.hasIndex() and fileExists(fnamecwd.chars())>1) { //try current working directory next
62 faidx.loadIndex(fnamecwd.chars());
63 }
64 if (!faidx.hasIndex()) {//could not load any index data
65 //try to create it in the same directory as the fasta file
66 GMessage("No fasta index found. Rebuilding..\n");
67 faidx.buildIndex();
68 if (faidx.getCount()==0) GError("Error: no fasta records to be indexed!\n");
69 GMessage("Fasta index rebuilt.\n");
70 //check if we can create a file there
71 FILE* fcreate=fopen(fname.chars(), "w");
72 if (fcreate==NULL)
73 GMessage("Warning: cannot create fasta index %s! (permissions?)\n", fname.chars());
74 else {
75 fclose(fcreate);
76 if (faidx.storeIndex(fname.chars())<faidx.getCount())
77 GMessage("Warning: error writing the index file %s!\n",fname.chars());
78 } //creating index file in the same directory as fasta file
79 }//trying to create the index file
80 }
81
82 if (createLocal || !faidx.hasIndex()) {
83 //simply rebuild the index in the current directory and use it:
84 //remove directories in path, if any
85 if (faidx.getCount()==0) {
86 faidx.buildIndex();
87 if (faidx.getCount()==0) GError("Error: no fasta records to be indexed!\n");
88 }
89 if (faidx.storeIndex(fnamecwd.chars())<faidx.getCount())
90 GMessage("Warning: error writing the index file %s!\n",fnamecwd.chars());
91 }
92
93 GStr qry(args.getOpt('q'));
94 if (qry.is_empty()) exit(0);
95 GFastaRec* farec=faidx.getRecord(qry.chars());
96 if (farec==NULL) {
97 GMessage("Error: couldn't find fasta record for '%s'!\n",qry.chars());
98 exit(1);
99 }
100 GFaSeqGet faseq(fadb.chars(),farec->seqlen, farec->fpos, farec->line_len, farec->line_blen);
101 //TODO: read these from -r option
102 uint qstart=0;
103 uint qend=0; //farec->seqlen
104 bool revCompl=(args.getOpt('C')!=NULL);
105 char* s=args.getOpt('r');
106 if (s!=NULL) {
107 char *p=s;
108 while (isdigit(*p)) p++;
109 if (*p=='-') {
110 sscanf(s,"%u-%u",&qstart, &qend);
111 if (qstart==0 || qend==0)
112 GError("Error parsing sequence range: %s\n",s);
113 }
114 else if (*p==':') {
115 int qlen=0;
116 sscanf(s,"%u:%d", &qstart, &qlen);
117 if (qstart==0 || qlen==0)
118 GError("Error parsing sequence range: %s\n",s);
119 qend=qstart+qlen-1;
120 }
121 else if (*p=='.') {
122 sscanf(s,"%u..%u",&qstart, &qend);
123 if (qstart==0 || qend==0)
124 GError("Error parsing sequence range: %s\n",s);
125 }
126 }
127 if (qstart==0) qstart=1;
128 if (qend==0) qend=farec->seqlen;
129 // call faseq.loadall() here if multiple ranges are to be extracted all
130 // over this genomic sequence
131 char* subseq=faseq.copyRange(qstart, qend, revCompl, true);
132 FILE* f_out=NULL;
133 openfwrite(f_out, args, 'o');
134 if (f_out==NULL) f_out=stdout;
135 writeFasta(f_out, qry.chars(), NULL, subseq, 70, qend-qstart+1);
136 GFREE(subseq);
137 }