ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/cuffcompare/testfai.cpp
Revision: 20
Committed: Mon Jul 18 21:08:36 2011 UTC (9 years, 3 months ago) by gpertea
File size: 4861 byte(s)
Log Message:
added cuffcompare sources

Line User Rev File contents
1 gpertea 20 /*
2     * testfai.cpp
3     *
4     * Created on: Sep 7, 2010
5     * Author: gpertea
6     */
7    
8     #include "GArgs.h"
9     #include "GFaSeqGet.h"
10     #include "GFastaIndex.h"
11     #include "GStr.h"
12     #include <ctype.h>
13    
14     #define USAGE "Usage:\n\
15     testfai <multi-fasta> [-F] [-q <seqname>] [-r <range>] [-C] [-o <out.fasta>]\n\
16     \n\
17     Create the index <multi-fasta>.fai if not available and retrieve the\n\
18     fasta sequence for <seqname>, optionally only pulling range <range>\n\
19     (reverse complemented if -C is given)\n\
20     \n\
21     Options:\n\
22     -F : force rebuilding of the fasta index in the current directory\n\
23     "
24     void openfwrite(FILE* &f, GArgs& args, char opt) {
25     GStr s=args.getOpt(opt);
26     if (!s.is_empty()) {
27     if (s=='-')
28     f=stdout;
29     else {
30     f=fopen(s,"w");
31     if (f==NULL) GError("Error creating file: %s\n", s.chars());
32     }
33     }
34     }
35    
36     int main(int argc, char * const argv[]) {
37     GArgs args(argc, argv, "hFCq:r:o:");
38     int e;
39     if ((e=args.isError())>0)
40     GError("%s\nInvalid argument: %s\n", USAGE, argv[e]);
41     if (args.getOpt('h')!=NULL){
42     GMessage("%s\n", USAGE);
43     exit(1);
44     }
45     args.startNonOpt();
46     GStr fadb(args.nextNonOpt());
47     if (fadb.is_empty()) GError("%s Error: multi-fasta file expected!\n",USAGE);
48     GStr fname(fadb);
49     fname.append(".fai");
50     bool createLocal=(args.getOpt('F')!=NULL);
51     const char* idxname=(createLocal)? NULL : fname.chars();
52     GFastaIndex faidx(fadb.chars(), idxname);
53     //also tried to load the index if exists in the current directory
54     GStr fnamecwd(fname); //name in current directory (without path)
55     int ip=-1;
56     if ((ip=fnamecwd.rindex(CHPATHSEP))>=0) {
57     fnamecwd.cut(0,ip+1);
58     }
59     if (!createLocal) { //look for existing indexes to load
60     //try the same directory as the fasta file first
61     if (!faidx.hasIndex() and fileExists(fnamecwd.chars())>1) { //try current working directory next
62     faidx.loadIndex(fnamecwd.chars());
63     }
64     if (!faidx.hasIndex()) {//could not load any index data
65     //try to create it in the same directory as the fasta file
66     GMessage("No fasta index found. Rebuilding..\n");
67     faidx.buildIndex();
68     if (faidx.getCount()==0) GError("Error: no fasta records to be indexed!\n");
69     GMessage("Fasta index rebuilt.\n");
70     //check if we can create a file there
71     FILE* fcreate=fopen(fname.chars(), "w");
72     if (fcreate==NULL)
73     GMessage("Warning: cannot create fasta index %s! (permissions?)\n", fname.chars());
74     else {
75     fclose(fcreate);
76     if (faidx.storeIndex(fname.chars())<faidx.getCount())
77     GMessage("Warning: error writing the index file %s!\n",fname.chars());
78     } //creating index file in the same directory as fasta file
79     }//trying to create the index file
80     }
81    
82     if (createLocal || !faidx.hasIndex()) {
83     //simply rebuild the index in the current directory and use it:
84     //remove directories in path, if any
85     if (faidx.getCount()==0) {
86     faidx.buildIndex();
87     if (faidx.getCount()==0) GError("Error: no fasta records to be indexed!\n");
88     }
89     if (faidx.storeIndex(fnamecwd.chars())<faidx.getCount())
90     GMessage("Warning: error writing the index file %s!\n",fnamecwd.chars());
91     }
92    
93     GStr qry(args.getOpt('q'));
94     if (qry.is_empty()) exit(0);
95     GFastaRec* farec=faidx.getRecord(qry.chars());
96     if (farec==NULL) {
97     GMessage("Error: couldn't find fasta record for '%s'!\n",qry.chars());
98     exit(1);
99     }
100     GFaSeqGet faseq(fadb.chars(),farec->seqlen, farec->fpos, farec->line_len, farec->line_blen);
101     //TODO: read these from -r option
102     uint qstart=0;
103     uint qend=0; //farec->seqlen
104     bool revCompl=(args.getOpt('C')!=NULL);
105     char* s=args.getOpt('r');
106     if (s!=NULL) {
107     char *p=s;
108     while (isdigit(*p)) p++;
109     if (*p=='-') {
110     sscanf(s,"%u-%u",&qstart, &qend);
111     if (qstart==0 || qend==0)
112     GError("Error parsing sequence range: %s\n",s);
113     }
114     else if (*p==':') {
115     int qlen=0;
116     sscanf(s,"%u:%d", &qstart, &qlen);
117     if (qstart==0 || qlen==0)
118     GError("Error parsing sequence range: %s\n",s);
119     qend=qstart+qlen-1;
120     }
121     else if (*p=='.') {
122     sscanf(s,"%u..%u",&qstart, &qend);
123     if (qstart==0 || qend==0)
124     GError("Error parsing sequence range: %s\n",s);
125     }
126     }
127     if (qstart==0) qstart=1;
128     if (qend==0) qend=farec->seqlen;
129     // call faseq.loadall() here if multiple ranges are to be extracted all
130     // over this genomic sequence
131     char* subseq=faseq.copyRange(qstart, qend, revCompl, true);
132     FILE* f_out=NULL;
133     openfwrite(f_out, args, 'o');
134     if (f_out==NULL) f_out=stdout;
135     writeFasta(f_out, qry.chars(), NULL, subseq, 70, qend-qstart+1);
136     GFREE(subseq);
137     }