ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GFaSeqGet.h
(Generate patch)
# Line 1 | Line 1
1   #ifndef GFASEQGET_H
2   #define GFASEQGET_H
3  
4 #include "GBase.h"
4   #include "GList.hh"
5  
7
6   #define MAX_FASUBSEQ 0x20000000
7   //max 512MB sequence data held in memory at a time
8  
# Line 30 | Line 28
28       GFREE(sq);
29       }
30    // genomic, 1-based coordinates:
31 <  void setup(uint sstart, int slen, int sovl=0, int qfrom=0, int qto=0);
31 >  void setup(uint sstart, int slen, int sovl=0, int qfrom=0, int qto=0, uint maxseqlen=0);
32      //check for overlap with previous window and realloc/extend appropriately
33      //returns offset from seq that corresponds to sstart
34      // the window will keep extending until MAX_FASUBSEQ is reached
# Line 41 | Line 39
39    FILE* fh;
40    //raw offset in the file where the sequence actually starts:
41    off_t fseqstart;
42 <  int linelen; //length of each sequence line (assumed fixed)
43 <  char lendlen; //length of end-of-line characters between lines
44 <                         //(assumed fixed)
45 <  char lendch; //end-of-line signal character (can only be '\n' or '\r')
42 >  uint seq_len; //total sequence length, if known (when created from GFastaIndex)
43 >  int line_len; //length of each line of text
44 >  int line_blen; //binary length of each line
45 >                 // = line_len + number of EOL character(s)
46    GSubSeq* lastsub;
47    void initialParse(off_t fofs=0, bool checkall=true);
48    const char* loadsubseq(uint cstart, int& clen);
49    void finit(const char* fn, off_t fofs, bool validate);
50   public:
51    GFaSeqGet() {
52 +    fh=NULL;
53      fseqstart=0;
54 <    linelen=0;
55 <    lendch='\0';
54 >    seq_len=0;
55 >    line_len=0;
56 >    line_blen=0;
57      fname=NULL;
58      lastsub=NULL;
59      }
60 <  GFaSeqGet(const char* fn, off_t fofs, bool validate=false) {
60 >  GFaSeqGet(const char* fn, off_t fofs, bool validate=false) {
61 >     seq_len=0;
62       finit(fn,fofs,validate);
63       }
64    GFaSeqGet(const char* fn, bool validate=false) {
65 +     seq_len=0;
66       finit(fn,0,validate);
67       }
68 <  /*
69 <  GFaSeqGet(bool readAll, const char* fn, off_t fofs=0);
70 <  GFaSeqGet(bool readAll, FILE* f, off_t fofs=0);
71 <  */
68 >
69 >  GFaSeqGet(const char* faname, uint seqlen, off_t fseqofs, int l_len, int l_blen);
70 >  //constructor from GFastaIndex record
71 >
72    GFaSeqGet(FILE* f, off_t fofs=0, bool validate=false);
73 +
74    ~GFaSeqGet() {
75      if (fname!=NULL) {
76         GFREE(fname);
# Line 76 | Line 79
79      delete lastsub;
80      }
81    const char* subseq(uint cstart, int& clen);
82 <  const char* getRange(uint cstart, uint cend) {
83 <      if (cstart>cend) { swap(cstart, cend); }
82 >  const char* getRange(uint cstart=1, uint cend=0) {
83 >      if (cend==0) cend=(seq_len>0)?seq_len : MAX_FASUBSEQ;
84 >      if (cstart>cend) { Gswap(cstart, cend); }
85        int clen=cend-cstart+1;
86        //int rdlen=clen;
87        return subseq(cstart, clen);
88        }
89 <  //caller is responsible for deallocating copyRange() return string
89 >
90    char* copyRange(uint cstart, uint cend, bool revCmpl=false, bool upCase=false);
91 +  //caller is responsible for deallocating the return string
92  
93 <  void loadall() {
94 <    int clen=MAX_FASUBSEQ;
93 >  void loadall(uint32 max_len=0) {
94 >    //TODO: must read the whole sequence differently here - line by line
95 >    //so when EOF or another '>' line is found, the reading stops!
96 >    int clen=(seq_len>0) ? seq_len : ((max_len>0) ? max_len : MAX_FASUBSEQ);
97      subseq(1, clen);
98      }
99    void load(uint cstart, uint cend) {
100       //cache as much as possible
101 +      if (seq_len>0 && cend>seq_len) cend=seq_len; //correct a bad request
102        int clen=cend-cstart+1;
103        subseq(cstart, clen);
104       }
105    int getsublen() { return lastsub!=NULL ? lastsub->sqlen : 0 ; }
106    off_t getseqofs() { return fseqstart; }
107 <  int getlinelen() { return linelen; }
108 <  int getlendlen() { return lendlen; }
107 >  int getLineLen() { return line_len; }
108 >  int getLineBLen() { return line_blen; }
109    //reads a subsequence starting at genomic coordinate cstart (1-based)
110   };
111  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines