ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GFaSeqGet.h
(Generate patch)
# Line 1 | Line 1
1   #ifndef GFASEQGET_H
2   #define GFASEQGET_H
3
4 #include "GBase.h"
3   #include "GList.hh"
4  
7
5   #define MAX_FASUBSEQ 0x20000000
6   //max 512MB sequence data held in memory at a time
7  
# Line 30 | Line 27
27       GFREE(sq);
28       }
29    // genomic, 1-based coordinates:
30 <  void setup(uint sstart, int slen, int sovl=0, int qfrom=0, int qto=0);
30 >  void setup(uint sstart, int slen, int sovl=0, int qfrom=0, int qto=0, uint maxseqlen=0);
31      //check for overlap with previous window and realloc/extend appropriately
32      //returns offset from seq that corresponds to sstart
33      // the window will keep extending until MAX_FASUBSEQ is reached
# Line 41 | Line 38
38    FILE* fh;
39    //raw offset in the file where the sequence actually starts:
40    off_t fseqstart;
41 <  int linelen; //length of each sequence line (assumed fixed)
42 <  char lendlen; //length of end-of-line characters between lines
43 <                         //(assumed fixed)
44 <  char lendch; //end-of-line signal character (can only be '\n' or '\r')
41 >  uint seq_len; //total sequence length, if known (when created from GFastaIndex)
42 >  int line_len; //length of each line of text
43 >  int line_blen; //binary length of each line
44 >                 // = line_len + number of EOL character(s)
45    GSubSeq* lastsub;
46    void initialParse(off_t fofs=0, bool checkall=true);
47    const char* loadsubseq(uint cstart, int& clen);
48    void finit(const char* fn, off_t fofs, bool validate);
49   public:
50    GFaSeqGet() {
51 +    fh=NULL;
52      fseqstart=0;
53 <    linelen=0;
54 <    lendch='\0';
53 >    seq_len=0;
54 >    line_len=0;
55 >    line_blen=0;
56      fname=NULL;
57      lastsub=NULL;
58      }
59 <  GFaSeqGet(const char* fn, off_t fofs, bool validate=false) {
59 >  GFaSeqGet(const char* fn, off_t fofs, bool validate=false) {
60 >     seq_len=0;
61       finit(fn,fofs,validate);
62       }
63    GFaSeqGet(const char* fn, bool validate=false) {
64 +     seq_len=0;
65       finit(fn,0,validate);
66       }
67 <  /*
68 <  GFaSeqGet(bool readAll, const char* fn, off_t fofs=0);
69 <  GFaSeqGet(bool readAll, FILE* f, off_t fofs=0);
70 <  */
67 >
68 >  GFaSeqGet(const char* faname, uint seqlen, off_t fseqofs, int l_len, int l_blen);
69 >  //constructor from GFastaIndex record
70 >
71    GFaSeqGet(FILE* f, off_t fofs=0, bool validate=false);
72 +
73    ~GFaSeqGet() {
74      if (fname!=NULL) {
75         GFREE(fname);
# Line 76 | Line 78
78      delete lastsub;
79      }
80    const char* subseq(uint cstart, int& clen);
81 <  const char* getRange(uint cstart, uint cend) {
82 <      if (cstart>cend) { swap(cstart, cend); }
81 >  const char* getRange(uint cstart=1, uint cend=0) {
82 >      if (cend==0) cend=(seq_len>0)?seq_len : MAX_FASUBSEQ;
83 >      if (cstart>cend) { Gswap(cstart, cend); }
84        int clen=cend-cstart+1;
85        //int rdlen=clen;
86        return subseq(cstart, clen);
87        }
88 <  //caller is responsible for deallocating copyRange() return string
88 >
89    char* copyRange(uint cstart, uint cend, bool revCmpl=false, bool upCase=false);
90 +  //caller is responsible for deallocating the return string
91  
92 <  void loadall() {
93 <    int clen=MAX_FASUBSEQ;
92 >  void loadall(uint32 max_len=0) {
93 >    //TODO: must read the whole sequence differently here - line by line
94 >    //so when EOF or another '>' line is found, the reading stops!
95 >    int clen=(seq_len>0) ? seq_len : ((max_len>0) ? max_len : MAX_FASUBSEQ);
96      subseq(1, clen);
97      }
98    void load(uint cstart, uint cend) {
99       //cache as much as possible
100 +      if (seq_len>0 && cend>seq_len) cend=seq_len; //correct a bad request
101        int clen=cend-cstart+1;
102        subseq(cstart, clen);
103       }
104    int getsublen() { return lastsub!=NULL ? lastsub->sqlen : 0 ; }
105    off_t getseqofs() { return fseqstart; }
106 <  int getlinelen() { return linelen; }
107 <  int getlendlen() { return lendlen; }
106 >  int getLineLen() { return line_len; }
107 >  int getLineBLen() { return line_blen; }
108    //reads a subsequence starting at genomic coordinate cstart (1-based)
109   };
110  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines