ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/LayoutParser.h
(Generate patch)
# Line 1 | Line 1
1 < #ifndef LayoutParser_H
2 < #define LayoutParser_H
3 <
4 < #include "GBase.h"
5 < #include "GList.hh"
6 < #include "GHash.hh"
7 < #include <stdio.h>
8 < //hash data associated with a contig/sequence name
9 < //a contig name key is always stored as its name plus .<length>
10 <
11 < class LytCtgData;
12 <
13 < struct LytSeqInterSeg {
14 < int  segEnd, segRClip;
15 < int  nextStart, nextLClip;
16 < char segRSplice, nextLSplice;
17 < int nextSegSeq;
18 < LytSeqInterSeg(int end, int nextstart, int rclip=0, int nextlclip=0,
19 <                                         char segrsplice=0, char nextlsplice=0, int seqpos=0) {
20 <  segEnd=end; segRClip=rclip;
21 <  nextStart=nextstart; nextLClip=nextlclip;
22 <  segRSplice=segrsplice;
23 <  nextLSplice=nextlsplice;
24 <  nextSegSeq=seqpos;
25 <  }
26 < int length() {
27 <  return (nextStart-segEnd-1);
28 <  }
29 < };
30 <
31 < class LytSeqInfo { //info for a sequence within the file
32 <   int xlen; //total sequence length (with all the added * within contig)
33 <   int interseglen;
34 <  public:
35 <   char *name;
36 <   LytCtgData* contig; //contig data containing this sequence, as above
37 <   bool segmented;
38 <   int numisegs; // number of intersegs[]
39 <   LytSeqInterSeg* intersegs;
40 <   off_t fpos; //file position for the sequence data
41 <   unsigned char reversed;
42 <   int offs; //offset in contig (of the very left end)
43 <   int left,right; //clear range (relative to sequence itself, max 1..xlen)
44 <   LytSeqInfo(char* seqid, LytCtgData* ctg, int pos=0, unsigned char minus=0,
45 <                          int slen=0, int clpL=0, int clpR=0) {
46 <         contig=ctg;
47 <         offs=pos;
48 <         reversed=minus;
49 <         fpos=0;
50 <         interseglen=0;
51 <         xlen=slen;
52 <         left=clpL+1; //1 if no clpL given
53 <         right=xlen-clpR; //0 if no len given
54 <         segmented=false;
55 <         numisegs=0;
56 <         name=Gstrdup(seqid);
57 <         intersegs=NULL;
58 <         }
59 <   ~LytSeqInfo() {
60 <         GFREE(name);
61 <         GFREE(intersegs);
62 <         }
63 <   bool hasIntrons() { return (numisegs>0); }
64 <   void addInterSeg(int end, int nextstart, int rclip=0, int nextlclip=0,
65 <                                          char splice=0, char nextsplice=0, int seqofs=0) {
66 <      GREALLOC(intersegs,(numisegs+1)*sizeof(LytSeqInterSeg));
67 <      interseglen+=nextstart-end-1;
68 <      intersegs[numisegs].segEnd=end; intersegs[numisegs].segRClip=rclip;
69 <      intersegs[numisegs].nextStart=nextstart; intersegs[numisegs].nextLClip=nextlclip;
70 <      intersegs[numisegs].segRSplice=splice;
71 <      intersegs[numisegs].nextLSplice=nextsplice;
72 <      intersegs[numisegs].nextSegSeq=seqofs;
73 <      numisegs++;
74 <      }
75 <
76 <   void setLength(int len) {
77 <         //should only be called BEFORE setting the real clipping coordinates
78 <         //(left,right)
79 <         xlen=len;
80 <         left=1;
81 <         right=xlen;
82 <         }
83 <   int length() { return xlen; } //xtended span, including introns
84 <   int seglen() { return xlen-interseglen; } //segments only, no introns
85 <   bool operator==(const LytSeqInfo& s)  {
86 <          return (offs+left-1==s.offs+s.left-1);
87 <         }
88 <   bool operator>(const LytSeqInfo& s)  {
89 <      return (offs+left-1>s.offs+s.left-1);
90 <     }
91 <   bool operator<(const LytSeqInfo& s)  {
92 <      return (offs+left-1<s.offs+s.left-1);
93 <     }
94 <  char* expandGaps(char* s);
95 <  };
96 <
97 <
98 < class LytCtgData {
99 <   public:
100 <    char* name; //contig name, as stored in file
101 <    unsigned int len; //contig length (lsequence, from ACE file)
102 <    int lpos, rpos;
103 <    int numseqs;
104 <    int offs; //some other type of user data that might be of use
105 <    off_t fpos; //position in file for this contig's entry
106 <    GList<LytSeqInfo> seqs;
107 <    LytCtgData(off_t pos=0):seqs(false,false,false) {
108 <      name=NULL;
109 <      offs=0;
110 <      len=0;
111 <      numseqs=0;
112 <      fpos=pos;
113 <      }
114 <    ~LytCtgData() {
115 <      GFREE(name);
116 <      seqs.Clear();
117 <      }
118 <
119 <   char* readName(char* s, GHash<int>& names);
120 <
121 <   bool operator==(const LytCtgData& s)  {
122 <      return (strcmp(name,s.name)==0);
123 <     }
124 <   bool operator>(const LytCtgData& s)  {
125 <      return (strcmp(name,s.name)>0);
126 <     }
127 <   bool operator<(const LytCtgData& s)  {
128 <      return (strcmp(name,s.name)<0);
129 <     }
130 < };
131 < //callback -- called after a read or contig sequence is loaded
132 < typedef bool fnLytSeq(int ctgno, LytCtgData* d, LytSeqInfo* s, char* seq);
133 <
134 < class LayoutParser {
135 < protected:
136 <  FILE* f; //file stream
137 <  off_t f_pos;
138 <  char* fname;
139 <  LytCtgData* currentContig; // currently loaded contig -- for browsing/loading
140 <  int numContigs; //total number of contigs found in this file
141 <  //int numSeqs; //total number of (distinct) sequences found in this file
142 <  GHash<LytSeqInfo> seqinfo; //sequence locations in the file
143 <  GHash<int> ctgIDs; //contig IDs, to make them unique!
144 <
145 <  GList<LytCtgData> contigs; //list of contig names with their size,
146 <                       //number of sequences and filepos
147 < protected:
148 <  GLineReader* linebuf; //the line buffer
149 <  off_t fskipTo(const char* linestart, const char* butnot=NULL);
150 <  bool startsWith(const char* s, const char* start, int tlen);
151 <  virtual LytSeqInfo* addSeq(char* s, LytCtgData* ctg);
152 <  int seek(off_t offset) {
153 <      int r=fseeko(f, offset, SEEK_SET);
154 <      if (r==0) f_pos=offset;
155 <      return r;
156 <      }
157 <
158 < public:
159 <  LayoutParser(const char* filename):contigs(false,true) {
160 <   f=NULL;
161 <   f_pos=0;
162 <   numContigs=0;
163 <   currentContig=NULL;
164 <   if (filename==NULL) {
165 <      f=stdin;
166 <      fname=Gstrdup("stdin");
167 <      }
168 <    else
169 <      fname=Gstrdup(filename);
170 <   linebuf=new GLineReader();
171 <   }
172 <  virtual ~LayoutParser() {
173 <    ctgIDs.Clear();
174 <    GFREE(fname);
175 <    delete linebuf;
176 <    close();
177 <    numContigs=0;
178 <    seqinfo.Clear();
179 <    contigs.Clear();
180 <    }
181 <  virtual bool open();
182 <  void close();
183 <  virtual bool parse(fnLytSeq* seqfn=NULL); //load all the file offsets
184 <  virtual bool parseContigs(); //load all the file offsets for contigs
185 <  virtual bool loadContig(int ctgidx, fnLytSeq* seqfn=NULL, bool re_pos=true); //for loading by browsing
186 <  //if parsefn is not NULL, it is executed, passing the sequence data(first time, with the contig sequence)
187 <  //if parserfn returns true, the data is freed after it is processed
188 <  virtual char getFileType() { return 'L'; }
189 <  //sequence loading - only by request
190 <  LytCtgData* getContig(int idx) { return contigs[idx]; }
191 <  virtual char* getSeq(LytSeqInfo* sqinfo) { return NULL; }
192 <  virtual char* getContigSeq(LytCtgData* ctgdata) { return NULL; }
193 <  int getNumContigs() { return numContigs; }
194 <  //int getNumSeqs() { return numSeqs; }
195 <  off_t getFilePos() { return f_pos; }
196 <  //sorting the list of contigs:
197 <  void contigsByName();
198 <  void contigsByLen();
199 <  void contigsByNumSeqs();
200 < };
201 <
202 < #endif
1 > #ifndef LayoutParser_H
2 > #define LayoutParser_H
3 >
4 > #include "GBase.h"
5 > #include "GList.hh"
6 > #include "GHash.hh"
7 > #include <stdio.h>
8 > //hash data associated with a contig/sequence name
9 > //a contig name key is always stored as its name plus .<length>
10 >
11 > class LytCtgData;
12 >
13 > struct LytSeqInterSeg {
14 > int  segEnd, segRClip;
15 > int  nextStart, nextLClip;
16 > char segRSplice, nextLSplice;
17 > int nextSegSeq;
18 > LytSeqInterSeg(int end, int nextstart, int rclip=0, int nextlclip=0,
19 >                                         char segrsplice=0, char nextlsplice=0, int seqpos=0) {
20 >  segEnd=end; segRClip=rclip;
21 >  nextStart=nextstart; nextLClip=nextlclip;
22 >  segRSplice=segrsplice;
23 >  nextLSplice=nextlsplice;
24 >  nextSegSeq=seqpos;
25 >  }
26 > int length() {
27 >  return (nextStart-segEnd-1);
28 >  }
29 > };
30 >
31 > class LytSeqInfo { //info for a sequence within the file
32 >   int xlen; //total sequence length (with all the added * within contig)
33 >   int interseglen;
34 >  public:
35 >   char *name;
36 >   LytCtgData* contig; //contig data containing this sequence, as above
37 >   bool segmented;
38 >   int numisegs; // number of intersegs[]
39 >   LytSeqInterSeg* intersegs;
40 >   off_t fpos; //file position for the sequence data
41 >   unsigned char reversed;
42 >   int offs; //offset in contig (of the very left end)
43 >   int left,right; //clear range (relative to sequence itself, max 1..xlen)
44 >   LytSeqInfo(char* seqid, LytCtgData* ctg, int pos=0, unsigned char minus=0,
45 >                          int slen=0, int clpL=0, int clpR=0) {
46 >         contig=ctg;
47 >         offs=pos;
48 >         reversed=minus;
49 >         fpos=0;
50 >         interseglen=0;
51 >         xlen=slen;
52 >         left=clpL+1; //1 if no clpL given
53 >         right=xlen-clpR; //0 if no len given
54 >         segmented=false;
55 >         numisegs=0;
56 >         name=Gstrdup(seqid);
57 >         intersegs=NULL;
58 >         }
59 >   ~LytSeqInfo() {
60 >         GFREE(name);
61 >         GFREE(intersegs);
62 >         }
63 >   bool hasIntrons() { return (numisegs>0); }
64 >   void addInterSeg(int end, int nextstart, int rclip=0, int nextlclip=0,
65 >                                          char splice=0, char nextsplice=0, int seqofs=0) {
66 >      GREALLOC(intersegs,(numisegs+1)*sizeof(LytSeqInterSeg));
67 >      interseglen+=nextstart-end-1;
68 >      intersegs[numisegs].segEnd=end; intersegs[numisegs].segRClip=rclip;
69 >      intersegs[numisegs].nextStart=nextstart; intersegs[numisegs].nextLClip=nextlclip;
70 >      intersegs[numisegs].segRSplice=splice;
71 >      intersegs[numisegs].nextLSplice=nextsplice;
72 >      intersegs[numisegs].nextSegSeq=seqofs;
73 >      numisegs++;
74 >      }
75 >
76 >   void setLength(int len) {
77 >         //should only be called BEFORE setting the real clipping coordinates
78 >         //(left,right)
79 >         xlen=len;
80 >         left=1;
81 >         right=xlen;
82 >         }
83 >   int length() { return xlen; } //xtended span, including introns
84 >   int seglen() { return xlen-interseglen; } //segments only, no introns
85 >   bool operator==(const LytSeqInfo& s)  {
86 >          return (offs+left-1==s.offs+s.left-1);
87 >         }
88 >   bool operator>(const LytSeqInfo& s)  {
89 >      return (offs+left-1>s.offs+s.left-1);
90 >     }
91 >   bool operator<(const LytSeqInfo& s)  {
92 >      return (offs+left-1<s.offs+s.left-1);
93 >     }
94 >  char* expandGaps(char* s);
95 >  };
96 >
97 >
98 > class LytCtgData {
99 >   public:
100 >    char* name; //contig name, as stored in file
101 >    unsigned int len; //contig length (lsequence, from ACE file)
102 >    int lpos, rpos;
103 >    int numseqs;
104 >    int offs; //some other type of user data that might be of use
105 >    off_t fpos; //position in file for this contig's entry
106 >    GList<LytSeqInfo> seqs;
107 >    LytCtgData(off_t pos=0):seqs(false,false,false) {
108 >      name=NULL;
109 >      offs=0;
110 >      len=0;
111 >      numseqs=0;
112 >      fpos=pos;
113 >      }
114 >    ~LytCtgData() {
115 >      GFREE(name);
116 >      seqs.Clear();
117 >      }
118 >
119 >   char* readName(char* s, GHash<int>& names);
120 >
121 >   bool operator==(const LytCtgData& s)  {
122 >      return (strcmp(name,s.name)==0);
123 >     }
124 >   bool operator>(const LytCtgData& s)  {
125 >      return (strcmp(name,s.name)>0);
126 >     }
127 >   bool operator<(const LytCtgData& s)  {
128 >      return (strcmp(name,s.name)<0);
129 >     }
130 > };
131 > //callback -- called after a read or contig sequence is loaded
132 > typedef bool fnLytSeq(int ctgno, LytCtgData* d, LytSeqInfo* s, char* seq);
133 >
134 > class LayoutParser {
135 > protected:
136 >  FILE* f; //file stream
137 >  off_t f_pos;
138 >  char* fname;
139 >  LytCtgData* currentContig; // currently loaded contig -- for browsing/loading
140 >  int numContigs; //total number of contigs found in this file
141 >  //int numSeqs; //total number of (distinct) sequences found in this file
142 >  GHash<LytSeqInfo> seqinfo; //sequence locations in the file
143 >  GHash<int> ctgIDs; //contig IDs, to make them unique!
144 >
145 >  GList<LytCtgData> contigs; //list of contig names with their size,
146 >                       //number of sequences and filepos
147 > protected:
148 >  GLineReader* linebuf; //the line buffer
149 >  off_t fskipTo(const char* linestart, const char* butnot=NULL);
150 >  bool startsWith(const char* s, const char* start, int tlen);
151 >  virtual LytSeqInfo* addSeq(char* s, LytCtgData* ctg);
152 >  int seek(off_t offset) {
153 >      int r=fseeko(f, offset, SEEK_SET);
154 >      if (r==0) f_pos=offset;
155 >      return r;
156 >      }
157 >
158 > public:
159 >  LayoutParser(const char* filename):contigs(false,true) {
160 >   f=NULL;
161 >   f_pos=0;
162 >   numContigs=0;
163 >   currentContig=NULL;
164 >   if (filename==NULL) {
165 >      f=stdin;
166 >      fname=Gstrdup("stdin");
167 >      }
168 >    else
169 >      fname=Gstrdup(filename);
170 >   linebuf=new GLineReader();
171 >   }
172 >  virtual ~LayoutParser() {
173 >    ctgIDs.Clear();
174 >    GFREE(fname);
175 >    delete linebuf;
176 >    close();
177 >    numContigs=0;
178 >    seqinfo.Clear();
179 >    contigs.Clear();
180 >    }
181 >  virtual bool open();
182 >  void close();
183 >  virtual bool parse(fnLytSeq* seqfn=NULL); //load all the file offsets
184 >  virtual bool parseContigs(); //load all the file offsets for contigs
185 >  virtual bool loadContig(int ctgidx, fnLytSeq* seqfn=NULL, bool re_pos=true); //for loading by browsing
186 >  //if parsefn is not NULL, it is executed, passing the sequence data(first time, with the contig sequence)
187 >  //if parserfn returns true, the data is freed after it is processed
188 >  virtual char getFileType() { return 'L'; }
189 >  //sequence loading - only by request
190 >  LytCtgData* getContig(int idx) { return contigs[idx]; }
191 >  virtual char* getSeq(LytSeqInfo* sqinfo) { return NULL; }
192 >  virtual char* getContigSeq(LytCtgData* ctgdata) { return NULL; }
193 >  int getNumContigs() { return numContigs; }
194 >  //int getNumSeqs() { return numSeqs; }
195 >  off_t getFilePos() { return f_pos; }
196 >  //sorting the list of contigs:
197 >  void contigsByName();
198 >  void contigsByLen();
199 >  void contigsByNumSeqs();
200 > };
201 >
202 > #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines