1 |
gpertea |
2 |
#ifndef __GCDB_H |
2 |
|
|
#define __GCDB_H |
3 |
|
|
#include <stdlib.h> |
4 |
|
|
#include <stddef.h> |
5 |
|
|
#include <fcntl.h> |
6 |
|
|
#include <sys/stat.h> |
7 |
|
|
#include "GBase.h" |
8 |
|
|
|
9 |
|
|
#if defined(__WIN32__) || defined(WIN32) |
10 |
|
|
#define PROT_READ 1 |
11 |
|
|
#define PROT_WRITE 2 |
12 |
|
|
#define PROT_READWRITE 3 |
13 |
|
|
#define MAP_SHARED 1 |
14 |
|
|
#define MAP_PRIVATE 2 |
15 |
|
|
#define F_OK 0 |
16 |
|
|
#define R_OK 4 |
17 |
|
|
#define W_OK 2 |
18 |
|
|
#define RW_OK 6 |
19 |
|
|
|
20 |
|
|
#if !defined(MAP_FAILED) |
21 |
|
|
#define MAP_FAILED ((void *) -1) |
22 |
|
|
#endif |
23 |
|
|
void *mmap(char *,size_t,int,int,int,off_t); |
24 |
|
|
int munmap(void *,size_t); |
25 |
|
|
#else |
26 |
|
|
#include <sys/mman.h> |
27 |
|
|
#endif |
28 |
|
|
|
29 |
|
|
#define MAX_UINT 0xFFFFFFFFUL |
30 |
|
|
|
31 |
|
|
|
32 |
|
|
//===================================================== |
33 |
|
|
//------------- buffer stuff ------------------- |
34 |
|
|
//===================================================== |
35 |
|
|
#define GCDBUFFER_INSIZE 8192 |
36 |
|
|
#define GCDBUFFER_OUTSIZE 8192 |
37 |
|
|
|
38 |
|
|
|
39 |
|
|
typedef int (*opfunc)(int, char*, size_t); |
40 |
|
|
|
41 |
|
|
//typedef unsigned long gcdb_seek_pos; |
42 |
|
|
typedef off_t gcdb_seek_pos; |
43 |
|
|
typedef unsigned int (*uint_conv)(void*); //uint conversion function pointer |
44 |
|
|
typedef off_t (*offt_conv)(void*); //uint conversion function pointer |
45 |
|
|
|
46 |
|
|
|
47 |
|
|
//conversion function --> to platform independent uint |
48 |
|
|
extern uint_conv gcvt_uint; |
49 |
|
|
extern offt_conv gcvt_offt; |
50 |
|
|
|
51 |
|
|
int endian_test(void); |
52 |
|
|
unsigned int uint32_sun(void* x86int); |
53 |
|
|
unsigned int uint32_x86(void* x86int); |
54 |
|
|
//for file offsets: off_t runtime conversions: |
55 |
|
|
off_t offt_sun(void* offt); |
56 |
|
|
off_t offt_x86(void* offt); |
57 |
|
|
|
58 |
|
|
|
59 |
|
|
class GCDBuffer { |
60 |
|
|
public: |
61 |
|
|
char *x; |
62 |
|
|
unsigned int p; |
63 |
|
|
unsigned int n; |
64 |
|
|
int fd; |
65 |
|
|
opfunc op; |
66 |
|
|
//methods: |
67 |
|
|
GCDBuffer() { |
68 |
|
|
x=NULL; |
69 |
|
|
fd=0; |
70 |
|
|
op=NULL; |
71 |
|
|
n=0; |
72 |
|
|
//check endianness |
73 |
|
|
gcvt_uint=(endian_test())? &uint32_sun : &uint32_x86; |
74 |
|
|
gcvt_offt=(endian_test())? &offt_sun : &offt_x86; |
75 |
|
|
} |
76 |
|
|
GCDBuffer(opfunc aop,int afd,char *buf,unsigned int len) { |
77 |
|
|
//check endianness |
78 |
|
|
gcvt_uint=(endian_test())? &uint32_sun : &uint32_x86; |
79 |
|
|
gcvt_offt=(endian_test())? &offt_sun : &offt_x86; |
80 |
|
|
init(aop, afd, buf, len); |
81 |
|
|
} |
82 |
|
|
void init(opfunc aop,int afd,char *buf,unsigned int len) { |
83 |
|
|
x=buf; |
84 |
|
|
fd=afd; |
85 |
|
|
op=aop; |
86 |
|
|
p=0; |
87 |
|
|
n=len; |
88 |
|
|
} |
89 |
|
|
int flush(); |
90 |
|
|
int write_all(char* buf, unsigned int pt); |
91 |
|
|
int put(char* buf,unsigned int len); |
92 |
|
|
int putalign(char* buf,unsigned int len); |
93 |
|
|
int putflush(char* buf,unsigned int len); |
94 |
|
|
int puts(char *buf); |
95 |
|
|
int putsalign(char *buf); |
96 |
|
|
int putsflush(char *buf); |
97 |
|
|
int oneRead(char* buf, unsigned int len); |
98 |
|
|
int getthis(char* buf,unsigned int len); |
99 |
|
|
int get(char* buf,unsigned int len); |
100 |
|
|
int bget(char* buf,unsigned int len); |
101 |
|
|
int feed(); |
102 |
|
|
char *peek(); |
103 |
|
|
void seek(unsigned int len); |
104 |
|
|
int copy(GCDBuffer* bin); |
105 |
|
|
}; |
106 |
|
|
|
107 |
|
|
|
108 |
|
|
//===================================================== |
109 |
|
|
//------------- cdb utils ------------------- |
110 |
|
|
//===================================================== |
111 |
|
|
#ifndef __WIN32__ |
112 |
|
|
extern int errno; |
113 |
|
|
#endif |
114 |
|
|
extern int error_intr; |
115 |
|
|
extern int error_nomem; |
116 |
|
|
extern int error_proto; |
117 |
|
|
|
118 |
|
|
//additional data to be appended to the cdb file: |
119 |
|
|
#define CDBMSK_OPT_MULTI 0x00000001 |
120 |
|
|
#define CDBMSK_OPT_C 0x00000002 |
121 |
|
|
#define CDBMSK_OPT_CADD 0x00000004 |
122 |
|
|
#define CDBMSK_OPT_COMPRESS 0x00000008 |
123 |
|
|
//creates a compressed version of the database |
124 |
|
|
//uses plenty of unions for ensuring compatibility with |
125 |
|
|
// the old 'CIDX' info structure |
126 |
|
|
|
127 |
|
|
//damn, sun and 64bit machines |
128 |
|
|
// align this to 64bit -- so sizeof() is misled! |
129 |
|
|
#pragma pack(4) |
130 |
|
|
// I wish, but stupid gcc 2.95.3 alpha-decosf version does not |
131 |
|
|
// recognize this pragma directive !!? |
132 |
|
|
// |
133 |
|
|
struct cdbInfo { |
134 |
|
|
uint32 num_keys; |
135 |
|
|
union { |
136 |
|
|
uint32 num_records; |
137 |
|
|
char oldtag[4]; // 'CIDX' for old tag style |
138 |
|
|
}; |
139 |
|
|
// data file size -- used to be uint32, now it could be 64bit |
140 |
|
|
union { |
141 |
|
|
off_t dbsize; |
142 |
|
|
uint32 oldnum[2]; //num_keys, num_records |
143 |
|
|
}; |
144 |
|
|
union { |
145 |
|
|
uint32 idxflags; |
146 |
|
|
uint32 old_dbsize; |
147 |
|
|
}; |
148 |
|
|
union { |
149 |
|
|
int dbnamelen; |
150 |
|
|
int old_idxflags; |
151 |
|
|
}; |
152 |
|
|
// -- the actual db name precedes this fixed-size record |
153 |
|
|
union { |
154 |
|
|
char tag[4]; //'CDBX' for new files with LFS |
155 |
|
|
uint32 old_dbnamelen; |
156 |
|
|
}; |
157 |
|
|
}; |
158 |
|
|
#pragma pack() |
159 |
|
|
|
160 |
|
|
extern int cdbInfoSIZE; |
161 |
|
|
|
162 |
|
|
void uint32_pack(char *,uint32); |
163 |
|
|
void uint32_pack_big(char *,uint32); |
164 |
|
|
void uint32_unpack(char *,uint32 *); |
165 |
|
|
void uint32_unpack_big(char *,uint32 *); |
166 |
|
|
|
167 |
|
|
//===================================================== |
168 |
|
|
//------------- cdb index ------------------- |
169 |
|
|
//===================================================== |
170 |
|
|
|
171 |
|
|
#define CDB_HPLIST 1000 |
172 |
|
|
|
173 |
|
|
struct cdb_hp { uint32 h; uint32 p; } ; |
174 |
|
|
|
175 |
|
|
struct cdb_hplist { |
176 |
|
|
struct cdb_hp hp[CDB_HPLIST]; |
177 |
|
|
struct cdb_hplist *next; |
178 |
|
|
int num; |
179 |
|
|
}; |
180 |
|
|
|
181 |
|
|
//the index file should always be smaller than 4GB ! |
182 |
|
|
|
183 |
|
|
class GCdbWrite { |
184 |
|
|
GCDBuffer* cdbuf; |
185 |
|
|
char bspace[8192]; |
186 |
|
|
char fname[1024]; |
187 |
|
|
char final[2048]; |
188 |
|
|
uint32 count[256]; |
189 |
|
|
uint32 start[256]; |
190 |
|
|
struct cdb_hplist *head; |
191 |
|
|
struct cdb_hp *split; /* includes space for hash */ |
192 |
|
|
struct cdb_hp *hash; |
193 |
|
|
uint32 numentries; |
194 |
|
|
uint32 pos; //file position |
195 |
|
|
int posplus(uint32 len); |
196 |
|
|
int fd; //file descriptor |
197 |
|
|
public: |
198 |
|
|
//methods: |
199 |
|
|
GCdbWrite(int afd); //was: init |
200 |
|
|
GCdbWrite(char* fname); |
201 |
|
|
~GCdbWrite(); |
202 |
|
|
int addbegin(unsigned int keylen,unsigned int datalen); |
203 |
|
|
int addend(unsigned int keylen,unsigned int datalen,uint32 h); |
204 |
|
|
int addrec(const char *key,unsigned int keylen,char *data,unsigned int datalen); |
205 |
|
|
int add(const char *key, char *data, unsigned int datalen); |
206 |
|
|
int getNumEntries() { return numentries; } |
207 |
|
|
int finish(); |
208 |
|
|
int close(); |
209 |
|
|
int getfd() { return fd; } |
210 |
|
|
char* getfile() { return fname; } |
211 |
|
|
}; |
212 |
|
|
|
213 |
|
|
|
214 |
|
|
//===================================================== |
215 |
|
|
//------------- cdb ------------------- |
216 |
|
|
//===================================================== |
217 |
|
|
|
218 |
|
|
#define CDB_HASHSTART 5381 |
219 |
|
|
|
220 |
|
|
uint32 cdb_hashadd(uint32,unsigned char); |
221 |
|
|
uint32 cdb_hash(const char *,unsigned int); |
222 |
|
|
|
223 |
|
|
class GCdbRead { |
224 |
|
|
uint32 size; // initialized if map is nonzero |
225 |
|
|
uint32 loop; // number of hash slots searched under this key |
226 |
|
|
uint32 khash; // initialized if loop is nonzero |
227 |
|
|
uint32 kpos; // initialized if loop is nonzero |
228 |
|
|
uint32 hpos; // initialized if loop is nonzero |
229 |
|
|
uint32 hslots; // initialized if loop is nonzero |
230 |
|
|
uint32 dpos; // initialized if cdb_findnext() returns 1 |
231 |
|
|
uint32 dlen; // initialized if cdb_findnext() returns 1 |
232 |
|
|
char fname[1024]; |
233 |
|
|
char *map; // 0 if no map is available |
234 |
|
|
int fd; |
235 |
|
|
public: |
236 |
|
|
//methods: |
237 |
|
|
GCdbRead(int fd); //was cdb_init |
238 |
|
|
GCdbRead(char* afname); //was cdb_init |
239 |
|
|
~GCdbRead(); //was cdb_free |
240 |
|
|
int read(char *,unsigned int,uint32); |
241 |
|
|
int match(const char *key, unsigned int len, uint32 pos); |
242 |
|
|
void findstart() { loop =0; } |
243 |
|
|
int findnext(const char *key,unsigned int len); |
244 |
|
|
int find(const char *key); |
245 |
|
|
int datapos() { return dpos; } |
246 |
|
|
int datalen() { return dlen; } |
247 |
|
|
int getfd() { return fd; } |
248 |
|
|
char* getfile() { return fname; } |
249 |
|
|
}; |
250 |
|
|
|
251 |
|
|
class GReadBuf { |
252 |
|
|
protected: |
253 |
|
|
FILE* f; |
254 |
|
|
uchar* buf; |
255 |
|
|
int buflen; |
256 |
|
|
int bufused; // |
257 |
|
|
int bufpos; |
258 |
|
|
off_t fpos; |
259 |
|
|
bool eof; |
260 |
|
|
bool eob; |
261 |
|
|
|
262 |
|
|
int refill(bool repos=false) { |
263 |
|
|
//refill the buffer----------- |
264 |
|
|
if (repos && bufpos==0) return 0; //no need to repos |
265 |
|
|
if (eof) return 0; |
266 |
|
|
int fr=0; |
267 |
|
|
if (repos && bufpos<bufused) { |
268 |
|
|
int kept=bufused-bufpos; |
269 |
|
|
memmove((void*)buf, (void*)(buf+bufpos),kept); |
270 |
|
|
fr=(int)fread((void *)(buf+kept), 1, buflen-kept, f); |
271 |
|
|
if (fr<buflen-kept) eof=true; |
272 |
|
|
buf[kept+fr]='\0'; |
273 |
|
|
bufused=kept+fr; |
274 |
|
|
} |
275 |
|
|
else { |
276 |
|
|
fr=(int)fread((void *)buf, 1, buflen, f); |
277 |
|
|
if (fr<buflen) eof=true; |
278 |
|
|
buf[fr]='\0'; //only for text record parsers |
279 |
|
|
bufused=fr; |
280 |
|
|
} |
281 |
|
|
if (feof(f)) eof=true; |
282 |
|
|
if (ferror(f)) { |
283 |
|
|
GMessage("GReadBuf::refill - error at fread!\n"); |
284 |
|
|
eof=true; |
285 |
|
|
} |
286 |
|
|
bufpos=0; |
287 |
|
|
fpos+=fr; //bytes read from file so far |
288 |
|
|
return fr; |
289 |
|
|
} |
290 |
|
|
public: |
291 |
|
|
GReadBuf(FILE* fin, int bsize=4096) { |
292 |
|
|
f=fin; |
293 |
|
|
buflen=bsize; |
294 |
|
|
GMALLOC(buf,buflen+1); |
295 |
|
|
bufpos=0; //current pointer for get function |
296 |
|
|
bufused=0; |
297 |
|
|
fpos=0; |
298 |
|
|
eof=false; |
299 |
|
|
eob=false; |
300 |
|
|
refill(); |
301 |
|
|
} |
302 |
|
|
~GReadBuf() { GFREE(buf); } |
303 |
|
|
|
304 |
|
|
//reads len chars from stream into the outbuf |
305 |
|
|
//updates bufpos |
306 |
|
|
//->returns the number of bytes read |
307 |
|
|
int get(uchar *outbuf, int len) { |
308 |
|
|
if (eob) return 0; |
309 |
|
|
int rd=0; //bytes read |
310 |
|
|
while (!eob && rd<len) { |
311 |
|
|
int to_read=GMIN((bufused-bufpos),(len-rd)); |
312 |
|
|
memcpy((void*)(outbuf+rd),(void*)(buf+bufpos), to_read); |
313 |
|
|
bufpos+=to_read; |
314 |
|
|
rd+=to_read; |
315 |
|
|
if (bufpos>=bufused) { |
316 |
|
|
if (eof) eob=true; |
317 |
|
|
else refill(); |
318 |
|
|
} |
319 |
|
|
}//while |
320 |
|
|
return rd; |
321 |
|
|
} |
322 |
|
|
|
323 |
|
|
uchar* getStr(uchar *outbuf, int len) { |
324 |
|
|
int rd=get(outbuf,len); |
325 |
|
|
if (rd==0) return NULL; |
326 |
|
|
else { |
327 |
|
|
outbuf[rd]='\0'; |
328 |
|
|
return outbuf; |
329 |
|
|
} |
330 |
|
|
} |
331 |
|
|
|
332 |
|
|
// getc equivalent |
333 |
|
|
int getch() { |
334 |
|
|
if (eob) return -1; |
335 |
|
|
int ch=(int)(uchar)buf[bufpos]; |
336 |
|
|
bufpos++; |
337 |
|
|
if (bufpos>=bufused) { |
338 |
|
|
if (eof) eob=true; |
339 |
|
|
else refill(); |
340 |
|
|
} |
341 |
|
|
return ch; |
342 |
|
|
} |
343 |
|
|
|
344 |
|
|
//--- |
345 |
|
|
bool isEof() { return eob; } |
346 |
|
|
bool ended() { return eob; } |
347 |
|
|
off_t getPos() { |
348 |
|
|
//returns the virtual file position |
349 |
|
|
// = the actual file offset of the byte at bufpos |
350 |
|
|
return fpos-(bufused-bufpos); |
351 |
|
|
} |
352 |
|
|
//skip into the stream the specified number of bytes |
353 |
|
|
int skip(int skiplen) { |
354 |
|
|
if (eob) return 0; |
355 |
|
|
int r=0; //the actual number of bytes skipped |
356 |
|
|
while (skiplen && !eob) { |
357 |
|
|
int dif=GMIN(bufused-bufpos,skiplen); |
358 |
|
|
skiplen-=dif; |
359 |
|
|
bufpos+=dif; |
360 |
|
|
r+=dif; |
361 |
|
|
if (bufpos>=bufused) { |
362 |
|
|
if (eof) { eob=true; return r; } |
363 |
|
|
refill(); |
364 |
|
|
} |
365 |
|
|
} |
366 |
|
|
return r; |
367 |
|
|
} |
368 |
|
|
//look ahead without updating the read pointer (bufpos) |
369 |
|
|
//Cannot peek more than buflen! |
370 |
|
|
int peek(uchar* outbuf, int len) { |
371 |
|
|
if (eob) return -1; |
372 |
|
|
//if (eob || len>buflen) return -1; |
373 |
|
|
if (len>bufused-bufpos) refill(true); |
374 |
|
|
int mlen=GMIN((bufused-bufpos),len); |
375 |
|
|
memcpy((void*)outbuf, (void*)(buf+bufpos), mlen); |
376 |
|
|
return mlen; |
377 |
|
|
} |
378 |
|
|
|
379 |
|
|
uchar* peekStr(uchar* outbuf, int len) { |
380 |
|
|
int rd=peek(outbuf,len); |
381 |
|
|
if (rd>0) { outbuf[rd]='\0'; return outbuf; } |
382 |
|
|
else return NULL; |
383 |
|
|
} |
384 |
|
|
//looks ahead to check if what follows matches |
385 |
|
|
int peekCmp(char* cmpstr, int cmplen=0) { |
386 |
|
|
if (eob) //GError("GReadBuf::peekcmp error: eob!\n"); |
387 |
|
|
return -2; |
388 |
|
|
if (!cmplen) cmplen=strlen(cmpstr); |
389 |
|
|
if (cmplen>bufused-bufpos) { |
390 |
|
|
refill(true); |
391 |
|
|
if (cmplen>bufused-bufpos) return -2; |
392 |
|
|
} |
393 |
|
|
//use memcmp |
394 |
|
|
return memcmp((void*)(buf+bufpos), cmpstr, cmplen); |
395 |
|
|
} |
396 |
|
|
|
397 |
|
|
}; |
398 |
|
|
|
399 |
|
|
//circular line buffer, with read-ahead (peeking) capability |
400 |
|
|
class GReadBufLine { |
401 |
|
|
protected: |
402 |
|
|
struct BufLine { |
403 |
|
|
off_t fpos; |
404 |
|
|
int len; |
405 |
|
|
char* chars; |
406 |
|
|
}; |
407 |
|
|
int bufcap; //total number of lines in the buf array |
408 |
|
|
int bufidx; // the "current line" index in buf array |
409 |
|
|
bool isEOF; |
410 |
|
|
int lno; |
411 |
|
|
FILE* file; |
412 |
|
|
off_t filepos; //current file/stream offset for the first char of buf[bufidx] |
413 |
|
|
BufLine* buf; //array of bufferred lines |
414 |
|
|
char* readline(int idx);//read line from file into the buffer |
415 |
|
|
int fillbuf(); |
416 |
|
|
bool isEOB; |
417 |
|
|
public: |
418 |
|
|
const char* line(); //gets current line and advances the "current line" pointer |
419 |
|
|
//use putLine() to revert/undo this advancement |
420 |
|
|
off_t fpos(); //gets current line's byte offset in the file |
421 |
|
|
// does NOT advance the "current line" pointer |
422 |
|
|
int len(); //gets current line's length |
423 |
|
|
// does NOT advance the "current line" pointer |
424 |
|
|
bool isEof() { return isEOB; } |
425 |
|
|
bool eof() { return isEOB; } |
426 |
|
|
off_t getfpos() { return fpos(); } |
427 |
|
|
const char* getline() { return line(); } |
428 |
|
|
const char* getLine() { return line(); } |
429 |
|
|
int getLen() { return len(); } |
430 |
|
|
int linenumber() { return lno; } |
431 |
|
|
int lineno() { return lno; } |
432 |
|
|
int getLineNo() { return lno; } |
433 |
|
|
void putLine(); |
434 |
|
|
GReadBufLine(FILE* stream, int bcap=20) { |
435 |
|
|
if (bcap<2) bcap=2; //at least 1 prev line is needed for putLine() |
436 |
|
|
bufcap=bcap; |
437 |
|
|
bufidx=-1; |
438 |
|
|
isEOB=false; |
439 |
|
|
isEOF=false; |
440 |
|
|
lno=0; |
441 |
|
|
GMALLOC(buf, bufcap * sizeof(BufLine)); |
442 |
|
|
for (int i=0;i<bufcap;i++) { |
443 |
|
|
buf[i].chars=NULL; |
444 |
|
|
buf[i].fpos=-1; |
445 |
|
|
buf[i].len=0; |
446 |
|
|
} |
447 |
|
|
file=stream; |
448 |
|
|
fillbuf(); |
449 |
|
|
} |
450 |
|
|
~GReadBufLine() { |
451 |
|
|
for (int i=0;i<bufcap;i++) { |
452 |
|
|
GFREE(buf[i].chars); |
453 |
|
|
} |
454 |
|
|
GFREE(buf); |
455 |
|
|
} |
456 |
|
|
}; |
457 |
|
|
|
458 |
|
|
#endif |