ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/gcdb.cpp
Revision: 16
Committed: Mon Jul 18 20:56:02 2011 UTC (8 years, 3 months ago) by gpertea
File size: 24443 byte(s)
Log Message:
sync with local source

Line File contents
1 #include "gcdb.h"
2 #include <errno.h>
3
4 #ifdef __WIN32__
5 /* m m a p === from imagick sources
6 % Method mmap emulates the Unix method of the same name.
7 % The format of the mmap method is:
8 % void *mmap(char *address,size_t length,int protection,
9 % int access,int file,off_t offset)
10 */
11 void *mmap(char *address,size_t length,int protection,int access,
12 int file, off_t offset) {
13 void *map;
14 HANDLE handle;
15 map=(void *) NULL;
16 handle=INVALID_HANDLE_VALUE;
17 switch (protection)
18 {
19 case PROT_READ:
20 default:
21 {
22 handle=CreateFileMapping((HANDLE) _get_osfhandle(file),0,PAGE_READONLY,0,
23 length,0);
24 if (!handle)
25 break;
26 map=(void *) MapViewOfFile(handle,FILE_MAP_READ,0,0,length);
27 CloseHandle(handle);
28 break;
29 }
30 case PROT_WRITE:
31 {
32 handle=CreateFileMapping((HANDLE) _get_osfhandle(file),0,PAGE_READWRITE,0,
33 length,0);
34 if (!handle)
35 break;
36 map=(void *) MapViewOfFile(handle,FILE_MAP_WRITE,0,0,length);
37 CloseHandle(handle);
38 break;
39 }
40 case PROT_READWRITE:
41 {
42 handle=CreateFileMapping((HANDLE) _get_osfhandle(file),0,PAGE_READWRITE,0,
43 length,0);
44 if (!handle)
45 break;
46 map=(void *) MapViewOfFile(handle,FILE_MAP_ALL_ACCESS,0,0,length);
47 CloseHandle(handle);
48 break;
49 }
50 }
51 if (map == (void *) NULL)
52 return((void *) MAP_FAILED);
53 return((void *) ((char *) map+offset));
54 }
55
56 /* =========== m u n m a p ===========================
57 %
58 % Method munmap emulates the Unix method with the same name.
59 % The format of the munmap method is:
60 % int munmap(void *map,size_t length)
61 % A description of each parameter follows:
62 % > status: Method munmap returns 0 on success; otherwise, it
63 % returns -1 and sets errno to indicate the error.
64 % > map: The address of the binary large object.
65 % > length: The length of the binary large object.
66 %
67 */
68 int munmap(void *map,size_t length) {
69 if (!UnmapViewOfFile(map))
70 return(-1);
71 return(0);
72 }
73
74 #endif
75
76
77
78 int cdbInfoSIZE=offsetof(cdbInfo, tag)+4;
79 int IdxDataSIZE=offsetof(CIdxData, reclen)+sizeof(uint32);
80 int IdxDataSIZE32=offsetof(CIdxData32, reclen)+sizeof(uint32);
81 /*
82 int IdxSeqDataSIZE=offsetof(CIdxSeqData, elen)+sizeof(byte);
83 int IdxSeqDataSIZE32=offsetof(CIdxSeqData32, elen)+sizeof(byte);
84 */
85
86 //=====================================================
87 //------------- buffer stuff -------------------
88 //=====================================================
89
90 //-------------------------------------
91 //--------- misc utility functions -----
92
93 static int gcdb_seek_set(int fd,gcdb_seek_pos pos) {
94 if (lseek(fd, pos, 0) == -1)
95 return -1;
96 return 0;
97 }
98
99 #define gcdb_seek_begin(fd) (gcdb_seek_set((fd),(gcdb_seek_pos) 0))
100
101 static unsigned int gcdb_strlen(const char *s) {
102 register char *t;
103 t = (char*)s;
104 for (;;) {
105 if (!*t) return t - s; ++t;
106 if (!*t) return t - s; ++t;
107 if (!*t) return t - s; ++t;
108 if (!*t) return t - s; ++t;
109 }
110 }
111
112
113 static int byte_diff(char *s, unsigned int n,char *t) {
114 for (;;) {
115 if (!n) return 0; if (*s != *t) break; ++s; ++t; --n;
116 if (!n) return 0; if (*s != *t) break; ++s; ++t; --n;
117 if (!n) return 0; if (*s != *t) break; ++s; ++t; --n;
118 if (!n) return 0; if (*s != *t) break; ++s; ++t; --n;
119 }
120 return ((int)(unsigned int)(unsigned char) *s)
121 - ((int)(unsigned int)(unsigned char) *t);
122 }
123
124 static void gcdb_byte_copy(char *to, unsigned int n, char *from) {
125 for (;;) {
126 if (!n) return; *to++ = *from++; --n;
127 if (!n) return; *to++ = *from++; --n;
128 if (!n) return; *to++ = *from++; --n;
129 if (!n) return; *to++ = *from++; --n;
130 }
131 }
132
133 static void gcdb_byte_copyr(char *to, unsigned int n, char *from) {
134 to += n;
135 from += n;
136 for (;;) {
137 if (!n) return; *--to = *--from; --n;
138 if (!n) return; *--to = *--from; --n;
139 if (!n) return; *--to = *--from; --n;
140 if (!n) return; *--to = *--from; --n;
141 }
142 }
143
144 #define ALIGNMENT 16 /* XXX: assuming that this alignment is enough */
145 #define SPACE 4096 /* must be multiple of ALIGNMENT */
146
147 typedef union { char irrelevant[ALIGNMENT]; double d; } aligned;
148 static aligned realspace[SPACE / ALIGNMENT];
149 #define space ((char *) realspace)
150
151 static unsigned int avail = SPACE; /* multiple of ALIGNMENT; 0<=avail<=SPACE */
152
153 offt_conv_func gcvt_offt;
154 uint_conv_func gcvt_uint;
155 int16_conv_func gcvt_int16;
156
157 char *gcdb_alloc(unsigned int n) {
158 char *x;
159 n = ALIGNMENT + n - (n & (ALIGNMENT - 1)); /* XXX: could overflow */
160 if (n <= avail) { avail -= n; return space + avail; }
161 x = (char*) malloc(n);
162 if (!x) return NULL;
163 //if (!x) GError("Error: mgcdb_alloc(%d) failed !\n", n);
164 return x;
165 }
166
167
168 int GCDBuffer::write_all(char* buf, unsigned int len) {
169 int w;
170 while (len) {
171 w = op(fd,buf,len);
172 if (w == -1) {
173 if (errno == error_intr) continue;
174 return -1; /* note that some data may have been written */
175 }
176 /* if (w == 0) ; luser's fault */
177 buf += w;
178 len -= w;
179 }
180 return 0;
181 }
182
183 int GCDBuffer::flush() {
184 int pt=p;
185 if (!pt) return 0;
186 p = 0;
187 //return allwrite(op,fd,x,pt);
188 return write_all(x,pt);
189 }
190
191 int GCDBuffer::putalign(char *buf,unsigned int len) {
192 unsigned int bn;
193
194 while (len > (bn = n-p)) {
195 gcdb_byte_copy(x + p,bn,buf);
196 p += bn; buf += bn; len -= bn;
197 if (GCDBuffer::flush() == -1) return -1;
198 }
199
200 /* now len <= s->n - s->p */
201 gcdb_byte_copy(x + p,len,buf);
202 p += len;
203 return 0;
204 }
205
206 int GCDBuffer::put(char *buf,unsigned int len) {
207 unsigned int bn=n;
208 if (len > bn - p) {
209 if (GCDBuffer::flush() == -1) return -1;
210 /* now s->p == 0 */
211 if (bn < GCDBUFFER_OUTSIZE) bn = GCDBUFFER_OUTSIZE;
212 while (len > n) {
213 if (bn > len) bn = len;
214 if (write_all(buf, bn) == -1) return -1;
215 buf += bn;
216 len -= bn;
217 }
218 }
219 /* now len <= s->n - s->p */
220 gcdb_byte_copy(x + p,len,buf);
221 p += len;
222 return 0;
223 }
224
225 int GCDBuffer::putflush(char *buf,unsigned int len) {
226 if (flush() == -1) return -1;
227 return write_all(buf,len);
228 }
229
230 int GCDBuffer::putsalign(char *buf) {
231 return GCDBuffer::putalign(buf, gcdb_strlen(buf));
232 }
233
234 int GCDBuffer::puts(char *buf) {
235 return GCDBuffer::put(buf, gcdb_strlen(buf));
236 }
237
238 int GCDBuffer::putsflush(char *buf) {
239 return GCDBuffer::putflush(buf, gcdb_strlen(buf));
240 }
241
242 static int oneread(opfunc op,int fd, char *buf,unsigned int len) {
243 int r;
244 for (;;) {
245 r = op(fd,buf,len);
246 if (r == -1 && errno == error_intr) continue;
247 return r;
248 }
249 }
250
251 int GCDBuffer::oneRead(char* buf, unsigned int len) {
252 return op(fd,buf,len);
253 /*int r;
254 for (;;) {
255 r = op(fd,buf,len);
256 if (r == -1 && errno == error_intr) continue;
257 return r;
258 }*/
259 }
260
261 int GCDBuffer::getthis(char *buf,unsigned int len) {
262 if (len > p) len = p;
263 p -= len;
264 gcdb_byte_copy(buf, len,x + n);
265 n += len;
266 return len;
267 }
268
269 int GCDBuffer::feed() {
270 int r;
271 if (p) return p;
272 r = oneRead(x,n);
273 if (r <= 0)
274 return r;
275 p = r;
276 n -= r;
277 if (n > 0) gcdb_byte_copyr(x + n,r,x);
278 return r;
279 }
280
281 int GCDBuffer::bget(char *buf,unsigned int len) {
282 int r;
283 if (p > 0) return getthis(buf,len);
284 if (n <= len) return oneRead(buf,n);
285 r = GCDBuffer::feed(); if (r <= 0) return r;
286 return getthis(buf,len);
287 }
288
289 int GCDBuffer::get(char *buf,unsigned int len) {
290 int r;
291 if (p > 0) return getthis(buf,len);
292 if (n <= len) return oneread(op,fd,buf,len);
293 r = GCDBuffer::feed();
294 if (r <= 0)
295 return r;
296 return getthis(buf,len);
297 }
298
299 char* GCDBuffer::peek() {
300 return x + n;
301 }
302
303 void GCDBuffer::seek(unsigned int len) {
304 n += len;
305 p -= len;
306 }
307
308 int GCDBuffer::copy(GCDBuffer* bin) {
309 int n_in;
310 char *x_in;
311 for (;;) {
312 n_in = bin->feed();
313 if (n_in < 0) return -2;
314 if (!n_in) return 0;
315 x_in = bin->peek();
316 if (GCDBuffer::put(x_in,n_in) == -1) return -3;
317 bin->seek(n_in);
318 }
319 }
320
321 //=====================================================
322 //------------- cdb utils -------------------
323 //=====================================================
324
325 int error_intr =
326 #ifdef EINTR
327 EINTR;
328 #else
329 -1;
330 #endif
331
332 int error_nomem =
333 #ifdef ENOMEM
334 ENOMEM;
335 #else
336 -2;
337 #endif
338
339 int error_proto =
340 #ifdef EPROTO
341 EPROTO;
342 #else
343 -15;
344 #endif
345 //------------------------------------------------
346 //------------ allocation routines:
347
348 /* conversion of unsigned int offsets read from a file
349 can also be used to prepare unsigned integers to be written
350 into a file in an independent platform manner
351 */
352
353 union UInt32Bytes {
354 unsigned char b[4];
355 int32_t ui;
356 };
357
358 union UInt16Bytes {
359 unsigned char b[2];
360 int16_t ui;
361 };
362
363
364 unsigned int uint32_sun(void* x86int) {
365 UInt32Bytes ub;
366 ub.b[3]=((unsigned char*)x86int)[0];
367 ub.b[0]=((unsigned char*)x86int)[3];
368 ub.b[1]=((unsigned char*)x86int)[2];
369 ub.b[2]=((unsigned char*)x86int)[1];
370 return ub.ui;
371 }
372
373 int16_t int16_sun(void* x86int) {
374 UInt16Bytes ub;
375 ub.b[1]=((unsigned char*)x86int)[0];
376 ub.b[0]=((unsigned char*)x86int)[1];
377 return ub.ui;
378 }
379
380 /* unsigned int uint32_sun(void* x86int) {
381 unsigned char b[4];
382 b[3]=((unsigned char*)x86int)[0];
383 b[0]=((unsigned char*)x86int)[3];
384 b[1]=((unsigned char*)x86int)[2];
385 b[2]=((unsigned char*)x86int)[1];
386 return *((unsigned int*)b);
387 return *ub;
388 }*/
389
390 unsigned int uint32_x86(void* offt) {
391 return *((unsigned int*)offt);
392 }
393
394 int16_t int16_x86(void* v) {
395 return *((int16_t *)v);
396 }
397
398 //-------- 64bit types conversion :
399 union ULongBytes {
400 unsigned char b[8];
401 off_t ob;
402 };
403
404 off_t offt_sun(void* offt) {
405 //unsigned char b[8];
406 ULongBytes ub;
407 if (sizeof(off_t)==8) { //64 bit?
408 // upper words:
409 ub.b[3]=((unsigned char*)offt)[4];
410 ub.b[0]=((unsigned char*)offt)[7];
411 ub.b[1]=((unsigned char*)offt)[6];
412 ub.b[2]=((unsigned char*)offt)[5];
413 //--
414 ub.b[7]=((unsigned char*)offt)[0];
415 ub.b[4]=((unsigned char*)offt)[3];
416 ub.b[5]=((unsigned char*)offt)[2];
417 ub.b[6]=((unsigned char*)offt)[1];
418 }
419 else {
420 ub.b[3]=((unsigned char*)offt)[0];
421 ub.b[0]=((unsigned char*)offt)[3];
422 ub.b[1]=((unsigned char*)offt)[2];
423 ub.b[2]=((unsigned char*)offt)[1];
424 }
425 //return *((off_t*)b);
426 return ub.ob;
427 }
428
429 /*
430 off_t offt_sun(void* offt) {
431 unsigned char b[8];
432 if (sizeof(off_t)==8) { //64 bit?
433 // upper words:
434 b[3]=((unsigned char*)offt)[4];
435 b[0]=((unsigned char*)offt)[7];
436 b[1]=((unsigned char*)offt)[6];
437 b[2]=((unsigned char*)offt)[5];
438 //--
439 b[7]=((unsigned char*)offt)[0];
440 b[4]=((unsigned char*)offt)[3];
441 b[5]=((unsigned char*)offt)[2];
442 b[6]=((unsigned char*)offt)[1];
443 }
444 else {
445 b[3]=((unsigned char*)offt)[0];
446 b[0]=((unsigned char*)offt)[3];
447 b[1]=((unsigned char*)offt)[2];
448 b[2]=((unsigned char*)offt)[1];
449 }
450 return *((off_t*)b);
451 }
452 */
453
454
455 off_t offt_x86(void* offt) {
456 return *((off_t*)offt);
457 }
458
459
460
461 //------------------------ platform independent uint32 :
462
463 void uint32_pack(char s[4],uint32 u)
464 {
465 s[0] = u & 255;
466 u >>= 8;
467 s[1] = u & 255;
468 u >>= 8;
469 s[2] = u & 255;
470 s[3] = u >> 8;
471 }
472
473 void uint32_pack_big(char s[4],uint32 u)
474 {
475 s[3] = u & 255;
476 u >>= 8;
477 s[2] = u & 255;
478 u >>= 8;
479 s[1] = u & 255;
480 s[0] = u >> 8;
481 }
482
483 /* unpacking: */
484
485
486 void uint32_unpack(char s[4],uint32 *u)
487 {
488 uint32 result;
489
490 result = (unsigned char) s[3];
491 result <<= 8;
492 result += (unsigned char) s[2];
493 result <<= 8;
494 result += (unsigned char) s[1];
495 result <<= 8;
496 result += (unsigned char) s[0];
497
498 *u = result;
499 }
500
501 void uint32_unpack_big(char s[4],uint32 *u)
502 {
503 uint32 result;
504
505 result = (unsigned char) s[0];
506 result <<= 8;
507 result += (unsigned char) s[1];
508 result <<= 8;
509 result += (unsigned char) s[2];
510 result <<= 8;
511 result += (unsigned char) s[3];
512
513 *u = result;
514 }
515
516 /*
517 big/little endian check
518 */
519 int endian_test(void) {
520 unsigned short v=0x0001;
521 unsigned char* b = (unsigned char*)&v;
522 return b[1];
523 }
524
525 void gcvt_endian_setup() {
526 //check endianness
527 if (endian_test()) {
528 gcvt_uint = &uint32_sun;
529 gcvt_offt = &offt_sun;
530 gcvt_int16 = &int16_sun;
531 }
532 else {
533 gcvt_uint = &uint32_x86;
534 gcvt_offt = &offt_x86;
535 gcvt_int16 = &int16_x86;
536 }
537 }
538
539 //=====================================================
540 //------------- cdb index -------------------
541 //=====================================================
542
543 GCdbWrite::GCdbWrite(int afd) {
544 //check endianness :)
545 gcvt_endian_setup();
546 cdbuf=new GCDBuffer((opfunc)&write,(int) afd,(char*)bspace,sizeof bspace);
547 head = NULL;
548 split = 0;
549 hash = 0;
550 numentries = 0;
551 fd = afd;
552 pos = sizeof final;
553 gcdb_seek_set(fd, pos);
554
555 fname[0]='\0';
556 //should return and test the result of gcdb_seek_set!!!
557 }
558
559 GCdbWrite::GCdbWrite(char* afname) {
560 #ifdef __WIN32__
561 fd = open(afname,O_WRONLY | O_TRUNC | O_BINARY | O_CREAT, S_IREAD|S_IWRITE);
562 #else
563 fd = open(afname,O_WRONLY | O_NDELAY | O_TRUNC | O_CREAT, 0664);
564 #endif
565 if (fd == -1)
566 GError("GCdbWrite: Error creating file '%s'\n", fname);
567
568 gcvt_endian_setup();
569
570 cdbuf=new GCDBuffer((opfunc)&write,(int) fd,(char*)bspace,sizeof bspace);
571 head = NULL;
572 split = 0;
573 hash = 0;
574 numentries = 0;
575 pos = sizeof final;
576 gcdb_seek_set(fd, pos);
577 strcpy(fname, afname);
578
579 //should return and test the result of gcdb_seek_set!!!
580 }
581
582 GCdbWrite::~GCdbWrite() {
583 cdbuf->flush();
584 #ifndef __WIN32__
585 /* NFS silliness */
586 if (fsync(fd) == -1)
587 GError("GCdbWrite: Error at fsync() for file '%s'\n",
588 fname);
589 #endif
590 if (::close(fd) == -1)
591 GError("GCdbWrite: Error at closing file '%s'\n",
592 fname);
593 delete cdbuf;
594 if (head!=NULL) free(head);
595 }
596
597 int GCdbWrite::posplus(uint32 len) {
598 uint32 newpos = pos + len;
599 if (newpos < len) { //errno = error_nomem;
600 return -1; }
601 pos = newpos;
602 return 0;
603 }
604
605 int GCdbWrite::addend(unsigned int keylen,unsigned int datalen,uint32 h) {
606 struct cdb_hplist *chead = head;
607 if (!chead || (chead->num >= CDB_HPLIST)) {
608 chead = (struct cdb_hplist *) gcdb_alloc(sizeof(struct cdb_hplist));
609 if (!chead) return -1;
610 chead->num = 0;
611 chead->next = head;
612 head = chead;
613 }
614 chead->hp[head->num].h = h;
615 chead->hp[head->num].p = pos;
616 ++chead->num;
617 ++numentries;
618 if (posplus(8) == -1) return -1;
619 if (posplus(keylen) == -1) return -1;
620 if (posplus(datalen) == -1) return -1;
621 return 0;
622 }
623
624 int GCdbWrite::addbegin(unsigned int keylen,unsigned int datalen) {
625 char buf[8];
626 //if (keylen > MAX_UINT) { /* errno = error_nomem; */return -1; }
627 // if (datalen > MAX_UINT) { /*errno = error_nomem;*/ return -1; }
628 uint32_pack(buf,keylen);
629 uint32_pack(buf + 4,datalen);
630 if (cdbuf->putalign(buf,8) == -1) return -1;
631 return 0;
632 }
633
634 #define cdbuffer_PUTC(s,c) \
635 ( ((s).n != (s).p) \
636 ? ( (s).x[(s).p++] = (c), 0 ) \
637 : (s).put(&(c),1) \
638 )
639
640 int GCdbWrite::add(const char* key, char* recdata, unsigned int datalen) {
641 unsigned int i;
642 unsigned int klen=strlen(key);
643 if (klen<1) {
644 GMessage("Warning: zero length key found\n");
645 return 0;
646 }
647 //------------ adding record -----------------
648 if (addbegin(klen,datalen)==-1)
649 GError("GCdbWrite: Error at addbegin(%d, %d)\n",klen, datalen);
650 uint32 h=CDB_HASHSTART;
651 for (i = 0;i < klen; ++i) {
652 //if (cdbuffer_PUTC(c.cdbuf,key[i]) == -1)
653 if ( ((cdbuf->n!=cdbuf->p) ? (cdbuf->x[cdbuf->p++]=(key[i]),0 )
654 : cdbuf->put((char*)&(key[i]),1) )==-1)
655 GError("GCdbWrite: Error at cdbbuf.put, key '%s'\n", key);
656 h = cdb_hashadd(h,key[i]);
657 }
658 if (cdbuf->put(recdata,datalen) == -1)
659 GError("GCdbWrite: Error at final cdbuf.put() at key='%s', datalen=%d\n",
660 key, datalen);
661 if (addend(klen,datalen,h) == -1)
662 GError("GCdbWrite: Error at addend(%d, %d, h)\n", klen, datalen);
663 return 1;
664 }
665
666 int GCdbWrite::addrec(const char *key,unsigned int keylen,char *data,unsigned int datalen) {
667 if (GCdbWrite::addbegin(keylen,datalen) == -1) return -1;
668 if (cdbuf->putalign((char*)key,keylen) == -1) return -1;
669 if (cdbuf->putalign(data,datalen) == -1) return -1;
670 return GCdbWrite::addend(keylen,datalen,cdb_hash(key,keylen));
671 }
672
673
674 int GCdbWrite::finish() {
675 char buf[8];
676 int i;
677 uint32 len;
678 uint32 u;
679 uint32 memsize;
680 uint32 icount;
681 uint32 where;
682 struct cdb_hplist *x;
683 struct cdb_hp *hp;
684
685 for (i = 0;i < 256;++i)
686 count[i] = 0;
687
688 for (x = head;x;x = x->next) {
689 i = x->num;
690 while (i--)
691 ++count[255 & x->hp[i].h];
692 }
693
694 memsize = 1;
695 for (i = 0;i < 256;++i) {
696 u = count[i] * 2;
697 if (u > memsize)
698 memsize = u;
699 }
700
701 memsize += numentries; /* no overflow possible up to now */
702 u = (uint32) 0 - (uint32) 1;
703 u /= sizeof(struct cdb_hp);
704 if (memsize > u) { /* errno = error_nomem;*/ return -1; }
705
706 split = (struct cdb_hp *) gcdb_alloc(memsize * sizeof(struct cdb_hp));
707 if (!split) return -1;
708
709 hash = split + numentries;
710
711 u = 0;
712 for (i = 0;i < 256;++i) {
713 u += count[i]; /* bounded by numentries, so no overflow */
714 start[i] = u;
715 }
716
717 for (x = head;x;x = x->next) {
718 i = x->num;
719 while (i--)
720 split[--start[255 & x->hp[i].h]] = x->hp[i];
721 }
722
723 for (i = 0;i < 256;++i) {
724 icount = count[i];
725
726 len = icount + icount; /* no overflow possible */
727 uint32_pack(final + 8 * i,pos);
728 uint32_pack(final + 8 * i + 4,len);
729
730 for (u = 0;u < len;++u)
731 hash[u].h = hash[u].p = 0;
732
733 hp = split + start[i];
734 for (u = 0;u < icount;++u) {
735 where = (hp->h >> 8) % len;
736 while (hash[where].p)
737 if (++where == len)
738 where = 0;
739 hash[where] = *hp++;
740 }
741
742 for (u = 0;u < len;++u) {
743 uint32_pack(buf,hash[u].h);
744 uint32_pack(buf + 4,hash[u].p);
745 if (cdbuf->putalign(buf,8) == -1) return -1;
746 if (posplus(8) == -1) return -1;
747 }
748 }
749
750 if (cdbuf->flush() == -1) return -1;
751 if (gcdb_seek_begin(fd) == -1) return -1;
752 return cdbuf->putflush(final,sizeof final);
753 }
754
755 //=====================================================
756 //------------- cdb -------------------
757 //=====================================================
758 uint32 cdb_hashadd(uint32 h,unsigned char c) {
759 h += (h << 5);
760 return h ^ c;
761 }
762
763 uint32 cdb_hash(const char *buf,unsigned int len) {
764 uint32 h;
765 h = CDB_HASHSTART;
766 while (len) {
767 h = cdb_hashadd(h,*buf++);
768 --len;
769 }
770 return h;
771 }
772
773 //---------------------------------------------------------------
774 //-------------------------- cdb methods ------------------------
775
776 GCdbRead::GCdbRead(int afd) {
777 struct stat st;
778 char *x;
779 map=NULL;
780 gcvt_endian_setup();
781
782 findstart();
783 fd = afd;
784 if (fstat(fd,&st) == 0) {
785 if (st.st_size <= MAX_UINT) {
786 #ifndef NO_MMAP
787 x = (char *) mmap(0,st.st_size,PROT_READ,MAP_SHARED,fd,0);
788 if (x + 1) {
789 size = st.st_size;
790 map = x;
791 }
792 else {
793 GError("Error mapping the file (size=%ld)!\n",st.st_size);
794 }
795 #endif
796 }
797 else {
798 GError("Error mapping the file (size %ld > MAX_UINT)\n",
799 st.st_size);
800 }
801 }
802 }
803
804 GCdbRead::GCdbRead(char* afname) {
805 struct stat st;
806 char *x;
807 map=NULL;
808 gcvt_endian_setup();
809
810 findstart();
811 #ifdef __WIN32__
812 fd = open(afname, O_RDONLY|O_BINARY);
813 #else
814 fd = open(afname, O_RDONLY);
815 #endif
816 if (fd == -1)
817 GError("Error: cannot open file %s\n", afname);
818 strcpy(fname, afname);
819 if (fstat(fd,&st) == 0) {
820 if (st.st_size <= MAX_UINT) {
821 #ifndef NO_MMAP
822 x = (char *) mmap(0,st.st_size,PROT_READ,MAP_SHARED,fd,0);
823 if (x + 1) {
824 size = st.st_size;
825 map = x;
826 }
827 else {
828 GError("GCdbRead: Error mapping the file (size=%ld)!\n",st.st_size);
829 }
830 #endif
831 }
832 else {
833 GError("GCdbRead: Error mapping the file (size %ld > MAX_UINT)\n",
834 st.st_size);
835 }
836 }
837 }
838
839
840 GCdbRead::~GCdbRead() {
841 if (map!=NULL) {
842 munmap(map,size);
843 map = 0;
844 }
845 }
846
847 int GCdbRead::read(char *buf,unsigned int len, uint32 pos) {
848 #ifndef NO_MMAP
849 if (map) {
850 if ((pos > size) || (size - pos < len)) {
851 /* errno = error_proto; */
852 return -1;
853 }
854 gcdb_byte_copy(buf, len, map + pos);
855 }
856 else
857 #endif
858 {
859 if (gcdb_seek_set(fd,pos) == -1) return -1;
860 while (len > 0) {
861 int r;
862 do {
863 r = ::read(fd,buf,len);
864 } while ((r == -1) && (errno == error_intr));
865 if (r == -1) return -1;
866 if (r == 0) {
867 //errno = error_proto;
868 return -1;
869 }
870 buf += r;
871 len -= r;
872 }
873 }
874 return 0;
875 }
876
877 int GCdbRead::match(const char *key, unsigned int len, uint32 pos) {
878 char buf[32];
879 unsigned int n;
880 while (len > 0) {
881 n = sizeof buf;
882 if (n > len) n = len;
883 if (GCdbRead::read(buf,n,pos) == -1) return -1;
884 if (byte_diff(buf,n,(char*)key)) return 0;
885 pos += n;
886 key += n;
887 len -= n;
888 }
889 return 1;
890 }
891
892 int GCdbRead::findnext(const char *key,unsigned int len) {
893 char buf[8];
894 uint32 pos;
895 uint32 u;
896 if (!loop) {
897 u = cdb_hash(key,len);
898 if (GCdbRead::read(buf,8,(u << 3) & 2047) == -1) return -1;
899 uint32_unpack(buf + 4,&hslots);
900 if (!hslots) return 0;
901 uint32_unpack(buf,&hpos);
902 khash = u;
903 u >>= 8;
904 u %= hslots;
905 u <<= 3;
906 kpos = hpos + u;
907 }
908 while (loop < hslots) {
909 if (GCdbRead::read(buf,8,kpos) == -1) return - 1;
910 uint32_unpack(buf + 4, &pos);
911 if (!pos) return 0;
912 loop += 1;
913 kpos += 8;
914 if (kpos == hpos + (hslots << 3)) kpos = hpos;
915 uint32_unpack(buf,&u);
916 if (u == khash) {
917 if (GCdbRead::read(buf,8,pos) == -1) return -1;
918 uint32_unpack(buf,&u);
919 if (u == len)
920 switch(GCdbRead::match(key,len,pos + 8)) {
921 case -1:
922 return -1;
923 case 1:
924 uint32_unpack(buf + 4,&dlen);
925 dpos = pos + 8 + len;
926 return 1;
927 }
928 }
929 }
930 return 0;
931 }
932
933 int GCdbRead::find(const char *key) {
934 GCdbRead::findstart();
935 return GCdbRead::findnext(key,gcdb_strlen(key));
936 }
937
938 //----- GReadBuf and GReadBufLine
939
940 char* GReadBufLine::readline(int idx) {
941 //reads a char at a time until \n and/or \r are encountered
942 GFREE(buf[idx].chars);
943 buf[idx].len=0;
944 if (isEOF) return NULL;
945 int len=0;
946 buf[idx].fpos=filepos;
947 int c=0;
948 int allocated=256;
949 GMALLOC(buf[idx].chars, allocated);
950 while ((c=getc(file))!=EOF) {
951 if (len>=allocated-1) {
952 allocated+=256;
953 GREALLOC(buf[idx].chars, allocated);
954 }
955 if (c=='\n' || c=='\r') {
956 buf[idx].chars[len]='\0';
957 if (c=='\r') { //DOS file -- special case
958 if ((c=getc(file))!='\n') ungetc(c,file);
959 else filepos++;
960 }
961 filepos++;
962 buf[idx].len=len;
963 return buf[idx].chars;
964 }
965 filepos++;
966 buf[idx].chars[len]=(char)c;
967 len++;
968 } //while i<buf_cap-1
969 if (c==EOF) { //end of file reached while reading chars
970 isEOF=true;
971 }
972 buf[idx].len=len;
973 if (len==0 && isEOF) {
974 GFREE(buf[idx].chars);
975 }
976 else {
977 buf[idx].chars[len]='\0';
978 }
979 return buf[idx].chars;
980 }
981
982
983
984 int GReadBufLine::fillbuf() {
985 if (isEOF) return -1;
986 if (bufidx==0 || bufidx==1) return 0; //buffer was just filled!
987 int bufstart=0;
988 GASSERT( (bufidx<=bufcap) );
989 if (bufidx>0) { //preserve the lines already in buffer
990 int bidx=bufidx-1;//always leave room for PREVIOUS line, for putLine()
991 for (int i=0;i<bufcap-bidx;i++) {
992 GFREE(buf[i].chars);
993 buf[i]=buf[bidx+i];
994 buf[bidx+i].chars=NULL;
995 }
996 //memmove((void*)&buf[0], (void*)&buf[bidx], (bufcap-bidx)*sizeof(BufLine));
997 bufstart=bufcap-bidx;
998 bufidx=1;
999 }
1000 else {
1001 bufidx=0; //only the first time, before the first getLine()
1002 }
1003 int rlines=0;
1004 for (int i=bufstart;i<bufcap;i++) {
1005 if (readline(i)!=NULL) rlines++;
1006 }
1007 return rlines;
1008 }
1009
1010 //get a line from the buffer, update "current line" pointer
1011 const char* GReadBufLine::line() {
1012 if (isEOB) return NULL;
1013 GASSERT( (bufidx>=0 && bufidx<bufcap) );
1014 char* r=buf[bufidx].chars;
1015 lno++;
1016 if (r==NULL) {
1017 isEOB=true;
1018 return NULL;
1019 }
1020 bufidx++;
1021 if (bufidx==bufcap) {
1022 if (isEOF) isEOB=true;
1023 else fillbuf();
1024 }
1025 return r;
1026 }
1027
1028 off_t GReadBufLine::fpos() {
1029 if (isEOB || bufidx==0) return -1;
1030 GASSERT( (bufidx>0 && bufidx<bufcap) );
1031 return buf[bufidx-1].fpos;
1032 }
1033
1034 int GReadBufLine::len() {
1035 if (isEOB || bufidx==0) return -1;
1036 GASSERT( (bufidx>0 && bufidx<bufcap) );
1037 return buf[bufidx-1].len;
1038 }
1039
1040 void GReadBufLine::putLine() {
1041 if (bufidx==0) GError("Error: calling putLine() before getLine()!\n");
1042 bufidx--;
1043 isEOB=false;
1044 lno--;
1045 }