ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GHash.hh
Revision: 16
Committed: Mon Jul 18 20:56:02 2011 UTC (7 years, 11 months ago) by gpertea
File size: 16767 byte(s)
Log Message:
sync with local source

Line File contents
1 /********************************************************************************
2 * Hash table class template (char* based) *
3 *********************************************************************************/
4
5 #ifndef GHash_HH
6 #define GHash_HH
7 #include "GBase.h"
8 /**
9 * This class maintains a fast-access hash table of entities
10 * indexed by a character string (essentially, maps strings to pointers)
11 */
12
13 typedef struct {
14 char* key; // Key string
15 bool keyalloc; //shared key flag (to not free the key chars)
16 int hash; // Hash value of key
17 pointer data; // Data
18 bool mark; // Entry is marked
19 } GHashEntry;
20
21 template <class OBJ> class GHash {
22 protected:
23 GHashEntry* hash; // Hash
24 int fCapacity; // table size
25 int fCount; // number of valid entries
26 int fCurrentEntry;
27 char* lastkeyptr; //pointer to last key string added
28 //---------- Raw data retrieval (including empty entries
29 // Return key at position pos.
30 const char* Key(uint pos) const { return hash[pos].key; }
31 // return data OBJ* at given position
32 OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; }
33 // Return mark flag of entry at position pos.
34 bool Mark(uint pos) const { return hash[pos].mark; }
35 // Return position of first filled slot, or >= fCapacity
36 int First() const;
37 // Return position of last filled slot or -1
38 int Last() const;
39 // Return position of next filled slot in hash table
40 // or a value greater than or equal to fCapacity if no filled
41 // slot was found
42 int Next(int pos) const;
43 //Return position of previous filled slot in hash table
44 //or a -1 if no filled slot was found
45 int Prev(int pos) const;
46
47 private:
48 GHash(const GHash&);
49 GHash &operator=(const GHash&);
50 GFreeProc* fFreeProc; //procedure to free item data
51 protected:
52 public:
53 static void DefaultFreeProc(pointer item) {
54 delete (OBJ*)item;
55 item=NULL;
56 }
57 public:
58 GHash(GFreeProc* freeProc); // constructs of an empty hash
59 GHash(bool doFree=true); // constructs of an empty hash (free the item objects)
60 void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
61 void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; }
62 int Capacity() const { return fCapacity; } // table's size, including the empty slots.
63 void Resize(int m); // Resize the table to the given size.
64 int Count() const { return fCount; }// the total number of entries in the table.
65 // Insert a new entry into the table given key and mark.
66 // If there is already an entry with that key, leave it unchanged,
67 const OBJ* Add(const char* ky, const OBJ* ptr=NULL, bool mrk=false);
68 //same as Add, but the key pointer is stored directly, no string duplicate
69 //is made (shared-key-Add)
70 const OBJ* shkAdd(const char* ky, const OBJ* ptr, bool mrk=false);
71
72 // Replace data at key, if the entry's mark is less than
73 // or equal to the given mark. If there was no existing entry,
74 // a new entry is inserted with the given mark.
75 OBJ* Replace(const char* ky, const OBJ* ptr, bool mrk=false);
76 // Remove a given key and its data
77 OBJ* Remove(const char* ky);
78 // Find data OBJ* given key.
79 OBJ* Find(const char* ky, char** keyptr=NULL);
80 bool hasKey(const char* ky);
81 char* getLastKey() { return lastkeyptr; }
82 OBJ* operator[](const char* ky) { return Find(ky); }
83 void startIterate(); //iterator-like initialization
84 char* NextKey(); //returns next valid key in the table (NULL if no more)
85 OBJ* NextData(); //returns next valid hash[].data
86 OBJ* NextData(char*& nextkey); //returns next valid hash[].data
87 //or NULL if no more
88 //nextkey is SET to the corresponding key
89 GHashEntry* NextEntry(); //returns a pointer to a GHashEntry
90
91 /// Clear all entries
92 void Clear();
93
94 /// Destructor
95 virtual ~GHash();
96 };
97 //
98 //======================== method definitions ========================
99 //
100 /*
101 Notes:
102 - The hash algorithm should yield a fCount in the range [0...GHash::EMPTY)
103 GHash::EMPTY and GHash::UNUSED are needed for flag purposes.
104 - Since the algorithm doubles the table size when exceeding MAX_LOAD,
105 it would be prudent to keep MIN_LOAD less than 1/2 MAX_LOAD;
106 otherwise, the algorithm might hip-hop between halving and doubling,
107 which would be quite expensive!!
108 - Not many people seem to know that hash tables don't have to be prime
109 numbers; in fact, a table size of 2**n and odd probe distance are very
110 easy to arrange, and this works just as well!
111 - We store the hash key, so that 99.999% of the time we can compare hash numbers;
112 only when hash numbers match do we need to compare keys.
113 Thus, with a good hash function, the fCount of calls to strcmp() should be
114 roughly the same as the fCount of successful lookups.
115 - The hash table should NEVER get full, or stuff will loop forever!!
116 */
117
118 // Initial table size (MUST be power of 2)
119 #define DEF_HASH_SIZE 32
120 // Maximum hash table load factor (%)
121 #define MAX_LOAD 80
122 // Minimum hash table load factor (%)
123 #define MIN_LOAD 10
124 // Probe Position [0..n-1]
125 #define HASH1(x,n) (((unsigned int)(x)*13)%(n))
126 // Probe Distance [1..n-1]
127 #define HASH2(x,n) (1|(((unsigned int)(x)*17)%((n)-1)))
128
129 #define FREEDATA (fFreeProc!=NULL)
130
131 /*******************************************************************************/
132 // Construct empty hash
133 template <class OBJ> GHash<OBJ>::GHash(GFreeProc* freeProc) {
134 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
135 fFreeProc=freeProc;
136 for (uint i=0; i<DEF_HASH_SIZE; i++)
137 hash[i].hash=-1; //this will be an indicator for 'empty' entries
138 fCapacity=DEF_HASH_SIZE;
139 fCount=0;
140 }
141
142 template <class OBJ> GHash<OBJ>::GHash(bool doFree) {
143 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
144 fFreeProc = (doFree)?&DefaultFreeProc : NULL;
145 for (uint i=0; i<DEF_HASH_SIZE; i++)
146 hash[i].hash=-1; //this will be an indicator for 'empty' entries
147 fCapacity=DEF_HASH_SIZE;
148 fCount=0;
149 }
150
151
152 // Resize table
153 template <class OBJ> void GHash<OBJ>::Resize(int m){
154 register int i,n,p,x,h;
155 GHashEntry *k;
156 GASSERT(fCount<=fCapacity);
157 if(m<DEF_HASH_SIZE) m=DEF_HASH_SIZE;
158 n=fCapacity;
159 while((n>>2)>m) n>>=1; // Shrink until n/4 <= m
160 while((n>>1)<m) n<<=1; // Grow until m <= n/2
161 GASSERT(m<=(n>>1));
162 GASSERT(DEF_HASH_SIZE<=n);
163 if(n!=fCapacity){
164 GASSERT(m<=n);
165 GMALLOC(k, sizeof(GHashEntry)*n);
166 for(i=0; i<n; i++) k[i].hash=-1;
167 for(i=0; i<fCapacity; i++){
168 h=hash[i].hash;
169 if(0<=h){
170 p=HASH1(h,n);
171 GASSERT(0<=p && p<n);
172 x=HASH2(h,n);
173 GASSERT(1<=x && x<n);
174 while(k[p].hash!=-1) p=(p+x)%n;
175 GASSERT(k[p].hash<0);
176 k[p]=hash[i];
177 }
178 }
179 GFREE(hash);
180 hash=k;
181 fCapacity=n;
182 }
183 }
184
185 // add a new entry, or update it if it already exists
186 template <class OBJ> const OBJ* GHash<OBJ>::Add(const char* ky,
187 const OBJ* pdata,bool mrk){
188 register int p,i,x,h,n;
189 if(!ky) GError("GHash::insert: NULL key argument.\n");
190 GASSERT(fCount<fCapacity);
191 h=strhash(ky);
192 GASSERT(0<=h);
193 p=HASH1(h,fCapacity);
194 GASSERT(0<=p && p<fCapacity);
195 x=HASH2(h,fCapacity);
196 GASSERT(1<=x && x<fCapacity);
197 i=-1;
198 n=fCapacity;
199 while(n && hash[p].hash!=-1){
200 if ((i==-1)&&(hash[p].hash==-2)) i=p;
201 if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
202 //replace hash data for this key!
203 lastkeyptr=hash[p].key;
204 hash[p].data = (void*) pdata;
205 return (OBJ*)hash[p].data;
206 }
207 p=(p+x)%fCapacity;
208 n--;
209 }
210 if(i==-1) i=p;
211 GTRACE(("GHash::insert: key=\"%s\"\n",ky));
212 //GMessage("GHash::insert: key=\"%s\"\n",ky);
213 GASSERT(0<=i && i<fCapacity);
214 GASSERT(hash[i].hash<0);
215 hash[i].hash=h;
216 hash[i].mark=mrk;
217 hash[i].key=Gstrdup(ky);
218 hash[i].keyalloc=true;
219 lastkeyptr=hash[i].key;
220 hash[i].data= (void*) pdata;
221 fCount++;
222 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
223 GASSERT(fCount<fCapacity);
224 return pdata;
225 }
226
227 template <class OBJ> const OBJ* GHash<OBJ>::shkAdd(const char* ky,
228 const OBJ* pdata,bool mrk){
229 register int p,i,x,h,n;
230 if(!ky) GError("GHash::insert: NULL key argument.\n");
231 GASSERT(fCount<fCapacity);
232 h=strhash(ky);
233 GASSERT(0<=h);
234 p=HASH1(h,fCapacity);
235 GASSERT(0<=p && p<fCapacity);
236 x=HASH2(h,fCapacity);
237 GASSERT(1<=x && x<fCapacity);
238 i=-1;
239 n=fCapacity;
240 while(n && hash[p].hash!=-1){
241 if((i==-1)&&(hash[p].hash==-2)) i=p;
242 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
243 //replace hash data for this key!
244 lastkeyptr=hash[p].key;
245 hash[p].data = (void*) pdata;
246 return (OBJ*)hash[p].data;
247 }
248 p=(p+x)%fCapacity;
249 n--;
250 }
251 if(i==-1) i=p;
252 GTRACE(("GHash::insert: key=\"%s\"\n",ky));
253 //GMessage("GHash::insert: key=\"%s\"\n",ky);
254 GASSERT(0<=i && i<fCapacity);
255 GASSERT(hash[i].hash<0);
256 hash[i].hash=h;
257 hash[i].mark=mrk;
258 hash[i].key=(char *)ky;
259 lastkeyptr=hash[i].key;
260 hash[i].keyalloc=false;
261 hash[i].data= (void*) pdata;
262 fCount++;
263 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
264 GASSERT(fCount<fCapacity);
265 return pdata;
266 }
267
268
269 // Add or replace entry
270 template <class OBJ> OBJ* GHash<OBJ>::Replace(const char* ky,const OBJ* pdata, bool mrk){
271 register int p,i,x,h,n;
272 if(!ky){ GError("GHash::replace: NULL key argument.\n"); }
273 GASSERT(fCount<fCapacity);
274 h=strhash(ky);
275 GASSERT(0<=h);
276 p=HASH1(h,fCapacity);
277 GASSERT(0<=p && p<fCapacity);
278 x=HASH2(h,fCapacity);
279 GASSERT(1<=x && x<fCapacity);
280 i=-1;
281 n=fCapacity;
282 while(n && hash[p].hash!=-1){
283 if((i==-1)&&(hash[p].hash==-2)) i=p;
284 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
285 if(hash[p].mark<=mrk){
286 GTRACE(("GHash::replace: %08x: replacing: \"%s\"\n",this,ky));
287 if (FREEDATA) (*fFreeProc)(hash[p].data);
288 hash[p].mark=mrk;
289 hash[p].data=pdata;
290 }
291 return hash[p].data;
292 }
293 p=(p+x)%fCapacity;
294 n--;
295 }
296 if(i==-1) i=p;
297 GTRACE(("GHash::replace: %08x: inserting: \"%s\"\n",this,ky));
298 GASSERT(0<=i && i<fCapacity);
299 GASSERT(hash[i].hash<0);
300 hash[i].hash=h;
301 hash[i].mark=mrk;
302 hash[i].key=Gstrdup(ky);
303 hash[i].data=pdata;
304 fCount++;
305 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
306 GASSERT(fCount<fCapacity);
307 return pdata;
308 }
309
310
311 // Remove entry
312 template <class OBJ> OBJ* GHash<OBJ>::Remove(const char* ky){
313 register int p,x,h,n;
314 if(!ky){ GError("GHash::remove: NULL key argument.\n"); }
315 if(0<fCount){
316 h=strhash(ky);
317 GASSERT(0<=h);
318 p=HASH1(h,fCapacity);
319 GASSERT(0<=p && p<fCapacity);
320 x=HASH2(h,fCapacity);
321 GASSERT(1<=x && x<fCapacity);
322 GASSERT(fCount<fCapacity);
323 n=fCapacity;
324 while(n && hash[p].hash!=-1){
325 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
326 GTRACE(("GHash::remove: %08x removing: \"%s\"\n",this,ky));
327 hash[p].hash=-2;
328 hash[p].mark=false;
329 if (hash[p].keyalloc) GFREE((hash[p].key));
330 if (FREEDATA) (*fFreeProc)(hash[p].data);
331 hash[p].key=NULL;
332 hash[p].data=NULL;
333 fCount--;
334 if((100*fCount)<=(MIN_LOAD*fCapacity)) Resize(fCount);
335 GASSERT(fCount<fCapacity);
336 return NULL;
337 }
338 p=(p+x)%fCapacity;
339 n--;
340 }
341 }
342 return NULL;
343 }
344
345
346 // Find entry
347 template <class OBJ> bool GHash<OBJ>::hasKey(const char* ky) {
348 register int p,x,h,n;
349 if(!ky){ GError("GHash::find: NULL key argument.\n"); }
350 if(0<fCount){
351 h=strhash(ky);
352 GASSERT(0<=h);
353 p=HASH1(h,fCapacity);
354 GASSERT(0<=p && p<fCapacity);
355 x=HASH2(h,fCapacity);
356 GASSERT(1<=x && x<fCapacity);
357 GASSERT(fCount<fCapacity);
358 n=fCapacity;
359 while(n && hash[p].hash!=-1){
360 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
361 return true;
362 }
363 p=(p+x)%fCapacity;
364 n--;
365 }
366 }
367 return false;
368 }
369
370 template <class OBJ> OBJ* GHash<OBJ>::Find(const char* ky, char** keyptr){
371 register int p,x,h,n;
372 if(!ky){ GError("GHash::find: NULL key argument.\n"); }
373 if(0<fCount){
374 h=strhash(ky);
375 GASSERT(0<=h);
376 p=HASH1(h,fCapacity);
377 GASSERT(0<=p && p<fCapacity);
378 x=HASH2(h,fCapacity);
379 GASSERT(1<=x && x<fCapacity);
380 GASSERT(fCount<fCapacity);
381 n=fCapacity;
382 while(n && hash[p].hash!=-1){
383 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
384 if (keyptr!=NULL) *keyptr = hash[p].key;
385 return (OBJ*)hash[p].data;
386 }
387 p=(p+x)%fCapacity;
388 n--;
389 }
390 }
391 return NULL;
392 }
393
394
395 template <class OBJ> void GHash<OBJ>::startIterate() {// initialize a key iterator; call
396 fCurrentEntry=0;
397 }
398
399 template <class OBJ> char* GHash<OBJ>::NextKey() {
400 register int pos=fCurrentEntry;
401 while (pos<fCapacity && hash[pos].hash<0) pos++;
402 if (pos==fCapacity) {
403 fCurrentEntry=fCapacity;
404 return NULL;
405 }
406 else {
407 fCurrentEntry=pos+1;
408 return hash[pos].key;
409 }
410 }
411
412 template <class OBJ> OBJ* GHash<OBJ>::NextData() {
413 register int pos=fCurrentEntry;
414 while (pos<fCapacity && hash[pos].hash<0) pos++;
415 if (pos==fCapacity) {
416 fCurrentEntry=fCapacity;
417 return NULL;
418 }
419 else {
420 fCurrentEntry=pos+1;
421 return (OBJ*)hash[pos].data;
422 }
423
424 }
425
426 template <class OBJ> OBJ* GHash<OBJ>::NextData(char* &nextkey) {
427 register int pos=fCurrentEntry;
428 while (pos<fCapacity && hash[pos].hash<0) pos++;
429 if (pos==fCapacity) {
430 fCurrentEntry=fCapacity;
431 nextkey=NULL;
432 return NULL;
433 }
434 else {
435 fCurrentEntry=pos+1;
436 nextkey=hash[pos].key;
437 return (OBJ*)hash[pos].data;
438 }
439
440 }
441
442 template <class OBJ> GHashEntry* GHash<OBJ>::NextEntry() {
443 register int pos=fCurrentEntry;
444 while (pos<fCapacity && hash[pos].hash<0) pos++;
445 if (pos==fCapacity) {
446 fCurrentEntry=fCapacity;
447 return NULL;
448 }
449 else {
450 fCurrentEntry=pos+1;
451 return &hash[pos];
452 }
453 }
454
455
456 // Get first non-empty entry
457 template <class OBJ> int GHash<OBJ>::First() const {
458 register int pos=0;
459 while(pos<fCapacity){ if(0<=hash[pos].hash) break; pos++; }
460 GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
461 return pos;
462 }
463
464 // Get last non-empty entry
465 template <class OBJ> int GHash<OBJ>::Last() const {
466 register int pos=fCapacity-1;
467 while(0<=pos){ if(0<=hash[pos].hash) break; pos--; }
468 GASSERT(pos<0 || 0<=hash[pos].hash);
469 return pos;
470 }
471
472
473 // Find next valid entry
474 template <class OBJ> int GHash<OBJ>::Next(int pos) const {
475 GASSERT(0<=pos && pos<fCapacity);
476 while(++pos <= fCapacity-1){ if(0<=hash[pos].hash) break; }
477 GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
478 return pos;
479 }
480
481
482 // Find previous valid entry
483 template <class OBJ> int GHash<OBJ>::Prev(int pos) const {
484 GASSERT(0<=pos && pos<fCapacity);
485 while(--pos >= 0){ if(0<=hash[pos].hash) break; }
486 GASSERT(pos<0 || 0<=hash[pos].hash);
487 return pos;
488 }
489
490
491 // Remove all
492 template <class OBJ> void GHash<OBJ>::Clear(){
493 register int i;
494 for(i=0; i<fCapacity; i++){
495 if(hash[i].hash>=0){
496 if (hash[i].keyalloc) GFREE((hash[i].key));
497 if (FREEDATA)
498 (*fFreeProc)(hash[i].data);
499 }
500 }
501 GFREE(hash);
502 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
503 //reinitialize it
504 for (i=0; i<DEF_HASH_SIZE; i++)
505 hash[i].hash=-1; //this will be an indicator for 'empty' entries
506 fCapacity=DEF_HASH_SIZE;
507 fCount=0;
508 }
509
510
511 // Save data
512 /*
513 void GHash::Save(Stream& store) const {
514 Object::save(store);
515 store << fCapacity;
516 store << fCount;
517 for(int i=0; i<fCapacity; i++){
518 store << hash[i].hash;
519 if(hash[i].hash>=0){
520 uint len=strlen(hash[i].key);
521 store << len;
522 store << hash[i].mark;
523 store.save(hash[i].key,len);
524 }
525 }
526 }
527
528
529 // Load data
530 void GHash::Load(Stream& store){
531 Object::load(store);
532 store >> fCapacity;
533 store >> fCount;
534 for(int i=0; i<fCapacity; i++){
535 store >> hash[i].hash;
536 if(hash[i].hash>=0){
537 uint len;
538 store >> len;
539 store >> hash[i].mark;
540 GMALLOC(hash[i].key,len+1);
541 store.load(hash[i].key,len);
542 hash[i].key[len]='\0';
543 }
544 }
545 }
546 */
547
548 // Destroy table
549 template <class OBJ> GHash<OBJ>::~GHash(){
550 register int i;
551 for(i=0; i<fCapacity; i++){
552 if(hash[i].hash>=0){
553 if (hash[i].keyalloc) GFREE((hash[i].key));
554 if (FREEDATA) (*fFreeProc)(hash[i].data);
555 }
556 }
557 GFREE(hash);
558 }
559
560 #endif