ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GHash.hh
Revision: 227
Committed: Mon Mar 26 21:28:07 2012 UTC (7 years, 4 months ago) by gpertea
File size: 16846 byte(s)
Log Message:
Line File contents
1 /********************************************************************************
2 * Hash table class template (char* based) *
3 *********************************************************************************/
4
5 #ifndef GHash_HH
6 #define GHash_HH
7 #include "GBase.h"
8
9 /**
10 * This class maintains a fast-access hash table of entities
11 * indexed by a character string (essentially, maps strings to pointers)
12 */
13
14 typedef struct {
15 char* key; // Key string
16 bool keyalloc; //shared key flag (to not free the key chars)
17 int hash; // Hash value of key
18 pointer data; // Data
19 bool mark; // Entry is marked
20 } GHashEntry;
21
22 template <class OBJ> class GHash {
23 protected:
24 GHashEntry* hash; // Hash
25 int fCapacity; // table size
26 int fCount; // number of valid entries
27 int fCurrentEntry;
28 char* lastkeyptr; //pointer to last key string added
29 //---------- Raw data retrieval (including empty entries
30 // Return key at position pos.
31 const char* Key(uint pos) const { return hash[pos].key; }
32 // return data OBJ* at given position
33 OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; }
34 // Return mark flag of entry at position pos.
35 bool Mark(uint pos) const { return hash[pos].mark; }
36 // Return position of first filled slot, or >= fCapacity
37 int First() const;
38 // Return position of last filled slot or -1
39 int Last() const;
40 // Return position of next filled slot in hash table
41 // or a value greater than or equal to fCapacity if no filled
42 // slot was found
43 int Next(int pos) const;
44 //Return position of previous filled slot in hash table
45 //or a -1 if no filled slot was found
46 int Prev(int pos) const;
47
48 private:
49 GHash(const GHash&);
50 GHash &operator=(const GHash&);
51 GFreeProc* fFreeProc; //procedure to free item data
52 protected:
53 public:
54 static void DefaultFreeProc(pointer item) {
55 delete (OBJ*)item;
56 item=NULL;
57 }
58 public:
59 GHash(GFreeProc* freeProc); // constructs of an empty hash
60 GHash(bool doFree=true); // constructs of an empty hash (free the item objects)
61 void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
62 void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; }
63 int Capacity() const { return fCapacity; } // table's size, including the empty slots.
64 void Resize(int m); // Resize the table to the given size.
65 int Count() const { return fCount; }// the total number of entries in the table.
66 // Insert a new entry into the table given key and mark.
67 // If there is already an entry with that key, leave it unchanged,
68 const OBJ* Add(const char* ky, const OBJ* ptr=NULL, bool mrk=false);
69 //same as Add, but the key pointer is stored directly, no string duplicate
70 //is made (shared-key-Add)
71 const OBJ* shkAdd(const char* ky, const OBJ* ptr, bool mrk=false);
72
73 // Replace data at key, if the entry's mark is less than
74 // or equal to the given mark. If there was no existing entry,
75 // a new entry is inserted with the given mark.
76 OBJ* Replace(const char* ky, const OBJ* ptr, bool mrk=false);
77 // Remove a given key and its data
78 OBJ* Remove(const char* ky);
79 // Find data OBJ* given key.
80 OBJ* Find(const char* ky, char** keyptr=NULL);
81 bool hasKey(const char* ky);
82 char* getLastKey() { return lastkeyptr; }
83 OBJ* operator[](const char* ky) { return Find(ky); }
84 void startIterate(); //iterator-like initialization
85 char* NextKey(); //returns next valid key in the table (NULL if no more)
86 OBJ* NextData(); //returns next valid hash[].data
87 OBJ* NextData(char*& nextkey); //returns next valid hash[].data
88 //or NULL if no more
89 //nextkey is SET to the corresponding key
90 GHashEntry* NextEntry(); //returns a pointer to a GHashEntry
91
92 /// Clear all entries
93 void Clear();
94
95 /// Destructor
96 virtual ~GHash();
97 };
98 //
99 //======================== method definitions ========================
100 //
101 /*
102 Notes:
103 - The hash algorithm should yield a fCount in the range [0...GHash::EMPTY)
104 GHash::EMPTY and GHash::UNUSED are needed for flag purposes.
105 - Since the algorithm doubles the table size when exceeding MAX_LOAD,
106 it would be prudent to keep MIN_LOAD less than 1/2 MAX_LOAD;
107 otherwise, the algorithm might hip-hop between halving and doubling,
108 which would be quite expensive!!
109 - Not many people seem to know that hash tables don't have to be prime
110 numbers; in fact, a table size of 2**n and odd probe distance are very
111 easy to arrange, and this works just as well!
112 - We store the hash key, so that 99.999% of the time we can compare hash numbers;
113 only when hash numbers match do we need to compare keys.
114 Thus, with a good hash function, the fCount of calls to strcmp() should be
115 roughly the same as the fCount of successful lookups.
116 - The hash table should NEVER get full, or stuff will loop forever!!
117 */
118
119 // Initial table size (MUST be power of 2)
120 #define DEF_HASH_SIZE 32
121 // Maximum hash table load factor (%)
122 #define MAX_LOAD 80
123 // Minimum hash table load factor (%)
124 #define MIN_LOAD 10
125 // Probe Position [0..n-1]
126 #define HASH1(x,n) (((unsigned int)(x)*13)%(n))
127 // Probe Distance [1..n-1]
128 #define HASH2(x,n) (1|(((unsigned int)(x)*17)%((n)-1)))
129
130 #define FREEDATA (fFreeProc!=NULL)
131
132 /*******************************************************************************/
133 // Construct empty hash
134 template <class OBJ> GHash<OBJ>::GHash(GFreeProc* freeProc) {
135 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
136 fCurrentEntry=-1;
137 fFreeProc=freeProc;
138 lastkeyptr=NULL;
139 for (uint i=0; i<DEF_HASH_SIZE; i++)
140 hash[i].hash=-1; //this will be an indicator for 'empty' entries
141 fCapacity=DEF_HASH_SIZE;
142 fCount=0;
143 }
144
145 template <class OBJ> GHash<OBJ>::GHash(bool doFree) {
146 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
147 fCurrentEntry=-1;
148 lastkeyptr=NULL;
149 fFreeProc = (doFree)?&DefaultFreeProc : NULL;
150 for (uint i=0; i<DEF_HASH_SIZE; i++)
151 hash[i].hash=-1; //this will be an indicator for 'empty' entries
152 fCapacity=DEF_HASH_SIZE;
153 fCount=0;
154 }
155
156
157 // Resize table
158 template <class OBJ> void GHash<OBJ>::Resize(int m){
159 register int i,n,p,x,h;
160 GHashEntry *k;
161 GASSERT(fCount<=fCapacity);
162 if(m<DEF_HASH_SIZE) m=DEF_HASH_SIZE;
163 n=fCapacity;
164 while((n>>2)>m) n>>=1; // Shrink until n/4 <= m
165 while((n>>1)<m) n<<=1; // Grow until m <= n/2
166 GASSERT(m<=(n>>1));
167 GASSERT(DEF_HASH_SIZE<=n);
168 if(n!=fCapacity){
169 GASSERT(m<=n);
170 GMALLOC(k, sizeof(GHashEntry)*n);
171 for(i=0; i<n; i++) k[i].hash=-1;
172 for(i=0; i<fCapacity; i++){
173 h=hash[i].hash;
174 if(0<=h){
175 p=HASH1(h,n);
176 GASSERT(0<=p && p<n);
177 x=HASH2(h,n);
178 GASSERT(1<=x && x<n);
179 while(k[p].hash!=-1) p=(p+x)%n;
180 GASSERT(k[p].hash<0);
181 k[p]=hash[i];
182 }
183 }
184 GFREE(hash);
185 hash=k;
186 fCapacity=n;
187 }
188 }
189
190 // add a new entry, or update it if it already exists
191 template <class OBJ> const OBJ* GHash<OBJ>::Add(const char* ky,
192 const OBJ* pdata,bool mrk){
193 register int p,i,x,h,n;
194 if(!ky) GError("GHash::insert: NULL key argument.\n");
195 GASSERT(fCount<fCapacity);
196 h=strhash(ky);
197 GASSERT(0<=h);
198 p=HASH1(h,fCapacity);
199 GASSERT(0<=p && p<fCapacity);
200 x=HASH2(h,fCapacity);
201 GASSERT(1<=x && x<fCapacity);
202 i=-1;
203 n=fCapacity;
204 while(n && hash[p].hash!=-1){
205 if ((i==-1)&&(hash[p].hash==-2)) i=p;
206 if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
207 //replace hash data for this key!
208 lastkeyptr=hash[p].key;
209 hash[p].data = (void*) pdata;
210 return (OBJ*)hash[p].data;
211 }
212 p=(p+x)%fCapacity;
213 n--;
214 }
215 if(i==-1) i=p;
216 GTRACE(("GHash::insert: key=\"%s\"\n",ky));
217 //GMessage("GHash::insert: key=\"%s\"\n",ky);
218 GASSERT(0<=i && i<fCapacity);
219 GASSERT(hash[i].hash<0);
220 hash[i].hash=h;
221 hash[i].mark=mrk;
222 hash[i].key=Gstrdup(ky);
223 hash[i].keyalloc=true;
224 lastkeyptr=hash[i].key;
225 hash[i].data= (void*) pdata;
226 fCount++;
227 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
228 GASSERT(fCount<fCapacity);
229 return pdata;
230 }
231
232 template <class OBJ> const OBJ* GHash<OBJ>::shkAdd(const char* ky,
233 const OBJ* pdata,bool mrk){
234 register int p,i,x,h,n;
235 if(!ky) GError("GHash::insert: NULL key argument.\n");
236 GASSERT(fCount<fCapacity);
237 h=strhash(ky);
238 GASSERT(0<=h);
239 p=HASH1(h,fCapacity);
240 GASSERT(0<=p && p<fCapacity);
241 x=HASH2(h,fCapacity);
242 GASSERT(1<=x && x<fCapacity);
243 i=-1;
244 n=fCapacity;
245 while(n && hash[p].hash!=-1){
246 if((i==-1)&&(hash[p].hash==-2)) i=p;
247 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
248 //replace hash data for this key!
249 lastkeyptr=hash[p].key;
250 hash[p].data = (void*) pdata;
251 return (OBJ*)hash[p].data;
252 }
253 p=(p+x)%fCapacity;
254 n--;
255 }
256 if(i==-1) i=p;
257 GTRACE(("GHash::insert: key=\"%s\"\n",ky));
258 //GMessage("GHash::insert: key=\"%s\"\n",ky);
259 GASSERT(0<=i && i<fCapacity);
260 GASSERT(hash[i].hash<0);
261 hash[i].hash=h;
262 hash[i].mark=mrk;
263 hash[i].key=(char *)ky;
264 lastkeyptr=hash[i].key;
265 hash[i].keyalloc=false;
266 hash[i].data= (void*) pdata;
267 fCount++;
268 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
269 GASSERT(fCount<fCapacity);
270 return pdata;
271 }
272
273
274 // Add or replace entry
275 template <class OBJ> OBJ* GHash<OBJ>::Replace(const char* ky,const OBJ* pdata, bool mrk){
276 register int p,i,x,h,n;
277 if(!ky){ GError("GHash::replace: NULL key argument.\n"); }
278 GASSERT(fCount<fCapacity);
279 h=strhash(ky);
280 GASSERT(0<=h);
281 p=HASH1(h,fCapacity);
282 GASSERT(0<=p && p<fCapacity);
283 x=HASH2(h,fCapacity);
284 GASSERT(1<=x && x<fCapacity);
285 i=-1;
286 n=fCapacity;
287 while(n && hash[p].hash!=-1){
288 if((i==-1)&&(hash[p].hash==-2)) i=p;
289 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
290 if(hash[p].mark<=mrk){
291 GTRACE(("GHash::replace: %08x: replacing: \"%s\"\n",this,ky));
292 if (FREEDATA) (*fFreeProc)(hash[p].data);
293 hash[p].mark=mrk;
294 hash[p].data=pdata;
295 }
296 return hash[p].data;
297 }
298 p=(p+x)%fCapacity;
299 n--;
300 }
301 if(i==-1) i=p;
302 GTRACE(("GHash::replace: %08x: inserting: \"%s\"\n",this,ky));
303 GASSERT(0<=i && i<fCapacity);
304 GASSERT(hash[i].hash<0);
305 hash[i].hash=h;
306 hash[i].mark=mrk;
307 hash[i].key=Gstrdup(ky);
308 hash[i].data=pdata;
309 fCount++;
310 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
311 GASSERT(fCount<fCapacity);
312 return pdata;
313 }
314
315
316 // Remove entry
317 template <class OBJ> OBJ* GHash<OBJ>::Remove(const char* ky){
318 register int p,x,h,n;
319 if(!ky){ GError("GHash::remove: NULL key argument.\n"); }
320 if(0<fCount){
321 h=strhash(ky);
322 GASSERT(0<=h);
323 p=HASH1(h,fCapacity);
324 GASSERT(0<=p && p<fCapacity);
325 x=HASH2(h,fCapacity);
326 GASSERT(1<=x && x<fCapacity);
327 GASSERT(fCount<fCapacity);
328 n=fCapacity;
329 while(n && hash[p].hash!=-1){
330 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
331 GTRACE(("GHash::remove: %08x removing: \"%s\"\n",this,ky));
332 hash[p].hash=-2;
333 hash[p].mark=false;
334 if (hash[p].keyalloc) GFREE((hash[p].key));
335 if (FREEDATA) (*fFreeProc)(hash[p].data);
336 hash[p].key=NULL;
337 hash[p].data=NULL;
338 fCount--;
339 if((100*fCount)<=(MIN_LOAD*fCapacity)) Resize(fCount);
340 GASSERT(fCount<fCapacity);
341 return NULL;
342 }
343 p=(p+x)%fCapacity;
344 n--;
345 }
346 }
347 return NULL;
348 }
349
350
351 // Find entry
352 template <class OBJ> bool GHash<OBJ>::hasKey(const char* ky) {
353 register int p,x,h,n;
354 if(!ky){ GError("GHash::find: NULL key argument.\n"); }
355 if(0<fCount){
356 h=strhash(ky);
357 GASSERT(0<=h);
358 p=HASH1(h,fCapacity);
359 GASSERT(0<=p && p<fCapacity);
360 x=HASH2(h,fCapacity);
361 GASSERT(1<=x && x<fCapacity);
362 GASSERT(fCount<fCapacity);
363 n=fCapacity;
364 while(n && hash[p].hash!=-1){
365 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
366 return true;
367 }
368 p=(p+x)%fCapacity;
369 n--;
370 }
371 }
372 return false;
373 }
374
375 template <class OBJ> OBJ* GHash<OBJ>::Find(const char* ky, char** keyptr){
376 register int p,x,h,n;
377 if(!ky){ GError("GHash::find: NULL key argument.\n"); }
378 if(0<fCount){
379 h=strhash(ky);
380 GASSERT(0<=h);
381 p=HASH1(h,fCapacity);
382 GASSERT(0<=p && p<fCapacity);
383 x=HASH2(h,fCapacity);
384 GASSERT(1<=x && x<fCapacity);
385 GASSERT(fCount<fCapacity);
386 n=fCapacity;
387 while(n && hash[p].hash!=-1){
388 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
389 if (keyptr!=NULL) *keyptr = hash[p].key;
390 return (OBJ*)hash[p].data;
391 }
392 p=(p+x)%fCapacity;
393 n--;
394 }
395 }
396 return NULL;
397 }
398
399
400 template <class OBJ> void GHash<OBJ>::startIterate() {// initialize a key iterator; call
401 fCurrentEntry=0;
402 }
403
404 template <class OBJ> char* GHash<OBJ>::NextKey() {
405 register int pos=fCurrentEntry;
406 while (pos<fCapacity && hash[pos].hash<0) pos++;
407 if (pos==fCapacity) {
408 fCurrentEntry=fCapacity;
409 return NULL;
410 }
411 else {
412 fCurrentEntry=pos+1;
413 return hash[pos].key;
414 }
415 }
416
417 template <class OBJ> OBJ* GHash<OBJ>::NextData() {
418 register int pos=fCurrentEntry;
419 while (pos<fCapacity && hash[pos].hash<0) pos++;
420 if (pos==fCapacity) {
421 fCurrentEntry=fCapacity;
422 return NULL;
423 }
424 else {
425 fCurrentEntry=pos+1;
426 return (OBJ*)hash[pos].data;
427 }
428
429 }
430
431 template <class OBJ> OBJ* GHash<OBJ>::NextData(char* &nextkey) {
432 register int pos=fCurrentEntry;
433 while (pos<fCapacity && hash[pos].hash<0) pos++;
434 if (pos==fCapacity) {
435 fCurrentEntry=fCapacity;
436 nextkey=NULL;
437 return NULL;
438 }
439 else {
440 fCurrentEntry=pos+1;
441 nextkey=hash[pos].key;
442 return (OBJ*)hash[pos].data;
443 }
444
445 }
446
447 template <class OBJ> GHashEntry* GHash<OBJ>::NextEntry() {
448 register int pos=fCurrentEntry;
449 while (pos<fCapacity && hash[pos].hash<0) pos++;
450 if (pos==fCapacity) {
451 fCurrentEntry=fCapacity;
452 return NULL;
453 }
454 else {
455 fCurrentEntry=pos+1;
456 return &hash[pos];
457 }
458 }
459
460
461 // Get first non-empty entry
462 template <class OBJ> int GHash<OBJ>::First() const {
463 register int pos=0;
464 while(pos<fCapacity){ if(0<=hash[pos].hash) break; pos++; }
465 GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
466 return pos;
467 }
468
469 // Get last non-empty entry
470 template <class OBJ> int GHash<OBJ>::Last() const {
471 register int pos=fCapacity-1;
472 while(0<=pos){ if(0<=hash[pos].hash) break; pos--; }
473 GASSERT(pos<0 || 0<=hash[pos].hash);
474 return pos;
475 }
476
477
478 // Find next valid entry
479 template <class OBJ> int GHash<OBJ>::Next(int pos) const {
480 GASSERT(0<=pos && pos<fCapacity);
481 while(++pos <= fCapacity-1){ if(0<=hash[pos].hash) break; }
482 GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
483 return pos;
484 }
485
486
487 // Find previous valid entry
488 template <class OBJ> int GHash<OBJ>::Prev(int pos) const {
489 GASSERT(0<=pos && pos<fCapacity);
490 while(--pos >= 0){ if(0<=hash[pos].hash) break; }
491 GASSERT(pos<0 || 0<=hash[pos].hash);
492 return pos;
493 }
494
495
496 // Remove all
497 template <class OBJ> void GHash<OBJ>::Clear(){
498 register int i;
499 for(i=0; i<fCapacity; i++){
500 if(hash[i].hash>=0){
501 if (hash[i].keyalloc) GFREE((hash[i].key));
502 if (FREEDATA)
503 (*fFreeProc)(hash[i].data);
504 }
505 }
506 GFREE(hash);
507 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
508 //reinitialize it
509 for (i=0; i<DEF_HASH_SIZE; i++)
510 hash[i].hash=-1; //this will be an indicator for 'empty' entries
511 fCapacity=DEF_HASH_SIZE;
512 fCount=0;
513 }
514
515
516 // Save data
517 /*
518 void GHash::Save(Stream& store) const {
519 Object::save(store);
520 store << fCapacity;
521 store << fCount;
522 for(int i=0; i<fCapacity; i++){
523 store << hash[i].hash;
524 if(hash[i].hash>=0){
525 uint len=strlen(hash[i].key);
526 store << len;
527 store << hash[i].mark;
528 store.save(hash[i].key,len);
529 }
530 }
531 }
532
533
534 // Load data
535 void GHash::Load(Stream& store){
536 Object::load(store);
537 store >> fCapacity;
538 store >> fCount;
539 for(int i=0; i<fCapacity; i++){
540 store >> hash[i].hash;
541 if(hash[i].hash>=0){
542 uint len;
543 store >> len;
544 store >> hash[i].mark;
545 GMALLOC(hash[i].key,len+1);
546 store.load(hash[i].key,len);
547 hash[i].key[len]='\0';
548 }
549 }
550 }
551 */
552
553 // Destroy table
554 template <class OBJ> GHash<OBJ>::~GHash(){
555 register int i;
556 for(i=0; i<fCapacity; i++){
557 if(hash[i].hash>=0){
558 if (hash[i].keyalloc) GFREE((hash[i].key));
559 if (FREEDATA) (*fFreeProc)(hash[i].data);
560 }
561 }
562 GFREE(hash);
563 }
564
565 #endif