ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GHash.hh
Revision: 174
Committed: Wed Feb 15 21:04:38 2012 UTC (7 years, 5 months ago) by gpertea
File size: 16845 byte(s)
Log Message:
wip fqtrim

Line User Rev File contents
1 gpertea 2 /********************************************************************************
2     * Hash table class template (char* based) *
3     *********************************************************************************/
4    
5     #ifndef GHash_HH
6     #define GHash_HH
7     #include "GBase.h"
8     /**
9     * This class maintains a fast-access hash table of entities
10 gpertea 16 * indexed by a character string (essentially, maps strings to pointers)
11 gpertea 2 */
12 gpertea 16
13 gpertea 2 typedef struct {
14     char* key; // Key string
15     bool keyalloc; //shared key flag (to not free the key chars)
16     int hash; // Hash value of key
17     pointer data; // Data
18     bool mark; // Entry is marked
19     } GHashEntry;
20    
21     template <class OBJ> class GHash {
22     protected:
23     GHashEntry* hash; // Hash
24     int fCapacity; // table size
25     int fCount; // number of valid entries
26     int fCurrentEntry;
27     char* lastkeyptr; //pointer to last key string added
28     //---------- Raw data retrieval (including empty entries
29     // Return key at position pos.
30     const char* Key(uint pos) const { return hash[pos].key; }
31     // return data OBJ* at given position
32     OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; }
33     // Return mark flag of entry at position pos.
34     bool Mark(uint pos) const { return hash[pos].mark; }
35     // Return position of first filled slot, or >= fCapacity
36     int First() const;
37     // Return position of last filled slot or -1
38     int Last() const;
39     // Return position of next filled slot in hash table
40     // or a value greater than or equal to fCapacity if no filled
41     // slot was found
42     int Next(int pos) const;
43     //Return position of previous filled slot in hash table
44     //or a -1 if no filled slot was found
45     int Prev(int pos) const;
46    
47     private:
48     GHash(const GHash&);
49     GHash &operator=(const GHash&);
50     GFreeProc* fFreeProc; //procedure to free item data
51     protected:
52     public:
53     static void DefaultFreeProc(pointer item) {
54     delete (OBJ*)item;
55     item=NULL;
56     }
57     public:
58     GHash(GFreeProc* freeProc); // constructs of an empty hash
59     GHash(bool doFree=true); // constructs of an empty hash (free the item objects)
60     void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
61     void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; }
62     int Capacity() const { return fCapacity; } // table's size, including the empty slots.
63     void Resize(int m); // Resize the table to the given size.
64     int Count() const { return fCount; }// the total number of entries in the table.
65     // Insert a new entry into the table given key and mark.
66     // If there is already an entry with that key, leave it unchanged,
67     const OBJ* Add(const char* ky, const OBJ* ptr=NULL, bool mrk=false);
68     //same as Add, but the key pointer is stored directly, no string duplicate
69     //is made (shared-key-Add)
70     const OBJ* shkAdd(const char* ky, const OBJ* ptr, bool mrk=false);
71    
72     // Replace data at key, if the entry's mark is less than
73     // or equal to the given mark. If there was no existing entry,
74     // a new entry is inserted with the given mark.
75     OBJ* Replace(const char* ky, const OBJ* ptr, bool mrk=false);
76     // Remove a given key and its data
77     OBJ* Remove(const char* ky);
78     // Find data OBJ* given key.
79     OBJ* Find(const char* ky, char** keyptr=NULL);
80     bool hasKey(const char* ky);
81     char* getLastKey() { return lastkeyptr; }
82     OBJ* operator[](const char* ky) { return Find(ky); }
83     void startIterate(); //iterator-like initialization
84     char* NextKey(); //returns next valid key in the table (NULL if no more)
85     OBJ* NextData(); //returns next valid hash[].data
86     OBJ* NextData(char*& nextkey); //returns next valid hash[].data
87     //or NULL if no more
88     //nextkey is SET to the corresponding key
89     GHashEntry* NextEntry(); //returns a pointer to a GHashEntry
90    
91     /// Clear all entries
92     void Clear();
93    
94     /// Destructor
95     virtual ~GHash();
96     };
97     //
98     //======================== method definitions ========================
99     //
100     /*
101     Notes:
102     - The hash algorithm should yield a fCount in the range [0...GHash::EMPTY)
103     GHash::EMPTY and GHash::UNUSED are needed for flag purposes.
104     - Since the algorithm doubles the table size when exceeding MAX_LOAD,
105     it would be prudent to keep MIN_LOAD less than 1/2 MAX_LOAD;
106     otherwise, the algorithm might hip-hop between halving and doubling,
107     which would be quite expensive!!
108     - Not many people seem to know that hash tables don't have to be prime
109     numbers; in fact, a table size of 2**n and odd probe distance are very
110     easy to arrange, and this works just as well!
111     - We store the hash key, so that 99.999% of the time we can compare hash numbers;
112     only when hash numbers match do we need to compare keys.
113     Thus, with a good hash function, the fCount of calls to strcmp() should be
114     roughly the same as the fCount of successful lookups.
115     - The hash table should NEVER get full, or stuff will loop forever!!
116     */
117    
118     // Initial table size (MUST be power of 2)
119     #define DEF_HASH_SIZE 32
120     // Maximum hash table load factor (%)
121     #define MAX_LOAD 80
122     // Minimum hash table load factor (%)
123     #define MIN_LOAD 10
124     // Probe Position [0..n-1]
125     #define HASH1(x,n) (((unsigned int)(x)*13)%(n))
126     // Probe Distance [1..n-1]
127     #define HASH2(x,n) (1|(((unsigned int)(x)*17)%((n)-1)))
128    
129     #define FREEDATA (fFreeProc!=NULL)
130    
131     /*******************************************************************************/
132     // Construct empty hash
133     template <class OBJ> GHash<OBJ>::GHash(GFreeProc* freeProc) {
134     GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
135 gpertea 174 fCurrentEntry=-1;
136 gpertea 2 fFreeProc=freeProc;
137 gpertea 174 lastkeyptr=NULL;
138 gpertea 2 for (uint i=0; i<DEF_HASH_SIZE; i++)
139     hash[i].hash=-1; //this will be an indicator for 'empty' entries
140     fCapacity=DEF_HASH_SIZE;
141     fCount=0;
142     }
143    
144     template <class OBJ> GHash<OBJ>::GHash(bool doFree) {
145     GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
146 gpertea 174 fCurrentEntry=-1;
147     lastkeyptr=NULL;
148 gpertea 2 fFreeProc = (doFree)?&DefaultFreeProc : NULL;
149     for (uint i=0; i<DEF_HASH_SIZE; i++)
150     hash[i].hash=-1; //this will be an indicator for 'empty' entries
151     fCapacity=DEF_HASH_SIZE;
152     fCount=0;
153     }
154    
155    
156     // Resize table
157     template <class OBJ> void GHash<OBJ>::Resize(int m){
158     register int i,n,p,x,h;
159     GHashEntry *k;
160     GASSERT(fCount<=fCapacity);
161     if(m<DEF_HASH_SIZE) m=DEF_HASH_SIZE;
162     n=fCapacity;
163     while((n>>2)>m) n>>=1; // Shrink until n/4 <= m
164     while((n>>1)<m) n<<=1; // Grow until m <= n/2
165     GASSERT(m<=(n>>1));
166     GASSERT(DEF_HASH_SIZE<=n);
167     if(n!=fCapacity){
168     GASSERT(m<=n);
169     GMALLOC(k, sizeof(GHashEntry)*n);
170     for(i=0; i<n; i++) k[i].hash=-1;
171     for(i=0; i<fCapacity; i++){
172     h=hash[i].hash;
173     if(0<=h){
174     p=HASH1(h,n);
175     GASSERT(0<=p && p<n);
176     x=HASH2(h,n);
177     GASSERT(1<=x && x<n);
178     while(k[p].hash!=-1) p=(p+x)%n;
179     GASSERT(k[p].hash<0);
180     k[p]=hash[i];
181     }
182     }
183     GFREE(hash);
184     hash=k;
185     fCapacity=n;
186     }
187     }
188    
189 gpertea 16 // add a new entry, or update it if it already exists
190 gpertea 2 template <class OBJ> const OBJ* GHash<OBJ>::Add(const char* ky,
191     const OBJ* pdata,bool mrk){
192     register int p,i,x,h,n;
193     if(!ky) GError("GHash::insert: NULL key argument.\n");
194     GASSERT(fCount<fCapacity);
195     h=strhash(ky);
196     GASSERT(0<=h);
197     p=HASH1(h,fCapacity);
198     GASSERT(0<=p && p<fCapacity);
199     x=HASH2(h,fCapacity);
200     GASSERT(1<=x && x<fCapacity);
201     i=-1;
202     n=fCapacity;
203     while(n && hash[p].hash!=-1){
204     if ((i==-1)&&(hash[p].hash==-2)) i=p;
205     if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
206     //replace hash data for this key!
207     lastkeyptr=hash[p].key;
208     hash[p].data = (void*) pdata;
209     return (OBJ*)hash[p].data;
210     }
211     p=(p+x)%fCapacity;
212     n--;
213     }
214     if(i==-1) i=p;
215     GTRACE(("GHash::insert: key=\"%s\"\n",ky));
216     //GMessage("GHash::insert: key=\"%s\"\n",ky);
217     GASSERT(0<=i && i<fCapacity);
218     GASSERT(hash[i].hash<0);
219     hash[i].hash=h;
220     hash[i].mark=mrk;
221     hash[i].key=Gstrdup(ky);
222     hash[i].keyalloc=true;
223     lastkeyptr=hash[i].key;
224     hash[i].data= (void*) pdata;
225     fCount++;
226     if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
227     GASSERT(fCount<fCapacity);
228     return pdata;
229     }
230    
231     template <class OBJ> const OBJ* GHash<OBJ>::shkAdd(const char* ky,
232     const OBJ* pdata,bool mrk){
233     register int p,i,x,h,n;
234     if(!ky) GError("GHash::insert: NULL key argument.\n");
235     GASSERT(fCount<fCapacity);
236     h=strhash(ky);
237     GASSERT(0<=h);
238     p=HASH1(h,fCapacity);
239     GASSERT(0<=p && p<fCapacity);
240     x=HASH2(h,fCapacity);
241     GASSERT(1<=x && x<fCapacity);
242     i=-1;
243     n=fCapacity;
244     while(n && hash[p].hash!=-1){
245     if((i==-1)&&(hash[p].hash==-2)) i=p;
246     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
247     //replace hash data for this key!
248     lastkeyptr=hash[p].key;
249     hash[p].data = (void*) pdata;
250     return (OBJ*)hash[p].data;
251     }
252     p=(p+x)%fCapacity;
253     n--;
254     }
255     if(i==-1) i=p;
256     GTRACE(("GHash::insert: key=\"%s\"\n",ky));
257     //GMessage("GHash::insert: key=\"%s\"\n",ky);
258     GASSERT(0<=i && i<fCapacity);
259     GASSERT(hash[i].hash<0);
260     hash[i].hash=h;
261     hash[i].mark=mrk;
262     hash[i].key=(char *)ky;
263     lastkeyptr=hash[i].key;
264     hash[i].keyalloc=false;
265     hash[i].data= (void*) pdata;
266     fCount++;
267     if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
268     GASSERT(fCount<fCapacity);
269     return pdata;
270     }
271    
272    
273     // Add or replace entry
274     template <class OBJ> OBJ* GHash<OBJ>::Replace(const char* ky,const OBJ* pdata, bool mrk){
275     register int p,i,x,h,n;
276     if(!ky){ GError("GHash::replace: NULL key argument.\n"); }
277     GASSERT(fCount<fCapacity);
278     h=strhash(ky);
279     GASSERT(0<=h);
280     p=HASH1(h,fCapacity);
281     GASSERT(0<=p && p<fCapacity);
282     x=HASH2(h,fCapacity);
283     GASSERT(1<=x && x<fCapacity);
284     i=-1;
285     n=fCapacity;
286     while(n && hash[p].hash!=-1){
287     if((i==-1)&&(hash[p].hash==-2)) i=p;
288     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
289     if(hash[p].mark<=mrk){
290     GTRACE(("GHash::replace: %08x: replacing: \"%s\"\n",this,ky));
291     if (FREEDATA) (*fFreeProc)(hash[p].data);
292     hash[p].mark=mrk;
293     hash[p].data=pdata;
294     }
295     return hash[p].data;
296     }
297     p=(p+x)%fCapacity;
298     n--;
299     }
300     if(i==-1) i=p;
301     GTRACE(("GHash::replace: %08x: inserting: \"%s\"\n",this,ky));
302     GASSERT(0<=i && i<fCapacity);
303     GASSERT(hash[i].hash<0);
304     hash[i].hash=h;
305     hash[i].mark=mrk;
306     hash[i].key=Gstrdup(ky);
307     hash[i].data=pdata;
308     fCount++;
309     if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
310     GASSERT(fCount<fCapacity);
311     return pdata;
312     }
313    
314    
315     // Remove entry
316     template <class OBJ> OBJ* GHash<OBJ>::Remove(const char* ky){
317     register int p,x,h,n;
318     if(!ky){ GError("GHash::remove: NULL key argument.\n"); }
319     if(0<fCount){
320     h=strhash(ky);
321     GASSERT(0<=h);
322     p=HASH1(h,fCapacity);
323     GASSERT(0<=p && p<fCapacity);
324     x=HASH2(h,fCapacity);
325     GASSERT(1<=x && x<fCapacity);
326     GASSERT(fCount<fCapacity);
327     n=fCapacity;
328     while(n && hash[p].hash!=-1){
329     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
330     GTRACE(("GHash::remove: %08x removing: \"%s\"\n",this,ky));
331     hash[p].hash=-2;
332     hash[p].mark=false;
333     if (hash[p].keyalloc) GFREE((hash[p].key));
334     if (FREEDATA) (*fFreeProc)(hash[p].data);
335     hash[p].key=NULL;
336     hash[p].data=NULL;
337     fCount--;
338     if((100*fCount)<=(MIN_LOAD*fCapacity)) Resize(fCount);
339     GASSERT(fCount<fCapacity);
340     return NULL;
341     }
342     p=(p+x)%fCapacity;
343     n--;
344     }
345     }
346     return NULL;
347     }
348    
349    
350     // Find entry
351     template <class OBJ> bool GHash<OBJ>::hasKey(const char* ky) {
352     register int p,x,h,n;
353     if(!ky){ GError("GHash::find: NULL key argument.\n"); }
354     if(0<fCount){
355     h=strhash(ky);
356     GASSERT(0<=h);
357     p=HASH1(h,fCapacity);
358     GASSERT(0<=p && p<fCapacity);
359     x=HASH2(h,fCapacity);
360     GASSERT(1<=x && x<fCapacity);
361     GASSERT(fCount<fCapacity);
362     n=fCapacity;
363     while(n && hash[p].hash!=-1){
364     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
365     return true;
366     }
367     p=(p+x)%fCapacity;
368     n--;
369     }
370     }
371     return false;
372     }
373    
374     template <class OBJ> OBJ* GHash<OBJ>::Find(const char* ky, char** keyptr){
375     register int p,x,h,n;
376     if(!ky){ GError("GHash::find: NULL key argument.\n"); }
377     if(0<fCount){
378     h=strhash(ky);
379     GASSERT(0<=h);
380     p=HASH1(h,fCapacity);
381     GASSERT(0<=p && p<fCapacity);
382     x=HASH2(h,fCapacity);
383     GASSERT(1<=x && x<fCapacity);
384     GASSERT(fCount<fCapacity);
385     n=fCapacity;
386     while(n && hash[p].hash!=-1){
387     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
388     if (keyptr!=NULL) *keyptr = hash[p].key;
389     return (OBJ*)hash[p].data;
390     }
391     p=(p+x)%fCapacity;
392     n--;
393     }
394     }
395     return NULL;
396     }
397    
398    
399     template <class OBJ> void GHash<OBJ>::startIterate() {// initialize a key iterator; call
400     fCurrentEntry=0;
401     }
402    
403     template <class OBJ> char* GHash<OBJ>::NextKey() {
404     register int pos=fCurrentEntry;
405     while (pos<fCapacity && hash[pos].hash<0) pos++;
406     if (pos==fCapacity) {
407     fCurrentEntry=fCapacity;
408     return NULL;
409     }
410     else {
411     fCurrentEntry=pos+1;
412     return hash[pos].key;
413     }
414     }
415    
416     template <class OBJ> OBJ* GHash<OBJ>::NextData() {
417     register int pos=fCurrentEntry;
418     while (pos<fCapacity && hash[pos].hash<0) pos++;
419     if (pos==fCapacity) {
420     fCurrentEntry=fCapacity;
421     return NULL;
422     }
423     else {
424     fCurrentEntry=pos+1;
425     return (OBJ*)hash[pos].data;
426     }
427    
428     }
429    
430     template <class OBJ> OBJ* GHash<OBJ>::NextData(char* &nextkey) {
431     register int pos=fCurrentEntry;
432     while (pos<fCapacity && hash[pos].hash<0) pos++;
433     if (pos==fCapacity) {
434     fCurrentEntry=fCapacity;
435     nextkey=NULL;
436     return NULL;
437     }
438     else {
439     fCurrentEntry=pos+1;
440     nextkey=hash[pos].key;
441     return (OBJ*)hash[pos].data;
442     }
443    
444     }
445    
446     template <class OBJ> GHashEntry* GHash<OBJ>::NextEntry() {
447     register int pos=fCurrentEntry;
448     while (pos<fCapacity && hash[pos].hash<0) pos++;
449     if (pos==fCapacity) {
450     fCurrentEntry=fCapacity;
451     return NULL;
452     }
453     else {
454     fCurrentEntry=pos+1;
455     return &hash[pos];
456     }
457     }
458    
459    
460     // Get first non-empty entry
461     template <class OBJ> int GHash<OBJ>::First() const {
462     register int pos=0;
463     while(pos<fCapacity){ if(0<=hash[pos].hash) break; pos++; }
464     GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
465     return pos;
466     }
467    
468     // Get last non-empty entry
469     template <class OBJ> int GHash<OBJ>::Last() const {
470     register int pos=fCapacity-1;
471     while(0<=pos){ if(0<=hash[pos].hash) break; pos--; }
472     GASSERT(pos<0 || 0<=hash[pos].hash);
473     return pos;
474     }
475    
476    
477     // Find next valid entry
478     template <class OBJ> int GHash<OBJ>::Next(int pos) const {
479     GASSERT(0<=pos && pos<fCapacity);
480     while(++pos <= fCapacity-1){ if(0<=hash[pos].hash) break; }
481     GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
482     return pos;
483     }
484    
485    
486     // Find previous valid entry
487     template <class OBJ> int GHash<OBJ>::Prev(int pos) const {
488     GASSERT(0<=pos && pos<fCapacity);
489     while(--pos >= 0){ if(0<=hash[pos].hash) break; }
490     GASSERT(pos<0 || 0<=hash[pos].hash);
491     return pos;
492     }
493    
494    
495     // Remove all
496     template <class OBJ> void GHash<OBJ>::Clear(){
497     register int i;
498     for(i=0; i<fCapacity; i++){
499     if(hash[i].hash>=0){
500     if (hash[i].keyalloc) GFREE((hash[i].key));
501     if (FREEDATA)
502     (*fFreeProc)(hash[i].data);
503     }
504     }
505     GFREE(hash);
506     GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
507     //reinitialize it
508     for (i=0; i<DEF_HASH_SIZE; i++)
509     hash[i].hash=-1; //this will be an indicator for 'empty' entries
510     fCapacity=DEF_HASH_SIZE;
511     fCount=0;
512     }
513    
514    
515     // Save data
516     /*
517     void GHash::Save(Stream& store) const {
518     Object::save(store);
519     store << fCapacity;
520     store << fCount;
521     for(int i=0; i<fCapacity; i++){
522     store << hash[i].hash;
523     if(hash[i].hash>=0){
524     uint len=strlen(hash[i].key);
525     store << len;
526     store << hash[i].mark;
527     store.save(hash[i].key,len);
528     }
529     }
530     }
531    
532    
533     // Load data
534     void GHash::Load(Stream& store){
535     Object::load(store);
536     store >> fCapacity;
537     store >> fCount;
538     for(int i=0; i<fCapacity; i++){
539     store >> hash[i].hash;
540     if(hash[i].hash>=0){
541     uint len;
542     store >> len;
543     store >> hash[i].mark;
544     GMALLOC(hash[i].key,len+1);
545     store.load(hash[i].key,len);
546     hash[i].key[len]='\0';
547     }
548     }
549     }
550     */
551    
552     // Destroy table
553     template <class OBJ> GHash<OBJ>::~GHash(){
554     register int i;
555     for(i=0; i<fCapacity; i++){
556     if(hash[i].hash>=0){
557     if (hash[i].keyalloc) GFREE((hash[i].key));
558     if (FREEDATA) (*fFreeProc)(hash[i].data);
559     }
560     }
561     GFREE(hash);
562     }
563    
564     #endif