ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GHash.hh
Revision: 310
Committed: Fri Mar 22 20:06:27 2013 UTC (6 years, 6 months ago) by gpertea
File size: 16799 byte(s)
Log Message:
sync with igm repo

Line User Rev File contents
1 gpertea 2 /********************************************************************************
2     * Hash table class template (char* based) *
3     *********************************************************************************/
4    
5     #ifndef GHash_HH
6     #define GHash_HH
7     #include "GBase.h"
8 gpertea 227
9 gpertea 2 /**
10     * This class maintains a fast-access hash table of entities
11 gpertea 16 * indexed by a character string (essentially, maps strings to pointers)
12 gpertea 2 */
13 gpertea 16
14 gpertea 2
15 gpertea 310 template <class OBJ> class GHash {
16 gpertea 2 protected:
17 gpertea 310 struct GHashEntry {
18     char* key; // Key string
19     bool keyalloc; //shared key flag (to not free the key chars)
20     int hash; // Hash value of key
21     pointer data; // Data
22     bool mark; // Entry is marked
23     };
24 gpertea 2 GHashEntry* hash; // Hash
25     int fCapacity; // table size
26     int fCount; // number of valid entries
27     int fCurrentEntry;
28     char* lastkeyptr; //pointer to last key string added
29     //---------- Raw data retrieval (including empty entries
30     // Return key at position pos.
31     const char* Key(uint pos) const { return hash[pos].key; }
32     // return data OBJ* at given position
33     OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; }
34     // Return mark flag of entry at position pos.
35     bool Mark(uint pos) const { return hash[pos].mark; }
36     // Return position of first filled slot, or >= fCapacity
37     int First() const;
38     // Return position of last filled slot or -1
39     int Last() const;
40     // Return position of next filled slot in hash table
41     // or a value greater than or equal to fCapacity if no filled
42     // slot was found
43     int Next(int pos) const;
44     //Return position of previous filled slot in hash table
45     //or a -1 if no filled slot was found
46     int Prev(int pos) const;
47    
48     private:
49     GHash(const GHash&);
50     GHash &operator=(const GHash&);
51     GFreeProc* fFreeProc; //procedure to free item data
52     protected:
53     public:
54     static void DefaultFreeProc(pointer item) {
55     delete (OBJ*)item;
56     }
57     public:
58     GHash(GFreeProc* freeProc); // constructs of an empty hash
59     GHash(bool doFree=true); // constructs of an empty hash (free the item objects)
60     void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
61     void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; }
62     int Capacity() const { return fCapacity; } // table's size, including the empty slots.
63     void Resize(int m); // Resize the table to the given size.
64     int Count() const { return fCount; }// the total number of entries in the table.
65     // Insert a new entry into the table given key and mark.
66     // If there is already an entry with that key, leave it unchanged,
67     const OBJ* Add(const char* ky, const OBJ* ptr=NULL, bool mrk=false);
68     //same as Add, but the key pointer is stored directly, no string duplicate
69     //is made (shared-key-Add)
70     const OBJ* shkAdd(const char* ky, const OBJ* ptr, bool mrk=false);
71    
72     // Replace data at key, if the entry's mark is less than
73     // or equal to the given mark. If there was no existing entry,
74     // a new entry is inserted with the given mark.
75     OBJ* Replace(const char* ky, const OBJ* ptr, bool mrk=false);
76     // Remove a given key and its data
77     OBJ* Remove(const char* ky);
78     // Find data OBJ* given key.
79     OBJ* Find(const char* ky, char** keyptr=NULL);
80     bool hasKey(const char* ky);
81     char* getLastKey() { return lastkeyptr; }
82 gpertea 310 OBJ* operator[](const char* ky) { return Find(ky); }
83 gpertea 2 void startIterate(); //iterator-like initialization
84     char* NextKey(); //returns next valid key in the table (NULL if no more)
85     OBJ* NextData(); //returns next valid hash[].data
86     OBJ* NextData(char*& nextkey); //returns next valid hash[].data
87     //or NULL if no more
88     //nextkey is SET to the corresponding key
89 gpertea 310 GHashEntry* NextEntry() { //returns a pointer to a GHashEntry
90     register int pos=fCurrentEntry;
91     while (pos<fCapacity && hash[pos].hash<0) pos++;
92     if (pos==fCapacity) {
93     fCurrentEntry=fCapacity;
94     return NULL;
95     }
96     else {
97     fCurrentEntry=pos+1;
98     return &hash[pos];
99     }
100     }
101 gpertea 2 /// Clear all entries
102     void Clear();
103    
104     /// Destructor
105     virtual ~GHash();
106     };
107     //
108     //======================== method definitions ========================
109     //
110     /*
111     Notes:
112     - The hash algorithm should yield a fCount in the range [0...GHash::EMPTY)
113     GHash::EMPTY and GHash::UNUSED are needed for flag purposes.
114     - Since the algorithm doubles the table size when exceeding MAX_LOAD,
115     it would be prudent to keep MIN_LOAD less than 1/2 MAX_LOAD;
116     otherwise, the algorithm might hip-hop between halving and doubling,
117     which would be quite expensive!!
118     - Not many people seem to know that hash tables don't have to be prime
119     numbers; in fact, a table size of 2**n and odd probe distance are very
120     easy to arrange, and this works just as well!
121     - We store the hash key, so that 99.999% of the time we can compare hash numbers;
122     only when hash numbers match do we need to compare keys.
123     Thus, with a good hash function, the fCount of calls to strcmp() should be
124     roughly the same as the fCount of successful lookups.
125     - The hash table should NEVER get full, or stuff will loop forever!!
126     */
127    
128     // Initial table size (MUST be power of 2)
129     #define DEF_HASH_SIZE 32
130     // Maximum hash table load factor (%)
131     #define MAX_LOAD 80
132     // Minimum hash table load factor (%)
133     #define MIN_LOAD 10
134     // Probe Position [0..n-1]
135     #define HASH1(x,n) (((unsigned int)(x)*13)%(n))
136     // Probe Distance [1..n-1]
137     #define HASH2(x,n) (1|(((unsigned int)(x)*17)%((n)-1)))
138    
139     #define FREEDATA (fFreeProc!=NULL)
140    
141     /*******************************************************************************/
142     // Construct empty hash
143     template <class OBJ> GHash<OBJ>::GHash(GFreeProc* freeProc) {
144     GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
145 gpertea 174 fCurrentEntry=-1;
146 gpertea 2 fFreeProc=freeProc;
147 gpertea 174 lastkeyptr=NULL;
148 gpertea 2 for (uint i=0; i<DEF_HASH_SIZE; i++)
149     hash[i].hash=-1; //this will be an indicator for 'empty' entries
150     fCapacity=DEF_HASH_SIZE;
151     fCount=0;
152     }
153    
154     template <class OBJ> GHash<OBJ>::GHash(bool doFree) {
155     GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
156 gpertea 174 fCurrentEntry=-1;
157     lastkeyptr=NULL;
158 gpertea 2 fFreeProc = (doFree)?&DefaultFreeProc : NULL;
159     for (uint i=0; i<DEF_HASH_SIZE; i++)
160     hash[i].hash=-1; //this will be an indicator for 'empty' entries
161     fCapacity=DEF_HASH_SIZE;
162     fCount=0;
163     }
164    
165    
166     // Resize table
167     template <class OBJ> void GHash<OBJ>::Resize(int m){
168     register int i,n,p,x,h;
169     GHashEntry *k;
170     GASSERT(fCount<=fCapacity);
171     if(m<DEF_HASH_SIZE) m=DEF_HASH_SIZE;
172     n=fCapacity;
173     while((n>>2)>m) n>>=1; // Shrink until n/4 <= m
174     while((n>>1)<m) n<<=1; // Grow until m <= n/2
175     GASSERT(m<=(n>>1));
176     GASSERT(DEF_HASH_SIZE<=n);
177     if(n!=fCapacity){
178     GASSERT(m<=n);
179     GMALLOC(k, sizeof(GHashEntry)*n);
180     for(i=0; i<n; i++) k[i].hash=-1;
181     for(i=0; i<fCapacity; i++){
182     h=hash[i].hash;
183     if(0<=h){
184     p=HASH1(h,n);
185     GASSERT(0<=p && p<n);
186     x=HASH2(h,n);
187     GASSERT(1<=x && x<n);
188     while(k[p].hash!=-1) p=(p+x)%n;
189     GASSERT(k[p].hash<0);
190     k[p]=hash[i];
191     }
192     }
193     GFREE(hash);
194     hash=k;
195     fCapacity=n;
196     }
197     }
198    
199 gpertea 16 // add a new entry, or update it if it already exists
200 gpertea 2 template <class OBJ> const OBJ* GHash<OBJ>::Add(const char* ky,
201     const OBJ* pdata,bool mrk){
202     register int p,i,x,h,n;
203     if(!ky) GError("GHash::insert: NULL key argument.\n");
204     GASSERT(fCount<fCapacity);
205     h=strhash(ky);
206     GASSERT(0<=h);
207     p=HASH1(h,fCapacity);
208     GASSERT(0<=p && p<fCapacity);
209     x=HASH2(h,fCapacity);
210     GASSERT(1<=x && x<fCapacity);
211     i=-1;
212     n=fCapacity;
213     while(n && hash[p].hash!=-1){
214     if ((i==-1)&&(hash[p].hash==-2)) i=p;
215     if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
216     //replace hash data for this key!
217     lastkeyptr=hash[p].key;
218     hash[p].data = (void*) pdata;
219     return (OBJ*)hash[p].data;
220     }
221     p=(p+x)%fCapacity;
222     n--;
223     }
224     if(i==-1) i=p;
225     GTRACE(("GHash::insert: key=\"%s\"\n",ky));
226     //GMessage("GHash::insert: key=\"%s\"\n",ky);
227     GASSERT(0<=i && i<fCapacity);
228     GASSERT(hash[i].hash<0);
229     hash[i].hash=h;
230     hash[i].mark=mrk;
231     hash[i].key=Gstrdup(ky);
232     hash[i].keyalloc=true;
233     lastkeyptr=hash[i].key;
234     hash[i].data= (void*) pdata;
235     fCount++;
236     if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
237     GASSERT(fCount<fCapacity);
238     return pdata;
239     }
240    
241     template <class OBJ> const OBJ* GHash<OBJ>::shkAdd(const char* ky,
242     const OBJ* pdata,bool mrk){
243     register int p,i,x,h,n;
244     if(!ky) GError("GHash::insert: NULL key argument.\n");
245     GASSERT(fCount<fCapacity);
246     h=strhash(ky);
247     GASSERT(0<=h);
248     p=HASH1(h,fCapacity);
249     GASSERT(0<=p && p<fCapacity);
250     x=HASH2(h,fCapacity);
251     GASSERT(1<=x && x<fCapacity);
252     i=-1;
253     n=fCapacity;
254     while(n && hash[p].hash!=-1){
255     if((i==-1)&&(hash[p].hash==-2)) i=p;
256     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
257     //replace hash data for this key!
258     lastkeyptr=hash[p].key;
259     hash[p].data = (void*) pdata;
260     return (OBJ*)hash[p].data;
261     }
262     p=(p+x)%fCapacity;
263     n--;
264     }
265     if(i==-1) i=p;
266     GTRACE(("GHash::insert: key=\"%s\"\n",ky));
267     //GMessage("GHash::insert: key=\"%s\"\n",ky);
268     GASSERT(0<=i && i<fCapacity);
269     GASSERT(hash[i].hash<0);
270     hash[i].hash=h;
271     hash[i].mark=mrk;
272     hash[i].key=(char *)ky;
273     lastkeyptr=hash[i].key;
274     hash[i].keyalloc=false;
275     hash[i].data= (void*) pdata;
276     fCount++;
277     if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
278     GASSERT(fCount<fCapacity);
279     return pdata;
280     }
281    
282    
283     // Add or replace entry
284     template <class OBJ> OBJ* GHash<OBJ>::Replace(const char* ky,const OBJ* pdata, bool mrk){
285     register int p,i,x,h,n;
286     if(!ky){ GError("GHash::replace: NULL key argument.\n"); }
287     GASSERT(fCount<fCapacity);
288     h=strhash(ky);
289     GASSERT(0<=h);
290     p=HASH1(h,fCapacity);
291     GASSERT(0<=p && p<fCapacity);
292     x=HASH2(h,fCapacity);
293     GASSERT(1<=x && x<fCapacity);
294     i=-1;
295     n=fCapacity;
296     while(n && hash[p].hash!=-1){
297     if((i==-1)&&(hash[p].hash==-2)) i=p;
298     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
299     if(hash[p].mark<=mrk){
300     GTRACE(("GHash::replace: %08x: replacing: \"%s\"\n",this,ky));
301     if (FREEDATA) (*fFreeProc)(hash[p].data);
302     hash[p].mark=mrk;
303     hash[p].data=pdata;
304     }
305     return hash[p].data;
306     }
307     p=(p+x)%fCapacity;
308     n--;
309     }
310     if(i==-1) i=p;
311     GTRACE(("GHash::replace: %08x: inserting: \"%s\"\n",this,ky));
312     GASSERT(0<=i && i<fCapacity);
313     GASSERT(hash[i].hash<0);
314     hash[i].hash=h;
315     hash[i].mark=mrk;
316     hash[i].key=Gstrdup(ky);
317     hash[i].data=pdata;
318     fCount++;
319     if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
320     GASSERT(fCount<fCapacity);
321     return pdata;
322     }
323    
324    
325     // Remove entry
326     template <class OBJ> OBJ* GHash<OBJ>::Remove(const char* ky){
327     register int p,x,h,n;
328     if(!ky){ GError("GHash::remove: NULL key argument.\n"); }
329     if(0<fCount){
330     h=strhash(ky);
331     GASSERT(0<=h);
332     p=HASH1(h,fCapacity);
333     GASSERT(0<=p && p<fCapacity);
334     x=HASH2(h,fCapacity);
335     GASSERT(1<=x && x<fCapacity);
336     GASSERT(fCount<fCapacity);
337     n=fCapacity;
338     while(n && hash[p].hash!=-1){
339     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
340     GTRACE(("GHash::remove: %08x removing: \"%s\"\n",this,ky));
341     hash[p].hash=-2;
342     hash[p].mark=false;
343     if (hash[p].keyalloc) GFREE((hash[p].key));
344     if (FREEDATA) (*fFreeProc)(hash[p].data);
345     hash[p].key=NULL;
346     hash[p].data=NULL;
347     fCount--;
348     if((100*fCount)<=(MIN_LOAD*fCapacity)) Resize(fCount);
349     GASSERT(fCount<fCapacity);
350     return NULL;
351     }
352     p=(p+x)%fCapacity;
353     n--;
354     }
355     }
356     return NULL;
357     }
358    
359    
360     // Find entry
361     template <class OBJ> bool GHash<OBJ>::hasKey(const char* ky) {
362     register int p,x,h,n;
363     if(!ky){ GError("GHash::find: NULL key argument.\n"); }
364     if(0<fCount){
365     h=strhash(ky);
366     GASSERT(0<=h);
367     p=HASH1(h,fCapacity);
368     GASSERT(0<=p && p<fCapacity);
369     x=HASH2(h,fCapacity);
370     GASSERT(1<=x && x<fCapacity);
371     GASSERT(fCount<fCapacity);
372     n=fCapacity;
373     while(n && hash[p].hash!=-1){
374     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
375     return true;
376     }
377     p=(p+x)%fCapacity;
378     n--;
379     }
380     }
381     return false;
382     }
383    
384     template <class OBJ> OBJ* GHash<OBJ>::Find(const char* ky, char** keyptr){
385     register int p,x,h,n;
386     if(!ky){ GError("GHash::find: NULL key argument.\n"); }
387     if(0<fCount){
388     h=strhash(ky);
389     GASSERT(0<=h);
390     p=HASH1(h,fCapacity);
391     GASSERT(0<=p && p<fCapacity);
392     x=HASH2(h,fCapacity);
393     GASSERT(1<=x && x<fCapacity);
394     GASSERT(fCount<fCapacity);
395     n=fCapacity;
396     while(n && hash[p].hash!=-1){
397     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
398     if (keyptr!=NULL) *keyptr = hash[p].key;
399     return (OBJ*)hash[p].data;
400     }
401     p=(p+x)%fCapacity;
402     n--;
403     }
404     }
405     return NULL;
406     }
407    
408    
409     template <class OBJ> void GHash<OBJ>::startIterate() {// initialize a key iterator; call
410     fCurrentEntry=0;
411     }
412    
413     template <class OBJ> char* GHash<OBJ>::NextKey() {
414     register int pos=fCurrentEntry;
415     while (pos<fCapacity && hash[pos].hash<0) pos++;
416     if (pos==fCapacity) {
417     fCurrentEntry=fCapacity;
418     return NULL;
419     }
420     else {
421     fCurrentEntry=pos+1;
422     return hash[pos].key;
423     }
424     }
425    
426     template <class OBJ> OBJ* GHash<OBJ>::NextData() {
427     register int pos=fCurrentEntry;
428     while (pos<fCapacity && hash[pos].hash<0) pos++;
429     if (pos==fCapacity) {
430     fCurrentEntry=fCapacity;
431     return NULL;
432     }
433     else {
434     fCurrentEntry=pos+1;
435     return (OBJ*)hash[pos].data;
436     }
437    
438     }
439    
440     template <class OBJ> OBJ* GHash<OBJ>::NextData(char* &nextkey) {
441     register int pos=fCurrentEntry;
442     while (pos<fCapacity && hash[pos].hash<0) pos++;
443     if (pos==fCapacity) {
444     fCurrentEntry=fCapacity;
445     nextkey=NULL;
446     return NULL;
447     }
448     else {
449     fCurrentEntry=pos+1;
450     nextkey=hash[pos].key;
451     return (OBJ*)hash[pos].data;
452     }
453    
454     }
455    
456    
457     // Get first non-empty entry
458     template <class OBJ> int GHash<OBJ>::First() const {
459     register int pos=0;
460     while(pos<fCapacity){ if(0<=hash[pos].hash) break; pos++; }
461     GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
462     return pos;
463     }
464    
465     // Get last non-empty entry
466     template <class OBJ> int GHash<OBJ>::Last() const {
467     register int pos=fCapacity-1;
468     while(0<=pos){ if(0<=hash[pos].hash) break; pos--; }
469     GASSERT(pos<0 || 0<=hash[pos].hash);
470     return pos;
471     }
472    
473    
474     // Find next valid entry
475     template <class OBJ> int GHash<OBJ>::Next(int pos) const {
476     GASSERT(0<=pos && pos<fCapacity);
477     while(++pos <= fCapacity-1){ if(0<=hash[pos].hash) break; }
478     GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
479     return pos;
480     }
481    
482    
483     // Find previous valid entry
484     template <class OBJ> int GHash<OBJ>::Prev(int pos) const {
485     GASSERT(0<=pos && pos<fCapacity);
486     while(--pos >= 0){ if(0<=hash[pos].hash) break; }
487     GASSERT(pos<0 || 0<=hash[pos].hash);
488     return pos;
489     }
490    
491    
492     // Remove all
493     template <class OBJ> void GHash<OBJ>::Clear(){
494     register int i;
495     for(i=0; i<fCapacity; i++){
496     if(hash[i].hash>=0){
497     if (hash[i].keyalloc) GFREE((hash[i].key));
498     if (FREEDATA)
499     (*fFreeProc)(hash[i].data);
500     }
501     }
502     GFREE(hash);
503     GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
504     //reinitialize it
505     for (i=0; i<DEF_HASH_SIZE; i++)
506     hash[i].hash=-1; //this will be an indicator for 'empty' entries
507     fCapacity=DEF_HASH_SIZE;
508     fCount=0;
509     }
510    
511    
512     // Save data
513     /*
514     void GHash::Save(Stream& store) const {
515     Object::save(store);
516     store << fCapacity;
517     store << fCount;
518     for(int i=0; i<fCapacity; i++){
519     store << hash[i].hash;
520     if(hash[i].hash>=0){
521     uint len=strlen(hash[i].key);
522     store << len;
523     store << hash[i].mark;
524     store.save(hash[i].key,len);
525     }
526     }
527     }
528    
529    
530     // Load data
531     void GHash::Load(Stream& store){
532     Object::load(store);
533     store >> fCapacity;
534     store >> fCount;
535     for(int i=0; i<fCapacity; i++){
536     store >> hash[i].hash;
537     if(hash[i].hash>=0){
538     uint len;
539     store >> len;
540     store >> hash[i].mark;
541     GMALLOC(hash[i].key,len+1);
542     store.load(hash[i].key,len);
543     hash[i].key[len]='\0';
544     }
545     }
546     }
547     */
548    
549     // Destroy table
550     template <class OBJ> GHash<OBJ>::~GHash(){
551     register int i;
552     for(i=0; i<fCapacity; i++){
553     if(hash[i].hash>=0){
554     if (hash[i].keyalloc) GFREE((hash[i].key));
555     if (FREEDATA) (*fFreeProc)(hash[i].data);
556     }
557     }
558     GFREE(hash);
559     }
560    
561     #endif