Main Page   Namespace List   Class Hierarchy   Compound List   File List   Compound Members   File Members  

hash.cc

Go to the documentation of this file.
00001 /*
00002   Copyright (C) 1997-2001 Shigeru Chiba, Tokyo Institute of Technology.
00003 
00004   Permission to use, copy, distribute and modify this software and   
00005   its documentation for any purpose is hereby granted without fee,        
00006   provided that the above copyright notice appear in all copies and that 
00007   both that copyright notice and this permission notice appear in 
00008   supporting documentation.
00009 
00010   Shigeru Chiba makes no representations about the suitability of this 
00011   software for any purpose.  It is provided "as is" without express or
00012   implied warranty.
00013 */
00014 
00015 /*
00016   Copyright (c) 1995, 1996 Xerox Corporation.
00017   All Rights Reserved.
00018 
00019   Use and copying of this software and preparation of derivative works
00020   based upon this software are permitted. Any copy of this software or
00021   of any derivative work must include the above copyright notice of
00022   Xerox Corporation, this paragraph and the one after it.  Any
00023   distribution of this software or derivative works must comply with all
00024   applicable United States export control laws.
00025 
00026   This software is made available AS IS, and XEROX CORPORATION DISCLAIMS
00027   ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE
00028   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029   PURPOSE, AND NOTWITHSTANDING ANY OTHER PROVISION CONTAINED HEREIN, ANY
00030   LIABILITY FOR DAMAGES RESULTING FROM THE SOFTWARE OR ITS USE IS
00031   EXPRESSLY DISCLAIMED, WHETHER ARISING IN CONTRACT, TORT (INCLUDING
00032   NEGLIGENCE) OR STRICT LIABILITY, EVEN IF XEROX CORPORATION IS ADVISED
00033   OF THE POSSIBILITY OF SUCH DAMAGES.
00034 */
00035 
00036 #include <iostream.h>
00037 #include <string.h>
00038 #include "types.h"
00039 #include "hash.h"
00040 #include "ptree-core.h"
00041 #ifndef DONT_GC
00042 #include "gc/gc.h"
00043 #endif
00044 
00045 struct HashTableEntry {
00046     char*       key;            // nil: unused, -1: deleted
00047     HashValue   value;
00048 };
00049 
00050 // class HashTable
00051 
00052 HashTable::HashTable()
00053 {
00054     Size = 251;
00055     Prime2 = 127;
00056     MakeTable();
00057 }
00058 
00059 void HashTable::MakeTable()
00060 {
00061     entries = new (GC) HashTableEntry[Size];
00062     for(int i = 0; i < Size; ++i)
00063         entries[i].key = nil;
00064 }
00065 
00066 BigHashTable::BigHashTable() : HashTable(0)
00067 {
00068     Size = 2053;        // 1021
00069     Prime2 = 1021;      // 509
00070     MakeTable();
00071 }
00072 
00073 bool HashTable::IsEmpty()
00074 {
00075     for(int i = 0; i < Size; ++i)
00076         if(entries[i].key != nil && entries[i].key != (char*)-1)
00077             return FALSE;
00078 
00079     return TRUE;
00080 }
00081 
00082 void HashTable::Dump(ostream& out)
00083 {
00084     out << '{';
00085     for(int i = 0; i < Size; ++i)
00086         if(entries[i].key != nil && entries[i].key != (char*)-1)
00087 //          out << entries[i].key << ", ";
00088             out << entries[i].key << '(' << i << "), ";
00089 
00090     out << '}';
00091 }
00092 
00093 char* HashTable::KeyString(char* key) {
00094     char* str = new (GC) char[strlen(key) + 1];
00095     strcpy(str, key);
00096     return str;
00097 }
00098 
00099 char* HashTable::KeyString(char* key, int len) {
00100     char* str = new (GC) char[len + 1];
00101     memmove(str, key, len);
00102     str[len] = '\0';
00103     return str;
00104 }
00105 
00106 bool HashTable::Lookup(char* key, HashValue* value)
00107 {
00108     int i;
00109     return Lookup2(key, value, &i);
00110 }
00111 
00112 bool HashTable::Lookup(char* key, int len, HashValue* value)
00113 {
00114     int i;
00115     return Lookup2(key, len, value, &i);
00116 }
00117 
00118 bool HashTable::Lookup2(char* key, HashValue* value, int* index)
00119 {
00120     unsigned int p = StringToInt(key);
00121     for(int i = 0; i < Size; ++i){
00122         int j = HashFunc(p, i);
00123         if(entries[j].key == nil){
00124             return FALSE;               // not found
00125         }
00126         else if(entries[j].key != (char*)-1
00127                                 && strcmp(entries[j].key, key) == 0){
00128             *value = entries[j].value;
00129             *index = j;
00130             return TRUE;
00131         }
00132     }
00133 
00134     return FALSE;
00135 }
00136 
00137 bool HashTable::Lookup2(char* key, int len, HashValue* value, int* index)
00138 {
00139     unsigned int p = StringToInt(key, len);
00140     for(int i = 0; i < Size; ++i){
00141         int j = HashFunc(p, i);
00142         if(entries[j].key == nil){
00143             return FALSE;               // not found
00144         }
00145         else if(entries[j].key != (char*)-1
00146                 && strncmp(entries[j].key, key, len) == 0
00147                 && entries[j].key[len] == '\0'){
00148             *value = entries[j].value;
00149             *index = j;
00150             return TRUE;
00151         }
00152     }
00153 
00154     return FALSE;
00155 }
00156 
00157 /*
00158   LookupEntries() is used to find multiple entries recorded with the
00159   same key.  It returns the entry found with the nth (>= 0) hash key.
00160   After this function completes, nth is increamented for the next try.
00161   The next entry can be found if nth is passed to LookupEntries() as is.
00162 */
00163 bool HashTable::LookupEntries(char* key, int len, HashValue* value,
00164                               int& nth)
00165 {
00166     unsigned int p = StringToInt(key, len);
00167     for(int i = nth; i < Size; ++i){
00168         int j = HashFunc(p, i);
00169         if(entries[j].key == nil){
00170             return FALSE;               // not found
00171         }
00172         else if(entries[j].key != (char*)-1
00173                 && strncmp(entries[j].key, key, len) == 0
00174                 && entries[j].key[len] == '\0'){
00175             *value = entries[j].value;
00176             nth = i + 1;
00177             return TRUE;
00178         }
00179     }
00180 
00181     return FALSE;
00182 }
00183 
00184 /*
00185   A naive implementation to calculate a prime number.
00186   This function returns the first prime number being greater
00187   than or equal to 'number'.
00188 */
00189 uint HashTable::NextPrimeNumber(uint number)
00190 {
00191     if(number < 2)
00192         return 2;
00193 
00194     for(;;){
00195         uint half = number / 2;
00196         bool prime = TRUE;
00197         for(uint i = 2; i <= half && prime; ++i)
00198             if(number % i == 0)
00199                 prime = FALSE;
00200 
00201         if(prime)
00202             return number;
00203 
00204         ++number;
00205     }
00206 }
00207 
00208 /*
00209   WARNING! When an hashtable is expanded, the elements change of position!
00210   This means that the index returned by some HashTable methods is safely valid
00211   until the next insertion of a new element. So don't store such an index for
00212   a long period!
00213 
00214   Post condition : new Size >= old Size + 2 * increment
00215 */
00216 bool HashTable::GrowTable(int increment)
00217 {
00218     HashTable bigger(0);
00219 
00220     MopWarningMessage2("The hash table is full.  ", "Expanded...");
00221 
00222     bigger.Prime2 = (int)NextPrimeNumber(Prime2 + increment);
00223     bigger.Size = (int)NextPrimeNumber(2 * bigger.Prime2);
00224     bigger.MakeTable();
00225     
00226     bool done = TRUE;
00227     for(int i = 0; done && i < Size; ++i) {
00228         char *key = this->entries[i].key;
00229         if (key != nil && key != (char*)-1)
00230             done = bool(bigger.AddDupEntry(key, strlen(key), entries[i].value)
00231                         >= 0);
00232     }
00233 
00234     if(done){
00235         entries = bigger.entries;
00236         Size = bigger.Size;
00237         Prime2 = bigger.Prime2;
00238     }
00239 
00240     return done;
00241 }
00242 
00243 // AddEntry adds a new entry to the hash table.
00244 // If succeeding, this returns an index of the added entry, otherwise -1.
00245 // Because `key' is duplicated, you can delete `key' later on.
00246 
00247 int HashTable::AddEntry(char* key, HashValue value, int* index)
00248 {
00249     unsigned int p = StringToInt(key);
00250     for(int i = 0; i < Size; ++i){
00251         int j = HashFunc(p, i);
00252         if(entries[j].key == nil || entries[j].key == (char*)-1){
00253             entries[j].key = KeyString(key);
00254             entries[j].value = value;
00255             if(index != nil)
00256                 *index = j;
00257 
00258             return j;
00259         }
00260         else if(strcmp(entries[j].key, key) == 0){
00261             if(index != nil)
00262                 *index = j;
00263 
00264             return -1;          // it is already registered.
00265         }
00266     }
00267 
00268     if(GrowTable(1000))
00269         return AddEntry(key, value, index);
00270 
00271     cerr << "HashTable overflow (key: " << key << ")\nPanic...\n";
00272     if(index != nil)
00273         *index = 0;             // no meaning
00274 
00275     return -1;
00276 }
00277 
00278 int HashTable::AddEntry(bool check_duplication,
00279                         char* key, int len, HashValue value, int* index)
00280 {
00281     int i;
00282     unsigned int p = StringToInt(key, len);
00283     for(i = 0; i < Size; ++i){
00284         int j = HashFunc(p, i);
00285         if(entries[j].key == nil || entries[j].key == (char*)-1){
00286             entries[j].key = KeyString(key, len);
00287             entries[j].value = value;
00288             if(index != nil)
00289                 *index = j;
00290 
00291             return j;
00292         }
00293         else if(check_duplication
00294                 && strncmp(entries[j].key, key, len) == 0
00295                 && entries[j].key[len] == '\0'){
00296             if(index != nil)
00297                 *index = j;
00298 
00299             return -1;          // it is already registered.
00300         }
00301     }
00302 
00303     if(GrowTable(1000))
00304         return AddEntry(check_duplication, key, len, value, index);
00305 
00306     cerr << "HashTable overflow (key: ";
00307     for(i = 0; i < len; ++i)
00308         cerr << key[i];
00309 
00310     cerr << ")\nPanic...\n";
00311     if(index != nil)
00312         *index = 0;             // no meaning
00313 
00314     return -1;
00315 }
00316 
00317 HashValue HashTable::Peek(int index)
00318 {
00319     return entries[index].value;
00320 }
00321 
00322 void HashTable::ReplaceValue(int index, HashValue val)
00323 {
00324     if(0 <= index && index < Size)
00325         entries[index].value = val;
00326     else
00327         cerr << "HashTable: invalid index (" << index << ")\n";
00328 }
00329 
00330 bool HashTable::RemoveEntry(char* key)
00331 {
00332     HashValue   u;
00333     int         index;
00334 
00335     if(!Lookup2(key, &u, &index))
00336         return FALSE;           // not found
00337     else{
00338         entries[index].key = (char*)-1;
00339         return TRUE;
00340     }
00341 }
00342 
00343 bool HashTable::RemoveEntry(char* key, int len)
00344 {
00345     HashValue   u;
00346     int         index;
00347 
00348     if(!Lookup2(key, len, &u, &index))
00349         return FALSE;           // not found
00350     else{
00351         entries[index].key = (char*)-1;
00352         return TRUE;
00353     }
00354 }
00355 
00356 unsigned int HashTable::StringToInt(char* key)
00357 {
00358     if(key == nil)
00359         return 0;
00360 
00361     unsigned int p = 0;
00362     unsigned int i, j;
00363     for(i = j = 0; key[i] != '\0'; ++i, ++j){
00364         if(j >= sizeof(unsigned int) * 8 - 7)
00365             j = 0;
00366 
00367         p += key[i] << j;
00368     }
00369 
00370     return p;
00371 }
00372 
00373 unsigned int HashTable::StringToInt(char* key, int len)
00374 {
00375     if(key == nil)
00376         return 0;
00377 
00378     unsigned int p = 0;
00379     int i;
00380     unsigned int j;
00381     for(i = j = 0; i < len; ++i, ++j){
00382         if(j >= sizeof(unsigned int) * 8 - 7)
00383             j = 0;
00384 
00385         p += key[i] << j;
00386     }
00387 
00388     return p;
00389 }
00390 
00391 int HashTable::HashFunc(unsigned int p, int n)
00392 {
00393     return((p + (unsigned int)n * (p % Prime2 + 1)) % Size);
00394 }
00395 
00396 
00397 #ifdef TEST
00398 
00399 #include <stdio.h>
00400 
00401 int main()
00402 {
00403     char        buf[128];
00404     int         v;
00405     HashTable   t;
00406 
00407     while(gets(buf) != NULL){
00408         unsigned int k = t.StringToInt(buf);
00409         printf("key %s (%d): %d\n", buf, k, t.HashFunc(k, 0));
00410         printf("key %s (%d): %d\n", buf, k, t.HashFunc(k, 1));
00411 
00412         printf("add %d, ", t.AddEntry(buf, 1));
00413         printf("lookup %d", t.Lookup(buf, v));
00414         printf("(%d)\n", v);
00415     }
00416 }
00417 
00418 #endif /* TEST */

Generated on Mon Feb 10 17:32:49 2003 for VFiasco Semantics Compiler by doxygen1.2.15