Hash table: implement load factor / rehash
Based on suggestion from bitbckt: I saw this in my feed, and feel it merits comment. I hope you don't mind the input. You'll want to monitor the load factor of the hash table and re- hash the table on insert when it is exceeded. Otherwise, key lookup will degrade toward linear time for sets of keys with a high number of collisions. The easiest way to implement the load factor is to maintain a count of allocated nodes in tvm_htab_t and divide that by the bucket count to obtain the load factor. Of course, you'd need the bucket count (HTAB_SIZE) to be dynamic, too.
This commit is contained in:
parent
e0a387d7de
commit
bc882e4db3
2 changed files with 58 additions and 8 deletions
|
|
@ -13,7 +13,9 @@ typedef struct tvm_htable_node_s
|
|||
|
||||
typedef struct tvm_htab_s
|
||||
{
|
||||
tvm_htable_node_t* nodes[HTAB_SIZE];
|
||||
unsigned int num_nodes;
|
||||
unsigned int size;
|
||||
tvm_htable_node_t** nodes;
|
||||
} tvm_htab_t;
|
||||
|
||||
tvm_htab_t* create_htab();
|
||||
|
|
@ -22,6 +24,6 @@ void destroy_htab(tvm_htab_t* htab);
|
|||
int htab_add(tvm_htab_t* htab, const char* key, int value);
|
||||
int htab_find(tvm_htab_t* htab, const char* key);
|
||||
|
||||
unsigned int htab_hash(const char* key);
|
||||
unsigned int htab_hash(const char* key, const unsigned int size);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -5,7 +5,49 @@
|
|||
|
||||
tvm_htab_t* create_htab()
|
||||
{
|
||||
return (tvm_htab_t*)calloc(1, sizeof(tvm_htab_t));
|
||||
tvm_htab_t *htab = (tvm_htab_t *)malloc(sizeof(tvm_htab_t));
|
||||
htab->size = HTAB_SIZE;
|
||||
htab->nodes = (tvm_htable_node_t**)calloc(htab->size, sizeof(tvm_htable_node_t *));
|
||||
htab->num_nodes = 0;
|
||||
return htab;
|
||||
}
|
||||
|
||||
void htab_rehash(tvm_htab_t* orig, unsigned int size)
|
||||
{
|
||||
int i;
|
||||
tvm_htable_node_t *node, *next;
|
||||
tvm_htab_t *new;
|
||||
|
||||
new = (tvm_htab_t *)malloc(sizeof(tvm_htab_t));
|
||||
new->nodes = (tvm_htable_node_t**)calloc(size, sizeof(tvm_htable_node_t *));
|
||||
new->size = size;
|
||||
new->num_nodes = 0;
|
||||
|
||||
/* Traverse the original hash table, rehashing
|
||||
every entry into the new table and deleting
|
||||
original entries */
|
||||
for(i = 0; i < orig->size; i++)
|
||||
{
|
||||
node = orig->nodes[i];
|
||||
while(node)
|
||||
{
|
||||
next = node->next;
|
||||
htab_add(new, node->key, node->value);
|
||||
free(node->key);
|
||||
free(node);
|
||||
node = next;
|
||||
}
|
||||
}
|
||||
|
||||
free(orig->nodes);
|
||||
|
||||
/* Transpose the new hash table's parameters
|
||||
on to the old one */
|
||||
orig->num_nodes = new->num_nodes;
|
||||
orig->nodes = new->nodes;
|
||||
orig->size = new->size;
|
||||
|
||||
free(new);
|
||||
}
|
||||
|
||||
void destroy_htab(tvm_htab_t* htab)
|
||||
|
|
@ -13,7 +55,7 @@ void destroy_htab(tvm_htab_t* htab)
|
|||
int i;
|
||||
tvm_htable_node_t *node, *next;
|
||||
|
||||
for(i = 0; i < HTAB_SIZE; i++)
|
||||
for(i = 0; i < htab->size; i++)
|
||||
{
|
||||
node = htab->nodes[i];
|
||||
while(node)
|
||||
|
|
@ -25,12 +67,13 @@ void destroy_htab(tvm_htab_t* htab)
|
|||
}
|
||||
}
|
||||
|
||||
free(htab->nodes);
|
||||
free(htab);
|
||||
}
|
||||
|
||||
int htab_add(tvm_htab_t* htab, const char* k, int v)
|
||||
{
|
||||
int hash = htab_hash(k);
|
||||
int hash = htab_hash(k, htab->size);
|
||||
tvm_htable_node_t *node = htab->nodes[hash];
|
||||
tvm_htable_node_t *prev = NULL;
|
||||
|
||||
|
|
@ -59,12 +102,17 @@ int htab_add(tvm_htab_t* htab, const char* k, int v)
|
|||
|
||||
node->next = NULL;
|
||||
|
||||
/* Increase bucket count and rehash if the
|
||||
load factor is too high */
|
||||
if((float)++htab->num_nodes / htab->size > 0.7)
|
||||
htab_rehash(htab, htab->num_nodes * 2);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int htab_find(tvm_htab_t* htab, const char* key)
|
||||
{
|
||||
int hash = htab_hash(key);
|
||||
int hash = htab_hash(key, htab->size);
|
||||
tvm_htable_node_t *node = htab->nodes[hash];
|
||||
|
||||
while(node)
|
||||
|
|
@ -77,12 +125,12 @@ int htab_find(tvm_htab_t* htab, const char* key)
|
|||
return -1;
|
||||
}
|
||||
|
||||
unsigned int htab_hash(const char* k)
|
||||
unsigned int htab_hash(const char* k, const unsigned int size)
|
||||
{
|
||||
unsigned int hash = 1;
|
||||
|
||||
char* c; for(c = (char*)k; *c; c++)
|
||||
hash += (hash << *c) - *c;
|
||||
|
||||
return hash % HTAB_SIZE;
|
||||
return hash % size;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue