Hash table: implement load factor / rehash

Based on suggestion from bitbckt:

I  saw  this  in  my feed, and feel it merits comment. I hope you
don't mind the input.

You'll want to monitor the load factor of the hash table and  re-
hash  the  table  on  insert  when it is exceeded. Otherwise, key
lookup will degrade toward linear time for sets of  keys  with  a
high number of collisions.

The  easiest  way  to  implement the load factor is to maintain a
count of allocated nodes in tvm_htab_t and  divide  that  by  the
bucket count to obtain the load factor. Of course, you'd need the
bucket count (HTAB_SIZE) to be dynamic, too.
This commit is contained in:
Bl0ckeduser 2012-01-19 17:22:41 -05:00
parent e0a387d7de
commit bc882e4db3
2 changed files with 58 additions and 8 deletions

View file

@ -13,7 +13,9 @@ typedef struct tvm_htable_node_s
typedef struct tvm_htab_s
{
tvm_htable_node_t* nodes[HTAB_SIZE];
unsigned int num_nodes;
unsigned int size;
tvm_htable_node_t** nodes;
} tvm_htab_t;
tvm_htab_t* create_htab();
@ -22,6 +24,6 @@ void destroy_htab(tvm_htab_t* htab);
int htab_add(tvm_htab_t* htab, const char* key, int value);
int htab_find(tvm_htab_t* htab, const char* key);
unsigned int htab_hash(const char* key);
unsigned int htab_hash(const char* key, const unsigned int size);
#endif

View file

@ -5,7 +5,49 @@
tvm_htab_t* create_htab()
{
return (tvm_htab_t*)calloc(1, sizeof(tvm_htab_t));
tvm_htab_t *htab = (tvm_htab_t *)malloc(sizeof(tvm_htab_t));
htab->size = HTAB_SIZE;
htab->nodes = (tvm_htable_node_t**)calloc(htab->size, sizeof(tvm_htable_node_t *));
htab->num_nodes = 0;
return htab;
}
void htab_rehash(tvm_htab_t* orig, unsigned int size)
{
int i;
tvm_htable_node_t *node, *next;
tvm_htab_t *new;
new = (tvm_htab_t *)malloc(sizeof(tvm_htab_t));
new->nodes = (tvm_htable_node_t**)calloc(size, sizeof(tvm_htable_node_t *));
new->size = size;
new->num_nodes = 0;
/* Traverse the original hash table, rehashing
every entry into the new table and deleting
original entries */
for(i = 0; i < orig->size; i++)
{
node = orig->nodes[i];
while(node)
{
next = node->next;
htab_add(new, node->key, node->value);
free(node->key);
free(node);
node = next;
}
}
free(orig->nodes);
/* Transpose the new hash table's parameters
on to the old one */
orig->num_nodes = new->num_nodes;
orig->nodes = new->nodes;
orig->size = new->size;
free(new);
}
void destroy_htab(tvm_htab_t* htab)
@ -13,7 +55,7 @@ void destroy_htab(tvm_htab_t* htab)
int i;
tvm_htable_node_t *node, *next;
for(i = 0; i < HTAB_SIZE; i++)
for(i = 0; i < htab->size; i++)
{
node = htab->nodes[i];
while(node)
@ -25,12 +67,13 @@ void destroy_htab(tvm_htab_t* htab)
}
}
free(htab->nodes);
free(htab);
}
int htab_add(tvm_htab_t* htab, const char* k, int v)
{
int hash = htab_hash(k);
int hash = htab_hash(k, htab->size);
tvm_htable_node_t *node = htab->nodes[hash];
tvm_htable_node_t *prev = NULL;
@ -59,12 +102,17 @@ int htab_add(tvm_htab_t* htab, const char* k, int v)
node->next = NULL;
/* Increase bucket count and rehash if the
load factor is too high */
if((float)++htab->num_nodes / htab->size > 0.7)
htab_rehash(htab, htab->num_nodes * 2);
return 0;
}
int htab_find(tvm_htab_t* htab, const char* key)
{
int hash = htab_hash(key);
int hash = htab_hash(key, htab->size);
tvm_htable_node_t *node = htab->nodes[hash];
while(node)
@ -77,12 +125,12 @@ int htab_find(tvm_htab_t* htab, const char* key)
return -1;
}
unsigned int htab_hash(const char* k)
unsigned int htab_hash(const char* k, const unsigned int size)
{
unsigned int hash = 1;
char* c; for(c = (char*)k; *c; c++)
hash += (hash << *c) - *c;
return hash % HTAB_SIZE;
return hash % size;
}