tinyvm/libtvm/tvm_hashtab.c
Bl0ckeduser bc882e4db3 Hash table: implement load factor / rehash
Based on suggestion from bitbckt:

I  saw  this  in  my feed, and feel it merits comment. I hope you
don't mind the input.

You'll want to monitor the load factor of the hash table and  re-
hash  the  table  on  insert  when it is exceeded. Otherwise, key
lookup will degrade toward linear time for sets of  keys  with  a
high number of collisions.

The  easiest  way  to  implement the load factor is to maintain a
count of allocated nodes in tvm_htab_t and  divide  that  by  the
bucket count to obtain the load factor. Of course, you'd need the
bucket count (HTAB_SIZE) to be dynamic, too.
2012-01-19 17:22:41 -05:00

136 lines
2.5 KiB
C

#include <stdlib.h>
#include <string.h>
#include <tvm/tvm_hashtab.h>
tvm_htab_t* create_htab()
{
tvm_htab_t *htab = (tvm_htab_t *)malloc(sizeof(tvm_htab_t));
htab->size = HTAB_SIZE;
htab->nodes = (tvm_htable_node_t**)calloc(htab->size, sizeof(tvm_htable_node_t *));
htab->num_nodes = 0;
return htab;
}
void htab_rehash(tvm_htab_t* orig, unsigned int size)
{
int i;
tvm_htable_node_t *node, *next;
tvm_htab_t *new;
new = (tvm_htab_t *)malloc(sizeof(tvm_htab_t));
new->nodes = (tvm_htable_node_t**)calloc(size, sizeof(tvm_htable_node_t *));
new->size = size;
new->num_nodes = 0;
/* Traverse the original hash table, rehashing
every entry into the new table and deleting
original entries */
for(i = 0; i < orig->size; i++)
{
node = orig->nodes[i];
while(node)
{
next = node->next;
htab_add(new, node->key, node->value);
free(node->key);
free(node);
node = next;
}
}
free(orig->nodes);
/* Transpose the new hash table's parameters
on to the old one */
orig->num_nodes = new->num_nodes;
orig->nodes = new->nodes;
orig->size = new->size;
free(new);
}
void destroy_htab(tvm_htab_t* htab)
{
int i;
tvm_htable_node_t *node, *next;
for(i = 0; i < htab->size; i++)
{
node = htab->nodes[i];
while(node)
{
next = node->next;
free(node->key);
free(node);
node = next;
}
}
free(htab->nodes);
free(htab);
}
int htab_add(tvm_htab_t* htab, const char* k, int v)
{
int hash = htab_hash(k, htab->size);
tvm_htable_node_t *node = htab->nodes[hash];
tvm_htable_node_t *prev = NULL;
if(node)
{
while(node->next)
node = node->next;
prev = node;
node = node->next;
}
/* Allocate space, and copy the key/value pair. */
node = calloc(1, sizeof(tvm_htable_node_t));
node->key = (char*)malloc(sizeof(char) * (strlen(k) + 1));
strcpy(node->key, k);
node->value = v;
if(prev)
prev->next = node;
else
htab->nodes[hash] = node; /* root node */
node->next = NULL;
/* Increase bucket count and rehash if the
load factor is too high */
if((float)++htab->num_nodes / htab->size > 0.7)
htab_rehash(htab, htab->num_nodes * 2);
return 0;
}
int htab_find(tvm_htab_t* htab, const char* key)
{
int hash = htab_hash(key, htab->size);
tvm_htable_node_t *node = htab->nodes[hash];
while(node)
{
if(!strcmp(node->key, key))
return node->value;
node = node->next;
}
return -1;
}
unsigned int htab_hash(const char* k, const unsigned int size)
{
unsigned int hash = 1;
char* c; for(c = (char*)k; *c; c++)
hash += (hash << *c) - *c;
return hash % size;
}