/*
 *  Text hash table:
 */

/*
 *  A hash table in which each key is a text string.
 *  A node may have both an integer 'count' value and
 *  a 'data' void pointer.
 *  Neither of these user-defined values is touched by
 *  the functions below, except 'count' can be evaluated
 *  to order the nodes into a list (but is not changed).
 *
 *  The table will dynamically resize to maintain efficiency.
 *
 *  Idea: the hash table resizes itself when certain conditions
 *  are met. These conditions are:
 *    Increase: num_nodes >= table_size * 16 (insertion) or
 *    Decrease: num_nodes <  table_size * 4 (deletion)
 *
 *  Number of nodes (num_nodes) = total number of things in table.
 *  Table size (table_size) = the size of the base hash table array.
 *  Each table entry points to a bucket (head of a linked list).
 *
 *  Note: the keys and values are not copied, for efficiency.
 *  If reading keys and/or values into a common buffer, they should
 *  be duplicated before storing them into the table. Later they
 *  should be deleted using the traversal function, before the
 *  table itself is deleted. This duplication and deletion of
 *  keys and values is unnecessary if the keys and/or values are
 *  compiled-in (static) text strings.
 *  Since values are only ever stored into the table, and never
 *  otherwise referenced, they could in fact be pointers to any
 *  kind of data.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "app.h"
#include "TextTable.h"

enum {
	HASH_MAX_RATIO	= 16,
	HASH_MIN_RATIO	= 4,
	HASH_MULT	= 37
};

/*
 *  Internal hashing function.
 */
static unsigned int hash_text(char *key, unsigned int keylen)
{
	unsigned char *ch;
	unsigned int h = 0;
	for (ch = (unsigned char *) key; keylen--; ch++)
		h = h * HASH_MULT + *ch;
	return h;
}

/*
 *  Internal comparison function for locating nodes.
 */
static int compare_texts(char *key1, unsigned int keylen1,
			char *key2, unsigned int keylen2)
{
	int result;
	unsigned int minimum;

	minimum = keylen1;
	if (minimum > keylen2)
		minimum = keylen2;
	result = memcmp(key1, key2, minimum);
	if (result == 0)
		if (keylen1 == keylen2)
			return 0;
		else if (keylen1 < keylen2)
			return -1;
		else
			return 1;
	else
		return result;
}

/*
 *  Create a new empty text hash table.
 */
TextTable * new_text_table(void)
{
	TextTable *table;

	table = app_alloc(sizeof(TextTable));
	table->size = 1;
	table->num_nodes = 0;
	table->nodes = app_alloc(sizeof(TextNode *));
	table->nodes[0] = NULL;
	return table;
}

/*
 *  Delete the table and all nodes.
 *  Keys and values are untouched by this function.
 */
void del_text_table(TextTable *table)
{
	unsigned int i, size;
	TextNode **nodes;
	TextNode *n;
	TextNode *next;

	size = table->size;
	nodes = table->nodes;

	for (i=0; i < size; i++) {
		for (n=nodes[i]; n; n=next) {
			next = n->next;
			app_free(n);
		}
	}
	app_free(nodes);
	app_free(table);
}

/*
 *  Traverse all nodes in the table, applying the function to each.
 *  Pass the supplied 'extra' pointer to the 'visit' function too.
 */
void traverse_text_table(TextTable *table,
			TextNodeVisitor visit, void *extra)
{
	unsigned int i, size;
	TextNode **nodes;
	TextNode *n;
	TextNode *next;

	size = table->size;
	nodes = table->nodes;

	for (i=0; i < size; i++) {
		for (n=nodes[i]; n; n=next) {
			next = n->next;
			visit(n, extra);
		}
	}
}

/*
 *  Find a node in the table by specifying its key.
 */
TextNode * locate_text_node(TextTable *table,
			char *key, unsigned int keylen)
{
	unsigned int h;
	TextNode *n;

	h = hash_text(key, keylen);
	for (n=table->nodes[h % table->size]; n; n=n->next)
		if (compare_texts(key, keylen, n->key, n->keylen) == 0)
			return n;
	return n;
}

/*
 *  Internal function used to resize the table.
 *  Create a new array and reinsert all entries.
 *  There is no need to rehash any entries, since the complete hash
 *  of each entry has previously been calculated and remembered.
 */
static void reinsert_all(TextTable *table, unsigned int new_size)
{
	unsigned int h, i, bytes, old_size;
	TextNode **new_nodes;
	TextNode **old_nodes;
	TextNode *n;
	TextNode *next;

	bytes = sizeof(TextNode *) * new_size;
	new_nodes = app_alloc(bytes);
	for (i=0; i < new_size; i++)
		new_nodes[i] = NULL;

	old_size = table->size;
	old_nodes = table->nodes;
	for (i=0; i < old_size; i++) {
		for (n=old_nodes[i]; n; n=next) {
			next = n->next;
			h = n->hash % new_size;
			n->next = new_nodes[h];
			new_nodes[h] = n;
		}
	}
	app_free(old_nodes);
	table->nodes = new_nodes;
	table->size = new_size;
}

/*
 *  Insert a node into the table with the given key.
 *  If a node with that key already exists, it is returned.
 *  Node values (data, count, order) are initialised to zero.
 */
TextNode * insert_text_node(TextTable *table,
			char *key, unsigned int keylen)
{
	unsigned int h;
	TextNode *n;

	/* try to locate that node first */
	h = hash_text(key, keylen);
	for (n=table->nodes[h % table->size]; n; n=n->next) {
		if (compare_texts(key, keylen, n->key, n->keylen) == 0)
			break;
	}
	if (n != NULL) {
		return n;
	}

	/* check if the table needs its size increased */
	if ( ((table->num_nodes +1) / HASH_MAX_RATIO) >= table->size )
		reinsert_all(table, table->size *2);
	table->num_nodes++;

	/* create a new node and insert it */
	n = app_alloc(sizeof(TextNode));
	n->next = NULL;
	n->hash = h;
	n->key = key;
	n->keylen = keylen;
	n->data = NULL;
	n->count = 0;
	n->order = 0;
	h = h % table->size;
	n->next = table->nodes[h];
	table->nodes[h] = n;

	return n;
}

/*
 *  Remove the node with the given key (if any) from the table.
 *  The node is not deleted, simply returned.
 */
TextNode * remove_text_node(TextTable *table,
			char *key, unsigned int keylen)
{
	unsigned int h, new_size;
	TextNode *n;
	TextNode *prev = NULL;

	/* find the node */
	h = hash_text(key, keylen);
	for (n=table->nodes[h % table->size]; n; n=n->next) {
		if (compare_texts(key, keylen, n->key, n->keylen) == 0)
		{
			/* unlink the found node */
			if (prev)
				prev->next = n->next;
			else
				table->nodes[h] = n->next;
			n->next = NULL;
			break;
		}
		prev = n;
	}
	if (n == NULL)
		return NULL;

	/* check if the table needs its size decreased */
	if (table->num_nodes -1 < table->size * HASH_MIN_RATIO) {
		new_size = table->size / 2;
		if (new_size <= 0)
			new_size = 1;
		if (new_size < table->size)
			reinsert_all(table, new_size);
	}
	table->num_nodes--;

	return n;
}

/*
 *  Helper comparison function for the sorting function below.
 */
static int order_text_node(const void *elem1, const void *elem2)
{
	TextNode *t1, *t2;

	t1 = *((TextNode **) elem1);
	t2 = *((TextNode **) elem2);

	if (t1->count < t2->count)
		return -1;
	else if (t1->count > t2->count)
		return 1;
	return 0;
}

/*
 *  Return an array of all nodes in the table,
 *  sorted by ascending 'count'.
 *  As a side-effect, the 'order' field in each node is set to the
 *  position within the array of that node, starting from zero.
 */
TextNode **order_text_nodes(TextTable *table)
{
	unsigned int i, num;
	TextNode *n;
	TextNode **array;

	array = app_alloc(table->num_nodes * sizeof(TextNode *));
	if (array == NULL)
		return NULL;

	for (i=num=0; i < table->size; i++) {
		for (n = table->nodes[i]; n != NULL; n = n->next)
			array[num++] = n;
	}
	qsort(array, num, sizeof(TextNode *), order_text_node);
	for (i=0; i < num; i++)
		array[i]->order = i;

	return array;
}
