mirror of
https://salsa.debian.org/dskoll/remind.git
synced 2026-04-16 14:28:40 +02:00
429 lines
12 KiB
C
429 lines
12 KiB
C
/***************************************************************/
|
|
/* */
|
|
/* HASHTAB.C */
|
|
/* */
|
|
/* Implementation of hash table. */
|
|
/* */
|
|
/* This file is part of REMIND. */
|
|
/* Copyright (C) 1992-2025 by Dianne Skoll */
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/* */
|
|
/***************************************************************/
|
|
|
|
/**
|
|
* \file hashtab.c
|
|
*
|
|
* \brief Implementation of hash table
|
|
*
|
|
* A hash table manages an array of buckets, each of which is the
|
|
* head of a singly-linked list. A given hash table can store items
|
|
* of a given type. The items in a hash table must be structs, and one
|
|
* of their members must be a struct hash_link object. For example,
|
|
* a hash table containing integers might have the hash objects
|
|
* defined as:
|
|
*
|
|
* struct int_object {
|
|
* int value;
|
|
* struct hash_link link;
|
|
* };
|
|
*
|
|
* When you initialize the hash table, you pass in the offset to the hash
|
|
* link. For example, to initialize a hash table designed to hold
|
|
* int_objects, you'd do something like:
|
|
*
|
|
* unsigned int hash_int_obj(void *x) {
|
|
* return (unsigned int) ((int_object *) x)->value;
|
|
* }
|
|
* int compare_int_obj(void *a, void *b) {
|
|
* return ((int_object *)a)->value - ((int_object *)b)->value;
|
|
* }
|
|
*
|
|
* hash_table tab;
|
|
* hash_table_init(&tab, offsetof(struct int_object, link), hash_int_obj, compare_int_obj);
|
|
*
|
|
* An item can be in multiple hash tables at once; just declare multiple
|
|
* hash_link members and pass in the appropriate offset to each hash
|
|
* table.
|
|
*/
|
|
|
|
#include "hashtab.h"
|
|
#include <stdlib.h>
|
|
#include <errno.h>
|
|
|
|
/*
|
|
* The number of buckets should be a prime number.
|
|
* Use these numbers of buckets to grow or shrink the hash table.
|
|
* Yes, OK, the list below is probably excessive.
|
|
*/
|
|
|
|
/**
|
|
* \brief A list of prime numbers from 17 to about 1.4 billion, approximately
|
|
* doubling with each successive number.
|
|
*
|
|
* These are used as choices for the number of hash buckets in the table
|
|
*/
|
|
static size_t bucket_choices[] = {
|
|
7, 17, 37, 79, 163, 331, 673, 1361, 2729, 5471, 10949, 21911, 43853, 87719,
|
|
175447, 350899, 701819, 1403641, 2807303, 5614657, 11229331, 22458671,
|
|
44917381, 89834777, 179669557, 359339171, 718678369, 1437356741 };
|
|
|
|
#define NUM_BUCKET_CHOICES (sizeof(bucket_choices) / sizeof(bucket_choices[0]))
|
|
|
|
#define NUM_BUCKETS(t) (bucket_choices[t->bucket_choice_index])
|
|
|
|
#define LINK(t, p) ( (struct hash_link *) (( ((char *) p) + t->hash_link_offset)) )
|
|
|
|
/**
|
|
* \brief Initialize a hash table
|
|
*
|
|
* Initializes a hash table. A given hash table can contain a collection
|
|
* of items, all of which must be the same. An item in a hash table is
|
|
* a structure and one of the elements in the structure must be a
|
|
* struct hash_link object. For example, if you are storing a collection
|
|
* of integers in a hash table, your item might look like this:
|
|
*
|
|
* struct item {
|
|
* int value;
|
|
* struct hash_link link;
|
|
* };
|
|
*
|
|
* \param t Pointer to a hash_table object
|
|
* \param link_offset The offset to the struct hash_link object within the object being put in the hash table. In the example above, it would be
|
|
* offsetof(struct item, link)
|
|
* \param hashfunc A pointer to a function that computes a hash given a pointer to an object. This function must return an unsigned int.
|
|
* \param compare A pointer to a function that compares two objects. It must
|
|
* return 0 if they compare equal and non-zero if they do not.
|
|
*
|
|
* \return 0 on success, -1 on failure (and errno is set appropriately)
|
|
*/
|
|
int
|
|
hash_table_init(hash_table *t,
|
|
size_t link_offset,
|
|
unsigned int (*hashfunc)(void const *x),
|
|
int (*compare)(void const *a, void const *b))
|
|
{
|
|
t->bucket_choice_index = 0;
|
|
t->num_entries = 0;
|
|
t->hash_link_offset = link_offset;
|
|
t->hashfunc = hashfunc;
|
|
t->compare = compare;
|
|
t->buckets = malloc(sizeof(void *) * bucket_choices[0]);
|
|
t->num_growths = 0;
|
|
t->num_shrinks = 0;
|
|
if (!t->buckets) {
|
|
return -1;
|
|
}
|
|
for (size_t i=0; i<bucket_choices[0]; i++) {
|
|
t->buckets[i] = NULL;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* \brief Free memory used by a hash table
|
|
*
|
|
* \param t Pointer to a hash_table object
|
|
*/
|
|
void
|
|
hash_table_free(hash_table *t)
|
|
{
|
|
free(t->buckets);
|
|
t->buckets = NULL;
|
|
t->bucket_choice_index = -1;
|
|
t->num_entries = 0;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the number of items in a hash table
|
|
*
|
|
* \param t Pointer to a hash_table object
|
|
*
|
|
* \return The number of items in the hash table
|
|
*/
|
|
size_t
|
|
hash_table_num_entries(hash_table const *t)
|
|
{
|
|
return t->num_entries;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the number of buckets in a hash table
|
|
*
|
|
* \param t Pointer to a hash_table object
|
|
*
|
|
* \return The number of buckets in the hash table
|
|
*/
|
|
size_t
|
|
hash_table_num_buckets(hash_table const *t)
|
|
{
|
|
if (t->bucket_choice_index >= NUM_BUCKET_CHOICES) {
|
|
return 0;
|
|
}
|
|
|
|
return NUM_BUCKETS(t);
|
|
}
|
|
|
|
/**
|
|
* \brief Return the length of the i'th bucket chain
|
|
*
|
|
* If i >= num_buckets, returns (size_t) -1
|
|
*
|
|
* \param t Pointer to a hash_table object
|
|
* \param i The bucket whose length we want (0 to num_buckets-1)
|
|
* \return The length of the i'th bucket chain
|
|
*/
|
|
size_t
|
|
hash_table_chain_len(hash_table *t, size_t i)
|
|
{
|
|
if (i >= hash_table_num_buckets(t)) {
|
|
return (size_t) -1;
|
|
}
|
|
size_t len = 0;
|
|
void *ptr = t->buckets[i];
|
|
while(ptr) {
|
|
len++;
|
|
ptr = LINK(t, ptr)->next;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/**
|
|
* \brief Resize a hash table
|
|
*
|
|
* Resizes (either grows or shrinks) a hash table's bucket array
|
|
*
|
|
* \param t Pointer to a hash_table object
|
|
* \param dir Must be either 1 (to increase the bucket array size) or
|
|
* -1 (to decrease it).
|
|
* \return 0 on success, non-zero if resizing fails. NOTE: Currently, resizing
|
|
* cannot fail; if we fail to allocate memory for the new bucket array,
|
|
* we just keep the existing array. This behaviour may change in future.
|
|
*/
|
|
static int
|
|
hash_table_resize(hash_table *t, int dir)
|
|
{
|
|
if (dir != 1 && dir != -1) {
|
|
return 0;
|
|
}
|
|
if ((dir == -1 && t->bucket_choice_index == 0) ||
|
|
(dir == 1 && t->bucket_choice_index == NUM_BUCKET_CHOICES-1)) {
|
|
return 0;
|
|
}
|
|
|
|
size_t num_old_buckets = bucket_choices[t->bucket_choice_index];
|
|
size_t num_new_buckets = bucket_choices[t->bucket_choice_index + dir];
|
|
|
|
void **new_buckets = malloc(sizeof(void *) * num_new_buckets);
|
|
if (!new_buckets) {
|
|
/* Out of memory... just don't resize? */
|
|
return 0;
|
|
}
|
|
if (dir == 1) {
|
|
t->num_growths++;
|
|
} else {
|
|
t->num_shrinks++;
|
|
}
|
|
for (size_t j=0; j<num_new_buckets; j++) {
|
|
new_buckets[j] = NULL;
|
|
}
|
|
|
|
/* Move everything from the old buckets into the new */
|
|
for (size_t i=0; i<num_old_buckets; i++) {
|
|
void *p = t->buckets[i];
|
|
while(p) {
|
|
struct hash_link *l = LINK(t, p);
|
|
void *nxt = l->next;
|
|
size_t j = l->hashval % num_new_buckets;
|
|
l->next = new_buckets[j];
|
|
new_buckets[j] = p;
|
|
p = nxt;
|
|
}
|
|
}
|
|
free(t->buckets);
|
|
t->buckets = new_buckets;
|
|
t->bucket_choice_index += dir;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* \brief Insert an item into a hash table
|
|
*
|
|
* Inserts an item into a hash table. The item MUST NOT be freed as
|
|
* long as it is in a hash table
|
|
*
|
|
* \param t Pointer to a hash_table object
|
|
* \param item Pointer to the item to insert
|
|
*
|
|
* \return 0 on success, -1 on failure (and errno is set appropriately)
|
|
*/
|
|
int
|
|
hash_table_insert(hash_table *t, void *item)
|
|
{
|
|
if (!item) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
unsigned int v = t->hashfunc(item);
|
|
|
|
struct hash_link *l = LINK(t, item);
|
|
l->hashval = v;
|
|
|
|
v = v % NUM_BUCKETS(t);
|
|
|
|
l->next = t->buckets[v];
|
|
t->buckets[v] = item;
|
|
t->num_entries++;
|
|
|
|
/* Grow table for load factor > 2 */
|
|
if (t->bucket_choice_index < NUM_BUCKET_CHOICES-1 &&
|
|
t->num_entries > 2 * NUM_BUCKETS(t)) {
|
|
return hash_table_resize(t, 1);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* \brief Find an item in a hash table
|
|
*
|
|
* \param t Pointer to a hash_table object
|
|
* \param candidate Pointer to an object to be sought in the table
|
|
*
|
|
* \return A pointer to the object if one that matches candidate is found. NULL if not found
|
|
*/
|
|
void *
|
|
hash_table_find(hash_table *t, void *candidate)
|
|
{
|
|
if (!candidate) {
|
|
return NULL;
|
|
}
|
|
|
|
unsigned int v = t->hashfunc(candidate);
|
|
|
|
void *ptr = t->buckets[v % NUM_BUCKETS(t)];
|
|
|
|
while(ptr) {
|
|
if (!t->compare(candidate, ptr)) {
|
|
return ptr;
|
|
}
|
|
ptr = LINK(t, ptr)->next;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* \brief Delete an item from a hash table
|
|
*
|
|
* \param t Pointer to a hash_table object
|
|
* \param candidate Pointer to an object that is in the table and must be removed from it
|
|
* \param resize_ok If non-zero, then it's OK to resize the hash table.
|
|
*
|
|
* \return 0 on success, -1 on failure
|
|
*/
|
|
static int
|
|
hash_table_delete_helper(hash_table *t, void *item, int resize_ok)
|
|
{
|
|
if (!item) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
struct hash_link *l = LINK(t, item);
|
|
unsigned int v = l->hashval;
|
|
|
|
v = v % NUM_BUCKETS(t);
|
|
|
|
if (t->buckets[v] == item) {
|
|
t->buckets[v] = l->next;
|
|
t->num_entries--;
|
|
if (resize_ok) {
|
|
/* Shrink table for load factor < 1 */
|
|
if (t->bucket_choice_index > 0 &&
|
|
t->num_entries < NUM_BUCKETS(t) / 2) {
|
|
return hash_table_resize(t, -1);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void *ptr = t->buckets[v];
|
|
while(ptr) {
|
|
struct hash_link *l2 = LINK(t, ptr);
|
|
if (l2->next == item) {
|
|
l2->next = l->next;
|
|
t->num_entries--;
|
|
/* Shrink table for load factor < 1 */
|
|
if (resize_ok) {
|
|
if (t->bucket_choice_index > 0 &&
|
|
t->num_entries < NUM_BUCKETS(t) / 2) {
|
|
return hash_table_resize(t, -1);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
ptr = l2->next;
|
|
}
|
|
|
|
/* Item not found in hash table */
|
|
errno = ENOENT;
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
hash_table_delete(hash_table *t, void *item)
|
|
{
|
|
return hash_table_delete_helper(t, item, 1);
|
|
}
|
|
|
|
int
|
|
hash_table_delete_no_resize(hash_table *t, void *item)
|
|
{
|
|
return hash_table_delete_helper(t, item, 0);
|
|
}
|
|
|
|
/**
|
|
* \brief Iterate to the next item in a hash table
|
|
*
|
|
* Acts as an iterator. Given a pointer to an item in the hash
|
|
* table, returns the next item, or NULL if no more items. If the
|
|
* existing-item pointer is supplied as NULL, returns a pointer to the
|
|
* first item in the hash table. You can therefore iterate across the
|
|
* hash table like this*
|
|
*
|
|
* void *item = NULL;
|
|
* while ( (item = hash_table_next(&table, item) ) != NULL) {
|
|
* // Do something with item
|
|
* }
|
|
*
|
|
* NOTE that you MUST NOT modify the hash table while iterating over it.
|
|
*
|
|
* \param t Pointer to a hash_table object
|
|
* \param cur The current item. Supply as NULL to get the first item
|
|
*
|
|
* \return A pointer to the next item in the hash table, or NULL if there
|
|
* are no more items
|
|
*/
|
|
void *
|
|
hash_table_next(hash_table *t, void *cur)
|
|
{
|
|
size_t n_buckets = NUM_BUCKETS(t);
|
|
|
|
size_t start_bucket = 0;
|
|
if (cur) {
|
|
struct hash_link *l = LINK(t, cur);
|
|
if (l->next) {
|
|
return l->next;
|
|
}
|
|
/* End of this chain; start searching at the next bucket */
|
|
start_bucket = (l->hashval % n_buckets) + 1;
|
|
}
|
|
|
|
for (size_t i=start_bucket; i<n_buckets; i++) {
|
|
if (t->buckets[i]) {
|
|
return t->buckets[i];
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|