/***************************************************************/ /* */ /* HASHTAB.C */ /* */ /* Implementation of hash table. */ /* */ /* This file is part of REMIND. */ /* Copyright (C) 1992-2025 by Dianne Skoll */ /* SPDX-License-Identifier: GPL-2.0-only */ /* */ /***************************************************************/ /** * \file hashtab.c * * \brief Implementation of hash table * * A hash table manages an array of buckets, each of which is the * head of a singly-linked list. A given hash table can store items * of a given type. The items in a hash table must be structs, and one * of their members must be a struct hash_link object. For example, * a hash table containing integers might have the hash objects * defined as: * * struct int_object { * int value; * struct hash_link link; * }; * * When you initialize the hash table, you pass in the offset to the hash * link. For example, to initialize a hash table designed to hold * int_objects, you'd do something like: * * unsigned int hash_int_obj(void *x) { * return (unsigned int) ((int_object *) x)->value; * } * int compare_int_obj(void *a, void *b) { * return ((int_object *)a)->value - ((int_object *)b)->value; * } * * hash_table tab; * hash_table_init(&tab, offsetof(struct int_object, link), hash_int_obj, compare_int_obj); * * An item can be in multiple hash tables at once; just declare multiple * hash_link members and pass in the appropriate offset to each hash * table. */ #include "hashtab.h" #include #include /* * The number of buckets should be a prime number. * Use these numbers of buckets to grow or shrink the hash table. * Yes, OK, the list below is probably excessive. */ /** * \brief A list of prime numbers from 17 to about 1.4 billion, approximately * doubling with each successive number. * * These are used as choices for the number of hash buckets in the table */ static size_t bucket_choices[] = { 7, 17, 37, 79, 163, 331, 673, 1361, 2729, 5471, 10949, 21911, 43853, 87719, 175447, 350899, 701819, 1403641, 2807303, 5614657, 11229331, 22458671, 44917381, 89834777, 179669557, 359339171, 718678369, 1437356741 }; #define NUM_BUCKET_CHOICES (sizeof(bucket_choices) / sizeof(bucket_choices[0])) #define NUM_BUCKETS(t) (bucket_choices[t->bucket_choice_index]) #define LINK(t, p) ( (struct hash_link *) (( ((char *) p) + t->hash_link_offset)) ) /** * \brief Initialize a hash table * * Initializes a hash table. A given hash table can contain a collection * of items, all of which must be the same. An item in a hash table is * a structure and one of the elements in the structure must be a * struct hash_link object. For example, if you are storing a collection * of integers in a hash table, your item might look like this: * * struct item { * int value; * struct hash_link link; * }; * * \param t Pointer to a hash_table object * \param link_offset The offset to the struct hash_link object within the object being put in the hash table. In the example above, it would be * offsetof(struct item, link) * \param hashfunc A pointer to a function that computes a hash given a pointer to an object. This function must return an unsigned int. * \param compare A pointer to a function that compares two objects. It must * return 0 if they compare equal and non-zero if they do not. * * \return 0 on success, -1 on failure (and errno is set appropriately) */ int hash_table_init(hash_table *t, size_t link_offset, unsigned int (*hashfunc)(void const *x), int (*compare)(void const *a, void const *b)) { t->bucket_choice_index = 0; t->num_entries = 0; t->hash_link_offset = link_offset; t->hashfunc = hashfunc; t->compare = compare; t->buckets = malloc(sizeof(void *) * bucket_choices[0]); t->num_growths = 0; t->num_shrinks = 0; if (!t->buckets) { return -1; } for (size_t i=0; ibuckets[i] = NULL; } return 0; } /** * \brief Free memory used by a hash table * * \param t Pointer to a hash_table object */ void hash_table_free(hash_table *t) { free(t->buckets); t->buckets = NULL; t->bucket_choice_index = -1; t->num_entries = 0; } /** * \brief Return the number of items in a hash table * * \param t Pointer to a hash_table object * * \return The number of items in the hash table */ size_t hash_table_num_entries(hash_table const *t) { return t->num_entries; } /** * \brief Return the number of buckets in a hash table * * \param t Pointer to a hash_table object * * \return The number of buckets in the hash table */ size_t hash_table_num_buckets(hash_table const *t) { if (t->bucket_choice_index >= NUM_BUCKET_CHOICES) { return 0; } return NUM_BUCKETS(t); } /** * \brief Return the length of the i'th bucket chain * * If i >= num_buckets, returns (size_t) -1 * * \param t Pointer to a hash_table object * \param i The bucket whose length we want (0 to num_buckets-1) * \return The length of the i'th bucket chain */ size_t hash_table_chain_len(hash_table *t, size_t i) { if (i >= hash_table_num_buckets(t)) { return (size_t) -1; } size_t len = 0; void *ptr = t->buckets[i]; while(ptr) { len++; ptr = LINK(t, ptr)->next; } return len; } /** * \brief Resize a hash table * * Resizes (either grows or shrinks) a hash table's bucket array * * \param t Pointer to a hash_table object * \param dir Must be either 1 (to increase the bucket array size) or * -1 (to decrease it). * \return 0 on success, non-zero if resizing fails. NOTE: Currently, resizing * cannot fail; if we fail to allocate memory for the new bucket array, * we just keep the existing array. This behaviour may change in future. */ static int hash_table_resize(hash_table *t, int dir) { if (dir != 1 && dir != -1) { return 0; } if ((dir == -1 && t->bucket_choice_index == 0) || (dir == 1 && t->bucket_choice_index == NUM_BUCKET_CHOICES-1)) { return 0; } size_t num_old_buckets = bucket_choices[t->bucket_choice_index]; size_t num_new_buckets = bucket_choices[t->bucket_choice_index + dir]; void **new_buckets = malloc(sizeof(void *) * num_new_buckets); if (!new_buckets) { /* Out of memory... just don't resize? */ return 0; } if (dir == 1) { t->num_growths++; } else { t->num_shrinks++; } for (size_t j=0; jbuckets[i]; while(p) { struct hash_link *l = LINK(t, p); void *nxt = l->next; size_t j = l->hashval % num_new_buckets; l->next = new_buckets[j]; new_buckets[j] = p; p = nxt; } } free(t->buckets); t->buckets = new_buckets; t->bucket_choice_index += dir; return 0; } /** * \brief Insert an item into a hash table * * Inserts an item into a hash table. The item MUST NOT be freed as * long as it is in a hash table * * \param t Pointer to a hash_table object * \param item Pointer to the item to insert * * \return 0 on success, -1 on failure (and errno is set appropriately) */ int hash_table_insert(hash_table *t, void *item) { if (!item) { errno = EINVAL; return -1; } unsigned int v = t->hashfunc(item); struct hash_link *l = LINK(t, item); l->hashval = v; v = v % NUM_BUCKETS(t); l->next = t->buckets[v]; t->buckets[v] = item; t->num_entries++; /* Grow table for load factor > 2 */ if (t->bucket_choice_index < NUM_BUCKET_CHOICES-1 && t->num_entries > 2 * NUM_BUCKETS(t)) { return hash_table_resize(t, 1); } return 0; } /** * \brief Find an item in a hash table * * \param t Pointer to a hash_table object * \param candidate Pointer to an object to be sought in the table * * \return A pointer to the object if one that matches candidate is found. NULL if not found */ void * hash_table_find(hash_table *t, void *candidate) { if (!candidate) { return NULL; } unsigned int v = t->hashfunc(candidate); void *ptr = t->buckets[v % NUM_BUCKETS(t)]; while(ptr) { if (!t->compare(candidate, ptr)) { return ptr; } ptr = LINK(t, ptr)->next; } return NULL; } /** * \brief Delete an item from a hash table * * \param t Pointer to a hash_table object * \param candidate Pointer to an object that is in the table and must be removed from it * \param resize_ok If non-zero, then it's OK to resize the hash table. * * \return 0 on success, -1 on failure */ static int hash_table_delete_helper(hash_table *t, void *item, int resize_ok) { if (!item) { errno = EINVAL; return -1; } struct hash_link *l = LINK(t, item); unsigned int v = l->hashval; v = v % NUM_BUCKETS(t); if (t->buckets[v] == item) { t->buckets[v] = l->next; t->num_entries--; if (resize_ok) { /* Shrink table for load factor < 1 */ if (t->bucket_choice_index > 0 && t->num_entries < NUM_BUCKETS(t) / 2) { return hash_table_resize(t, -1); } } return 0; } void *ptr = t->buckets[v]; while(ptr) { struct hash_link *l2 = LINK(t, ptr); if (l2->next == item) { l2->next = l->next; t->num_entries--; /* Shrink table for load factor < 1 */ if (resize_ok) { if (t->bucket_choice_index > 0 && t->num_entries < NUM_BUCKETS(t) / 2) { return hash_table_resize(t, -1); } } return 0; } ptr = l2->next; } /* Item not found in hash table */ errno = ENOENT; return -1; } int hash_table_delete(hash_table *t, void *item) { return hash_table_delete_helper(t, item, 1); } int hash_table_delete_no_resize(hash_table *t, void *item) { return hash_table_delete_helper(t, item, 0); } /** * \brief Iterate to the next item in a hash table * * Acts as an iterator. Given a pointer to an item in the hash * table, returns the next item, or NULL if no more items. If the * existing-item pointer is supplied as NULL, returns a pointer to the * first item in the hash table. You can therefore iterate across the * hash table like this* * * void *item = NULL; * while ( (item = hash_table_next(&table, item) ) != NULL) { * // Do something with item * } * * NOTE that you MUST NOT modify the hash table while iterating over it. * * \param t Pointer to a hash_table object * \param cur The current item. Supply as NULL to get the first item * * \return A pointer to the next item in the hash table, or NULL if there * are no more items */ void * hash_table_next(hash_table *t, void *cur) { size_t n_buckets = NUM_BUCKETS(t); size_t start_bucket = 0; if (cur) { struct hash_link *l = LINK(t, cur); if (l->next) { return l->next; } /* End of this chain; start searching at the next bucket */ start_bucket = (l->hashval % n_buckets) + 1; } for (size_t i=start_bucket; ibuckets[i]) { return t->buckets[i]; } } return NULL; }