diff --git a/src/Makefile.in b/src/Makefile.in index a5b13c48..a4fabaae 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -27,12 +27,12 @@ MANS= $(srcdir)/../man/rem2ps.1 $(srcdir)/../man/remind.1 \ .SUFFIXES: .c .o REMINDSRCS= calendar.c dedupe.c dynbuf.c dorem.c dosubst.c expr.c \ - files.c funcs.c globals.c hbcal.c init.c main.c md5.c \ - moon.c omit.c queue.c sort.c token.c trigger.c \ - userfns.c utils.c var.c + files.c funcs.c globals.c hashtab.c hashtab_stats.c \ + hbcal.c init.c main.c md5.c moon.c omit.c queue.c \ + sort.c token.c trigger.c userfns.c utils.c var.c -REMINDHDRS=config.h custom.h dynbuf.h err.h globals.h lang.h \ - md5.h protos.h rem2ps.h types.h version.h +REMINDHDRS=config.h custom.h dynbuf.h err.h globals.h hashtab.h \ + lang.h md5.h protos.h rem2ps.h types.h version.h REMINDOBJS= $(REMINDSRCS:.c=.o) all: remind rem2ps diff --git a/src/dedupe.c b/src/dedupe.c index c73c94ea..828b9836 100644 --- a/src/dedupe.c +++ b/src/dedupe.c @@ -16,16 +16,37 @@ #include "protos.h" #include #include +#include +#include -#define DEDUPE_HASH_SLOTS 31 typedef struct dedupe_entry { - struct dedupe_entry *next; + struct hash_link link; int trigger_date; int trigger_time; char const *body; } DedupeEntry; -static DedupeEntry *DedupeTable[DEDUPE_HASH_SLOTS]; +static hash_table DedupeTable; + +static unsigned int DedupeHashFunc(void *x) +{ + DedupeEntry *e = (DedupeEntry *) x; + unsigned int hashval = (unsigned int) e->trigger_date; + if (e->trigger_time != NO_TIME) { + hashval += (unsigned int) e->trigger_time; + } + hashval += HashVal(e->body); + return hashval; +} + +static int CompareDedupes(void *x, void *y) +{ + DedupeEntry *a = (DedupeEntry *) x; + DedupeEntry *b = (DedupeEntry *) y; + if (a->trigger_date != b->trigger_date) return 1; + if (a->trigger_time != b->trigger_time) return 1; + return strcmp(a->body, b->body); +} /***************************************************************/ /* */ @@ -43,24 +64,6 @@ FreeDedupeEntry(DedupeEntry *e) free(e); } -/***************************************************************/ -/* */ -/* GetDedupeBucket */ -/* */ -/* Get the bucket for a given date and body */ -/* */ -/***************************************************************/ -static unsigned int -GetDedupeBucket(int trigger_date, int trigger_time, char const *body) -{ - unsigned int bucket = trigger_date; - if (trigger_time != NO_TIME) { - bucket += trigger_time; - } - bucket += HashVal(body); - return bucket % DEDUPE_HASH_SLOTS; -} - /***************************************************************/ /* */ /* FindDedupeEntry */ @@ -72,19 +75,12 @@ static DedupeEntry * FindDedupeEntry(int trigger_date, int trigger_time, char const *body) { DedupeEntry *e; - - unsigned int bucket = GetDedupeBucket(trigger_date, trigger_time, body); - - e = DedupeTable[bucket]; - while(e) { - if (e->trigger_date == trigger_date && - e->trigger_time == trigger_time && - !strcmp(body, e->body)) { - return e; - } - e = e->next; - } - return NULL; + DedupeEntry candidate; + candidate.body = body; + candidate.trigger_date = trigger_date; + candidate.trigger_time = trigger_time; + e = hash_table_find(&DedupeTable, &candidate); + return e; } /***************************************************************/ @@ -99,8 +95,6 @@ InsertDedupeEntry(int trigger_date, int trigger_time, char const *body) { DedupeEntry *e; - unsigned int bucket = GetDedupeBucket(trigger_date, trigger_time, body); - e = malloc(sizeof(DedupeEntry)); if (!e) { return; /* No error checking... what can we do? */ @@ -113,8 +107,7 @@ InsertDedupeEntry(int trigger_date, int trigger_time, char const *body) return; } - e->next = DedupeTable[bucket]; - DedupeTable[bucket] = e; + hash_table_insert(&DedupeTable, e); } /***************************************************************/ @@ -149,16 +142,16 @@ void ClearDedupeTable(void) { DedupeEntry *e, *next; - for (int i=0; inext; - FreeDedupeEntry(e); - e = next; - } - DedupeTable[i] = NULL; + + e = hash_table_next(&DedupeTable, NULL); + while(e) { + next = hash_table_next(&DedupeTable, e); + hash_table_delete(&DedupeTable, e); + FreeDedupeEntry(e); + e = next; } } + /***************************************************************/ /* */ /* InitDedupeTable */ @@ -169,31 +162,17 @@ ClearDedupeTable(void) void InitDedupeTable(void) { - for (int i=0; inext; - } - if (len > *maxlen) { - *maxlen = len; - } - } - *avglen = (double) *total / (double) DEDUPE_HASH_SLOTS; + struct hash_table_stats s; + hash_table_get_stats(&DedupeTable, &s); + *total = s.num_entries; + *maxlen = s.max_len; + *avglen = s.avg_len; } diff --git a/src/expr.c b/src/expr.c index 1bc6a184..92231427 100644 --- a/src/expr.c +++ b/src/expr.c @@ -2019,7 +2019,7 @@ static int make_atom(expr_node *atom, Var *locals) atom->u.arg = i; return OK; } - v = v->next; + v = v->link.next; i++; } if (strlen(s) < SHORT_NAME_BUF) { diff --git a/src/hashtab.c b/src/hashtab.c new file mode 100644 index 00000000..21937604 --- /dev/null +++ b/src/hashtab.c @@ -0,0 +1,433 @@ +/***************************************************************/ +/* */ +/* HASHTAB_STATS.C */ +/* */ +/* Implementation of hash table. */ +/* */ +/* This file is part of REMIND. */ +/* Copyright (C) 1992-2024 by Dianne Skoll */ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* */ +/***************************************************************/ + +/** + * \file hashtab.c + * + * \brief Implementation of hash table + * + * A hash table manages an array of buckets, each of which is the + * head of a singly-linked list. A given hash table can store items + * of a given type. The items in a hash table must be structs, and one + * of their members must be a struct hash_link object. For example, + * a hash table containing integers might have the hash objects + * defined as: + * + * struct int_object { + * int value; + * struct hash_link link; + * }; + * + * When you initialize the hash table, you pass in the offset to the hash + * link. For example, to initialize a hash table designed to hold + * int_objects, you'd do something like: + * + * unsigned int hash_int_obj(void *x) { + * return (unsigned int) ((int_object *) x)->value; + * } + * int compare_int_obj(void *a, void *b) { + * return ((int_object *)a)->value - ((int_object *)b)->value; + * } + * + * hash_table tab; + * hash_table_init(&tab, offsetof(struct int_object, link), hash_int_obj, compare_int_obj); + * + * An item can be in multiple hash tables at once; just declare multiple + * hash_link members and pass in the appropriate offset to each hash + * table. + */ + +#include "hashtab.h" +#include +#include + +/* + * The number of buckets should be a prime number. + * Use these numbers of buckets to grow or shrink the hash table. + * Yes, OK, the list below is probably excessive. + */ + +/** + * \brief A list of prime numbers from 17 to about 1.4 billion, approximately + * doubling with each successive number. + * + * These are used as choices for the number of hash buckets in the table + */ +static size_t bucket_choices[] = { + 17, 37, 79, 163, 331, 673, 1361, 2729, 5471, 10949, 21911, 43853, 87719, + 175447, 350899, 701819, 1403641, 2807303, 5614657, 11229331, 22458671, + 44917381, 89834777, 179669557, 359339171, 718678369, 1437356741 }; + +#define NUM_BUCKET_CHOICES (sizeof(bucket_choices) / sizeof(bucket_choices[0])) + +#define NUM_BUCKETS(t) (bucket_choices[t->bucket_choice_index]) + +#define LINK(t, p) ( (struct hash_link *) (( ((char *) p) + t->hash_link_offset)) ) + +/** + * \brief Initialize a hash table + * + * Initializes a hash table. A given hash table can contain a collection + * of items, all of which must be the same. An item in a hash table is + * a structure and one of the elements in the structure must be a + * struct hash_link object. For example, if you are storing a collection + * of integers in a hash table, your item might look like this: + * + * struct item { + * int value; + * struct hash_link link; + * }; + * + * \param t Pointer to a hash_table object + * \param link_offset The offset to the struct hash_link object within the object being put in the hash table. In the example above, it would be + * offsetof(struct item, link) + * \param hashfunc A pointer to a function that computes a hash given a pointer to an object. This function must return an unsigned int. + * \param compare A pointer to a function that compares two objects. It must + * return 0 if they compare equal and non-zero if they do not. + * + * \return 0 on success, -1 on failure (and errno is set appropriately) + */ +int +hash_table_init(hash_table *t, + size_t link_offset, + unsigned int (*hashfunc)(void *x), + int (*compare)(void *a, void *b)) +{ + t->bucket_choice_index = 0; + t->num_entries = 0; + t->hash_link_offset = link_offset; + t->hashfunc = hashfunc; + t->compare = compare; + t->buckets = malloc(sizeof(void *) * bucket_choices[0]); + if (!t->buckets) { + return -1; + } + for (size_t i=0; ibuckets[i] = NULL; + } + return 0; +} + +/** + * \brief Free memory used by a hash table + * + * \param t Pointer to a hash_table object + */ +void +hash_table_free(hash_table *t) +{ + free(t->buckets); + t->buckets = NULL; + t->bucket_choice_index = -1; + t->num_entries = 0; +} + +/** + * \brief Return the number of items in a hash table + * + * \param t Pointer to a hash_table object + * + * \return The number of items in the hash table + */ +size_t +hash_table_num_entries(hash_table *t) +{ + return t->num_entries; +} + +/** + * \brief Return the number of buckets in a hash table + * + * \param t Pointer to a hash_table object + * + * \return The number of buckets in the hash table + */ +size_t +hash_table_num_buckets(hash_table *t) +{ + if (t->bucket_choice_index >= NUM_BUCKET_CHOICES) { + return 0; + } + + return NUM_BUCKETS(t); +} + +/** + * \brief Return the length of the i'th bucket chain + * + * If i >= num_buckets, returns (size_t) -1 + * + * \param t Pointer to a hash_table object + * \param i The bucket whose length we want (0 to num_buckets-1) + * \return The length of the i'th bucket chain + */ +size_t +hash_table_chain_len(hash_table *t, size_t i) +{ + if (i >= hash_table_num_buckets(t)) { + return (size_t) -1; + } + size_t len = 0; + void *ptr = t->buckets[i]; + while(ptr) { + len++; + ptr = LINK(t, ptr)->next; + } + return len; +} + +/** + * \brief Resize a hash table + * + * Resizes (either grows or shrinks) a hash table's bucket array + * + * \param t Pointer to a hash_table object + * \param dir Must be either 1 (to increase the bucket array size) or + * -1 (to decrease it). + * \return 0 on success, non-zero if resizing fails. NOTE: Currently, resizing + * cannot fail; if we fail to allocate memory for the new bucket array, + * we just keep the existing array. This behaviour may change in future. + */ +static int +hash_table_resize(hash_table *t, int dir) +{ + if (dir != 1 && dir != -1) { + return 0; + } + if ((dir == -1 && t->bucket_choice_index == 0) || + (dir == 1 && t->bucket_choice_index == NUM_BUCKET_CHOICES-1)) { + return 0; + } + + size_t num_old_buckets = bucket_choices[t->bucket_choice_index]; + size_t num_new_buckets = bucket_choices[t->bucket_choice_index + dir]; + + void **new_buckets = malloc(sizeof(void *) * num_new_buckets); + if (!new_buckets) { + /* Out of memory... just don't resize? */ + return 0; + } + for (size_t j=0; jbuckets[i]) { + continue; + } + void *p = t->buckets[i]; + while(p) { + struct hash_link *l = LINK(t, p); + void *nxt = l->next; + size_t j = l->hashval % num_new_buckets; + l->next = new_buckets[j]; + new_buckets[j] = p; + p = nxt; + } + } + free(t->buckets); + t->buckets = new_buckets; + t->bucket_choice_index += dir; + + return 0; +} + +/** + * \brief Insert an item into a hash table + * + * Inserts an item into a hash table. The item MUST NOT be freed as + * long as it is in a hash table + * + * \param t Pointer to a hash_table object + * \param item Pointer to the item to insert + * + * \return 0 on success, -1 on failure (and errno is set appropriately) + */ +int +hash_table_insert(hash_table *t, void *item) +{ + if (!item) { + errno = EINVAL; + return -1; + } + + unsigned int v = t->hashfunc(item); + + struct hash_link *l = LINK(t, item); + l->hashval = v; + + v = v % NUM_BUCKETS(t); + + l->next = t->buckets[v]; + t->buckets[v] = item; + t->num_entries++; + + /* Grow table for load factor > 2 */ + if (t->bucket_choice_index < NUM_BUCKET_CHOICES-1 && + t->num_entries > 2 * NUM_BUCKETS(t)) { + return hash_table_resize(t, 1); + } + return 0; +} + +/** + * \brief Find an item in a hash table + * + * \param t Pointer to a hash_table object + * \param candidate Pointer to an object to be sought in the table + * + * \return A pointer to the object if one that matches candidate is found. NULL if not found + */ +void * +hash_table_find(hash_table *t, void *candidate) +{ + if (!candidate) { + return NULL; + } + + unsigned int v = t->hashfunc(candidate); + + void *ptr = t->buckets[v % NUM_BUCKETS(t)]; + + while(ptr) { + if (!t->compare(candidate, ptr)) { + return ptr; + } + ptr = LINK(t, ptr)->next; + } + return NULL; +} + +/** + * \brief Find the next item in a hash table + * + * \param t Pointer to a hash table object + * \param obj Pointer to an object that was perviously returned by + * hash_table_find() or hash_table_find_next(). + * + * \return A pointer to the next object matching obj, or NULL if + * no more exist + */ +void * +hash_table_find_next(hash_table *t, void *obj) +{ + if (!obj) { + return NULL; + } + void *ptr = LINK(t, obj)->next; + while(ptr) { + if (!t->compare(obj, ptr)) { + return ptr; + } + ptr = LINK(t, ptr)->next; + } + return NULL; +} + +/** + * \brief Delete an item from a hash table + * + * \param t Pointer to a hash_table object + * \param candidate Pointer to an object that is in the table and must be removed from it + * + * \return 0 on success, -1 on failure + */ +int +hash_table_delete(hash_table *t, void *item) +{ + if (!item) { + errno = EINVAL; + return -1; + } + + struct hash_link *l = LINK(t, item); + unsigned int v = l->hashval; + + v = v % NUM_BUCKETS(t); + + if (t->buckets[v] == item) { + t->buckets[v] = l->next; + t->num_entries--; + /* Shrink table for load factor < 1 */ + if (t->bucket_choice_index > 0 && + t->num_entries < NUM_BUCKETS(t) / 2) { + return hash_table_resize(t, -1); + } + return 0; + } + + void *ptr = t->buckets[v]; + while(ptr) { + struct hash_link *l2 = LINK(t, ptr); + if (l2->next == item) { + l2->next = l->next; + t->num_entries--; + /* Shrink table for load factor < 1 */ + if (t->bucket_choice_index > 0 && + t->num_entries < NUM_BUCKETS(t) / 2) { + return hash_table_resize(t, -1); + } + return 0; + } + ptr = l2->next; + } + + /* Item not found in hash table */ + errno = ENOENT; + return -1; +} + +/** + * \brief Iterate to the next item in a hash table + * + * Acts as an iterator. Given a pointer to an item in the hash + * table, returns the next item, or NULL if no more items. If the + * existing-item pointer is supplied as NULL, returns a pointer to the + * first item in the hash table. You can therefore iterate across the + * hash table like this* + * + * void *item = NULL; + * while ( (item = hash_table_next(&table, item) ) != NULL) { + * // Do something with item + * } + * + * NOTE that you MUST NOT modify the hash table while iterating over it. + * + * \param t Pointer to a hash_table object + * \param cur The current item. Supply as NULL to get the first item + * + * \return A pointer to the next item in the hash table, or NULL if there + * are no more items + */ +void * +hash_table_next(hash_table *t, void *cur) +{ + size_t n_buckets = NUM_BUCKETS(t); + + size_t start_bucket = 0; + if (cur) { + struct hash_link *l = LINK(t, cur); + if (l->next) { + return l->next; + } + /* End of this chain; start searching at the next bucket */ + start_bucket = (l->hashval % n_buckets) + 1; + } + + for (size_t i=start_bucket; ibuckets[i]) { + return t->buckets[i]; + } + } + return NULL; +} diff --git a/src/hashtab.h b/src/hashtab.h new file mode 100644 index 00000000..33238c49 --- /dev/null +++ b/src/hashtab.h @@ -0,0 +1,111 @@ +/***************************************************************/ +/* */ +/* HASHTAB.H */ +/* */ +/* Header file for hash-table related functions. */ +/* */ +/* This file is part of REMIND. */ +/* Copyright (C) 1992-2024 by Dianne Skoll */ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* */ +/***************************************************************/ + +/* For size_t */ +#include + +/** + * \brief A structure for holding hash table chain links. + * + * This structure is embedded in a container structure to make up + * a hash table entry + */ +struct hash_link { + void *next; /**< Link to next item in the chain */ + unsigned int hashval; /**< Cached hash function value */ +}; + +/** + * \brief A hash table + */ +typedef struct { + unsigned int bucket_choice_index; /**< Index into array of possible bucket counts */ + size_t num_entries; /**< Number of entries in the hash table */ + size_t hash_link_offset; /**< Offset of the struct hash_link in the container */ + void **buckets; /**< Array of buckets */ + unsigned int (*hashfunc)(void *x); /**< Pointer to the hashing function */ + int (*compare)(void *a, void *b); /**< Pointer to the comparison function */ +} hash_table; + +/** + * \brief Data type to hold statistics about a hash table + */ +struct hash_table_stats { + size_t num_entries; /**< Number of items in the hash table */ + size_t num_buckets; /**< Number of buckets in the hash table */ + size_t num_nonempty_buckets; /**< Number of non-emptry buckets */ + size_t max_len; /**< Length of longest chain in the hash table */ + size_t min_len; /**< Length of the shortest chain in the hash table */ + double avg_len; /**< Average chain length */ + double avg_nonempty_len; /**< Average chain length of non-empty bucket */ + double stddev; /**< Standard deviation of chain lengths */ +}; + +int hash_table_init(hash_table *t, + size_t link_offset, + unsigned int (*hashfunc)(void *x), + int (*compare)(void *a, void *b)); +void hash_table_free(hash_table *t); +size_t hash_table_num_entries(hash_table *t); +size_t hash_table_num_buckets(hash_table *t); +size_t hash_table_chain_len(hash_table *t, size_t i); +int hash_table_insert(hash_table *t, void *item); +void *hash_table_find(hash_table *t, void *candidate); +void *hash_table_find_next(hash_table *t, void *obj); +int hash_table_delete(hash_table *t, void *item); +void *hash_table_next(hash_table *t, void *cur); +void hash_table_dump_stats(hash_table *t, FILE *fp); +void hash_table_get_stats(hash_table *t, struct hash_table_stats *stat); + +/** + * \brief Iterate over all items in a hash table + * + * This macro iterates over all items in a hash table. Here is an + * example of how to use it: + * + * hash_table tab; + * void *item; + * hash_table_for_each(item, &tab) { + * // Do something with item + * } + */ +#define hash_table_for_each(item, t) \ + for ((item) = hash_table_next((t), NULL); \ + (item); \ + (item) = hash_table_next((t), (item))) + +/** + * \brief Iterate over all items in a hash table that match a candidate + * + * This macro iterates over all items in a hash table that match a + * candidate object. (In general, a hash table may contain multiple + * objects with the same key.) Here is an example assuming that the hash + * table holds objects of type struct int_object: + * + * struct int_object { + * int value; + * struct hash_link link; + * } + * + * hash_table tab; + * int_object candidate; + * + * candidate.value = 7; + * int_object *item; + * hash_table_for_each_matching(item, &candidate, &tab) { + * // Do something with item, which will match "7" + * } + */ +#define hash_table_for_each_matching(item, candidate, t) \ + for ((item) = hash_table_find((t), (candidate)); \ + (item); \ + (item) = hash_table_find_next((t), (item))) diff --git a/src/hashtab_stats.c b/src/hashtab_stats.c new file mode 100644 index 00000000..d589a9bd --- /dev/null +++ b/src/hashtab_stats.c @@ -0,0 +1,96 @@ +/***************************************************************/ +/* */ +/* HASHTAB_STATS.C */ +/* */ +/* Utility function to print hash table stats. */ +/* */ +/* This file is part of REMIND. */ +/* Copyright (C) 1992-2024 by Dianne Skoll */ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* */ +/***************************************************************/ + +/** + * \file hashtab_stats.c + * \brief Obtain or print statistics about a hash table + * + * NOTE: Use of any of the functions in this file will require linking + * with the math library to pull in the sqrt() function. + */ + +#include "hashtab.h" +#include +#include + +/** + * \brief Dump hash table statistics to a stdio FILE + * + * \param t A pointer to a hash_table object + * \param fp A stdio file pointer that is writable + */ +void +hash_table_dump_stats(hash_table *t, FILE *fp) +{ + struct hash_table_stats stat; + hash_table_get_stats(t, &stat); + fprintf(fp, "#Entries: %lu\n#Buckets: %lu\n#Non-empty Buckets: %lu\n", + (unsigned long) stat.num_entries, + (unsigned long) stat.num_buckets, + (unsigned long) stat.num_nonempty_buckets); + fprintf(fp, "Max len: %lu\nMin len: %lu\nAvg len: %.4f\nStd dev: %.4f\nAvg nonempty len: %.4f\n", + (unsigned long) stat.max_len, + (unsigned long) stat.min_len, + stat.avg_len, stat.stddev, stat.avg_nonempty_len); +} + +/** + * \brief Obtain hash table statistics + * + * This function fills in the elements of a struct hash_table_stats object + * with hash table statistics. + * + * \param t A pointer to a hash_table object + * \param stat A pointer to a hash_table_stats object that will be filled in + */ +void +hash_table_get_stats(hash_table *t, struct hash_table_stats *stat) +{ + size_t n = hash_table_num_buckets(t); + size_t max_len = 0; + size_t min_len = 1000000000; + + stat->num_buckets = n; + stat->num_entries = hash_table_num_entries(t); + stat->max_len = 0; + stat->min_len = 0; + stat->avg_len = 0.0; + stat->stddev = 0.0; + stat->num_nonempty_buckets = 0; + stat->avg_nonempty_len = 0.0; + double sum = 0.0; + double sumsq = 0.0; + + if (n == 0) { + return; + } + + for (size_t i=0; inum_nonempty_buckets++; + } + sum += (double) c; + sumsq += (double) c * (double) c; + if (c > max_len) max_len = c; + if (c < min_len) min_len = c; + } + double avg_len = sum / (double) n; + double stddev = sqrt( (sumsq / (double) n) - (avg_len * avg_len) ); + if (stat->num_nonempty_buckets > 0) { + stat->avg_nonempty_len = sum / (double) stat->num_nonempty_buckets; + } + stat->max_len = max_len; + stat->min_len = min_len; + stat->avg_len = avg_len; + stat->stddev = stddev; +} diff --git a/src/init.c b/src/init.c index 19be7126..016471bd 100644 --- a/src/init.c +++ b/src/init.c @@ -179,6 +179,12 @@ void InitRemind(int argc, char const *argv[]) dse = NO_DATE; + /* Initialize variable hash table */ + InitVars(); + + /* Initialize user-defined functions hash table */ + InitUserFunctions(); + /* If stdout is a terminal, initialize $FormWidth to terminal width-8, but clamp to [20, 500] */ InitCalWidthAndFormWidth(STDOUT_FILENO); diff --git a/src/protos.h b/src/protos.h index e943f929..bc3096aa 100644 --- a/src/protos.h +++ b/src/protos.h @@ -258,3 +258,5 @@ void get_dedupe_hash_stats(int *total, int *maxlen, double *avglen); int ShouldDedupe(int trigger_date, int trigger_time, char const *body); void ClearDedupeTable(void); void InitDedupeTable(void); +void InitVars(void); +void InitUserFunctions(void); diff --git a/src/types.h b/src/types.h index 9836e0bf..573b1e83 100644 --- a/src/types.h +++ b/src/types.h @@ -12,6 +12,7 @@ #include #include "dynbuf.h" +#include "hashtab.h" typedef struct udf_struct UserFunc; @@ -99,7 +100,7 @@ typedef struct expr_node_struct { /* Define the structure of a variable */ typedef struct var { - struct var *next; + struct hash_link link; char name[VAR_NAME_LEN+1]; char preserve; Value v; @@ -291,7 +292,7 @@ typedef struct { /* Define the data structure used to hold a user-defined function */ typedef struct udf_struct { - struct udf_struct *next; + struct hash_link link; char name[VAR_NAME_LEN+1]; expr_node *node; char **args; diff --git a/src/userfns.c b/src/userfns.c index 7cad748e..cfa21bf3 100644 --- a/src/userfns.c +++ b/src/userfns.c @@ -15,6 +15,7 @@ #include #include +#include #ifdef HAVE_STRINGS_H #include @@ -27,15 +28,36 @@ #include "protos.h" #include "err.h" -#define FUNC_HASH_SIZE 31 /* Size of User-defined function hash table */ - /* The hash table */ -static UserFunc *FuncHash[FUNC_HASH_SIZE]; +hash_table FuncHash; static void DestroyUserFunc (UserFunc *f); static void FUnset (char const *name); static void FSet (UserFunc *f); static void RenameUserFunc(char const *oldname, char const *newname); +unsigned int HashVal_nocase(char const *str); + +static unsigned int HashUserFunc(void *x) +{ + UserFunc *f = (UserFunc *) x; + return HashVal_nocase(f->name); +} + +static int CompareUserFuncs(void *a, void *b) +{ + UserFunc *f = (UserFunc *) a; + UserFunc *g = (UserFunc *) b; + return strcmp(f->name, g->name); +} + +void +InitUserFunctions(void) +{ + hash_table_init(&FuncHash, + offsetof(UserFunc, link), + HashUserFunc, + CompareUserFuncs); +} /***************************************************************/ /* */ @@ -257,8 +279,8 @@ int DoFset(ParsePtr p) } local_array[i].v.type = ERR_TYPE; StrnCpy(local_array[i].name, DBufValue(&buf), VAR_NAME_LEN); - local_array[i].next = &(local_array[i+1]); - local_array[i+1].next = NULL; + local_array[i].link.next = &(local_array[i+1]); + local_array[i+1].link.next = NULL; func->nargs++; c = ParseNonSpaceChar(p, &r, 0); if (r) { @@ -373,21 +395,11 @@ static void DestroyUserFunc(UserFunc *f) /***************************************************************/ static void FUnset(char const *name) { - UserFunc *cur, *prev; - int h; - - h = HashVal_nocase(name) % FUNC_HASH_SIZE; - - cur = FuncHash[h]; - prev = NULL; - while(cur) { - if (! strncmp(name, cur->name, VAR_NAME_LEN)) break; - prev = cur; - cur = cur->next; + UserFunc *f = FindUserFunc(name); + if (f) { + hash_table_delete(&FuncHash, f); + DestroyUserFunc(f); } - if (!cur) return; - if (prev) prev->next = cur->next; else FuncHash[h] = cur->next; - DestroyUserFunc(cur); } /***************************************************************/ @@ -399,19 +411,17 @@ static void FUnset(char const *name) /***************************************************************/ static void FSet(UserFunc *f) { - int h = HashVal_nocase(f->name) % FUNC_HASH_SIZE; - f->next = FuncHash[h]; - FuncHash[h] = f; + hash_table_insert(&FuncHash, f); } UserFunc *FindUserFunc(char const *name) { UserFunc *f; - int h = HashVal_nocase(name) % FUNC_HASH_SIZE; + UserFunc candidate; - /* Search for the function */ - f = FuncHash[h]; - while (f && strncmp(name, f->name, VAR_NAME_LEN)) f = f->next; + StrnCpy(candidate.name, name, VAR_NAME_LEN); + + f = hash_table_find(&FuncHash, &candidate); return f; } @@ -444,15 +454,13 @@ UnsetAllUserFuncs(void) { UserFunc *f; UserFunc *next; - int i; - for (i=0; inext; - DestroyUserFunc(f); - f = next; - } - FuncHash[i] = NULL; + + f = hash_table_next(&FuncHash, NULL); + while(f) { + next = hash_table_next(&FuncHash, f); + hash_table_delete(&FuncHash, f); + DestroyUserFunc(f); + f = next; } } @@ -469,7 +477,6 @@ static void RenameUserFunc(char const *oldname, char const *newname) { UserFunc *f = FindUserFunc(oldname); - UserFunc *cur, *prev; if (!strcmp(oldname, newname)) { /* Same name; do nothing */ @@ -485,52 +492,22 @@ RenameUserFunc(char const *oldname, char const *newname) } /* Remove from hash table */ - int h = HashVal_nocase(f->name) % FUNC_HASH_SIZE; - cur = FuncHash[h]; - prev = NULL; - while(cur) { - if (cur == f) { - if (prev) { - prev->next = cur->next; - } else { - FuncHash[h] = cur->next; - } - break; - } - prev = cur; - cur = cur->next; - } + hash_table_delete(&FuncHash, f); /* Rename */ StrnCpy(f->name, newname, VAR_NAME_LEN); /* Insert into hash table */ - h = HashVal_nocase(f->name) % FUNC_HASH_SIZE; - f->next = FuncHash[h]; - FuncHash[h] = f; + hash_table_insert(&FuncHash, f); } void get_userfunc_hash_stats(int *total, int *maxlen, double *avglen) { - int len; - int i; - UserFunc *f; - - *maxlen = 0; - *total = 0; - - for (i=0; inext; - } - if (len > *maxlen) { - *maxlen = len; - } - } - *avglen = (double) *total / (double) FUNC_HASH_SIZE; + struct hash_table_stats s; + hash_table_get_stats(&FuncHash, &s); + *total = s.num_entries; + *maxlen = s.max_len; + *avglen = s.avg_len; } + diff --git a/src/var.c b/src/var.c index 419663d4..27b0e6da 100644 --- a/src/var.c +++ b/src/var.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -27,8 +28,6 @@ #include "err.h" #define UPPER(c) (islower(c) ? toupper(c) : c) -/* The variable hash table */ -#define VAR_HASH_SIZE 67 #define VARIABLE ErrMsg[E_VAR] #define VALUE ErrMsg[E_VAL] #define UNDEF ErrMsg[E_UNDEF] @@ -36,7 +35,27 @@ static int IntMin = INT_MIN; static int IntMax = INT_MAX; -static Var *VHashTbl[VAR_HASH_SIZE]; +static hash_table VHashTbl; + +static unsigned int VarHashFunc(void *x) +{ + Var *v = (Var *) x; + return HashVal(v->name); +} + +static int VarCompareFunc(void *a, void *b) +{ + Var *x = (Var *) a; + Var *y = (Var *) b; + return StrCmpi(x->name, y->name); +} + +void +InitVars(void) +{ + hash_table_init(&VHashTbl, offsetof(Var, link), + VarHashFunc, VarCompareFunc); +} static double strtod_in_c_locale(char const *str, char **endptr) @@ -483,31 +502,22 @@ unsigned int HashVal(char const *str) /***************************************************************/ Var *FindVar(char const *str, int create) { - register int h; - register Var *v; - register Var *prev; + Var *v; + Var candidate; + StrnCpy(candidate.name, str, VAR_NAME_LEN); - h = HashVal(str) % VAR_HASH_SIZE; - v = VHashTbl[h]; - prev = NULL; + v = (Var *) hash_table_find(&VHashTbl, &candidate); + if (v != NULL || !create) return v; - while(v) { - if (! StrinCmp(str, v->name, VAR_NAME_LEN)) return v; - prev = v; - v = v-> next; - } - if (!create) return v; - -/* Create the variable */ + /* Create the variable */ v = NEW(Var); if (!v) return v; - v->next = NULL; v->v.type = INT_TYPE; v->v.v.val = 0; v->preserve = 0; StrnCpy(v->name, str, VAR_NAME_LEN); - if (prev) prev->next = v; else VHashTbl[h] = v; + hash_table_insert(&VHashTbl, v); return v; } @@ -520,23 +530,12 @@ Var *FindVar(char const *str, int create) /***************************************************************/ int DeleteVar(char const *str) { - register int h; - register Var *v; - register Var *prev; + Var *v; - h = HashVal(str) % VAR_HASH_SIZE; - v = VHashTbl[h]; - prev = NULL; - - while(v) { - if (! StrinCmp(str, v->name, VAR_NAME_LEN)) break; - prev = v; - v = v-> next; - } + v = FindVar(str, 0); if (!v) return E_NOSUCH_VAR; DestroyValue(v->v); - if (prev) prev->next = v->next; else VHashTbl[h] = v->next; - free(v); + hash_table_delete(&VHashTbl, v); return OK; } @@ -725,19 +724,14 @@ int DoDump(ParsePtr p) /***************************************************************/ void DumpVarTable(void) { - register Var *v; - register int i; + Var *v; fprintf(ErrFp, "%s %s\n\n", VARIABLE, VALUE); - for (i=0; iname); - PrintValue(&(v->v), ErrFp); - fprintf(ErrFp, "\n"); - v = v->next; - } + hash_table_for_each(v, &VHashTbl) { + fprintf(ErrFp, "%s ", v->name); + PrintValue(&(v->v), ErrFp); + fprintf(ErrFp, "\n"); } } @@ -751,27 +745,18 @@ void DumpVarTable(void) /***************************************************************/ void DestroyVars(int all) { - int i; - Var *v, *next, *prev; + Var *v; + Var *next; - for (i=0; ipreserve) { - DestroyValue(v->v); - next = v->next; - free(v); - } else { - if (prev) prev->next = v; - else VHashTbl[i] = v; - prev = v; - next = v->next; - v->next = NULL; - } - v = next; + v = hash_table_next(&VHashTbl, NULL); + while(v) { + next = hash_table_next(&VHashTbl, v); + if (all || !v->preserve) { + DestroyValue(v->v); + hash_table_delete(&VHashTbl, v); + free(v); } + v = next; } } @@ -1214,24 +1199,9 @@ print_sysvar_tokens(void) void get_var_hash_stats(int *total, int *maxlen, double *avglen) { - int len; - int i; - Var *v; - - *maxlen = 0; - *total = 0; - - for (i=0; inext; - } - if (len > *maxlen) { - *maxlen = len; - } - } - *avglen = (double) *total / (double) VAR_HASH_SIZE; + struct hash_table_stats s; + hash_table_get_stats(&VHashTbl, &s); + *total = s.num_entries; + *maxlen = s.max_len; + *avglen = s.avg_len; } diff --git a/tests/test.cmp b/tests/test.cmp index d22e5ac2..82f21acd 100644 --- a/tests/test.cmp +++ b/tests/test.cmp @@ -2596,141 +2596,141 @@ wkdaynum(1993-12-27) => 1 dump Variable Value -a071 2 -a072 0 -a030 1 -a031 "foobarbaz" -a074 "Tuesday, 5 May, 1992 In 444 days' time T"... -a032 34 -a054b 11:22 -a075 "05-05 Tuesday, May 5th, 1992 Tuesday, Ma"... -a033 "foo" -a076 "S' 05 Th 05 Tuesday, 5th May, 1992 Tuesd"... -a034 1991-02-17 -a077 "1992 92\n" -a035 1 -a078 1991-04-07 -a036 "bar" -a079 1992-04-26 -a037 1991-02-15 -a100 2010-09-03 -a038 33 -a101 2010-09-03 -a039 "February" -a102 5000 -a103 0 -a104 0 -a105 -1 -mltest "a b" -a080 2027-05-02 -a106 4 -a081 "" -a107 3 -a082 1991-03-13 -a108 14 -a040 2 -a083 1991-03-24 -a109 2012-01-01 -a041 "3rd" -a084 7 -a042 "4th" -a085 7 a000 1 -a043 "UNIX" -a086 4 a001 1 -a044 "s" -a087 3 a002 102 -a045 "iess" -a088 14 a003 1990 -a046 "ies" -a089 2012-01-01 -a004 "B7BMB" -a047 -1 -a110 1991-02-16 -a005 "baz" -a048 "foo" -a111 -1 -a006 "1" -a049 21 -a112 7 -a007 "1991-02-16" -a113 15 -a008 "11:44" -a114 2 -a115 03:33 -a090 1991-02-16 -a116 -4 -a091 -1 -a117 -3 -a092 7 -a118 0 -a093 0 -a119 -1 -a094 0 a052 03:07 -a095 -1 -a010 12 +a004 "B7BMB" a053 1992-01-10 -a096 -4 -a011 704 +a005 "baz" a054 11:22 -a097 -3 -a012 411 a055 1 -a098 0 -a013 1992-02-02 +a006 "1" a056 "SDFJHSDF KSJDFH KJSDFH KSJDFH" -a099 -1 +a007 "1991-02-16" a057 "SDFJHSDF KSJDFH KJSDFH KSJDFH" -a120 2010-09-03 -a015 16 +a008 "11:44" a058 "05.01.01" -a121 2010-09-03 -a016 28 a059 "Saturday" -a122 5000 -a017 29 -a123 0 -a018 1 -a124 0 -a019 0 -a125 -1 -a126 14:00 -a127 04:30 -a128 2018-02-03@16:45 +a010 12 a060 6 -a129 2019-02-03@16:14 +a011 704 a061 1991 +a012 411 +a100 2010-09-03 a062 -19 -a020 "../tests/test.rem" +a013 1992-02-02 +a101 2010-09-03 a063 0 -a021 "foo bar baz" +a102 5000 a064 1 -a022 11 +a015 16 +a103 0 a065 1 -a023 1 +a016 28 +a104 0 a066 0 -a131b 00:00 -a024 0 +a017 29 +a105 -1 a067 "INT" -a130 1991-02-16 -a025 4 +a018 1 +a106 4 a068 "STRING" -a131 00:00 -a026 7 +a019 0 +a107 3 a069 "TIME" -a132 1991-02-16@00:00 -a027 0 -a133 16:00 -a028 1 -a134 1991-02-13@16:00 -a029 0 -a135 72:00 -a136 "FILE" +a108 14 +a109 2012-01-01 +a020 "../tests/test.rem" a070 "DATE" +a021 "foo bar baz" +a071 2 +a022 11 +a110 1991-02-16 +a072 0 +a023 1 +a111 -1 +a024 0 +a112 7 +a074 "Tuesday, 5 May, 1992 In 444 days' time T"... +a025 4 +a113 15 +a026 7 +a075 "05-05 Tuesday, May 5th, 1992 Tuesday, Ma"... +a114 2 +a027 0 +a076 "S' 05 Th 05 Tuesday, 5th May, 1992 Tuesd"... +a115 03:33 +a028 1 +a077 "1992 92\n" +a116 -4 +a078 1991-04-07 +a029 0 +a117 -3 +a079 1992-04-26 +a118 0 +a119 -1 +a131b 00:00 +a030 1 +a080 2027-05-02 +a031 "foobarbaz" +a081 "" +a032 34 +a120 2010-09-03 +a082 1991-03-13 +a033 "foo" +a121 2010-09-03 +a083 1991-03-24 +a034 1991-02-17 +a122 5000 +a084 7 +a035 1 +a123 0 +a085 7 +a036 "bar" +a124 0 +a086 4 +a037 1991-02-15 +a125 -1 +a087 3 +a038 33 +a126 14:00 +a088 14 +a039 "February" +a127 04:30 +a089 2012-01-01 +a128 2018-02-03@16:45 +a129 2019-02-03@16:14 +mltest "a b" +a040 2 +a090 1991-02-16 +a041 "3rd" +a091 -1 +a042 "4th" +a130 1991-02-16 +a092 7 +a043 "UNIX" +a054b 11:22 +a131 00:00 +a093 0 +a044 "s" +a132 1991-02-16@00:00 +a094 0 +a045 "iess" +a133 16:00 +a095 -1 +a046 "ies" +a134 1991-02-13@16:00 +a096 -4 +a047 -1 +a135 72:00 +a097 -3 +a048 "foo" +a136 "FILE" +a098 0 +a049 21 +a099 -1 dump $ Variable Value @@ -5970,8 +5970,8 @@ Hello on the same line DEBUG +s # Don't want Remind to queue reminders EXIT - Var hash: total = 141; maxlen = 4; avglen = 2.104 - Func hash: total = 17; maxlen = 2; avglen = 0.548 + Var hash: total = 141; maxlen = 4; avglen = 1.785 + Func hash: total = 17; maxlen = 3; avglen = 1.000 Dedup hash: total = 0; maxlen = 0; avglen = 0.000 Expression nodes allocated: 128 Expression nodes high-water: 74 @@ -13424,7 +13424,7 @@ Parsed expression: isany("foo", 1 + 1, 2:00 + 1, '2021-01-01' + 1, '2021-01-01@1 "f" + "oo" => "foo" isany("foo", 2, 02:01, 2021-01-02, 2021-01-01@14:01, "foo", ?) => 1 No reminders. - Var hash: total = 1; maxlen = 1; avglen = 0.015 + Var hash: total = 1; maxlen = 1; avglen = 0.059 Func hash: total = 0; maxlen = 0; avglen = 0.000 Dedup hash: total = 0; maxlen = 0; avglen = 0.000 Expression nodes allocated: 512