Replace the individual hash table implementations with a unified one.

This commit is contained in:
Dianne Skoll
2024-12-09 11:54:52 -05:00
parent be7c67b6fd
commit cb712ad7e7
12 changed files with 922 additions and 347 deletions

View File

@@ -27,12 +27,12 @@ MANS= $(srcdir)/../man/rem2ps.1 $(srcdir)/../man/remind.1 \
.SUFFIXES: .c .o
REMINDSRCS= calendar.c dedupe.c dynbuf.c dorem.c dosubst.c expr.c \
files.c funcs.c globals.c hbcal.c init.c main.c md5.c \
moon.c omit.c queue.c sort.c token.c trigger.c \
userfns.c utils.c var.c
files.c funcs.c globals.c hashtab.c hashtab_stats.c \
hbcal.c init.c main.c md5.c moon.c omit.c queue.c \
sort.c token.c trigger.c userfns.c utils.c var.c
REMINDHDRS=config.h custom.h dynbuf.h err.h globals.h lang.h \
md5.h protos.h rem2ps.h types.h version.h
REMINDHDRS=config.h custom.h dynbuf.h err.h globals.h hashtab.h \
lang.h md5.h protos.h rem2ps.h types.h version.h
REMINDOBJS= $(REMINDSRCS:.c=.o)
all: remind rem2ps

View File

@@ -16,16 +16,37 @@
#include "protos.h"
#include <stdlib.h>
#include <string.h>
#include <stdlib.h>
#include <stddef.h>
#define DEDUPE_HASH_SLOTS 31
typedef struct dedupe_entry {
struct dedupe_entry *next;
struct hash_link link;
int trigger_date;
int trigger_time;
char const *body;
} DedupeEntry;
static DedupeEntry *DedupeTable[DEDUPE_HASH_SLOTS];
static hash_table DedupeTable;
static unsigned int DedupeHashFunc(void *x)
{
DedupeEntry *e = (DedupeEntry *) x;
unsigned int hashval = (unsigned int) e->trigger_date;
if (e->trigger_time != NO_TIME) {
hashval += (unsigned int) e->trigger_time;
}
hashval += HashVal(e->body);
return hashval;
}
static int CompareDedupes(void *x, void *y)
{
DedupeEntry *a = (DedupeEntry *) x;
DedupeEntry *b = (DedupeEntry *) y;
if (a->trigger_date != b->trigger_date) return 1;
if (a->trigger_time != b->trigger_time) return 1;
return strcmp(a->body, b->body);
}
/***************************************************************/
/* */
@@ -43,24 +64,6 @@ FreeDedupeEntry(DedupeEntry *e)
free(e);
}
/***************************************************************/
/* */
/* GetDedupeBucket */
/* */
/* Get the bucket for a given date and body */
/* */
/***************************************************************/
static unsigned int
GetDedupeBucket(int trigger_date, int trigger_time, char const *body)
{
unsigned int bucket = trigger_date;
if (trigger_time != NO_TIME) {
bucket += trigger_time;
}
bucket += HashVal(body);
return bucket % DEDUPE_HASH_SLOTS;
}
/***************************************************************/
/* */
/* FindDedupeEntry */
@@ -72,19 +75,12 @@ static DedupeEntry *
FindDedupeEntry(int trigger_date, int trigger_time, char const *body)
{
DedupeEntry *e;
unsigned int bucket = GetDedupeBucket(trigger_date, trigger_time, body);
e = DedupeTable[bucket];
while(e) {
if (e->trigger_date == trigger_date &&
e->trigger_time == trigger_time &&
!strcmp(body, e->body)) {
return e;
}
e = e->next;
}
return NULL;
DedupeEntry candidate;
candidate.body = body;
candidate.trigger_date = trigger_date;
candidate.trigger_time = trigger_time;
e = hash_table_find(&DedupeTable, &candidate);
return e;
}
/***************************************************************/
@@ -99,8 +95,6 @@ InsertDedupeEntry(int trigger_date, int trigger_time, char const *body)
{
DedupeEntry *e;
unsigned int bucket = GetDedupeBucket(trigger_date, trigger_time, body);
e = malloc(sizeof(DedupeEntry));
if (!e) {
return; /* No error checking... what can we do? */
@@ -113,8 +107,7 @@ InsertDedupeEntry(int trigger_date, int trigger_time, char const *body)
return;
}
e->next = DedupeTable[bucket];
DedupeTable[bucket] = e;
hash_table_insert(&DedupeTable, e);
}
/***************************************************************/
@@ -149,16 +142,16 @@ void
ClearDedupeTable(void)
{
DedupeEntry *e, *next;
for (int i=0; i<DEDUPE_HASH_SLOTS; i++) {
e = DedupeTable[i];
while (e) {
next = e->next;
FreeDedupeEntry(e);
e = next;
}
DedupeTable[i] = NULL;
e = hash_table_next(&DedupeTable, NULL);
while(e) {
next = hash_table_next(&DedupeTable, e);
hash_table_delete(&DedupeTable, e);
FreeDedupeEntry(e);
e = next;
}
}
/***************************************************************/
/* */
/* InitDedupeTable */
@@ -169,31 +162,17 @@ ClearDedupeTable(void)
void
InitDedupeTable(void)
{
for (int i=0; i<DEDUPE_HASH_SLOTS; i++) {
DedupeTable[i] = NULL;
}
hash_table_init(&DedupeTable,
offsetof(DedupeEntry, link),
DedupeHashFunc, CompareDedupes);
}
void
get_dedupe_hash_stats(int *total, int *maxlen, double *avglen)
{
int len;
int i;
DedupeEntry *e;
*maxlen = 0;
*total = 0;
for (i=0; i<DEDUPE_HASH_SLOTS; i++) {
len = 0;
e = DedupeTable[i];
while (e) {
len++;
(*total)++;
e = e->next;
}
if (len > *maxlen) {
*maxlen = len;
}
}
*avglen = (double) *total / (double) DEDUPE_HASH_SLOTS;
struct hash_table_stats s;
hash_table_get_stats(&DedupeTable, &s);
*total = s.num_entries;
*maxlen = s.max_len;
*avglen = s.avg_len;
}

View File

@@ -2019,7 +2019,7 @@ static int make_atom(expr_node *atom, Var *locals)
atom->u.arg = i;
return OK;
}
v = v->next;
v = v->link.next;
i++;
}
if (strlen(s) < SHORT_NAME_BUF) {

433
src/hashtab.c Normal file
View File

@@ -0,0 +1,433 @@
/***************************************************************/
/* */
/* HASHTAB_STATS.C */
/* */
/* Implementation of hash table. */
/* */
/* This file is part of REMIND. */
/* Copyright (C) 1992-2024 by Dianne Skoll */
/* SPDX-License-Identifier: GPL-2.0-only */
/* */
/***************************************************************/
/**
* \file hashtab.c
*
* \brief Implementation of hash table
*
* A hash table manages an array of buckets, each of which is the
* head of a singly-linked list. A given hash table can store items
* of a given type. The items in a hash table must be structs, and one
* of their members must be a struct hash_link object. For example,
* a hash table containing integers might have the hash objects
* defined as:
*
* struct int_object {
* int value;
* struct hash_link link;
* };
*
* When you initialize the hash table, you pass in the offset to the hash
* link. For example, to initialize a hash table designed to hold
* int_objects, you'd do something like:
*
* unsigned int hash_int_obj(void *x) {
* return (unsigned int) ((int_object *) x)->value;
* }
* int compare_int_obj(void *a, void *b) {
* return ((int_object *)a)->value - ((int_object *)b)->value;
* }
*
* hash_table tab;
* hash_table_init(&tab, offsetof(struct int_object, link), hash_int_obj, compare_int_obj);
*
* An item can be in multiple hash tables at once; just declare multiple
* hash_link members and pass in the appropriate offset to each hash
* table.
*/
#include "hashtab.h"
#include <stdlib.h>
#include <errno.h>
/*
* The number of buckets should be a prime number.
* Use these numbers of buckets to grow or shrink the hash table.
* Yes, OK, the list below is probably excessive.
*/
/**
* \brief A list of prime numbers from 17 to about 1.4 billion, approximately
* doubling with each successive number.
*
* These are used as choices for the number of hash buckets in the table
*/
static size_t bucket_choices[] = {
17, 37, 79, 163, 331, 673, 1361, 2729, 5471, 10949, 21911, 43853, 87719,
175447, 350899, 701819, 1403641, 2807303, 5614657, 11229331, 22458671,
44917381, 89834777, 179669557, 359339171, 718678369, 1437356741 };
#define NUM_BUCKET_CHOICES (sizeof(bucket_choices) / sizeof(bucket_choices[0]))
#define NUM_BUCKETS(t) (bucket_choices[t->bucket_choice_index])
#define LINK(t, p) ( (struct hash_link *) (( ((char *) p) + t->hash_link_offset)) )
/**
* \brief Initialize a hash table
*
* Initializes a hash table. A given hash table can contain a collection
* of items, all of which must be the same. An item in a hash table is
* a structure and one of the elements in the structure must be a
* struct hash_link object. For example, if you are storing a collection
* of integers in a hash table, your item might look like this:
*
* struct item {
* int value;
* struct hash_link link;
* };
*
* \param t Pointer to a hash_table object
* \param link_offset The offset to the struct hash_link object within the object being put in the hash table. In the example above, it would be
* offsetof(struct item, link)
* \param hashfunc A pointer to a function that computes a hash given a pointer to an object. This function must return an unsigned int.
* \param compare A pointer to a function that compares two objects. It must
* return 0 if they compare equal and non-zero if they do not.
*
* \return 0 on success, -1 on failure (and errno is set appropriately)
*/
int
hash_table_init(hash_table *t,
size_t link_offset,
unsigned int (*hashfunc)(void *x),
int (*compare)(void *a, void *b))
{
t->bucket_choice_index = 0;
t->num_entries = 0;
t->hash_link_offset = link_offset;
t->hashfunc = hashfunc;
t->compare = compare;
t->buckets = malloc(sizeof(void *) * bucket_choices[0]);
if (!t->buckets) {
return -1;
}
for (size_t i=0; i<bucket_choices[0]; i++) {
t->buckets[i] = NULL;
}
return 0;
}
/**
* \brief Free memory used by a hash table
*
* \param t Pointer to a hash_table object
*/
void
hash_table_free(hash_table *t)
{
free(t->buckets);
t->buckets = NULL;
t->bucket_choice_index = -1;
t->num_entries = 0;
}
/**
* \brief Return the number of items in a hash table
*
* \param t Pointer to a hash_table object
*
* \return The number of items in the hash table
*/
size_t
hash_table_num_entries(hash_table *t)
{
return t->num_entries;
}
/**
* \brief Return the number of buckets in a hash table
*
* \param t Pointer to a hash_table object
*
* \return The number of buckets in the hash table
*/
size_t
hash_table_num_buckets(hash_table *t)
{
if (t->bucket_choice_index >= NUM_BUCKET_CHOICES) {
return 0;
}
return NUM_BUCKETS(t);
}
/**
* \brief Return the length of the i'th bucket chain
*
* If i >= num_buckets, returns (size_t) -1
*
* \param t Pointer to a hash_table object
* \param i The bucket whose length we want (0 to num_buckets-1)
* \return The length of the i'th bucket chain
*/
size_t
hash_table_chain_len(hash_table *t, size_t i)
{
if (i >= hash_table_num_buckets(t)) {
return (size_t) -1;
}
size_t len = 0;
void *ptr = t->buckets[i];
while(ptr) {
len++;
ptr = LINK(t, ptr)->next;
}
return len;
}
/**
* \brief Resize a hash table
*
* Resizes (either grows or shrinks) a hash table's bucket array
*
* \param t Pointer to a hash_table object
* \param dir Must be either 1 (to increase the bucket array size) or
* -1 (to decrease it).
* \return 0 on success, non-zero if resizing fails. NOTE: Currently, resizing
* cannot fail; if we fail to allocate memory for the new bucket array,
* we just keep the existing array. This behaviour may change in future.
*/
static int
hash_table_resize(hash_table *t, int dir)
{
if (dir != 1 && dir != -1) {
return 0;
}
if ((dir == -1 && t->bucket_choice_index == 0) ||
(dir == 1 && t->bucket_choice_index == NUM_BUCKET_CHOICES-1)) {
return 0;
}
size_t num_old_buckets = bucket_choices[t->bucket_choice_index];
size_t num_new_buckets = bucket_choices[t->bucket_choice_index + dir];
void **new_buckets = malloc(sizeof(void *) * num_new_buckets);
if (!new_buckets) {
/* Out of memory... just don't resize? */
return 0;
}
for (size_t j=0; j<num_new_buckets; j++) {
new_buckets[j] = NULL;
}
/* Move everything from the old buckets into the new */
for (size_t i=0; i<num_old_buckets; i++) {
if (!t->buckets[i]) {
continue;
}
void *p = t->buckets[i];
while(p) {
struct hash_link *l = LINK(t, p);
void *nxt = l->next;
size_t j = l->hashval % num_new_buckets;
l->next = new_buckets[j];
new_buckets[j] = p;
p = nxt;
}
}
free(t->buckets);
t->buckets = new_buckets;
t->bucket_choice_index += dir;
return 0;
}
/**
* \brief Insert an item into a hash table
*
* Inserts an item into a hash table. The item MUST NOT be freed as
* long as it is in a hash table
*
* \param t Pointer to a hash_table object
* \param item Pointer to the item to insert
*
* \return 0 on success, -1 on failure (and errno is set appropriately)
*/
int
hash_table_insert(hash_table *t, void *item)
{
if (!item) {
errno = EINVAL;
return -1;
}
unsigned int v = t->hashfunc(item);
struct hash_link *l = LINK(t, item);
l->hashval = v;
v = v % NUM_BUCKETS(t);
l->next = t->buckets[v];
t->buckets[v] = item;
t->num_entries++;
/* Grow table for load factor > 2 */
if (t->bucket_choice_index < NUM_BUCKET_CHOICES-1 &&
t->num_entries > 2 * NUM_BUCKETS(t)) {
return hash_table_resize(t, 1);
}
return 0;
}
/**
* \brief Find an item in a hash table
*
* \param t Pointer to a hash_table object
* \param candidate Pointer to an object to be sought in the table
*
* \return A pointer to the object if one that matches candidate is found. NULL if not found
*/
void *
hash_table_find(hash_table *t, void *candidate)
{
if (!candidate) {
return NULL;
}
unsigned int v = t->hashfunc(candidate);
void *ptr = t->buckets[v % NUM_BUCKETS(t)];
while(ptr) {
if (!t->compare(candidate, ptr)) {
return ptr;
}
ptr = LINK(t, ptr)->next;
}
return NULL;
}
/**
* \brief Find the next item in a hash table
*
* \param t Pointer to a hash table object
* \param obj Pointer to an object that was perviously returned by
* hash_table_find() or hash_table_find_next().
*
* \return A pointer to the next object matching obj, or NULL if
* no more exist
*/
void *
hash_table_find_next(hash_table *t, void *obj)
{
if (!obj) {
return NULL;
}
void *ptr = LINK(t, obj)->next;
while(ptr) {
if (!t->compare(obj, ptr)) {
return ptr;
}
ptr = LINK(t, ptr)->next;
}
return NULL;
}
/**
* \brief Delete an item from a hash table
*
* \param t Pointer to a hash_table object
* \param candidate Pointer to an object that is in the table and must be removed from it
*
* \return 0 on success, -1 on failure
*/
int
hash_table_delete(hash_table *t, void *item)
{
if (!item) {
errno = EINVAL;
return -1;
}
struct hash_link *l = LINK(t, item);
unsigned int v = l->hashval;
v = v % NUM_BUCKETS(t);
if (t->buckets[v] == item) {
t->buckets[v] = l->next;
t->num_entries--;
/* Shrink table for load factor < 1 */
if (t->bucket_choice_index > 0 &&
t->num_entries < NUM_BUCKETS(t) / 2) {
return hash_table_resize(t, -1);
}
return 0;
}
void *ptr = t->buckets[v];
while(ptr) {
struct hash_link *l2 = LINK(t, ptr);
if (l2->next == item) {
l2->next = l->next;
t->num_entries--;
/* Shrink table for load factor < 1 */
if (t->bucket_choice_index > 0 &&
t->num_entries < NUM_BUCKETS(t) / 2) {
return hash_table_resize(t, -1);
}
return 0;
}
ptr = l2->next;
}
/* Item not found in hash table */
errno = ENOENT;
return -1;
}
/**
* \brief Iterate to the next item in a hash table
*
* Acts as an iterator. Given a pointer to an item in the hash
* table, returns the next item, or NULL if no more items. If the
* existing-item pointer is supplied as NULL, returns a pointer to the
* first item in the hash table. You can therefore iterate across the
* hash table like this*
*
* void *item = NULL;
* while ( (item = hash_table_next(&table, item) ) != NULL) {
* // Do something with item
* }
*
* NOTE that you MUST NOT modify the hash table while iterating over it.
*
* \param t Pointer to a hash_table object
* \param cur The current item. Supply as NULL to get the first item
*
* \return A pointer to the next item in the hash table, or NULL if there
* are no more items
*/
void *
hash_table_next(hash_table *t, void *cur)
{
size_t n_buckets = NUM_BUCKETS(t);
size_t start_bucket = 0;
if (cur) {
struct hash_link *l = LINK(t, cur);
if (l->next) {
return l->next;
}
/* End of this chain; start searching at the next bucket */
start_bucket = (l->hashval % n_buckets) + 1;
}
for (size_t i=start_bucket; i<n_buckets; i++) {
if (t->buckets[i]) {
return t->buckets[i];
}
}
return NULL;
}

111
src/hashtab.h Normal file
View File

@@ -0,0 +1,111 @@
/***************************************************************/
/* */
/* HASHTAB.H */
/* */
/* Header file for hash-table related functions. */
/* */
/* This file is part of REMIND. */
/* Copyright (C) 1992-2024 by Dianne Skoll */
/* SPDX-License-Identifier: GPL-2.0-only */
/* */
/***************************************************************/
/* For size_t */
#include <stdio.h>
/**
* \brief A structure for holding hash table chain links.
*
* This structure is embedded in a container structure to make up
* a hash table entry
*/
struct hash_link {
void *next; /**< Link to next item in the chain */
unsigned int hashval; /**< Cached hash function value */
};
/**
* \brief A hash table
*/
typedef struct {
unsigned int bucket_choice_index; /**< Index into array of possible bucket counts */
size_t num_entries; /**< Number of entries in the hash table */
size_t hash_link_offset; /**< Offset of the struct hash_link in the container */
void **buckets; /**< Array of buckets */
unsigned int (*hashfunc)(void *x); /**< Pointer to the hashing function */
int (*compare)(void *a, void *b); /**< Pointer to the comparison function */
} hash_table;
/**
* \brief Data type to hold statistics about a hash table
*/
struct hash_table_stats {
size_t num_entries; /**< Number of items in the hash table */
size_t num_buckets; /**< Number of buckets in the hash table */
size_t num_nonempty_buckets; /**< Number of non-emptry buckets */
size_t max_len; /**< Length of longest chain in the hash table */
size_t min_len; /**< Length of the shortest chain in the hash table */
double avg_len; /**< Average chain length */
double avg_nonempty_len; /**< Average chain length of non-empty bucket */
double stddev; /**< Standard deviation of chain lengths */
};
int hash_table_init(hash_table *t,
size_t link_offset,
unsigned int (*hashfunc)(void *x),
int (*compare)(void *a, void *b));
void hash_table_free(hash_table *t);
size_t hash_table_num_entries(hash_table *t);
size_t hash_table_num_buckets(hash_table *t);
size_t hash_table_chain_len(hash_table *t, size_t i);
int hash_table_insert(hash_table *t, void *item);
void *hash_table_find(hash_table *t, void *candidate);
void *hash_table_find_next(hash_table *t, void *obj);
int hash_table_delete(hash_table *t, void *item);
void *hash_table_next(hash_table *t, void *cur);
void hash_table_dump_stats(hash_table *t, FILE *fp);
void hash_table_get_stats(hash_table *t, struct hash_table_stats *stat);
/**
* \brief Iterate over all items in a hash table
*
* This macro iterates over all items in a hash table. Here is an
* example of how to use it:
*
* hash_table tab;
* void *item;
* hash_table_for_each(item, &tab) {
* // Do something with item
* }
*/
#define hash_table_for_each(item, t) \
for ((item) = hash_table_next((t), NULL); \
(item); \
(item) = hash_table_next((t), (item)))
/**
* \brief Iterate over all items in a hash table that match a candidate
*
* This macro iterates over all items in a hash table that match a
* candidate object. (In general, a hash table may contain multiple
* objects with the same key.) Here is an example assuming that the hash
* table holds objects of type struct int_object:
*
* struct int_object {
* int value;
* struct hash_link link;
* }
*
* hash_table tab;
* int_object candidate;
*
* candidate.value = 7;
* int_object *item;
* hash_table_for_each_matching(item, &candidate, &tab) {
* // Do something with item, which will match "7"
* }
*/
#define hash_table_for_each_matching(item, candidate, t) \
for ((item) = hash_table_find((t), (candidate)); \
(item); \
(item) = hash_table_find_next((t), (item)))

96
src/hashtab_stats.c Normal file
View File

@@ -0,0 +1,96 @@
/***************************************************************/
/* */
/* HASHTAB_STATS.C */
/* */
/* Utility function to print hash table stats. */
/* */
/* This file is part of REMIND. */
/* Copyright (C) 1992-2024 by Dianne Skoll */
/* SPDX-License-Identifier: GPL-2.0-only */
/* */
/***************************************************************/
/**
* \file hashtab_stats.c
* \brief Obtain or print statistics about a hash table
*
* NOTE: Use of any of the functions in this file will require linking
* with the math library to pull in the sqrt() function.
*/
#include "hashtab.h"
#include <stdio.h>
#include <math.h>
/**
* \brief Dump hash table statistics to a stdio FILE
*
* \param t A pointer to a hash_table object
* \param fp A stdio file pointer that is writable
*/
void
hash_table_dump_stats(hash_table *t, FILE *fp)
{
struct hash_table_stats stat;
hash_table_get_stats(t, &stat);
fprintf(fp, "#Entries: %lu\n#Buckets: %lu\n#Non-empty Buckets: %lu\n",
(unsigned long) stat.num_entries,
(unsigned long) stat.num_buckets,
(unsigned long) stat.num_nonempty_buckets);
fprintf(fp, "Max len: %lu\nMin len: %lu\nAvg len: %.4f\nStd dev: %.4f\nAvg nonempty len: %.4f\n",
(unsigned long) stat.max_len,
(unsigned long) stat.min_len,
stat.avg_len, stat.stddev, stat.avg_nonempty_len);
}
/**
* \brief Obtain hash table statistics
*
* This function fills in the elements of a struct hash_table_stats object
* with hash table statistics.
*
* \param t A pointer to a hash_table object
* \param stat A pointer to a hash_table_stats object that will be filled in
*/
void
hash_table_get_stats(hash_table *t, struct hash_table_stats *stat)
{
size_t n = hash_table_num_buckets(t);
size_t max_len = 0;
size_t min_len = 1000000000;
stat->num_buckets = n;
stat->num_entries = hash_table_num_entries(t);
stat->max_len = 0;
stat->min_len = 0;
stat->avg_len = 0.0;
stat->stddev = 0.0;
stat->num_nonempty_buckets = 0;
stat->avg_nonempty_len = 0.0;
double sum = 0.0;
double sumsq = 0.0;
if (n == 0) {
return;
}
for (size_t i=0; i<n; i++) {
size_t c = hash_table_chain_len(t, i);
if (c != 0) {
stat->num_nonempty_buckets++;
}
sum += (double) c;
sumsq += (double) c * (double) c;
if (c > max_len) max_len = c;
if (c < min_len) min_len = c;
}
double avg_len = sum / (double) n;
double stddev = sqrt( (sumsq / (double) n) - (avg_len * avg_len) );
if (stat->num_nonempty_buckets > 0) {
stat->avg_nonempty_len = sum / (double) stat->num_nonempty_buckets;
}
stat->max_len = max_len;
stat->min_len = min_len;
stat->avg_len = avg_len;
stat->stddev = stddev;
}

View File

@@ -179,6 +179,12 @@ void InitRemind(int argc, char const *argv[])
dse = NO_DATE;
/* Initialize variable hash table */
InitVars();
/* Initialize user-defined functions hash table */
InitUserFunctions();
/* If stdout is a terminal, initialize $FormWidth to terminal width-8,
but clamp to [20, 500] */
InitCalWidthAndFormWidth(STDOUT_FILENO);

View File

@@ -258,3 +258,5 @@ void get_dedupe_hash_stats(int *total, int *maxlen, double *avglen);
int ShouldDedupe(int trigger_date, int trigger_time, char const *body);
void ClearDedupeTable(void);
void InitDedupeTable(void);
void InitVars(void);
void InitUserFunctions(void);

View File

@@ -12,6 +12,7 @@
#include <limits.h>
#include "dynbuf.h"
#include "hashtab.h"
typedef struct udf_struct UserFunc;
@@ -99,7 +100,7 @@ typedef struct expr_node_struct {
/* Define the structure of a variable */
typedef struct var {
struct var *next;
struct hash_link link;
char name[VAR_NAME_LEN+1];
char preserve;
Value v;
@@ -291,7 +292,7 @@ typedef struct {
/* Define the data structure used to hold a user-defined function */
typedef struct udf_struct {
struct udf_struct *next;
struct hash_link link;
char name[VAR_NAME_LEN+1];
expr_node *node;
char **args;

View File

@@ -15,6 +15,7 @@
#include <stdio.h>
#include <ctype.h>
#include <stddef.h>
#ifdef HAVE_STRINGS_H
#include <strings.h>
@@ -27,15 +28,36 @@
#include "protos.h"
#include "err.h"
#define FUNC_HASH_SIZE 31 /* Size of User-defined function hash table */
/* The hash table */
static UserFunc *FuncHash[FUNC_HASH_SIZE];
hash_table FuncHash;
static void DestroyUserFunc (UserFunc *f);
static void FUnset (char const *name);
static void FSet (UserFunc *f);
static void RenameUserFunc(char const *oldname, char const *newname);
unsigned int HashVal_nocase(char const *str);
static unsigned int HashUserFunc(void *x)
{
UserFunc *f = (UserFunc *) x;
return HashVal_nocase(f->name);
}
static int CompareUserFuncs(void *a, void *b)
{
UserFunc *f = (UserFunc *) a;
UserFunc *g = (UserFunc *) b;
return strcmp(f->name, g->name);
}
void
InitUserFunctions(void)
{
hash_table_init(&FuncHash,
offsetof(UserFunc, link),
HashUserFunc,
CompareUserFuncs);
}
/***************************************************************/
/* */
@@ -257,8 +279,8 @@ int DoFset(ParsePtr p)
}
local_array[i].v.type = ERR_TYPE;
StrnCpy(local_array[i].name, DBufValue(&buf), VAR_NAME_LEN);
local_array[i].next = &(local_array[i+1]);
local_array[i+1].next = NULL;
local_array[i].link.next = &(local_array[i+1]);
local_array[i+1].link.next = NULL;
func->nargs++;
c = ParseNonSpaceChar(p, &r, 0);
if (r) {
@@ -373,21 +395,11 @@ static void DestroyUserFunc(UserFunc *f)
/***************************************************************/
static void FUnset(char const *name)
{
UserFunc *cur, *prev;
int h;
h = HashVal_nocase(name) % FUNC_HASH_SIZE;
cur = FuncHash[h];
prev = NULL;
while(cur) {
if (! strncmp(name, cur->name, VAR_NAME_LEN)) break;
prev = cur;
cur = cur->next;
UserFunc *f = FindUserFunc(name);
if (f) {
hash_table_delete(&FuncHash, f);
DestroyUserFunc(f);
}
if (!cur) return;
if (prev) prev->next = cur->next; else FuncHash[h] = cur->next;
DestroyUserFunc(cur);
}
/***************************************************************/
@@ -399,19 +411,17 @@ static void FUnset(char const *name)
/***************************************************************/
static void FSet(UserFunc *f)
{
int h = HashVal_nocase(f->name) % FUNC_HASH_SIZE;
f->next = FuncHash[h];
FuncHash[h] = f;
hash_table_insert(&FuncHash, f);
}
UserFunc *FindUserFunc(char const *name)
{
UserFunc *f;
int h = HashVal_nocase(name) % FUNC_HASH_SIZE;
UserFunc candidate;
/* Search for the function */
f = FuncHash[h];
while (f && strncmp(name, f->name, VAR_NAME_LEN)) f = f->next;
StrnCpy(candidate.name, name, VAR_NAME_LEN);
f = hash_table_find(&FuncHash, &candidate);
return f;
}
@@ -444,15 +454,13 @@ UnsetAllUserFuncs(void)
{
UserFunc *f;
UserFunc *next;
int i;
for (i=0; i<FUNC_HASH_SIZE; i++) {
f = FuncHash[i];
while(f) {
next = f->next;
DestroyUserFunc(f);
f = next;
}
FuncHash[i] = NULL;
f = hash_table_next(&FuncHash, NULL);
while(f) {
next = hash_table_next(&FuncHash, f);
hash_table_delete(&FuncHash, f);
DestroyUserFunc(f);
f = next;
}
}
@@ -469,7 +477,6 @@ static void
RenameUserFunc(char const *oldname, char const *newname)
{
UserFunc *f = FindUserFunc(oldname);
UserFunc *cur, *prev;
if (!strcmp(oldname, newname)) {
/* Same name; do nothing */
@@ -485,52 +492,22 @@ RenameUserFunc(char const *oldname, char const *newname)
}
/* Remove from hash table */
int h = HashVal_nocase(f->name) % FUNC_HASH_SIZE;
cur = FuncHash[h];
prev = NULL;
while(cur) {
if (cur == f) {
if (prev) {
prev->next = cur->next;
} else {
FuncHash[h] = cur->next;
}
break;
}
prev = cur;
cur = cur->next;
}
hash_table_delete(&FuncHash, f);
/* Rename */
StrnCpy(f->name, newname, VAR_NAME_LEN);
/* Insert into hash table */
h = HashVal_nocase(f->name) % FUNC_HASH_SIZE;
f->next = FuncHash[h];
FuncHash[h] = f;
hash_table_insert(&FuncHash, f);
}
void
get_userfunc_hash_stats(int *total, int *maxlen, double *avglen)
{
int len;
int i;
UserFunc *f;
*maxlen = 0;
*total = 0;
for (i=0; i<FUNC_HASH_SIZE; i++) {
len = 0;
f = FuncHash[i];
while(f) {
len++;
(*total)++;
f = f->next;
}
if (len > *maxlen) {
*maxlen = len;
}
}
*avglen = (double) *total / (double) FUNC_HASH_SIZE;
struct hash_table_stats s;
hash_table_get_stats(&FuncHash, &s);
*total = s.num_entries;
*maxlen = s.max_len;
*avglen = s.avg_len;
}

134
src/var.c
View File

@@ -17,6 +17,7 @@
#include <string.h>
#include <ctype.h>
#include <stddef.h>
#include <stdlib.h>
#include <limits.h>
#include <errno.h>
@@ -27,8 +28,6 @@
#include "err.h"
#define UPPER(c) (islower(c) ? toupper(c) : c)
/* The variable hash table */
#define VAR_HASH_SIZE 67
#define VARIABLE ErrMsg[E_VAR]
#define VALUE ErrMsg[E_VAL]
#define UNDEF ErrMsg[E_UNDEF]
@@ -36,7 +35,27 @@
static int IntMin = INT_MIN;
static int IntMax = INT_MAX;
static Var *VHashTbl[VAR_HASH_SIZE];
static hash_table VHashTbl;
static unsigned int VarHashFunc(void *x)
{
Var *v = (Var *) x;
return HashVal(v->name);
}
static int VarCompareFunc(void *a, void *b)
{
Var *x = (Var *) a;
Var *y = (Var *) b;
return StrCmpi(x->name, y->name);
}
void
InitVars(void)
{
hash_table_init(&VHashTbl, offsetof(Var, link),
VarHashFunc, VarCompareFunc);
}
static double
strtod_in_c_locale(char const *str, char **endptr)
@@ -483,31 +502,22 @@ unsigned int HashVal(char const *str)
/***************************************************************/
Var *FindVar(char const *str, int create)
{
register int h;
register Var *v;
register Var *prev;
Var *v;
Var candidate;
StrnCpy(candidate.name, str, VAR_NAME_LEN);
h = HashVal(str) % VAR_HASH_SIZE;
v = VHashTbl[h];
prev = NULL;
v = (Var *) hash_table_find(&VHashTbl, &candidate);
if (v != NULL || !create) return v;
while(v) {
if (! StrinCmp(str, v->name, VAR_NAME_LEN)) return v;
prev = v;
v = v-> next;
}
if (!create) return v;
/* Create the variable */
/* Create the variable */
v = NEW(Var);
if (!v) return v;
v->next = NULL;
v->v.type = INT_TYPE;
v->v.v.val = 0;
v->preserve = 0;
StrnCpy(v->name, str, VAR_NAME_LEN);
if (prev) prev->next = v; else VHashTbl[h] = v;
hash_table_insert(&VHashTbl, v);
return v;
}
@@ -520,23 +530,12 @@ Var *FindVar(char const *str, int create)
/***************************************************************/
int DeleteVar(char const *str)
{
register int h;
register Var *v;
register Var *prev;
Var *v;
h = HashVal(str) % VAR_HASH_SIZE;
v = VHashTbl[h];
prev = NULL;
while(v) {
if (! StrinCmp(str, v->name, VAR_NAME_LEN)) break;
prev = v;
v = v-> next;
}
v = FindVar(str, 0);
if (!v) return E_NOSUCH_VAR;
DestroyValue(v->v);
if (prev) prev->next = v->next; else VHashTbl[h] = v->next;
free(v);
hash_table_delete(&VHashTbl, v);
return OK;
}
@@ -725,19 +724,14 @@ int DoDump(ParsePtr p)
/***************************************************************/
void DumpVarTable(void)
{
register Var *v;
register int i;
Var *v;
fprintf(ErrFp, "%s %s\n\n", VARIABLE, VALUE);
for (i=0; i<VAR_HASH_SIZE; i++) {
v = VHashTbl[i];
while(v) {
fprintf(ErrFp, "%s ", v->name);
PrintValue(&(v->v), ErrFp);
fprintf(ErrFp, "\n");
v = v->next;
}
hash_table_for_each(v, &VHashTbl) {
fprintf(ErrFp, "%s ", v->name);
PrintValue(&(v->v), ErrFp);
fprintf(ErrFp, "\n");
}
}
@@ -751,27 +745,18 @@ void DumpVarTable(void)
/***************************************************************/
void DestroyVars(int all)
{
int i;
Var *v, *next, *prev;
Var *v;
Var *next;
for (i=0; i<VAR_HASH_SIZE; i++) {
v = VHashTbl[i];
VHashTbl[i] = NULL;
prev = NULL;
while(v) {
if (all || !v->preserve) {
DestroyValue(v->v);
next = v->next;
free(v);
} else {
if (prev) prev->next = v;
else VHashTbl[i] = v;
prev = v;
next = v->next;
v->next = NULL;
}
v = next;
v = hash_table_next(&VHashTbl, NULL);
while(v) {
next = hash_table_next(&VHashTbl, v);
if (all || !v->preserve) {
DestroyValue(v->v);
hash_table_delete(&VHashTbl, v);
free(v);
}
v = next;
}
}
@@ -1214,24 +1199,9 @@ print_sysvar_tokens(void)
void
get_var_hash_stats(int *total, int *maxlen, double *avglen)
{
int len;
int i;
Var *v;
*maxlen = 0;
*total = 0;
for (i=0; i<VAR_HASH_SIZE; i++) {
len = 0;
v = VHashTbl[i];
while(v) {
len++;
(*total)++;
v = v->next;
}
if (len > *maxlen) {
*maxlen = len;
}
}
*avglen = (double) *total / (double) VAR_HASH_SIZE;
struct hash_table_stats s;
hash_table_get_stats(&VHashTbl, &s);
*total = s.num_entries;
*maxlen = s.max_len;
*avglen = s.avg_len;
}

View File

@@ -2596,141 +2596,141 @@ wkdaynum(1993-12-27) => 1
dump
Variable Value
a071 2
a072 0
a030 1
a031 "foobarbaz"
a074 "Tuesday, 5 May, 1992 In 444 days' time T"...
a032 34
a054b 11:22
a075 "05-05 Tuesday, May 5th, 1992 Tuesday, Ma"...
a033 "foo"
a076 "S' 05 Th 05 Tuesday, 5th May, 1992 Tuesd"...
a034 1991-02-17
a077 "1992 92\n"
a035 1
a078 1991-04-07
a036 "bar"
a079 1992-04-26
a037 1991-02-15
a100 2010-09-03
a038 33
a101 2010-09-03
a039 "February"
a102 5000
a103 0
a104 0
a105 -1
mltest "a b"
a080 2027-05-02
a106 4
a081 ""
a107 3
a082 1991-03-13
a108 14
a040 2
a083 1991-03-24
a109 2012-01-01
a041 "3rd"
a084 7
a042 "4th"
a085 7
a000 1
a043 "UNIX"
a086 4
a001 1
a044 "s"
a087 3
a002 102
a045 "iess"
a088 14
a003 1990
a046 "ies"
a089 2012-01-01
a004 "B7BMB"
a047 -1
a110 1991-02-16
a005 "baz"
a048 "foo"
a111 -1
a006 "1"
a049 21
a112 7
a007 "1991-02-16"
a113 15
a008 "11:44"
a114 2
a115 03:33
a090 1991-02-16
a116 -4
a091 -1
a117 -3
a092 7
a118 0
a093 0
a119 -1
a094 0
a052 03:07
a095 -1
a010 12
a004 "B7BMB"
a053 1992-01-10
a096 -4
a011 704
a005 "baz"
a054 11:22
a097 -3
a012 411
a055 1
a098 0
a013 1992-02-02
a006 "1"
a056 "SDFJHSDF KSJDFH KJSDFH KSJDFH"
a099 -1
a007 "1991-02-16"
a057 "SDFJHSDF KSJDFH KJSDFH KSJDFH"
a120 2010-09-03
a015 16
a008 "11:44"
a058 "05.01.01"
a121 2010-09-03
a016 28
a059 "Saturday"
a122 5000
a017 29
a123 0
a018 1
a124 0
a019 0
a125 -1
a126 14:00
a127 04:30
a128 2018-02-03@16:45
a010 12
a060 6
a129 2019-02-03@16:14
a011 704
a061 1991
a012 411
a100 2010-09-03
a062 -19
a020 "../tests/test.rem"
a013 1992-02-02
a101 2010-09-03
a063 0
a021 "foo bar baz"
a102 5000
a064 1
a022 11
a015 16
a103 0
a065 1
a023 1
a016 28
a104 0
a066 0
a131b 00:00
a024 0
a017 29
a105 -1
a067 "INT"
a130 1991-02-16
a025 4
a018 1
a106 4
a068 "STRING"
a131 00:00
a026 7
a019 0
a107 3
a069 "TIME"
a132 1991-02-16@00:00
a027 0
a133 16:00
a028 1
a134 1991-02-13@16:00
a029 0
a135 72:00
a136 "FILE"
a108 14
a109 2012-01-01
a020 "../tests/test.rem"
a070 "DATE"
a021 "foo bar baz"
a071 2
a022 11
a110 1991-02-16
a072 0
a023 1
a111 -1
a024 0
a112 7
a074 "Tuesday, 5 May, 1992 In 444 days' time T"...
a025 4
a113 15
a026 7
a075 "05-05 Tuesday, May 5th, 1992 Tuesday, Ma"...
a114 2
a027 0
a076 "S' 05 Th 05 Tuesday, 5th May, 1992 Tuesd"...
a115 03:33
a028 1
a077 "1992 92\n"
a116 -4
a078 1991-04-07
a029 0
a117 -3
a079 1992-04-26
a118 0
a119 -1
a131b 00:00
a030 1
a080 2027-05-02
a031 "foobarbaz"
a081 ""
a032 34
a120 2010-09-03
a082 1991-03-13
a033 "foo"
a121 2010-09-03
a083 1991-03-24
a034 1991-02-17
a122 5000
a084 7
a035 1
a123 0
a085 7
a036 "bar"
a124 0
a086 4
a037 1991-02-15
a125 -1
a087 3
a038 33
a126 14:00
a088 14
a039 "February"
a127 04:30
a089 2012-01-01
a128 2018-02-03@16:45
a129 2019-02-03@16:14
mltest "a b"
a040 2
a090 1991-02-16
a041 "3rd"
a091 -1
a042 "4th"
a130 1991-02-16
a092 7
a043 "UNIX"
a054b 11:22
a131 00:00
a093 0
a044 "s"
a132 1991-02-16@00:00
a094 0
a045 "iess"
a133 16:00
a095 -1
a046 "ies"
a134 1991-02-13@16:00
a096 -4
a047 -1
a135 72:00
a097 -3
a048 "foo"
a136 "FILE"
a098 0
a049 21
a099 -1
dump $
Variable Value
@@ -5970,8 +5970,8 @@ Hello on the same line
DEBUG +s
# Don't want Remind to queue reminders
EXIT
Var hash: total = 141; maxlen = 4; avglen = 2.104
Func hash: total = 17; maxlen = 2; avglen = 0.548
Var hash: total = 141; maxlen = 4; avglen = 1.785
Func hash: total = 17; maxlen = 3; avglen = 1.000
Dedup hash: total = 0; maxlen = 0; avglen = 0.000
Expression nodes allocated: 128
Expression nodes high-water: 74
@@ -13424,7 +13424,7 @@ Parsed expression: isany("foo", 1 + 1, 2:00 + 1, '2021-01-01' + 1, '2021-01-01@1
"f" + "oo" => "foo"
isany("foo", 2, 02:01, 2021-01-02, 2021-01-01@14:01, "foo", ?) => 1
No reminders.
Var hash: total = 1; maxlen = 1; avglen = 0.015
Var hash: total = 1; maxlen = 1; avglen = 0.059
Func hash: total = 0; maxlen = 0; avglen = 0.000
Dedup hash: total = 0; maxlen = 0; avglen = 0.000
Expression nodes allocated: 512