+Fri Dec 12 23:54:37 2003 Ben Pfaff <blp@gnu.org>
+
+ * autorecode.c: (recode) Replace stupid use of memcpy() by
+ memberwise copy.
+ (hash_alpha_value) Use hsh_hash_bytes(). Get rid of nasty casts.
+ (hash_numeric_value) Ditto.
+ (autorecode_proc_func) pool_strdup() was wrong here because the
+ source string was not null-terminated. Use new pool_strndup()
+ instead.
+
+ * crosstabs.q: (enum_var_values) Remove superfluous and bizarre
+ use of hsh_iterator_init().
+
+ * data-in.c: (parse_N) Initialize i->v->f.
+
+ * flip.c: (cmd_flip) Use memmove(), not memcpy(), to copy
+ overlapping arrays.
+
+ * groff-font.c: Use power-of-2 hash table sizes, not prime.
+ (groff_read_font) Don't call hsh_next_prime(). Don't call
+ fclose(NULL).
+ (static var hash) Remove `size_p', `max_used' members.
+ (font_char_name_to_index) Don't call hsh_next_prime(). Use
+ hsh_hash_string() instead of hashpjw(), & instead of %.
+ (default_font) Don't call hsh_next_prime().
+
+ * pool.c: (pool_strndup) New function.
+ (pool_strdup) Reimplement in terms of pool_strndup.
+
+ * postscript.c: (hash_font_entry) Use hsh_hash_string(). Get rid
+ of nasty casts.
+ (hash_ps_encoding) Use hsh_hash_string().
+ (hash_ps_combo) Use hsh_hash_string(), hsh_hash_int().
+ (hash_filename2font) Use hsh_hash_string().
+
+ * som.c: Add #include <stdlib.h>.
+
+ * tab.c: (tab_destroy) Don't set t->container after freeing `t'
+ (by destroying its pool).
+
+Fri Dec 12 23:18:59 2003 Ben Pfaff <blp@gnu.org>
+
+ Miscellaneous hash table code cleanup:
+
+ * hash.h: (struct hsh_table) Moved into hash.c.
+ (hsh_count) Ditto, and transformed into function.
+ (hsh_compare_func) New typedef, used for defining otherwise-long
+ function types here and in hash.c
+ (hsh_hash_func) Ditto.
+ (hsh_free_func) Ditto.
+
+ * hash.c: (struct hsh_table) Renamed `n' to `used', `m' to `size',
+ `table' to `entries'. Removed `mp'. All references updated.
+ (hsh_clear) Don't shrink entries array; if the hash was this big
+ once, it probably will be again.
+ (hsh_rehash) Made static.
+ (force_hsh_insert) Renamed hsh_force_insert.
+ (force_hsh_find) Renamed hsh_force_find.
+
+ Made hash table sizes powers of 2, because that's fine with any
+ reasonable hash function and because taking a power-of-2 modulus
+ is faster than any other:
+
+ (hsh_prime_tab) Removed;
+ (hsh_next_prime) Ditto.
+ (next_power_of_2) New function.
+ (hsh_create) Use next_power_of_2.
+ (hsh_rehash) Use & instead of %.
+
+ Cleaned up hsh_sort:
+
+ (internal_comparison_fn) Removed.
+ (sort_nulls_last) New function.
+ (hsh_sort) Removed second parameter, switched to using the new
+ quicksort() function from quicksort.h to avoid using nasty need
+ for static variables with qsort(). All references updated.
+
+ Changed the hash functions offered, because there are better hash
+ functions than the ones we had, and cleaned up the names to boot:
+
+ * hash.c: (hashpjw_d) Removed.
+ (hashpjw) Ditto.
+ (hsh_hash_bytes) New function.
+ (hsh_hash_string) New function.
+ (hsh_hash_int) New function.
+
+ Improved the hash table iteration interface:
+
+ * hash.h: (hsh_iterator_init) Removed.
+ (struct hsh_iterator) Removed `init' member, change `next' to
+ size_t.
+
+ * hash.c: (hsh_foreach) Removed. All references updated to use
+ hsh_first/hsh_next instead.
+ (hsh_first) New function. Notably, unlike hsh_foreach() it does
+ not treat a null pointer as an empty hash table.
+ (hsh_next) New function.
+
+ Made deletion possible, though slow:
+
+ * hash.c: (locate_matching_entry) New function.
+ (hsh_find) Use locate_matching_entry().
+ (hsh_delete) New function also using locate_matching_entry().
+ (hsh_force_delete) New function.
+
+Fri Dec 12 23:16:10 2003 Ben Pfaff <blp@gnu.org>
+
+ * quicksort.c: New file implementing a sort routine with a
+ interface better than qsort() because it passes a user-provided
+ parameter to the sort routine.
+
+ * Makefile.am: Add quicksort.c, quicksort.h.
+
Fri Dec 12 13:31:58 2003 Ben Pfaff <blp@gnu.org>
* All source files: Get rid of nasty special cases for Checker,
loop.c magic.c magic.h main.c main.h matrix-data.c matrix.c matrix.h \
means.c mis-val.c misc.c misc.h modify-vars.c numeric.c output.c \
output.h pfm-read.c pfm-write.c pfm.h pool.c pool.h postscript.c \
-print.c random.c random.h recode.c rename-vars.c repeat.c sample.c \
-sel-if.c set.c settings.h sfm-read.c sfm-write.c sfm.h sfmP.h som.c \
-som.h sort.c sort.h split-file.c stat.h stats.c stats.h str.c str.h \
-sysfile-info.c tab.c tab.h temporary.c title.c t-test.c val-labs.c \
-var-labs.c var.h vars-atr.c vars-prs.c vector.c vector.h version.c \
-version.h vfm.c vfm.h vfmP.h weight.c
+print.c quicksort.c quicksort.h random.c random.h recode.c \
+rename-vars.c repeat.c sample.c sel-if.c set.c settings.h sfm-read.c \
+sfm-write.c sfm.h sfmP.h som.c som.h sort.c sort.h split-file.c stat.h \
+stats.c stats.h str.c str.h sysfile-info.c tab.c tab.h temporary.c \
+title.c t-test.c val-labs.c var-labs.c var.h vars-atr.c vars-prs.c \
+vector.c vector.h version.c version.h vfm.c vfm.h vfmP.h weight.c
LDADD = ../lib/julcal/libjulcal.a \
../lib/misc/libmisc.a \
for (i = 0; i < nv_src; i++)
{
struct arc_spec *spec = &t->arc[i];
- void **p = hsh_sort (h_trans[i], NULL);
+ void **p = hsh_sort (h_trans[i]);
int count = hsh_count (h_trans[i]);
int j;
for (j = 0; *p; p++, j++)
{
struct arc_item *item = pool_alloc (arc_pool, sizeof *item);
-
- memcpy (&item->from, *p, sizeof (union value));
- if (v_src[i]->type == ALPHA)
- item->from.c = pool_strdup (arc_pool, item->from.c);
+ union value *vp = *p;
+
+ if (v_src[i]->type == NUMERIC)
+ item->from.f = vp->f;
+ else
+ item->from.c = pool_strdup (arc_pool, vp->c);
item->to = !descend ? j + 1 : count - j;
- force_hsh_insert (spec->items, item);
+ hsh_force_insert (spec->items, item);
}
hsh_destroy (h_trans[i]);
struct arc_item *item;
if (spec->src->type == NUMERIC)
- item = force_hsh_find (spec->items, &c->data[spec->src->fv].f);
+ item = hsh_force_find (spec->items, &c->data[spec->src->fv].f);
else
{
union value v;
v.c = c->data[spec->src->fv].s;
- item = force_hsh_find (spec->items, &v);
+ item = hsh_force_find (spec->items, &v);
}
c->data[spec->dest->fv].f = item->to;
}
static unsigned
-hash_alpha_value (const void *a, void *len)
+hash_alpha_value (const void *a_, void *len)
{
- return hashpjw_d (((union value *) a)->c, &((union value *) a)->c[(int) len]);
+ const union value *a = a_;
+ return hsh_hash_bytes (a->c, (int) len);
}
static int
}
static unsigned
-hash_numeric_value (const void *a, void *len unused)
+hash_numeric_value (const void *a_, void *len unused)
{
- return hashpjw_d ((char *) &((union value *) a)->f,
- (char *) &(&((union value *) a)->f)[1]);
+ const union value *a = a_;
+ return hsh_hash_bytes (&a->f, sizeof a->f);
}
static int
if (NULL == *vpp)
{
vp = pool_alloc (hash_pool, sizeof (union value));
- vp->c = pool_strdup (hash_pool, v.c);
+ vp->c = pool_strndup (hash_pool, v.c, v_src[i]->width);
*vpp = vp;
}
}
other hand, what good are the above commands? */
#undef DEBUGGING
-#define DEBUGGING 1
+/*#define DEBUGGING 1*/
#include "debug-print.h"
\f
/* Definitions. */
if (mode == GENERAL)
{
n_sorted_tab = hsh_count (gen_tab);
- sorted_tab = (struct table_entry **) hsh_sort (gen_tab, compare_table_entry);
+ sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
#if DEBUGGING
print_table_entries (sorted_tab);
#endif
int i;
i = 0;
- hsh_iterator_init (trav);
while (NULL != (v = avl_traverse (tree, &trav)))
(*values)[i++] = *v;
*nvalues = i;
{
const unsigned char *cp;
+ i->v->f = 0;
for (cp = i->s; cp < i->e; cp++)
{
if (!isdigit (*cp))
for (i = 0; i < nvar; i++)
if (var[i] == newnames)
{
- memcpy (&var[i], &var[i + 1], sizeof *var * (nvar - i - 1));
+ memmove (&var[i], &var[i + 1], sizeof *var * (nvar - i - 1));
nvar--;
break;
}
font->metric_size = 0;
font->metric_used = 0;
font->kern = NULL;
- font->kern_size = 0;
- font->kern_size_p = hsh_next_prime (64);
+ font->kern_size = 8;
font->kern_used = 0;
font->kern_max_used = 0;
/* Come here on any error. */
lose:
- fclose (f);
+ if (f != NULL)
+ fclose (f);
pool_destroy (font_pool);
#if unix
free ((char *) fn);
/* Character index hash table. */
static struct
{
- int size; /* Size of table. */
- int *size_p; /* Next larger table size. */
+ int size; /* Size of table (must be power of 2). */
int used; /* Number of full entries. */
- int max_used; /* # used entries where we enlarge & rehash. */
int next_index; /* Next index to allocate. */
struct index_hash *tab; /* Hash table proper. */
struct pool *ar; /* Pool for names. */
if (!hash.tab)
{
- hash.size_p = hsh_next_prime (128);
- hash.size = *hash.size_p++;
+ hash.size = 128;
hash.used = 0;
- hash.max_used = hash.size / 2;
hash.next_index = 256;
hash.tab = xmalloc (sizeof *hash.tab * hash.size);
hash.ar = pool_create ();
hash.tab[i].name = NULL;
}
- for (i = hashpjw (name) % hash.size; hash.tab[i].name;)
+ for (i = hsh_hash_string (name) & (hash.size - 1); hash.tab[i].name; )
{
if (!strcmp (hash.tab[i].name, name))
return hash.tab[i].index;
}
hash.used++;
- if (hash.used >= hash.max_used)
+ if (hash.used >= hash.size / 2)
{
struct index_hash *old_tab = hash.tab;
int old_size = hash.size;
int i, j;
- hash.size = *hash.size_p++;
- hash.max_used = hash.size / 2;
+ hash.size *= 2;
hash.tab = xmalloc (sizeof *hash.tab * hash.size);
for (i = 0; i < hash.size; i++)
hash.tab[i].name = NULL;
for (i = 0; i < old_size; i++)
if (old_tab[i].name)
{
- for (j = hashpjw (old_tab[i].name) % hash.size; hash.tab[j].name;)
+ for (j = hsh_hash_string (old_tab[i].name) & (hash.size - 1);
+ hash.tab[j].name;)
if (++j >= hash.size)
j = 0;
hash.tab[j] = old_tab[i];
int old_kern_size = font->kern_size;
int j;
- font->kern_size = *font->kern_size_p++;
+ font->kern_size *= 2;
font->kern_max_used = font->kern_size / 2;
font->kern = pool_malloc (font->owner,
sizeof *font->kern * font->kern_size);
font->metric_size = 0;
font->metric_used = 0;
font->kern = NULL;
- font->kern_size = 0;
- font->kern_size_p = hsh_next_prime (64);
+ font->kern_size = 8;
font->kern_used = 0;
font->kern_max_used = 0;
return font;
#include <stdlib.h>
#include "alloc.h"
#include "hash.h"
+#include "quicksort.h"
#include "str.h"
+/* Hash table. */
+struct hsh_table
+ {
+ size_t used; /* Number of filled entries. */
+ size_t size; /* Number of entries (a power of 2). */
+ void **entries; /* Hash table proper. */
+
+ void *param;
+ hsh_compare_func *compare;
+ hsh_hash_func *hash;
+ hsh_free_func *free;
+ };
+
/* Note for constructing hash functions:
You can store the hash values in the records, then compare hash
\f
/* Prime numbers and hash functions. */
-static int hsh_prime_tab[] =
+/* Returns smallest power of 2 greater than X. */
+static size_t
+next_power_of_2 (size_t x)
{
- 13, 31, 47, 67, 131, 257, 521, 1031, 2053, 4099, 8209, 16411,
- 32771, 65537, 131101, 262147, 524309, 1048583, 2097169, 4194319,
- 8388617, 16777259, 33554467, 67108879, 134217757, 268435459,
- 536870923, 1073741827, INT_MAX,
-};
-
-/* Returns pointer into hsh_prime_tab[], pointing to the first prime
- in the table greater than X. */
-int *
-hsh_next_prime (int x)
-{
- int *p;
-
- assert (x >= 0);
+ assert (x != 0);
- for (p = hsh_prime_tab; *p < x; p++)
- ;
+ for (;;)
+ {
+ /* Turn off rightmost 1-bit in x. */
+ size_t y = x & (x - 1);
- assert (*p != INT_MAX);
+ /* If y is 0 then x only had a single 1-bit. */
+ if (y == 0)
+ return 2 * x;
- return p;
+ /* Otherwise turn off the next. */
+ x = y;
+ }
}
-/* P.J. Weinberger's hash function, recommended by the Red Dragon
- Book. Hashes the d-string between S1 and S2. Returns unbounded
- nonnegative result. */
-int
-hashpjw_d (const char *s1, const char *s2)
+/* Colin Plumb's "one-at-a-time" hash, for bytes. */
+unsigned
+hsh_hash_bytes (const void *buf_, size_t size)
{
- const char *p;
- unsigned g, h;
-
- for (h = 0, p = s1; p < s2; p++)
+ const unsigned char *buf = buf_;
+ unsigned hash = 0;
+ while (size-- > 0)
{
- h = (h << 4) + *(unsigned char *) p;
- g = h & 0xf0000000;
- h ^= (g >> 24) | g;
- }
- return abs ((int) h);
+ hash += *buf++;
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+ return hash;
+}
+
+/* Colin Plumb's "one-at-a-time" hash, for strings. */
+unsigned
+hsh_hash_string (const char *s_)
+{
+ const unsigned char *s = s_;
+ unsigned hash = 0;
+ while (*s != '\0')
+ {
+ hash += *s++;
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+ return hash;
}
-/* Alternate entry point for hashpjw_d() that takes an s-string. */
-int
-hashpjw (const char *s)
+/* Hash for ints. */
+unsigned
+hsh_hash_int (int i)
{
- return hashpjw_d (s, &s[strlen (s)]);
+ return hsh_hash_bytes (&i, sizeof i);
}
\f
-/*hash tables. */
+/* Hash tables. */
/* Creates a hash table with at least M entries. COMPARE is a
function that compares two entries and returns 0 if they are
identical, nonzero otherwise; HASH returns a nonnegative hash value
for an entry; FREE destroys an entry. */
struct hsh_table *
-hsh_create (int m,
- int (*compare) (const void *, const void *, void *param),
- unsigned (*hash) (const void *, void *param),
- void (*free) (void *, void *param),
- void *param)
+hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
+ hsh_free_func *free, void *param)
{
struct hsh_table *h = xmalloc (sizeof *h);
int i;
- h->n = 0;
- h->mp = hsh_next_prime (m);
- h->m = *h->mp++;
- h->table = xmalloc (sizeof *h->table * h->m);
- for (i = 0; i < h->m; i++)
- h->table[i] = NULL;
+ h->used = 0;
+ h->size = next_power_of_2 (size);
+ h->entries = xmalloc (sizeof *h->entries * h->size);
+ for (i = 0; i < h->size; i++)
+ h->entries[i] = NULL;
h->param = param;
h->compare = compare;
h->hash = hash;
int i;
if (h->free)
- for (i = 0; i < h->m; i++)
- h->free (h->table[i], h->param);
+ for (i = 0; i < h->size; i++)
+ if (h->entries[i] != NULL)
+ h->free (h->entries[i], h->param);
- if (h->m >= 128)
- {
- free (h->table);
- h->mp = hsh_next_prime (31);
- h->m = *h->mp++;
- h->table = xmalloc (sizeof *h->table * h->m);
- }
-
- for (i = 0; i < h->m; i++)
- h->table[i] = NULL;
+ for (i = 0; i < h->size; i++)
+ h->entries[i] = NULL;
}
/* Destroys table H and all its contents. */
if (h == NULL)
return;
if (h->free)
- for (i = 0; i < h->m; i++)
- {
- void *p = h->table[i];
- if (p)
- h->free (p, h->param);
- }
- free (h->table);
+ for (i = 0; i < h->size; i++)
+ if (h->entries[i] != NULL)
+ h->free (h->entries[i], h->param);
+ free (h->entries);
free (h);
}
-/* Increases the capacity of H. */
-void
-hsh_rehash (struct hsh_table *h)
+/* Changes the capacity of H to NEW_SIZE. */
+static void
+hsh_rehash (struct hsh_table *h, size_t new_size)
{
- void **begin = h->table;
- void **end = &h->table[h->m];
+ void **begin = h->entries;
+ void **end = &h->entries[h->size];
void **table_p;
int i;
- h->m = *h->mp++;
- h->table = xmalloc (sizeof *h->table * h->m);
- for (i = 0; i < h->m; i++)
- h->table[i] = NULL;
+ h->size = new_size;
+ h->entries = xmalloc (sizeof *h->entries * h->size);
+ for (i = 0; i < h->size; i++)
+ h->entries[i] = NULL;
for (table_p = begin; table_p < end; table_p++)
{
void **entry;
if (*table_p == NULL)
continue;
- entry = &h->table[h->hash (*table_p, h->param) % h->m];
+ entry = &h->entries[h->hash (*table_p, h->param) & (h->size - 1)];
while (*entry)
- if (--entry < h->table)
- entry = &h->table[h->m - 1];
+ if (--entry < h->entries)
+ entry = &h->entries[h->size - 1];
*entry = *table_p;
}
free (begin);
}
-/* Static variables for hsh_sort(). */
-static void *hsh_param;
-static int (*hsh_compare) (const void *, const void *, void *param);
-
/* hsh_sort() helper function that ensures NULLs are sorted after the
rest of the table. */
static int
-internal_comparison_fn (const void *pa, const void *pb)
+sort_nulls_last (const void *a_, const void *b_, void *h_)
{
- void *a = *(void **) pa;
- void *b = *(void **) pb;
- return a == NULL ? 1 : (b == NULL ? -1 : hsh_compare (a, b, hsh_param));
+ void *a = *(void **) a_;
+ void *b = *(void **) b_;
+ struct hsh_table *h = h_;
+
+ if (a != NULL)
+ {
+ if (b != NULL)
+ return h->compare (a, b, h->param);
+ else
+ return -1;
+ }
+ else
+ {
+ if (b != NULL)
+ return +1;
+ else
+ return 0;
+ }
}
-/* Sorts hash table H based on function COMPARE. NULLs are sent to
- the end of the table. The resultant table is returned (it is
- guaranteed to be NULL-terminated). H should not be used again as a
- hash table until and unless hsh_clear() called. */
+/* Sorts hash table H based on hash comparison function. NULLs
+ are sent to the end of the table. The resultant table is
+ returned (it is guaranteed to be NULL-terminated). H should
+ not be used again as a hash table until and unless hsh_clear()
+ called. */
void **
-hsh_sort (struct hsh_table *h,
- int (*compare) (const void *, const void *, void *param))
+hsh_sort (struct hsh_table *h)
{
-#if GLOBAL_DEBUGGING
- static int reentrant;
- if (reentrant)
- abort ();
- reentrant++;
-#endif
- hsh_param = h->param;
- hsh_compare = compare ? compare : h->compare;
- qsort (h->table, h->m, sizeof *h->table, internal_comparison_fn);
-#if GLOBAL_DEBUGGING
- reentrant--;
-#endif
- return h->table;
+ quicksort (h->entries, h->size, sizeof *h->entries, sort_nulls_last, h);
+ return h->entries;
}
\f
/* Hash entries. */
void **entry;
/* Order of these statements is important! */
- if (h->n > h->m / 2)
- hsh_rehash (h);
- entry = &h->table[h->hash (target, h->param) % h->m];
+ if (h->used > h->size / 2)
+ hsh_rehash (h, h->size * 2);
+ entry = &h->entries[h->hash (target, h->param) & (h->size - 1)];
while (*entry)
{
if (!h->compare (*entry, target, h->param))
return entry;
-
- if (--entry < h->table)
- entry = &h->table[h->m - 1];
+ if (--entry < h->entries)
+ entry = &h->entries[h->size - 1];
}
- h->n++;
+ h->used++;
return entry;
}
-/* Returns the entry in hash table H that matches TARGET, NULL if
- there is none. */
-void *
-hsh_find (struct hsh_table *h, const void *target)
+/* Locates an entry matching TARGET. Returns a pointer to the
+ entry, or a null pointer on failure. */
+static inline void **
+locate_matching_entry (struct hsh_table *h, const void *target)
{
- void **entry = &h->table[h->hash (target, h->param) % h->m];
+ void **entry = &h->entries[h->hash (target, h->param) & (h->size - 1)];
while (*entry)
{
if (!h->compare (*entry, target, h->param))
- return *entry;
- if (--entry < h->table)
- entry = &h->table[h->m - 1];
+ return entry;
+ if (--entry < h->entries)
+ entry = &h->entries[h->size - 1];
}
return NULL;
}
-/* Iterates throught hash table TABLE with iterator ITER. Returns the
- next non-NULL entry in TABLE, or NULL after the last non-NULL
- entry. After NULL is returned, ITER is returned to a condition in
- which hsh_foreach() will return the first non-NULL entry if any on
- the next call. Do not add entries to TABLE between call to
- hsh_foreach() between NULL returns.
-
- Before calling hsh_foreach with a particular iterator for the first
- time, you must initialize the iterator with a call to
- hsh_iterator_init. */
+/* Returns the entry in hash table H that matches TARGET, or NULL
+ if there is none. */
void *
-hsh_foreach (struct hsh_table *table, struct hsh_iterator *iter)
+hsh_find (struct hsh_table *h, const void *target)
{
- int i;
+ void **entry = locate_matching_entry (h, target);
+ return entry != NULL ? *entry : NULL;
+}
+
+/* Deletes the entry in hash table H that matches TARGET.
+ Returns nonzero if an entry was deleted.
- if (!table)
- return NULL;
- if (!iter->init)
+ Note: this function is very slow because it rehashes the
+ entire table. Don't use this hash table implementation if
+ deletion is a common operation. */
+int
+hsh_delete (struct hsh_table *h, const void *target)
+{
+ void **entry = locate_matching_entry (h, target);
+ if (h->free != NULL)
{
- iter->init = 1;
- iter->next = 0;
+ h->free (*entry, h->param);
+ *entry = 0;
+ hsh_rehash (h, h->size);
+ return 1;
}
- for (i = iter->next; i < table->m; i++)
- if (table->table[i])
+ else
+ return 0;
+}
+\f
+/* Iteration. */
+
+/* Finds and returns an entry in TABLE, and initializes ITER for
+ use with hsh_next(). If TABLE is empty, returns a null
+ pointer. */
+void *
+hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
+{
+ assert (h != NULL);
+ assert (iter != NULL);
+
+ iter->next = 0;
+ return hsh_next (h, iter);
+}
+
+/* Iterates through TABLE with iterator ITER. Returns the next
+ entry in TABLE, or a null pointer after the last entry.
+
+ Entries are returned in an undefined order. Modifying TABLE
+ during iteration may cause some entries to be returned
+ multiple times or not at all. */
+void *
+hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
+{
+ size_t i;
+
+ assert (h != NULL);
+ assert (iter != NULL);
+ assert (iter->next <= h->size);
+
+ for (i = iter->next; i < h->size; i++)
+ if (h->entries[i])
{
iter->next = i + 1;
- return table->table[i];
+ return h->entries[i];
}
- iter->init = 0;
+
+ iter->next = h->size;
return NULL;
}
+\f
+/* Returns the number of items in H. */
+size_t
+hsh_count (struct hsh_table *h)
+{
+ assert (h != NULL);
+
+ return h->used;
+}
+\f
+/* Debug helpers. */
#if GLOBAL_DEBUGGING
+#undef NDEBUG
+#include <assert.h>
#include <stdio.h>
/* Displays contents of hash table H on stdout. */
void
hsh_dump (struct hsh_table *h)
{
- void **entry = h->table;
+ void **entry = h->entries;
int i;
printf (_("hash table:"));
- for (i = 0; i < h->m; i++)
+ for (i = 0; i < h->size; i++)
printf (" %p", *entry++);
printf ("\n");
}
to a NULL pointer. This function is used when it is known that the
entry to be inserted does not already exist in the table. */
void
-force_hsh_insert (struct hsh_table *h, void *p)
+hsh_force_insert (struct hsh_table *h, void *p)
{
void **pp = hsh_probe (h, p);
- if (*pp != NULL)
- assert (0);
+ assert (*pp == NULL);
*pp = p;
}
This function is for use when it is known that the entry being
searched for must exist in the table. */
void *
-force_hsh_find (struct hsh_table *h, const void *p)
+hsh_force_find (struct hsh_table *h, const void *target)
+{
+ void *found = hsh_find (h, target);
+ assert (found != NULL);
+ return found;
+}
+
+/* This wrapper for hsh_delete() verifies that an item really was
+ deleted. */
+void
+hsh_force_delete (struct hsh_table *h, const void *target)
{
- p = hsh_find (h, p);
- if (p == NULL)
- assert (0);
- return (void *) p;
+ int found = hsh_delete (h, target);
+ assert (found != 0);
}
#endif
#if !hash_h
#define hash_h 1
-/* Hash table (opaque). */
-struct hsh_table
- {
- int n; /* Number of filled entries. */
- int m; /* Number of entries. */
- int *mp; /* Pointer into hsh_prime_tab[]. */
- void **table; /* Hash table proper. */
+#include <stddef.h>
- void *param;
- int (*compare) (const void *, const void *, void *param);
- unsigned (*hash) (const void *, void *param);
- void (*free) (void *, void *param);
- };
+typedef int hsh_compare_func (const void *, const void *, void *param);
+typedef unsigned hsh_hash_func (const void *, void *param);
+typedef void hsh_free_func (void *, void *param);
/* Hash table iterator (opaque). */
struct hsh_iterator
{
- int init; /* Initialized? */
- int next; /* Index of next entry. */
+ size_t next; /* Index of next entry. */
};
-#define hsh_iterator_init(ITERATOR) (ITERATOR).init = 0
-
/* Prime numbers and hash functions. */
-int *hsh_next_prime (int) __attribute__ ((const));
-int hashpjw_d (const char *s1, const char *s2);
-
-#if __GNUC__>=2 && __OPTIMIZE__
-extern inline int
-hashpjw (const char *s)
-{
- return hashpjw_d (s, &s[strlen (s)]);
-}
-#else
-int hashpjw (const char *s);
-#endif
+unsigned hsh_hash_bytes (const void *, size_t);
+unsigned hsh_hash_string (const char *);
+unsigned hsh_hash_int (int);
/* Hash tables. */
-struct hsh_table *hsh_create (int m,
- int (*compare) (const void *, const void *,
- void *param),
- unsigned (*hash) (const void *, void *param),
- void (*free) (void *, void *param),
+struct hsh_table *hsh_create (int m, hsh_compare_func *,
+ hsh_hash_func *, hsh_free_func *,
void *param);
void hsh_clear (struct hsh_table *);
void hsh_destroy (struct hsh_table *);
-void hsh_rehash (struct hsh_table *);
-void **hsh_sort (struct hsh_table *,
- int (*compare) (const void *, const void *, void *param));
-#if GLOBAL_DEBUGGING
-void hsh_dump (struct hsh_table *);
-#endif
+void **hsh_sort (struct hsh_table *);
-/* Hash entries. */
+/* Search and insertion. */
void **hsh_probe (struct hsh_table *, const void *);
void *hsh_find (struct hsh_table *, const void *);
-void *hsh_foreach (struct hsh_table *, struct hsh_iterator *);
+int hsh_delete (struct hsh_table *, const void *);
+/* Iteration. */
+void *hsh_first (struct hsh_table *, struct hsh_iterator *);
+void *hsh_next (struct hsh_table *, struct hsh_iterator *);
+
+/* Search and insertion with assertion. */
#if GLOBAL_DEBUGGING
-void force_hsh_insert (struct hsh_table *, void *);
-void *force_hsh_find (struct hsh_table *, const void *);
+void hsh_force_insert (struct hsh_table *, void *);
+void *hsh_force_find (struct hsh_table *, const void *);
+void hsh_force_delete (struct hsh_table *, const void *);
#else
-#define force_hsh_insert(A, B) \
- do *hsh_probe (A, B) = B; while (0)
-#define force_hsh_find(A, B) \
- hsh_find (A, B)
+#define hsh_force_insert(A, B) ((void) (*hsh_probe (A, B) = B))
+#define hsh_force_find(A, B) (hsh_find (A, B))
+#define hsh_force_delete(A, B) ((void) hsh_delete (A, B))
#endif
-/* Returns number of used elements in hash table H. */
-#define hsh_count(H) \
- ((H)->n)
+/* Number of entries in hash table H. */
+size_t hsh_count (struct hsh_table *);
+
+/* Debugging. */
+#if GLOBAL_DEBUGGING
+void hsh_dump (struct hsh_table *);
+#endif
#endif /* hash_h */
{
assert (pool != NULL);
-#if !DISCRETE_BLOCKS /* Help identify source of bugs for Checker users. */
+#if !DISCRETE_BLOCKS
if (amt <= MAX_SUBALLOC)
{
struct pool_block *b = pool->blocks;
return pool_malloc (pool, amt);
}
-/* Duplicates STRING within POOL and returns a pointer to the
- duplicate. */
+/* Duplicates STRING, which has LENGTH characters, within POOL,
+ and returns a pointer to the duplicate. LENGTH should not
+ include the null terminator, which is always added to the
+ duplicate. For use only with strings, because the returned
+ pointere may not be aligned properly for other types. */
char *
-pool_strdup (struct pool *pool, const char *string)
+pool_strndup (struct pool *pool, const char *string, size_t length)
{
- size_t amt;
- void *p;
+ size_t size;
+ char *copy;
assert (pool && string);
- amt = strlen (string) + 1;
+ size = length + 1;
/* Note that strings need not be aligned on any boundary. */
{
#if !DISCRETE_BLOCKS
struct pool_block *const b = pool->blocks;
- if (b->ofs + amt <= BLOCK_SIZE)
+ if (b->ofs + size <= BLOCK_SIZE)
{
- p = ((char *) b) + b->ofs;
- b->ofs += amt;
+ copy = ((char *) b) + b->ofs;
+ b->ofs += size;
}
else
#endif
- p = pool_alloc (pool, amt);
+ copy = pool_alloc (pool, size);
}
- memcpy (p, string, amt);
- return p;
+ memcpy (copy, string, length);
+ copy[length] = '\0';
+ return copy;
+}
+
+/* Duplicates null-terminated STRING, within POOL, and returns a
+ pointer to the duplicate. For use only with strings, because
+ the returned pointere may not be aligned properly for other
+ types. */
+char *
+pool_strdup (struct pool *pool, const char *string)
+{
+ return pool_strndup (pool, string, strlen (string));
}
\f
/* Standard allocation routines. */
/* Suballocation routines. */
void *pool_alloc (struct pool *, size_t);
char *pool_strdup (struct pool *, const char *);
+char *pool_strndup (struct pool *, const char *, size_t);
char *pool_strcat (struct pool *, const char *, ...);
/* Standard allocation routines. */
/* font_entry hash function for hash tables. */
static unsigned
-hash_font_entry (const void *a, void *foobar unused)
+hash_font_entry (const void *fe_, void *foobar unused)
{
- return hashpjw (((struct font_entry *) a)->dit);
+ const struct font_entry *fe = fe_;
+ return hsh_hash_string (fe->dit);
}
/* font_entry destructor function for hash tables. */
{
const struct ps_encoding *a = pa;
- return hashpjw (a->filename);
+ return hsh_hash_string (a->filename);
}
/* Hash table free function for ps_encoding's. */
ds_init (NULL, &line, 128);
ds_init (NULL, &buf, 128);
- hsh_iterator_init (iter);
- while ((pe = hsh_foreach (x->encodings, &iter)) != NULL)
+ for (pe = hsh_first (x->encodings, &iter); pe != NULL;
+ pe = hsh_next (x->encodings, &iter))
{
FILE *f;
"%%%%DocumentNeededResources:%s"),
x->eol, x->file_page_number, x->eol, x->eol);
- hsh_iterator_init (iter);
- while ((fe = hsh_foreach (x->loaded, &iter)) != NULL)
+ for (fe = hsh_first (x->loaded, &iter); fe != NULL;
+ fe = hsh_next (x->loaded, &iter))
{
char buf[256], *cp;
struct ps_driver_ext *x = this->ext;
struct hsh_iterator iter;
- struct line_form *line;
int type;
- hsh_iterator_init (iter);
for (type = 0; type < n_line_types; type++)
{
- while (NULL != (line = hsh_foreach (x->lines[type], &iter)))
- {
+ struct line_form *line;
+
+ if (x->lines[type] == NULL)
+ continue;
+
+ for (line = hsh_first (x->lines[type], &iter); line != NULL;
+ line = hsh_next (x->lines[type], &iter))
+ {
int i;
int lo = INT_MIN, hi;
hash_ps_combo (const void *pa, void *foo unused)
{
const struct ps_font_combo *a = pa;
-
- return hashpjw (a->font->font->internal_name) ^ a->size;
+ unsigned name_hash = hsh_hash_string (a->font->font->internal_name);
+ return name_hash ^ hsh_hash_int (a->size);
}
/* Hash table free function for ps_combo structs. */
/* Hash table hash function for filename2font structs. */
static unsigned
-hash_filename2font (const void *a, void *param unused)
+hash_filename2font (const void *f2f_, void *param unused)
{
- /* I sure hope this works with long filenames. */
- return hashpjw (((struct filename2font *) a)->filename);
+ const struct filename2font *f2f = f2f_;
+ return hsh_hash_string (f2f->filename);
}
/* Initializes the global font list by creating the hash table for
--- /dev/null
+/* Copyright (C) 1991, 1992 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If
+ not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+ Cambridge, MA 02139, USA. */
+
+#include <config.h>
+#include <stdlib.h>
+#include <string.h>
+#include "alloc.h"
+
+/* Byte-wise swap two items of size SIZE. */
+#define SWAP(a, b, size) \
+ do \
+ { \
+ register size_t __size = (size); \
+ register char *__a = (a), *__b = (b); \
+ do \
+ { \
+ char __tmp = *__a; \
+ *__a++ = *__b; \
+ *__b++ = __tmp; \
+ } while (--__size > 0); \
+ } while (0)
+
+/* Discontinue quicksort algorithm when partition gets below this size.
+ This particular magic number was chosen to work best on a Sun 4/260. */
+#define MAX_THRESH 4
+
+/* Stack node declarations used to store unfulfilled partition obligations. */
+typedef struct
+ {
+ char *lo;
+ char *hi;
+ }
+stack_node;
+
+/* The next 4 #defines implement a very fast in-line stack abstraction. */
+#define STACK_SIZE (8 * sizeof(unsigned long int))
+#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
+#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
+#define STACK_NOT_EMPTY (stack < top)
+
+
+/* Order size using quicksort. This implementation incorporates
+ four optimizations discussed in Sedgewick:
+
+ 1. Non-recursive, using an explicit stack of pointer that store the
+ next array partition to sort. To save time, this maximum amount
+ of space required to store an array of MAX_INT is allocated on the
+ stack. Assuming a 32-bit integer, this needs only 32 *
+ sizeof(stack_node) == 136 bits. Pretty cheap, actually.
+
+ 2. Chose the pivot element using a median-of-three decision tree.
+ This reduces the probability of selecting a bad pivot value and
+ eliminates certain extraneous comparisons.
+
+ 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
+ insertion sort to order the MAX_THRESH items within each partition.
+ This is a big win, since insertion sort is faster for small, mostly
+ sorted array segements.
+
+ 4. The larger of the two sub-partitions is always pushed onto the
+ stack first, with the algorithm then concentrating on the
+ smaller partition. This *guarantees* no more than log (n)
+ stack size is needed (actually O(1) in this case)! */
+
+void
+quicksort (void *pbase, size_t total_elems, size_t size,
+ int (*cmp) (const void *, const void *, void *),
+ void *param)
+{
+ register char *base_ptr = (char *) pbase;
+
+ /* Allocating SIZE bytes for a pivot buffer facilitates a better
+ algorithm below since we can do comparisons directly on the pivot. */
+ char *pivot_buffer = xmalloc (size);
+ const size_t max_thresh = MAX_THRESH * size;
+
+ if (total_elems == 0)
+ {
+ /* Avoid lossage with unsigned arithmetic below. */
+ free (pivot_buffer);
+ return;
+ }
+
+ if (total_elems > MAX_THRESH)
+ {
+ char *lo = base_ptr;
+ char *hi = &lo[size * (total_elems - 1)];
+ /* Largest size needed for 32-bit int!!! */
+ stack_node stack[STACK_SIZE];
+ stack_node *top = stack + 1;
+
+ while (STACK_NOT_EMPTY)
+ {
+ char *left_ptr;
+ char *right_ptr;
+
+ char *pivot = pivot_buffer;
+
+ /* Select median value from among LO, MID, and HI. Rearrange
+ LO and HI so the three values are sorted. This lowers the
+ probability of picking a pathological pivot value and
+ skips a comparison for both the LEFT_PTR and RIGHT_PTR. */
+
+ char *mid = lo + size * ((hi - lo) / size >> 1);
+
+ if ((*cmp) ((void *) mid, (void *) lo, param) < 0)
+ SWAP (mid, lo, size);
+ if ((*cmp) ((void *) hi, (void *) mid, param) < 0)
+ SWAP (mid, hi, size);
+ else
+ goto jump_over;
+ if ((*cmp) ((void *) mid, (void *) lo, param) < 0)
+ SWAP (mid, lo, size);
+ jump_over:;
+ memcpy (pivot, mid, size);
+ pivot = pivot_buffer;
+
+ left_ptr = lo + size;
+ right_ptr = hi - size;
+
+ /* Here's the famous ``collapse the walls'' section of quicksort.
+ Gotta like those tight inner loops! They are the main reason
+ that this algorithm runs much faster than others. */
+ do
+ {
+ while ((*cmp) ((void *) left_ptr, (void *) pivot, param) < 0)
+ left_ptr += size;
+
+ while ((*cmp) ((void *) pivot, (void *) right_ptr, param) < 0)
+ right_ptr -= size;
+
+ if (left_ptr < right_ptr)
+ {
+ SWAP (left_ptr, right_ptr, size);
+ left_ptr += size;
+ right_ptr -= size;
+ }
+ else if (left_ptr == right_ptr)
+ {
+ left_ptr += size;
+ right_ptr -= size;
+ break;
+ }
+ }
+ while (left_ptr <= right_ptr);
+
+ /* Set up pointers for next iteration. First determine whether
+ left and right partitions are below the threshold size. If so,
+ ignore one or both. Otherwise, push the larger partition's
+ bounds on the stack and continue sorting the smaller one. */
+
+ if ((size_t) (right_ptr - lo) <= max_thresh)
+ {
+ if ((size_t) (hi - left_ptr) <= max_thresh)
+ /* Ignore both small partitions. */
+ POP (lo, hi);
+ else
+ /* Ignore small left partition. */
+ lo = left_ptr;
+ }
+ else if ((size_t) (hi - left_ptr) <= max_thresh)
+ /* Ignore small right partition. */
+ hi = right_ptr;
+ else if ((right_ptr - lo) > (hi - left_ptr))
+ {
+ /* Push larger left partition indices. */
+ PUSH (lo, right_ptr);
+ lo = left_ptr;
+ }
+ else
+ {
+ /* Push larger right partition indices. */
+ PUSH (left_ptr, hi);
+ hi = right_ptr;
+ }
+ }
+ }
+
+ /* Once the BASE_PTR array is partially sorted by quicksort the rest
+ is completely sorted using insertion sort, since this is efficient
+ for partitions below MAX_THRESH size. BASE_PTR points to the beginning
+ of the array to sort, and END_PTR points at the very last element in
+ the array (*not* one beyond it!). */
+
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+ {
+ char *const end_ptr = &base_ptr[size * (total_elems - 1)];
+ char *tmp_ptr = base_ptr;
+ char *thresh = min (end_ptr, base_ptr + max_thresh);
+ register char *run_ptr;
+
+ /* Find smallest element in first threshold and place it at the
+ array's beginning. This is the smallest array element,
+ and the operation speeds up insertion sort's inner loop. */
+
+ for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
+ if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, param) < 0)
+ tmp_ptr = run_ptr;
+
+ if (tmp_ptr != base_ptr)
+ SWAP (tmp_ptr, base_ptr, size);
+
+ /* Insertion sort, running from left-hand-side up to right-hand-side. */
+
+ run_ptr = base_ptr + size;
+ while ((run_ptr += size) <= end_ptr)
+ {
+ tmp_ptr = run_ptr - size;
+ while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, param) < 0)
+ tmp_ptr -= size;
+
+ tmp_ptr += size;
+ if (tmp_ptr != run_ptr)
+ {
+ char *trav;
+
+ trav = run_ptr + size;
+ while (--trav >= run_ptr)
+ {
+ char c = *trav;
+ char *hi, *lo;
+
+ for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
+ *hi = *lo;
+ *hi = c;
+ }
+ }
+ }
+ }
+
+ free (pivot_buffer);
+}
--- /dev/null
+#ifndef QUICKSORT_H
+#define QUICKSORT_H 1
+
+/* Equivalent to standard C qsort(), but allows passing an extra
+ parameter to the comparison function. */
+void
+quicksort (void *pbase, size_t total_elems, size_t size,
+ int (*cmp) (const void *, const void *, void *),
+ void *param);
+
+#endif /* quicksort.h */
#include <config.h>
#include <assert.h>
#include <stdio.h>
+#include <stdlib.h>
#include "output.h"
#include "som.h"
/*#undef DEBUGGING*/
{
assert (t != NULL);
pool_destroy (t->container);
- t->container=0;
}
/* Sets the width and height of a table, in columns and rows,