/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include "value-labels.h"
+#include "data/value-labels.h"
#include <stdlib.h>
-#include <data/data-out.h>
-#include <data/value.h>
-#include <data/variable.h>
-#include <libpspp/compiler.h>
-#include <libpspp/hash.h>
-#include <libpspp/message.h>
-#include <libpspp/str.h>
+#include "data/data-out.h"
+#include "data/value.h"
+#include "data/variable.h"
+#include "libpspp/array.h"
+#include "libpspp/cast.h"
+#include "libpspp/compiler.h"
+#include "libpspp/hash-functions.h"
+#include "libpspp/hmap.h"
+#include "libpspp/intern.h"
+#include "libpspp/message.h"
+#include "libpspp/str.h"
-#include "xalloc.h"
-
-static hsh_compare_func compare_int_val_lab;
-static hsh_hash_func hash_int_val_lab;
-static hsh_free_func free_int_val_lab;
-
-struct atom;
-static struct atom *atom_create (const char *string);
-static void atom_destroy (struct atom *);
-static char *atom_to_string (const struct atom *);
-
-/* A set of value labels. */
-struct val_labs
- {
- int width; /* 0=numeric, otherwise string width. */
- struct hsh_table *labels; /* Hash table of `struct int_val_lab's. */
- };
+#include "gl/xalloc.h"
/* Creates and returns a new, empty set of value labels with the
- given WIDTH. To actually add any value labels, WIDTH must be
- a numeric or short string width. */
+ given WIDTH. */
struct val_labs *
val_labs_create (int width)
{
- struct val_labs *vls;
-
- assert (width >= 0);
-
- vls = xmalloc (sizeof *vls);
+ struct val_labs *vls = xmalloc (sizeof *vls);
vls->width = width;
- vls->labels = NULL;
+ hmap_init (&vls->labels);
return vls;
}
/* Creates and returns a new set of value labels identical to
- VLS. */
+ VLS. Returns a null pointer if VLS is null. */
struct val_labs *
val_labs_clone (const struct val_labs *vls)
{
struct val_labs *copy;
- struct val_labs_iterator *i;
- struct val_lab *vl;
+ struct val_lab *label;
if (vls == NULL)
return NULL;
copy = val_labs_create (vls->width);
- for (vl = val_labs_first (vls, &i); vl != NULL;
- vl = val_labs_next (vls, &i))
- val_labs_add (copy, vl->value, vl->label);
+ HMAP_FOR_EACH (label, struct val_lab, node, &vls->labels)
+ val_labs_add (copy, &label->value, label->escaped_label);
return copy;
}
bool
val_labs_can_set_width (const struct val_labs *vls, int new_width)
{
- struct val_labs_iterator *i;
- struct val_lab *lab;
-
- for (lab = val_labs_first (vls, &i); lab != NULL;
- lab = val_labs_next (vls, &i))
- if (!value_is_resizable (&lab->value, vls->width, new_width))
- {
- val_labs_done (&i);
- return false;
- }
+ struct val_lab *label;
+
+ HMAP_FOR_EACH (label, struct val_lab, node, &vls->labels)
+ if (!value_is_resizable (&label->value, vls->width, new_width))
+ return false;
return true;
}
/* Changes the width of VLS to NEW_WIDTH. The original and new
- width must be both numeric or both string. If the new width
- is a long string width, then any value labels in VLS are
- deleted. */
+ width must be both numeric or both string. */
void
val_labs_set_width (struct val_labs *vls, int new_width)
{
assert (val_labs_can_set_width (vls, new_width));
-
+ if (value_needs_resize (vls->width, new_width))
+ {
+ struct val_lab *label;
+ HMAP_FOR_EACH (label, struct val_lab, node, &vls->labels)
+ value_resize (&label->value, vls->width, new_width);
+ }
vls->width = new_width;
- if (new_width > MAX_SHORT_STRING)
- val_labs_clear (vls);
}
/* Destroys VLS. */
{
if (vls != NULL)
{
- hsh_destroy (vls->labels);
+ val_labs_clear (vls);
+ hmap_destroy (&vls->labels);
free (vls);
}
}
void
val_labs_clear (struct val_labs *vls)
{
- assert (vls != NULL);
+ struct val_lab *label, *next;
- hsh_destroy (vls->labels);
- vls->labels = NULL;
+ HMAP_FOR_EACH_SAFE (label, next, struct val_lab, node, &vls->labels)
+ {
+ hmap_delete (&vls->labels, &label->node);
+ value_destroy (&label->value, vls->width);
+ intern_unref (label->label);
+ intern_unref (label->escaped_label);
+ free (label);
+ }
+}
+
+/* Returns the width of VLS. */
+int
+val_labs_get_width (const struct val_labs *vls)
+{
+ return vls->width;
}
-/* Returns the number of value labels in VLS. */
+/* Returns the number of value labels in VLS.
+ Returns 0 if VLS is null. */
size_t
val_labs_count (const struct val_labs *vls)
{
- return vls == NULL || vls->labels == NULL ? 0 : hsh_count (vls->labels);
+ return vls == NULL ? 0 : hmap_count (&vls->labels);
}
\f
-/* One value label in internal format. */
-struct int_val_lab
- {
- union value value; /* The value being labeled. */
- struct atom *label; /* A ref-counted string. */
- };
-
-/* Creates and returns an int_val_lab based on VALUE and
- LABEL. */
-static struct int_val_lab *
-create_int_val_lab (struct val_labs *vls, union value value, const char *label)
-{
- struct int_val_lab *ivl;
-
- assert (label != NULL);
- assert (vls->width <= MAX_SHORT_STRING);
-
- ivl = xmalloc (sizeof *ivl);
- ivl->value = value;
- if (vls->width > 0)
- memset (ivl->value.s + vls->width, ' ', MAX_SHORT_STRING - vls->width);
- ivl->label = atom_create (label);
-
- return ivl;
-}
-
-/* If VLS does not already contain a value label for VALUE (and
- VLS represents a numeric or short string set of value labels),
- adds LABEL for it and returns true. Otherwise, returns
- false. */
-bool
-val_labs_add (struct val_labs *vls, union value value, const char *label)
+static void
+set_label (struct val_lab *lab, const char *escaped_label)
{
- assert (label != NULL);
- if (vls->width < MIN_LONG_STRING)
+ lab->escaped_label = intern_new (escaped_label);
+ if (strstr (escaped_label, "\\n") == NULL)
+ lab->label = intern_ref (lab->escaped_label);
+ else
{
- struct int_val_lab *ivl;
- void **vlpp;
+ struct string s;
+ const char *p;
- if (vls->labels == NULL)
- vls->labels = hsh_create (8, compare_int_val_lab, hash_int_val_lab,
- free_int_val_lab, vls);
-
- ivl = create_int_val_lab (vls, value, label);
- vlpp = hsh_probe (vls->labels, ivl);
- if (*vlpp == NULL)
+ ds_init_empty (&s);
+ ds_extend (&s, intern_strlen (lab->escaped_label));
+ for (p = escaped_label; *p != '\0'; p++)
{
- *vlpp = ivl;
- return true;
+ char c = *p;
+ if (c == '\\' && p[1] == 'n')
+ {
+ c = '\n';
+ p++;
+ }
+ ds_put_byte (&s, c);
}
- free_int_val_lab (ivl, vls);
+ lab->label = intern_new (ds_cstr (&s));
+ ds_destroy (&s);
}
- return false;
}
-/* Sets LABEL as the value label for VALUE in VLS, replacing any
- existing label for VALUE. Has no effect if VLS has a long
- string width. */
-void
-val_labs_replace (struct val_labs *vls, union value value, const char *label)
+static void
+do_add_val_lab (struct val_labs *vls, const union value *value,
+ const char *escaped_label)
{
- if (vls->width < MIN_LONG_STRING)
- {
- if (vls->labels != NULL)
- {
- struct int_val_lab *new = create_int_val_lab (vls, value, label);
- struct int_val_lab *old = hsh_replace (vls->labels, new);
- if (old != NULL)
- free_int_val_lab (old, vls);
- }
- else
- val_labs_add (vls, value, label);
- }
+ struct val_lab *lab = xmalloc (sizeof *lab);
+ value_clone (&lab->value, value, vls->width);
+ set_label (lab, escaped_label);
+ hmap_insert (&vls->labels, &lab->node, value_hash (value, vls->width, 0));
}
-/* Removes any value label for VALUE within VLS. Returns true
- if a value label was removed. */
+/* If VLS does not already contain a value label for VALUE, adds the UTF-8
+ encoded LABEL for it and returns true. Otherwise, returns false.
+
+ In LABEL, the two-byte sequence "\\n" is interpreted as a new-line. */
bool
-val_labs_remove (struct val_labs *vls, union value value)
+val_labs_add (struct val_labs *vls, const union value *value,
+ const char *label)
{
- if (vls->width < MIN_LONG_STRING && vls->labels != NULL)
+ const struct val_lab *lab = val_labs_lookup (vls, value);
+ if (lab == NULL)
{
- struct int_val_lab *ivl = create_int_val_lab (vls, value, "");
- int deleted = hsh_delete (vls->labels, ivl);
- free (ivl);
- return deleted;
+ do_add_val_lab (vls, value, label);
+ return true;
}
else
return false;
}
-/* Searches VLS for a value label for VALUE. If successful,
- returns the label; otherwise, returns a null pointer. If
- VLS's width is greater than MAX_SHORT_STRING, always returns a
- null pointer. */
-char *
-val_labs_find (const struct val_labs *vls, union value value)
-{
- if (vls != NULL
- && vls->width <= MAX_SHORT_STRING
- && vls->labels != NULL)
- {
- struct int_val_lab ivl, *vlp;
+/* Sets LABEL as the value label for VALUE in VLS, replacing any
+ existing label for VALUE.
- ivl.value = value;
- vlp = hsh_find (vls->labels, &ivl);
- if (vlp != NULL)
- return atom_to_string (vlp->label);
- }
- return NULL;
-}
-\f
-/* A value labels iterator. */
-struct val_labs_iterator
- {
- void **labels; /* The labels, in order. */
- void **lp; /* Current label. */
- struct val_lab vl; /* Structure presented to caller. */
- };
-
-/* Sets up *IP for iterating through the value labels in VLS in
- no particular order. Returns the first value label or a null
- pointer if VLS is empty. If the return value is non-null,
- then val_labs_next() may be used to continue iterating or
- val_labs_done() to free up the iterator. Otherwise, neither
- function may be called for *IP. */
-struct val_lab *
-val_labs_first (const struct val_labs *vls, struct val_labs_iterator **ip)
+ In LABEL, the two-byte sequence "\\n" is interpreted as a new-line. */
+void
+val_labs_replace (struct val_labs *vls, const union value *value,
+ const char *label)
{
- struct val_labs_iterator *i;
-
- assert (vls != NULL);
- assert (ip != NULL);
-
- if (vls->labels == NULL || vls->width > MAX_SHORT_STRING)
+ struct val_lab *vl = val_labs_lookup (vls, value);
+ if (vl != NULL)
{
- *ip = NULL;
- return NULL;
+ intern_unref (vl->label);
+ intern_unref (vl->escaped_label);
+ set_label (vl, label);
}
+ else
+ do_add_val_lab (vls, value, label);
+}
- i = *ip = xmalloc (sizeof *i);
- i->labels = hsh_data_copy (vls->labels);
- i->lp = i->labels;
- return val_labs_next (vls, ip);
+/* Removes LABEL from VLS. */
+void
+val_labs_remove (struct val_labs *vls, struct val_lab *label)
+{
+ hmap_delete (&vls->labels, &label->node);
+ value_destroy (&label->value, vls->width);
+ intern_unref (label->label);
+ intern_unref (label->escaped_label);
+ free (label);
}
-/* Sets up *IP for iterating through the value labels in VLS in
- sorted order of values. Returns the first value label or a
- null pointer if VLS is empty. If the return value is
- non-null, then val_labs_next() may be used to continue
- iterating or val_labs_done() to free up the iterator.
- Otherwise, neither function may be called for *IP. */
-struct val_lab *
-val_labs_first_sorted (const struct val_labs *vls,
- struct val_labs_iterator **ip)
+/* Searches VLS for a value label for VALUE. If successful, returns the string
+ used as the label, as a UTF-8 encoded string in a format suitable for
+ output. Otherwise, returns a null pointer. Returns a null pointer if VLS
+ is null. */
+const char *
+val_labs_find (const struct val_labs *vls, const union value *value)
{
- struct val_labs_iterator *i;
+ const struct val_lab *label = val_labs_lookup (vls, value);
+ return label ? label->label : NULL;
+}
- assert (vls != NULL);
- assert (ip != NULL);
+/* Searches VLS for a value label for VALUE. If successful,
+ returns the value label; otherwise, returns a null pointer.
+ Returns a null pointer if VLS is null. */
+static struct val_lab *
+val_labs_lookup__ (const struct val_labs *vls, const union value *value,
+ unsigned int hash)
+{
+ struct val_lab *label;
- if (vls->labels == NULL || vls->width > MAX_SHORT_STRING)
- {
- *ip = NULL;
- return NULL;
- }
+ HMAP_FOR_EACH_WITH_HASH (label, struct val_lab, node, hash, &vls->labels)
+ if (value_equal (&label->value, value, vls->width))
+ return label;
- i = *ip = xmalloc (sizeof *i);
- i->lp = i->labels = hsh_sort_copy (vls->labels);
- return val_labs_next (vls, ip);
+ return NULL;
}
-/* Returns the next value label in an iteration begun by
- val_labs_first() or val_labs_first_sorted(). If the return
- value is non-null, then val_labs_next() may be used to
- continue iterating or val_labs_done() to free up the iterator.
- Otherwise, neither function may be called for *IP. */
+/* Searches VLS for a value label for VALUE. If successful,
+ returns the value label; otherwise, returns a null pointer.
+ Returns a null pointer if VLS is null. */
struct val_lab *
-val_labs_next (const struct val_labs *vls, struct val_labs_iterator **ip)
+val_labs_lookup (const struct val_labs *vls, const union value *value)
{
- struct val_labs_iterator *i;
- struct int_val_lab *ivl;
+ return (vls == NULL ? NULL
+ : val_labs_lookup__ (vls, value, value_hash (value, vls->width, 0)));
+}
- assert (vls != NULL);
- assert (vls->width <= MAX_SHORT_STRING);
- assert (ip != NULL);
- assert (*ip != NULL);
+/* Searches VLS for a value label whose label is exactly LABEL. If successful,
+ returns the corresponding value. Otherwise, returns a null pointer.
- i = *ip;
- ivl = *i->lp++;
- if (ivl != NULL)
- {
- i->vl.value = ivl->value;
- i->vl.label = atom_to_string (ivl->label);
- return &i->vl;
- }
- else
- {
- free (i->labels);
- free (i);
- *ip = NULL;
- return NULL;
- }
-}
+ Returns a null pointer if VLS is null.
-/* Discards the state for an incomplete iteration begun by
- val_labs_first() or val_labs_first_sorted(). */
-void
-val_labs_done (struct val_labs_iterator **ip)
+ This function is O(n) in the number of labels in VLS. */
+const union value *
+val_labs_find_value (const struct val_labs *vls, const char *label_)
{
- if (*ip != NULL)
+ const union value *value = NULL;
+
+ if (vls != NULL)
{
- struct val_labs_iterator *i = *ip;
- free (i->labels);
- free (i);
- *ip = NULL;
+ const struct val_lab *vl;
+ const char *label;
+
+ label = intern_new (label_);
+ HMAP_FOR_EACH (vl, struct val_lab, node, &vls->labels)
+ if (vl->label == label)
+ {
+ value = &vl->value;
+ break;
+ }
+ intern_unref (label);
}
-}
-\f
-/* Compares two value labels and returns a strcmp()-type result. */
-int
-compare_int_val_lab (const void *a_, const void *b_, const void *vls_)
-{
- const struct int_val_lab *a = a_;
- const struct int_val_lab *b = b_;
- const struct val_labs *vls = vls_;
- if (vls->width == 0)
- return a->value.f < b->value.f ? -1 : a->value.f > b->value.f;
- else
- return memcmp (a->value.s, b->value.s, vls->width);
+ return value;
}
-
-/* Hash a value label. */
-unsigned
-hash_int_val_lab (const void *vl_, const void *vls_)
+\f
+/* Returns the first value label in VLS, in arbitrary order, or a
+ null pointer if VLS is empty or if VLS is a null pointer. If
+ the return value is non-null, then val_labs_next() may be used
+ to continue iterating. */
+const struct val_lab *
+val_labs_first (const struct val_labs *vls)
{
- const struct int_val_lab *vl = vl_;
- const struct val_labs *vls = vls_;
-
- if (vls->width == 0)
- return hsh_hash_double (vl->value.f);
- else
- return hsh_hash_bytes (vl->value.s, vls->width);
+ return vls ? HMAP_FIRST (struct val_lab, node, &vls->labels) : NULL;
}
-/* Free a value label. */
-void
-free_int_val_lab (void *vl_, const void *vls_ UNUSED)
+/* Returns the next value label in an iteration begun by
+ val_labs_first(). If the return value is non-null, then
+ val_labs_next() may be used to continue iterating. */
+const struct val_lab *
+val_labs_next (const struct val_labs *vls, const struct val_lab *label)
{
- struct int_val_lab *vl = vl_;
-
- atom_destroy (vl->label);
- free (vl);
+ return HMAP_NEXT (label, struct val_lab, node, &vls->labels);
}
-\f
-/* Atoms. */
-
-/* An atom. */
-struct atom
- {
- char *string; /* String value. */
- unsigned ref_count; /* Number of references. */
- };
-
-static hsh_compare_func compare_atoms;
-static hsh_hash_func hash_atom;
-static hsh_free_func free_atom;
-
-/* Hash table of atoms. */
-static struct hsh_table *atoms;
-static void
-destroy_atoms (void)
+static int
+compare_labels_by_value_3way (const void *a_, const void *b_, const void *vls_)
{
- hsh_destroy (atoms);
+ const struct val_lab *const *a = a_;
+ const struct val_lab *const *b = b_;
+ const struct val_labs *vls = vls_;
+ return value_compare_3way (&(*a)->value, &(*b)->value, vls->width);
}
-/* Creates and returns an atom for STRING. */
-static struct atom *
-atom_create (const char *string)
+/* Allocates and returns an array of pointers to value labels
+ that is sorted in increasing order by value. The array has
+ val_labs_count(VLS) elements. The caller is responsible for
+ freeing the array. */
+const struct val_lab **
+val_labs_sorted (const struct val_labs *vls)
{
- struct atom a;
- void **app;
-
- assert (string != NULL);
-
- if (atoms == NULL)
- {
- atoms = hsh_create (8, compare_atoms, hash_atom, free_atom, NULL);
- atexit (destroy_atoms);
- }
-
- a.string = (char *) string;
- app = hsh_probe (atoms, &a);
- if (*app != NULL)
+ if (vls != NULL)
{
- struct atom *ap = *app;
- ap->ref_count++;
- return ap;
+ const struct val_lab *label;
+ const struct val_lab **labels;
+ size_t i;
+
+ labels = xmalloc (val_labs_count (vls) * sizeof *labels);
+ i = 0;
+ HMAP_FOR_EACH (label, struct val_lab, node, &vls->labels)
+ labels[i++] = label;
+ assert (i == val_labs_count (vls));
+ sort (labels, val_labs_count (vls), sizeof *labels,
+ compare_labels_by_value_3way, vls);
+ return labels;
}
else
- {
- struct atom *ap = xmalloc (sizeof *ap);
- ap->string = xstrdup (string);
- ap->ref_count = 1;
- *app = ap;
- return ap;
- }
-}
-
-/* Destroys ATOM. */
-static void
-atom_destroy (struct atom *atom)
-{
- if (atom != NULL)
- {
- assert (atom->ref_count > 0);
- atom->ref_count--;
- if (atom->ref_count == 0)
- hsh_force_delete (atoms, atom);
- }
+ return NULL;
}
-/* Returns the string associated with ATOM. */
-static char *
-atom_to_string (const struct atom *atom)
+/* Returns a hash value that represents all of the labels in VLS, starting from
+ BASIS. */
+unsigned int
+val_labs_hash (const struct val_labs *vls, unsigned int basis)
{
- assert (atom != NULL);
-
- return atom->string;
+ const struct val_lab *label;
+ unsigned int hash;
+
+ hash = hash_int (val_labs_count (vls), basis);
+ HMAP_FOR_EACH (label, struct val_lab, node, &vls->labels)
+ hash ^= value_hash (&label->value, vls->width,
+ hash_string (label->label, basis));
+ return hash;
}
-/* A hsh_compare_func that compares A and B. */
-static int
-compare_atoms (const void *a_, const void *b_, const void *aux UNUSED)
+/* Returns true if A and B contain the same values with the same labels,
+ false if they differ in some way. */
+bool
+val_labs_equal (const struct val_labs *a, const struct val_labs *b)
{
- const struct atom *a = a_;
- const struct atom *b = b_;
-
- return strcmp (a->string, b->string);
-}
+ const struct val_lab *label;
-/* A hsh_hash_func that hashes ATOM. */
-static unsigned
-hash_atom (const void *atom_, const void *aux UNUSED)
-{
- const struct atom *atom = atom_;
+ if (val_labs_count (a) != val_labs_count (b))
+ return false;
+
+ if (a == NULL || b == NULL)
+ return true;
- return hsh_hash_string (atom->string);
-}
+ if (a->width != b->width)
+ return false;
-/* A hsh_free_func that destroys ATOM. */
-static void
-free_atom (void *atom_, const void *aux UNUSED)
-{
- struct atom *atom = atom_;
+ HMAP_FOR_EACH (label, struct val_lab, node, &a->labels)
+ {
+ struct val_lab *label2 = val_labs_lookup__ (b, &label->value,
+ label->node.hash);
+ if (!label2 || label->label != label2->label)
+ return false;
+ }
- free (atom->string);
- free (atom);
+ return true;
}