-/* PSPP - computes sample statistics.
- Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2004, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
-#include <data/case.h>
+#include "data/case.h"
-#include <assert.h>
#include <limits.h>
+#include <stddef.h>
#include <stdlib.h>
-#include <data/value.h>
-#include <data/variable.h>
-#include <libpspp/alloc.h>
-#include <libpspp/str.h>
+#include "data/value.h"
+#include "data/variable.h"
+#include "libpspp/assertion.h"
+#include "libpspp/str.h"
-#include "minmax.h"
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
-/* Reference-counted case implementation. */
-struct case_data
- {
- size_t value_cnt; /* Number of values. */
- unsigned ref_cnt; /* Reference count. */
- union value values[1]; /* Values. */
- };
+/* Set this flag to 1 to copy cases instead of ref counting them.
+ This is sometimes helpful in debugging situations. */
+#define DEBUG_CASEREFS 0
-/* Ensures that C does not share data with any other case. */
-static void
-case_unshare (struct ccase *c)
+#if DEBUG_CASEREFS
+#warning "Caseref debug enabled. CASES ARE NOT BEING SHARED!!"
+#endif
+
+static size_t case_size (const struct caseproto *);
+static void assert_variable_matches_case (const struct ccase *,
+ const struct variable *);
+static void copy_forward (struct ccase *dst, size_t dst_idx,
+ const struct ccase *src, size_t src_idx,
+ size_t n_values);
+static void copy_backward (struct ccase *dst, size_t dst_idx,
+ const struct ccase *src, size_t src_idx,
+ size_t n_values);
+
+/* Creates and returns a new case that stores data of the form
+ specified by PROTO. The data in the case have indeterminate
+ contents until explicitly written.
+
+ The caller retains ownership of PROTO. */
+struct ccase *
+case_create (const struct caseproto *proto)
{
- if (c->case_data->ref_cnt > 1)
- {
- struct case_data *cd = c->case_data;
- cd->ref_cnt--;
- case_create (c, cd->value_cnt);
- memcpy (c->case_data->values, cd->values,
- sizeof *cd->values * cd->value_cnt);
- }
+ struct ccase *c = case_try_create (proto);
+ if (c == NULL)
+ xalloc_die ();
+ return c;
}
-/* Returns the number of bytes needed by a case with VALUE_CNT
- values. */
-static size_t
-case_size (size_t value_cnt)
+/* Like case_create, but returns a null pointer if not enough
+ memory is available. */
+struct ccase *
+case_try_create (const struct caseproto *proto)
{
- return (offsetof (struct case_data, values)
- + value_cnt * sizeof (union value));
+ struct ccase *c = malloc (case_size (proto));
+ if (c != NULL)
+ {
+ if (caseproto_try_init_values (proto, c->values))
+ {
+ c->proto = caseproto_ref (proto);
+ c->ref_cnt = 1;
+ return c;
+ }
+ free (c);
+ }
+ return NULL;
}
-/* Initializes C as a null case. */
-void
-case_nullify (struct ccase *c)
+/* Creates and returns an unshared copy of case C. */
+struct ccase *
+case_clone (const struct ccase *c)
{
- c->case_data = NULL;
+ return case_unshare (case_ref (c));
}
-/* Returns true iff C is a null case. */
-bool
-case_is_null (const struct ccase *c)
+/* Increments case C's reference count and returns C. Afterward,
+ case C is shared among its reference count holders. */
+struct ccase *
+case_ref (const struct ccase *c_)
{
- return c->case_data == NULL;
+ struct ccase *c = CONST_CAST (struct ccase *, c_);
+ c->ref_cnt++;
+#if DEBUG_CASEREFS
+ c = case_unshare__ (c);
+#endif
+ return c;
}
-/* Initializes C as a new case that can store VALUE_CNT values.
- The values have indeterminate contents until explicitly
- written. */
-void
-case_create (struct ccase *c, size_t value_cnt)
+/* Returns an estimate of the number of bytes of memory that
+ would be consumed in creating a case based on PROTO. The
+ estimate includes typical overhead from malloc() in addition
+ to the actual size of data. */
+size_t
+case_get_cost (const struct caseproto *proto)
{
- if (!case_try_create (c, value_cnt))
- xalloc_die ();
+ /* FIXME: improve approximation? */
+ return (1 + caseproto_get_n_widths (proto)
+ + 3 * caseproto_get_n_strings (proto)) * sizeof (union value);
}
-/* Initializes CLONE as a copy of ORIG. */
-void
-case_clone (struct ccase *clone, const struct ccase *orig)
-{
- assert (orig->case_data->ref_cnt > 0);
+/* Changes the prototype for case C, which must not be shared.
+ The new PROTO must be conformable with C's current prototype
+ (as defined by caseproto_is_conformable).
- if (clone != orig)
- *clone = *orig;
- orig->case_data->ref_cnt++;
-#ifdef DEBUGGING
- case_unshare (clone);
-#endif
-}
+ Any new values created by this function have indeterminate
+ content that the caller is responsible for initializing.
-/* Replaces DST by SRC and nullifies SRC.
- DST and SRC must be initialized cases at entry. */
-void
-case_move (struct ccase *dst, struct ccase *src)
+ The caller retains ownership of PROTO.
+
+ Returns a new case that replaces C, which is freed. */
+struct ccase *
+case_resize (struct ccase *c, const struct caseproto *new_proto)
{
- assert (src->case_data->ref_cnt > 0);
+ struct caseproto *old_proto = c->proto;
+ size_t old_n_widths = caseproto_get_n_widths (old_proto);
+ size_t new_n_widths = caseproto_get_n_widths (new_proto);
- if (dst != src)
+ assert (!case_is_shared (c));
+ expensive_assert (caseproto_is_conformable (old_proto, new_proto));
+
+ if (old_n_widths != new_n_widths)
{
- *dst = *src;
- case_nullify (src);
+ if (new_n_widths < old_n_widths)
+ caseproto_reinit_values (old_proto, new_proto, c->values);
+ c = xrealloc (c, case_size (new_proto));
+ if (new_n_widths > old_n_widths)
+ caseproto_reinit_values (old_proto, new_proto, c->values);
+
+ caseproto_unref (old_proto);
+ c->proto = caseproto_ref (new_proto);
}
+
+ return c;
}
-/* Destroys case C. */
-void
-case_destroy (struct ccase *c)
-{
- struct case_data *cd;
+/* case_unshare_and_resize(C, PROTO) is equivalent to
+ case_resize(case_unshare(C), PROTO), but it is faster if case
+ C is shared.
- cd = c->case_data;
- if (cd != NULL && --cd->ref_cnt == 0)
- {
- memset (cd->values, 0xcc, sizeof *cd->values * cd->value_cnt);
- cd->value_cnt = 0xdeadbeef;
- free (cd);
- }
-}
+ Any new values created by this function have indeterminate
+ content that the caller is responsible for initializing.
-/* Returns the number of union values in C. */
-size_t
-case_get_value_cnt (const struct ccase *c)
-{
- return c->case_data->value_cnt;
-}
+ The caller retains ownership of PROTO.
-/* Resizes case C to NEW_CNT union values. */
-void
-case_resize (struct ccase *c, size_t new_cnt)
+ Returns the new case that replaces C, which is freed. */
+struct ccase *
+case_unshare_and_resize (struct ccase *c, const struct caseproto *proto)
{
- size_t old_cnt = case_get_value_cnt (c);
- if (old_cnt != new_cnt)
+ if (!case_is_shared (c))
+ return case_resize (c, proto);
+ else
{
- struct ccase new;
-
- case_create (&new, new_cnt);
- case_copy (&new, 0, c, 0, MIN (old_cnt, new_cnt));
- case_swap (&new, c);
- case_destroy (&new);
+ struct ccase *new = case_create (proto);
+ size_t old_n_values = caseproto_get_n_widths (c->proto);
+ size_t new_n_values = caseproto_get_n_widths (proto);
+ case_copy (new, 0, c, 0, MIN (old_n_values, new_n_values));
+ c->ref_cnt--;
+ return new;
}
}
-/* Swaps cases A and B. */
+/* Sets all of the numeric values in case C to the system-missing
+ value, and all of the string values to spaces. */
void
-case_swap (struct ccase *a, struct ccase *b)
+case_set_missing (struct ccase *c)
{
- struct case_data *t = a->case_data;
- a->case_data = b->case_data;
- b->case_data = t;
-}
-
-/* Attempts to create C as a new case that holds VALUE_CNT
- values. Returns true if successful, false if memory
- allocation failed. */
-bool
-case_try_create (struct ccase *c, size_t value_cnt)
-{
- c->case_data = malloc (case_size (value_cnt));
- if (c->case_data != NULL)
- {
- c->case_data->value_cnt = value_cnt;
- c->case_data->ref_cnt = 1;
- return true;
- }
+ size_t i;
- return false;
+ assert (!case_is_shared (c));
+ for (i = 0; i < caseproto_get_n_widths (c->proto); i++)
+ value_set_missing (&c->values[i], caseproto_get_width (c->proto, i));
}
-/* Tries to initialize CLONE as a copy of ORIG.
- Returns true if successful, false if memory allocation
- failed. */
-bool
-case_try_clone (struct ccase *clone, const struct ccase *orig)
-{
- case_clone (clone, orig);
- return true;
-}
+/* Copies N_VALUES values from SRC (starting at SRC_IDX) to DST
+ (starting at DST_IDX). Each value that is copied into must
+ have the same width as the value that it is copied from.
-/* Copies VALUE_CNT values from SRC (starting at SRC_IDX) to DST
- (starting at DST_IDX). */
+ Properly handles overlapping ranges when DST == SRC.
+
+ DST must not be shared. */
void
case_copy (struct ccase *dst, size_t dst_idx,
const struct ccase *src, size_t src_idx,
- size_t value_cnt)
+ size_t n_values)
{
- assert (dst->case_data->ref_cnt > 0);
- assert (dst_idx + value_cnt <= dst->case_data->value_cnt);
-
- assert (src->case_data->ref_cnt > 0);
- assert (src_idx + value_cnt <= src->case_data->value_cnt);
+ assert (!case_is_shared (dst));
+ assert (caseproto_range_is_valid (dst->proto, dst_idx, n_values));
+ assert (caseproto_range_is_valid (src->proto, src_idx, n_values));
+ assert (caseproto_equal (dst->proto, dst_idx, src->proto, src_idx,
+ n_values));
- if (dst->case_data != src->case_data || dst_idx != src_idx)
+ if (dst != src)
+ {
+ if (!dst->proto->n_strings || !src->proto->n_strings)
+ memcpy (&dst->values[dst_idx], &src->values[src_idx],
+ sizeof dst->values[0] * n_values);
+ else
+ copy_forward (dst, dst_idx, src, src_idx, n_values);
+ }
+ else if (dst_idx != src_idx)
{
- case_unshare (dst);
- memmove (dst->case_data->values + dst_idx,
- src->case_data->values + src_idx,
- sizeof *dst->case_data->values * value_cnt);
+ if (!dst->proto->n_strings)
+ memmove (&dst->values[dst_idx], &src->values[src_idx],
+ sizeof dst->values[0] * n_values);
+ else if (dst_idx < src_idx)
+ copy_forward (dst, dst_idx, src, src_idx, n_values);
+ else /* dst_idx > src_idx */
+ copy_backward (dst, dst_idx, src, src_idx, n_values);
}
}
-/* Copies VALUE_CNT values out of case C to VALUES, starting at
+/* Copies N_VALUES values out of case C to VALUES, starting at
the given START_IDX. */
void
case_copy_out (const struct ccase *c,
- size_t start_idx, union value *values, size_t value_cnt)
+ size_t start_idx, union value *values, size_t n_values)
{
- assert (c->case_data->ref_cnt > 0);
- assert (value_cnt <= c->case_data->value_cnt);
- assert (start_idx + value_cnt <= c->case_data->value_cnt);
+ size_t i;
- memcpy (values, c->case_data->values + start_idx,
- value_cnt * sizeof *values);
+ assert (caseproto_range_is_valid (c->proto, start_idx, n_values));
+
+ for (i = 0; i < n_values; i++)
+ value_copy (&values[i], &c->values[start_idx + i],
+ caseproto_get_width (c->proto, start_idx + i));
}
-/* Copies VALUE_CNT values from VALUES into case C, staring at
- the given START_IDX. */
+/* Copies N_VALUES values from VALUES into case C, starting at
+ the given START_IDX.
+
+ C must not be shared. */
void
case_copy_in (struct ccase *c,
- size_t start_idx, const union value *values, size_t value_cnt)
+ size_t start_idx, const union value *values, size_t n_values)
{
- assert (c->case_data->ref_cnt > 0);
- assert (value_cnt <= c->case_data->value_cnt);
- assert (start_idx + value_cnt <= c->case_data->value_cnt);
+ size_t i;
+
+ assert (!case_is_shared (c));
+ assert (caseproto_range_is_valid (c->proto, start_idx, n_values));
- case_unshare (c);
- memcpy (c->case_data->values + start_idx, values,
- value_cnt * sizeof *values);
+ for (i = 0; i < n_values; i++)
+ value_copy (&c->values[start_idx + i], &values[i],
+ caseproto_get_width (c->proto, start_idx + i));
}
/* Returns a pointer to the `union value' used for the
const union value *
case_data (const struct ccase *c, const struct variable *v)
{
- return case_data_idx (c, var_get_case_index (v));
+ assert_variable_matches_case (c, v);
+ return &c->values[var_get_case_index (v)];
}
-/* Returns the numeric value of the `union value' in C for
- variable V.
- Case C must be drawn from V's dictionary. */
-double
-case_num (const struct ccase *c, const struct variable *v)
+/* Returns a pointer to the `union value' used for the element of
+ C numbered IDX. The caller must not modify the returned
+ data. */
+const union value *
+case_data_idx (const struct ccase *c, size_t idx)
{
- return case_num_idx (c, var_get_case_index (v));
+ assert (idx < c->proto->n_widths);
+ return &c->values[idx];
}
-/* Returns the string value of the `union value' in C for
- variable V.
- Case C must be drawn from V's dictionary.
- (Note that the value is not null-terminated.)
- The caller must not modify the return value. */
-const char *
-case_str (const struct ccase *c, const struct variable *v)
-{
- return case_str_idx (c, var_get_case_index (v));
-}
+/* Returns a pointer to the `union value' used for the element of
+ C for variable V. Case C must be drawn from V's dictionary.
+ The caller is allowed to modify the returned data.
-/* Returns a pointer to the `union value' used for the
- element of C for variable V.
- Case C must be drawn from V's dictionary.
- The caller is allowed to modify the returned data. */
+ Case C must not be shared. */
union value *
case_data_rw (struct ccase *c, const struct variable *v)
{
- return case_data_rw_idx (c, var_get_case_index (v));
+ assert_variable_matches_case (c, v);
+ assert (!case_is_shared (c));
+ return &c->values[var_get_case_index (v)];
}
/* Returns a pointer to the `union value' used for the
element of C numbered IDX.
- The caller must not modify the returned data. */
-const union value *
-case_data_idx (const struct ccase *c, size_t idx)
+ The caller is allowed to modify the returned data.
+
+ Case C must not be shared. */
+union value *
+case_data_rw_idx (struct ccase *c, size_t idx)
{
- assert (c->case_data->ref_cnt > 0);
- assert (idx < c->case_data->value_cnt);
+ assert (idx < c->proto->n_widths);
+ assert (!case_is_shared (c));
+ return &c->values[idx];
+}
- return &c->case_data->values[idx];
+/* Returns the numeric value of the `union value' in C for
+ variable V.
+ Case C must be drawn from V's dictionary. */
+double
+case_num (const struct ccase *c, const struct variable *v)
+{
+ assert_variable_matches_case (c, v);
+ return c->values[var_get_case_index (v)].f;
}
/* Returns the numeric value of the `union value' in C numbered
double
case_num_idx (const struct ccase *c, size_t idx)
{
- assert (c->case_data->ref_cnt > 0);
- assert (idx < c->case_data->value_cnt);
+ assert (idx < c->proto->n_widths);
+ return c->values[idx].f;
+}
+
+/* Returns the string value of the `union value' in C for
+ variable V. Case C must be drawn from V's dictionary. The
+ caller must not modify the return value.
- return c->case_data->values[idx].f;
+ Like the strings embedded in all "union value"s, the return
+ value is not null-terminated. */
+const uint8_t *
+case_str (const struct ccase *c, const struct variable *v)
+{
+ assert_variable_matches_case (c, v);
+ return c->values[var_get_case_index (v)].s;
}
/* Returns the string value of the `union value' in C numbered
- IDX.
- (Note that the value is not null-terminated.)
- The caller must not modify the return value. */
-const char *
+ IDX. The caller must not modify the return value.
+
+ Like the strings embedded in all "union value"s, the return
+ value is not null-terminated. */
+const uint8_t *
case_str_idx (const struct ccase *c, size_t idx)
{
- assert (c->case_data->ref_cnt > 0);
- assert (idx < c->case_data->value_cnt);
-
- return c->case_data->values[idx].s;
+ assert (idx < c->proto->n_widths);
+ return c->values[idx].s;
}
-/* Returns a pointer to the `union value' used for the
- element of C numbered IDX.
- The caller is allowed to modify the returned data. */
-union value *
-case_data_rw_idx (struct ccase *c, size_t idx)
+/* Returns the string value of the `union value' in C for
+ variable V. Case C must be drawn from V's dictionary. The
+ caller may modify the return value.
+
+ Case C must not be shared.
+
+ Like the strings embedded in all "union value"s, the return
+ value is not null-terminated. */
+uint8_t *
+case_str_rw (struct ccase *c, const struct variable *v)
{
- assert (c->case_data->ref_cnt > 0);
- assert (idx < c->case_data->value_cnt);
+ assert_variable_matches_case (c, v);
+ size_t idx = var_get_case_index (v);
+ assert (!case_is_shared (c));
+ return c->values[idx].s;
+}
+
+/* Returns the string value of the `union value' in C numbered
+ IDX. The caller may modify the return value.
- case_unshare (c);
- return &c->case_data->values[idx];
+ Case C must not be shared.
+
+ Like the strings embedded in all "union value"s, the return
+ value is not null-terminated. */
+uint8_t *
+case_str_rw_idx (struct ccase *c, size_t idx)
+{
+ assert (idx < c->proto->n_widths);
+ assert (!case_is_shared (c));
+ return c->values[idx].s;
}
-/* Compares the values of the VAR_CNT variables in VP
+/* Compares the values of the N_VARS variables in VP
in cases A and B and returns a strcmp()-type result. */
int
case_compare (const struct ccase *a, const struct ccase *b,
- const struct variable *const *vp, size_t var_cnt)
+ const struct variable *const *vp, size_t n_vars)
{
- return case_compare_2dict (a, b, vp, vp, var_cnt);
+ return case_compare_2dict (a, b, vp, vp, n_vars);
}
-/* Compares the values of the VAR_CNT variables in VAP in case CA
- to the values of the VAR_CNT variables in VBP in CB
+/* Compares the values of the N_VARS variables in VAP in case CA
+ to the values of the N_VARS variables in VBP in CB
and returns a strcmp()-type result. */
int
case_compare_2dict (const struct ccase *ca, const struct ccase *cb,
const struct variable *const *vap,
- const struct variable *const *vbp,
- size_t var_cnt)
+ const struct variable *const *vbp,
+ size_t n_vars)
{
- for (; var_cnt-- > 0; vap++, vbp++)
+ int cmp = 0;
+ for (; !cmp && n_vars-- > 0; vap++, vbp++)
{
- const struct variable *va = *vap;
- const struct variable *vb = *vbp;
-
- assert (var_get_width (va) == var_get_width (vb));
-
- if (var_get_width (va) == 0)
- {
- double af = case_num (ca, va);
- double bf = case_num (cb, vb);
-
- if (af != bf)
- return af > bf ? 1 : -1;
- }
- else
- {
- const char *as = case_str (ca, va);
- const char *bs = case_str (cb, vb);
- int cmp = memcmp (as, bs, var_get_width (va));
-
- if (cmp != 0)
- return cmp;
- }
+ const union value *va = case_data (ca, *vap);
+ const union value *vb = case_data (cb, *vbp);
+ assert (var_get_width (*vap) == var_get_width (*vbp));
+ cmp = value_compare_3way (va, vb, var_get_width (*vap));
}
- return 0;
+ return cmp;
}
/* Returns a pointer to the array of `union value's used for C.
The caller must *not* modify the returned data.
- NOTE: This function breaks the case abstraction. It should
- *not* be used often. Prefer the other case functions. */
+ This function breaks the case abstraction. It should *not* be
+ commonly used. Prefer the other case functions. */
const union value *
case_data_all (const struct ccase *c)
{
- assert (c->case_data->ref_cnt > 0);
-
- return c->case_data->values;
+ return c->values;
}
/* Returns a pointer to the array of `union value's used for C.
The caller is allowed to modify the returned data.
- NOTE: This function breaks the case abstraction. It should
- *not* be used often. Prefer the other case functions. */
+ Case C must not be shared.
+
+ This function breaks the case abstraction. It should *not* be
+ commonly used. Prefer the other case functions. */
union value *
case_data_all_rw (struct ccase *c)
{
- assert (c->case_data->ref_cnt > 0);
+ assert (!case_is_shared (c));
+ return c->values;
+}
+
+/* Internal helper function for case_unshare. */
+struct ccase *
+case_unshare__ (struct ccase *old)
+{
+ struct ccase *new = case_create (old->proto);
+ case_copy (new, 0, old, 0, caseproto_get_n_widths (new->proto));
+ --old->ref_cnt;
+ return new;
+}
+
+/* Internal helper function for case_unref. */
+void
+case_unref__ (struct ccase *c)
+{
+ caseproto_destroy_values (c->proto, c->values);
+ caseproto_unref (c->proto);
+ free (c);
+}
+\f
+/* Returns the number of bytes needed by a case for case
+ prototype PROTO. */
+static size_t
+case_size (const struct caseproto *proto)
+{
+ return (offsetof (struct ccase, values)
+ + caseproto_get_n_widths (proto) * sizeof (union value));
+}
- case_unshare (c);
- return c->case_data->values;
+/* Returns true if C contains a value at V's case index with the
+ same width as V; that is, if V may plausibly be used to read
+ or write data in C.
+
+ Useful in assertions. */
+static void
+assert_variable_matches_case (const struct ccase *c, const struct variable *v)
+{
+ size_t case_idx = var_get_case_index (v);
+ assert (case_idx < caseproto_get_n_widths (c->proto));
+ assert (caseproto_get_width (c->proto, case_idx) == var_get_width (v));
}
+
+/* Internal helper function for case_copy(). */
+static void
+copy_forward (struct ccase *dst, size_t dst_idx,
+ const struct ccase *src, size_t src_idx,
+ size_t n_values)
+{
+ size_t i;
+
+ for (i = 0; i < n_values; i++)
+ value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i],
+ caseproto_get_width (dst->proto, dst_idx + i));
+}
+
+/* Internal helper function for case_copy(). */
+static void
+copy_backward (struct ccase *dst, size_t dst_idx,
+ const struct ccase *src, size_t src_idx,
+ size_t n_values)
+{
+ size_t i;
+
+ for (i = n_values; i-- != 0;)
+ value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i],
+ caseproto_get_width (dst->proto, dst_idx + i));
+}
+