X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fcase.c;h=dc4029268ebf981bc10d96ecccf7c88eb53dd71a;hb=8830c95bb9e8d72621787866141a27fc22e8c786;hp=8613eca6f1b9944da0ffd2269f4dd8bf0691f429;hpb=5fd22ca7771c8175ef05e91e1194c3c4096337f4;p=pspp-builds.git diff --git a/src/data/case.c b/src/data/case.c index 8613eca6..dc402926 100644 --- a/src/data/case.c +++ b/src/data/case.c @@ -1,431 +1,483 @@ -/* PSPP - computes sample statistics. - Copyright (C) 2004 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 2004, 2007, 2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include -#include "case.h" + +#include + #include +#include #include -#include "value.h" -#include "alloc.h" -#include "str.h" -#include "variable.h" - -#ifdef DEBUGGING -#undef NDEBUG -#else -#ifndef NDEBUG -#define NDEBUG -#endif -#endif -#include - -/* Changes C not to share data with any other case. - C must be a case with a reference count greater than 1. - There should be no reason for external code to call this - function explicitly. It will be called automatically when - needed. */ -void -case_unshare (struct ccase *c) -{ - struct case_data *cd; - - assert (c != NULL); - assert (c->this == c); - assert (c->case_data != NULL); - assert (c->case_data->ref_cnt > 1); - - cd = c->case_data; - cd->ref_cnt--; - case_create (c, c->case_data->value_cnt); - memcpy (c->case_data->values, cd->values, - sizeof *cd->values * cd->value_cnt); -} -/* Returns the number of bytes needed by a case with VALUE_CNT - values. */ -static inline size_t -case_size (size_t value_cnt) +#include +#include +#include +#include + +#include "minmax.h" +#include "xalloc.h" + +static size_t case_size (const struct caseproto *); +static bool variable_matches_case (const struct ccase *, + const struct variable *); +static void copy_forward (struct ccase *dst, size_t dst_idx, + const struct ccase *src, size_t src_idx, + size_t n_values); +static void copy_backward (struct ccase *dst, size_t dst_idx, + const struct ccase *src, size_t src_idx, + size_t n_values); + +/* Creates and returns a new case that stores data of the form + specified by PROTO. The data in the case have indeterminate + contents until explicitly written. + + The caller retains ownership of PROTO. */ +struct ccase * +case_create (const struct caseproto *proto) { - return (offsetof (struct case_data, values) - + value_cnt * sizeof (union value)); + struct ccase *c = case_try_create (proto); + if (c == NULL) + xalloc_die (); + return c; } -#ifdef DEBUGGING -/* Initializes C as a null case. */ -void -case_nullify (struct ccase *c) +/* Like case_create, but returns a null pointer if not enough + memory is available. */ +struct ccase * +case_try_create (const struct caseproto *proto) { - c->case_data = NULL; - c->this = c; + struct ccase *c = malloc (case_size (proto)); + if (c != NULL) + { + if (caseproto_try_init_values (proto, c->values)) + { + c->proto = caseproto_ref (proto); + c->ref_cnt = 1; + return c; + } + free (c); + } + return NULL; } -#endif /* DEBUGGING */ -#ifdef DEBUGGING -/* Returns true iff C is a null case. */ -int -case_is_null (const struct ccase *c) +/* Creates and returns an unshared copy of case C. */ +struct ccase * +case_clone (const struct ccase *c) { - return c->case_data == NULL; + return case_unshare (case_ref (c)); } -#endif /* DEBUGGING */ -/* Initializes C as a new case that can store VALUE_CNT values. - The values have indeterminate contents until explicitly - written. */ -void -case_create (struct ccase *c, size_t value_cnt) +/* Returns an estimate of the number of bytes of memory that + would be consumed in creating a case based on PROTO. The + estimate includes typical overhead from malloc() in addition + to the actual size of data. */ +size_t +case_get_cost (const struct caseproto *proto) { - if (!case_try_create (c, value_cnt)) - xalloc_die (); + /* FIXME: improve approximation? */ + return (1 + caseproto_get_n_widths (proto) + + 3 * caseproto_get_n_long_strings (proto)) * sizeof (union value); } -#ifdef DEBUGGING -/* Initializes CLONE as a copy of ORIG. */ -void -case_clone (struct ccase *clone, const struct ccase *orig) +/* Changes the prototype for case C, which must not be shared. + The new PROTO must be conformable with C's current prototype + (as defined by caseproto_is_conformable). + + Any new values created by this function have indeterminate + content that the caller is responsible for initializing. + + The caller retains ownership of PROTO. + + Returns a new case that replaces C, which is freed. */ +struct ccase * +case_resize (struct ccase *c, const struct caseproto *new_proto) { - assert (orig != NULL); - assert (orig->this == orig); - assert (orig->case_data != NULL); - assert (orig->case_data->ref_cnt > 0); - assert (clone != NULL); + struct caseproto *old_proto = c->proto; + size_t old_n_widths = caseproto_get_n_widths (old_proto); + size_t new_n_widths = caseproto_get_n_widths (new_proto); - if (clone != orig) + assert (!case_is_shared (c)); + expensive_assert (caseproto_is_conformable (old_proto, new_proto)); + + if (old_n_widths != new_n_widths) { - *clone = *orig; - clone->this = clone; + if (new_n_widths < old_n_widths) + caseproto_reinit_values (old_proto, new_proto, c->values); + c = xrealloc (c, case_size (new_proto)); + if (new_n_widths > old_n_widths) + caseproto_reinit_values (old_proto, new_proto, c->values); + + caseproto_unref (old_proto); + c->proto = caseproto_ref (new_proto); } - orig->case_data->ref_cnt++; -} -#endif /* DEBUGGING */ -#ifdef DEBUGGING -/* Replaces DST by SRC and nullifies SRC. - DST and SRC must be initialized cases at entry. */ -void -case_move (struct ccase *dst, struct ccase *src) -{ - assert (src != NULL); - assert (src->this == src); - assert (src->case_data != NULL); - assert (src->case_data->ref_cnt > 0); - assert (dst != NULL); - - *dst = *src; - dst->this = dst; - case_nullify (src); + return c; } -#endif /* DEBUGGING */ -#ifdef DEBUGGING -/* Destroys case C. */ -void -case_destroy (struct ccase *c) -{ - struct case_data *cd; - - assert (c != NULL); - assert (c->this == c); +/* case_unshare_and_resize(C, PROTO) is equivalent to + case_resize(case_unshare(C), PROTO), but it is faster if case + C is shared. - cd = c->case_data; - if (cd != NULL && --cd->ref_cnt == 0) + Any new values created by this function have indeterminate + content that the caller is responsible for initializing. + + The caller retains ownership of PROTO. + + Returns the new case that replaces C, which is freed. */ +struct ccase * +case_unshare_and_resize (struct ccase *c, const struct caseproto *proto) +{ + if (!case_is_shared (c)) + return case_resize (c, proto); + else { - memset (cd->values, 0xcc, sizeof *cd->values * cd->value_cnt); - cd->value_cnt = 0xdeadbeef; - free (cd); + struct ccase *new = case_create (proto); + size_t old_n_values = caseproto_get_n_widths (c->proto); + size_t new_n_values = caseproto_get_n_widths (proto); + case_copy (new, 0, c, 0, MIN (old_n_values, new_n_values)); + c->ref_cnt--; + return new; } } -#endif /* DEBUGGING */ -/* Resizes case C from OLD_CNT to NEW_CNT values. */ +/* Sets all of the numeric values in case C to the system-missing + value, and all of the string values to spaces. */ void -case_resize (struct ccase *c, size_t old_cnt, size_t new_cnt) +case_set_missing (struct ccase *c) { - struct ccase new; + size_t i; - case_create (&new, new_cnt); - case_copy (&new, 0, c, 0, old_cnt < new_cnt ? old_cnt : new_cnt); - case_swap (&new, c); - case_destroy (&new); + assert (!case_is_shared (c)); + for (i = 0; i < caseproto_get_n_widths (c->proto); i++) + value_set_missing (&c->values[i], caseproto_get_width (c->proto, i)); } -/* Swaps cases A and B. */ +/* Copies N_VALUES values from SRC (starting at SRC_IDX) to DST + (starting at DST_IDX). Each value that is copied into must + have the same width as the value that it is copied from. + + Properly handles overlapping ranges when DST == SRC. + + DST must not be shared. */ void -case_swap (struct ccase *a, struct ccase *b) +case_copy (struct ccase *dst, size_t dst_idx, + const struct ccase *src, size_t src_idx, + size_t n_values) { - struct case_data *t = a->case_data; - a->case_data = b->case_data; - b->case_data = t; -} + assert (!case_is_shared (dst)); + assert (caseproto_range_is_valid (dst->proto, dst_idx, n_values)); + assert (caseproto_range_is_valid (src->proto, src_idx, n_values)); + assert (caseproto_equal (dst->proto, dst_idx, src->proto, src_idx, + n_values)); -/* Attempts to create C as a new case that holds VALUE_CNT - values. Returns nonzero if successful, zero if memory - allocation failed. */ -int -case_try_create (struct ccase *c, size_t value_cnt) -{ - c->case_data = malloc (case_size (value_cnt)); - if (c->case_data != NULL) + if (dst != src) { -#ifdef DEBUGGING - c->this = c; -#endif - c->case_data->value_cnt = value_cnt; - c->case_data->ref_cnt = 1; - return 1; + if (!dst->proto->n_long_strings || !src->proto->n_long_strings) + memcpy (&dst->values[dst_idx], &src->values[src_idx], + sizeof dst->values[0] * n_values); + else + copy_forward (dst, dst_idx, src, src_idx, n_values); } - else + else if (dst_idx != src_idx) { -#ifdef DEBUGGING - c->this = c; -#endif - return 0; + if (!dst->proto->n_long_strings) + memmove (&dst->values[dst_idx], &src->values[src_idx], + sizeof dst->values[0] * n_values); + else if (dst_idx < src_idx) + copy_forward (dst, dst_idx, src, src_idx, n_values); + else /* dst_idx > src_idx */ + copy_backward (dst, dst_idx, src, src_idx, n_values); } } -/* Tries to initialize CLONE as a copy of ORIG. - Returns nonzero if successful, zero if memory allocation - failed. */ -int -case_try_clone (struct ccase *clone, const struct ccase *orig) +/* Copies N_VALUES values out of case C to VALUES, starting at + the given START_IDX. */ +void +case_copy_out (const struct ccase *c, + size_t start_idx, union value *values, size_t n_values) { - case_clone (clone, orig); - return 1; + size_t i; + + assert (caseproto_range_is_valid (c->proto, start_idx, n_values)); + + for (i = 0; i < n_values; i++) + value_copy (&values[i], &c->values[start_idx + i], + caseproto_get_width (c->proto, start_idx + i)); } -#ifdef DEBUGGING -/* Copies VALUE_CNT values from SRC (starting at SRC_IDX) to DST - (starting at DST_IDX). */ +/* Copies N_VALUES values from VALUES into case C, starting at + the given START_IDX. + + C must not be shared. */ void -case_copy (struct ccase *dst, size_t dst_idx, - const struct ccase *src, size_t src_idx, - size_t value_cnt) +case_copy_in (struct ccase *c, + size_t start_idx, const union value *values, size_t n_values) { - assert (dst != NULL); - assert (dst->this == dst); - assert (dst->case_data != NULL); - assert (dst->case_data->ref_cnt > 0); - assert (dst_idx + value_cnt <= dst->case_data->value_cnt); - - assert (src != NULL); - assert (src->this == src); - assert (src->case_data != NULL); - assert (src->case_data->ref_cnt > 0); - assert (src_idx + value_cnt <= dst->case_data->value_cnt); - - if (dst->case_data->ref_cnt > 1) - case_unshare (dst); - if (dst->case_data != src->case_data || dst_idx != src_idx) - memmove (dst->case_data->values + dst_idx, - src->case_data->values + src_idx, - sizeof *dst->case_data->values * value_cnt); + size_t i; + + assert (!case_is_shared (c)); + assert (caseproto_range_is_valid (c->proto, start_idx, n_values)); + + for (i = 0; i < n_values; i++) + value_copy (&c->values[start_idx + i], &values[i], + caseproto_get_width (c->proto, start_idx + i)); } -#endif /* DEBUGGING */ -#ifdef DEBUGGING -/* Copies case C to OUTPUT. - OUTPUT_SIZE is the number of `union values' in OUTPUT, - which must match the number of `union values' in C. */ -void -case_to_values (const struct ccase *c, union value *output, - size_t output_size UNUSED) +/* Returns a pointer to the `union value' used for the + element of C for variable V. + Case C must be drawn from V's dictionary. + The caller must not modify the returned data. */ +const union value * +case_data (const struct ccase *c, const struct variable *v) { - assert (c != NULL); - assert (c->this == c); - assert (c->case_data != NULL); - assert (c->case_data->ref_cnt > 0); - assert (output_size == c->case_data->value_cnt); - assert (output != NULL || output_size == 0); - - memcpy (output, c->case_data->values, - c->case_data->value_cnt * sizeof *output); + assert (variable_matches_case (c, v)); + return &c->values[var_get_case_index (v)]; } -#endif /* DEBUGGING */ -#ifdef DEBUGGING -/* Copies INPUT into case C. - INPUT_SIZE is the number of `union values' in INPUT, - which must match the number of `union values' in C. */ -void -case_from_values (struct ccase *c, const union value *input, - size_t input_size UNUSED) +/* Returns a pointer to the `union value' used for the element of + C numbered IDX. The caller must not modify the returned + data. */ +const union value * +case_data_idx (const struct ccase *c, size_t idx) +{ + assert (idx < c->proto->n_widths); + return &c->values[idx]; +} + +/* Returns a pointer to the `union value' used for the element of + C for variable V. Case C must be drawn from V's dictionary. + The caller is allowed to modify the returned data. + + Case C must not be shared. */ +union value * +case_data_rw (struct ccase *c, const struct variable *v) { - assert (c != NULL); - assert (c->this == c); - assert (c->case_data != NULL); - assert (c->case_data->ref_cnt > 0); - assert (input_size == c->case_data->value_cnt); - assert (input != NULL || input_size == 0); - - if (c->case_data->ref_cnt > 1) - case_unshare (c); - memcpy (c->case_data->values, input, - c->case_data->value_cnt * sizeof *input); + assert (variable_matches_case (c, v)); + assert (!case_is_shared (c)); + return &c->values[var_get_case_index (v)]; } -#endif /* DEBUGGING */ -#ifdef DEBUGGING /* Returns a pointer to the `union value' used for the element of C numbered IDX. - The caller must not modify the returned data. */ -const union value * -case_data (const struct ccase *c, size_t idx) + The caller is allowed to modify the returned data. + + Case C must not be shared. */ +union value * +case_data_rw_idx (struct ccase *c, size_t idx) { - assert (c != NULL); - assert (c->this == c); - assert (c->case_data != NULL); - assert (c->case_data->ref_cnt > 0); - assert (idx < c->case_data->value_cnt); + assert (idx < c->proto->n_widths); + assert (!case_is_shared (c)); + return &c->values[idx]; +} - return &c->case_data->values[idx]; +/* Returns the numeric value of the `union value' in C for + variable V. + Case C must be drawn from V's dictionary. */ +double +case_num (const struct ccase *c, const struct variable *v) +{ + assert (variable_matches_case (c, v)); + return c->values[var_get_case_index (v)].f; } -#endif /* DEBUGGING */ -#ifdef DEBUGGING /* Returns the numeric value of the `union value' in C numbered IDX. */ double -case_num (const struct ccase *c, size_t idx) +case_num_idx (const struct ccase *c, size_t idx) { - assert (c != NULL); - assert (c->this == c); - assert (c->case_data != NULL); - assert (c->case_data->ref_cnt > 0); - assert (idx < c->case_data->value_cnt); + assert (idx < c->proto->n_widths); + return c->values[idx].f; +} + +/* Returns the string value of the `union value' in C for + variable V. Case C must be drawn from V's dictionary. The + caller must not modify the return value. - return c->case_data->values[idx].f; + Like the strings embedded in all "union value"s, the return + value is not null-terminated. */ +const uint8_t * +case_str (const struct ccase *c, const struct variable *v) +{ + size_t idx = var_get_case_index (v); + assert (variable_matches_case (c, v)); + return value_str (&c->values[idx], caseproto_get_width (c->proto, idx)); } -#endif /* DEBUGGING */ -#ifdef DEBUGGING /* Returns the string value of the `union value' in C numbered - IDX. - (Note that the value is not null-terminated.) - The caller must not modify the return value. */ -const char * -case_str (const struct ccase *c, size_t idx) + IDX. The caller must not modify the return value. + + Like the strings embedded in all "union value"s, the return + value is not null-terminated. */ +const uint8_t * +case_str_idx (const struct ccase *c, size_t idx) { - assert (c != NULL); - assert (c->this == c); - assert (c->case_data != NULL); - assert (c->case_data->ref_cnt > 0); - assert (idx < c->case_data->value_cnt); + assert (idx < c->proto->n_widths); + return value_str (&c->values[idx], caseproto_get_width (c->proto, idx)); +} - return c->case_data->values[idx].s; +/* Returns the string value of the `union value' in C for + variable V. Case C must be drawn from V's dictionary. The + caller may modify the return value. + + Case C must not be shared. + + Like the strings embedded in all "union value"s, the return + value is not null-terminated. */ +uint8_t * +case_str_rw (struct ccase *c, const struct variable *v) +{ + size_t idx = var_get_case_index (v); + assert (variable_matches_case (c, v)); + assert (!case_is_shared (c)); + return value_str_rw (&c->values[idx], caseproto_get_width (c->proto, idx)); } -#endif /* DEBUGGING */ -#ifdef DEBUGGING -/* Returns a pointer to the `union value' used for the - element of C numbered IDX. - The caller is allowed to modify the returned data. */ -union value * -case_data_rw (struct ccase *c, size_t idx) +/* Returns the string value of the `union value' in C numbered + IDX. The caller may modify the return value. + + Case C must not be shared. + + Like the strings embedded in all "union value"s, the return + value is not null-terminated. */ +uint8_t * +case_str_rw_idx (struct ccase *c, size_t idx) { - assert (c != NULL); - assert (c->this == c); - assert (c->case_data != NULL); - assert (c->case_data->ref_cnt > 0); - assert (idx < c->case_data->value_cnt); - - if (c->case_data->ref_cnt > 1) - case_unshare (c); - return &c->case_data->values[idx]; + assert (idx < c->proto->n_widths); + assert (!case_is_shared (c)); + return value_str_rw (&c->values[idx], caseproto_get_width (c->proto, idx)); } -#endif /* DEBUGGING */ -/* Compares the values of the VAR_CNT variables in VP +/* Compares the values of the N_VARS variables in VP in cases A and B and returns a strcmp()-type result. */ int case_compare (const struct ccase *a, const struct ccase *b, - struct variable *const *vp, size_t var_cnt) + const struct variable *const *vp, size_t n_vars) { - return case_compare_2dict (a, b, vp, vp, var_cnt); + return case_compare_2dict (a, b, vp, vp, n_vars); } -/* Compares the values of the VAR_CNT variables in VAP in case CA - to the values of the VAR_CNT variables in VBP in CB +/* Compares the values of the N_VARS variables in VAP in case CA + to the values of the N_VARS variables in VBP in CB and returns a strcmp()-type result. */ int case_compare_2dict (const struct ccase *ca, const struct ccase *cb, - struct variable *const *vap, struct variable *const *vbp, - size_t var_cnt) + const struct variable *const *vap, + const struct variable *const *vbp, + size_t n_vars) { - for (; var_cnt-- > 0; vap++, vbp++) + int cmp = 0; + for (; !cmp && n_vars-- > 0; vap++, vbp++) { - const struct variable *va = *vap; - const struct variable *vb = *vbp; - - assert (va->type == vb->type); - assert (va->width == vb->width); - - if (va->width == 0) - { - double af = case_num (ca, va->fv); - double bf = case_num (cb, vb->fv); - - if (af != bf) - return af > bf ? 1 : -1; - } - else - { - const char *as = case_str (ca, va->fv); - const char *bs = case_str (cb, vb->fv); - int cmp = memcmp (as, bs, va->width); - - if (cmp != 0) - return cmp; - } + const union value *va = case_data (ca, *vap); + const union value *vb = case_data (cb, *vbp); + assert (var_get_width (*vap) == var_get_width (*vbp)); + cmp = value_compare_3way (va, vb, var_get_width (*vap)); } - return 0; + return cmp; } /* Returns a pointer to the array of `union value's used for C. The caller must *not* modify the returned data. - NOTE: This function breaks the case abstraction. It should - *not* be used often. Prefer the other case functions. */ + This function breaks the case abstraction. It should *not* be + commonly used. Prefer the other case functions. */ const union value * -case_data_all (const struct ccase *c) +case_data_all (const struct ccase *c) { - assert (c != NULL); - assert (c->this == c); - assert (c->case_data != NULL); - assert (c->case_data->ref_cnt > 0); - - return c->case_data->values; + return c->values; } /* Returns a pointer to the array of `union value's used for C. The caller is allowed to modify the returned data. - NOTE: This function breaks the case abstraction. It should - *not* be used often. Prefer the other case functions. */ + Case C must not be shared. + + This function breaks the case abstraction. It should *not* be + commonly used. Prefer the other case functions. */ union value * -case_data_all_rw (struct ccase *c) +case_data_all_rw (struct ccase *c) +{ + assert (!case_is_shared (c)); + return c->values; +} + +/* Internal helper function for case_unshare. */ +struct ccase * +case_unshare__ (struct ccase *old) +{ + struct ccase *new = case_create (old->proto); + case_copy (new, 0, old, 0, caseproto_get_n_widths (new->proto)); + --old->ref_cnt; + return new; +} + +/* Internal helper function for case_unref. */ +void +case_unref__ (struct ccase *c) +{ + caseproto_destroy_values (c->proto, c->values); + caseproto_unref (c->proto); + free (c); +} + +/* Returns the number of bytes needed by a case for case + prototype PROTO. */ +static size_t +case_size (const struct caseproto *proto) { - assert (c != NULL); - assert (c->this == c); - assert (c->case_data != NULL); - assert (c->case_data->ref_cnt > 0); - - if (c->case_data->ref_cnt > 1) - case_unshare (c); - return c->case_data->values; + return (offsetof (struct ccase, values) + + caseproto_get_n_widths (proto) * sizeof (union value)); } + +/* Returns true if C contains a value at V's case index with the + same width as V; that is, if V may plausibly be used to read + or write data in C. + + Useful in assertions. */ +static bool UNUSED +variable_matches_case (const struct ccase *c, const struct variable *v) +{ + size_t case_idx = var_get_case_index (v); + return (case_idx < caseproto_get_n_widths (c->proto) + && caseproto_get_width (c->proto, case_idx) == var_get_width (v)); +} + +/* Internal helper function for case_copy(). */ +static void +copy_forward (struct ccase *dst, size_t dst_idx, + const struct ccase *src, size_t src_idx, + size_t n_values) +{ + size_t i; + + for (i = 0; i < n_values; i++) + value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i], + caseproto_get_width (dst->proto, dst_idx + i)); +} + +/* Internal helper function for case_copy(). */ +static void +copy_backward (struct ccase *dst, size_t dst_idx, + const struct ccase *src, size_t src_idx, + size_t n_values) +{ + size_t i; + + for (i = n_values; i-- != 0; ) + value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i], + caseproto_get_width (dst->proto, dst_idx + i)); +} +