-/* PSPP - computes sample statistics.
- Copyright (C) 2005 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2005, 2009, 2011, 2013 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
-#include "missing-values.h"
+
+#include "data/missing-values.h"
+
#include <assert.h>
#include <stdlib.h>
-#include <libpspp/str.h>
+#include "data/variable.h"
+#include "libpspp/assertion.h"
+#include "libpspp/cast.h"
+#include "libpspp/i18n.h"
+#include "libpspp/str.h"
+
+#include "gl/minmax.h"
+
+/* Types of user-missing values.
+ Invisible--use access functions defined below instead. */
+enum mv_type
+ {
+ MVT_NONE = 0, /* No user-missing values. */
+ MVT_1 = 1, /* One user-missing value. */
+ MVT_2 = 2, /* Two user-missing values. */
+ MVT_3 = 3, /* Three user-missing values. */
+ MVT_RANGE = 4, /* A range of user-missing values. */
+ MVT_RANGE_1 = 5 /* A range plus an individual value. */
+ };
/* Initializes MV as a set of missing values for a variable of
- the given WIDTH. Although only numeric variables and short
- string variables may have missing values, WIDTH may be any
- valid variable width. */
+ the given WIDTH. MV should be destroyed with mv_destroy when
+ it is no longer needed. */
void
-mv_init (struct missing_values *mv, int width)
+mv_init (struct missing_values *mv, int width)
{
+ int i;
+
assert (width >= 0 && width <= MAX_STRING);
- mv->type = MV_NONE;
+ mv->type = MVT_NONE;
mv->width = width;
+ for (i = 0; i < 3; i++)
+ value_init (&mv->values[i], width);
}
-void
-mv_set_type(struct missing_values *mv, enum mv_type type)
+/* Initializes MV as a set of missing values for a variable of
+ the given WIDTH. MV will be automatically destroyed along
+ with POOL; it must not be passed to mv_destroy for explicit
+ destruction. */
+void
+mv_init_pool (struct pool *pool, struct missing_values *mv, int width)
{
- mv->type = type;
+ int i;
+
+ assert (width >= 0 && width <= MAX_STRING);
+ mv->type = MVT_NONE;
+ mv->width = width;
+ for (i = 0; i < 3; i++)
+ value_init_pool (pool, &mv->values[i], width);
}
+/* Frees any storage allocated by mv_init for MV. */
+void
+mv_destroy (struct missing_values *mv)
+{
+ if (mv != NULL)
+ {
+ int i;
-/* Copies SRC to MV. */
+ for (i = 0; i < 3; i++)
+ value_destroy (&mv->values[i], mv->width);
+ }
+}
+
+/* Removes any missing values from MV. */
void
-mv_copy (struct missing_values *mv, const struct missing_values *src)
+mv_clear (struct missing_values *mv)
{
- assert(src);
+ mv->type = MVT_NONE;
+}
- *mv = *src;
+/* Initializes MV as a copy of SRC. */
+void
+mv_copy (struct missing_values *mv, const struct missing_values *src)
+{
+ int i;
+
+ mv_init (mv, src->width);
+ mv->type = src->type;
+ for (i = 0; i < 3; i++)
+ value_copy (&mv->values[i], &src->values[i], mv->width);
+}
+
+/* Returns true if VALUE, of the given WIDTH, may be added to a
+ missing value set also of the given WIDTH. This is normally
+ the case, but string missing values over MV_MAX_STRING bytes
+ long must consist solely of spaces after the first
+ MV_MAX_STRING bytes. */
+bool
+mv_is_acceptable (const union value *value, int width)
+{
+ int i;
+
+ for (i = MV_MAX_STRING; i < width; i++)
+ if (value->s[i] != ' ')
+ return false;
+ return true;
}
/* Returns true if MV is an empty set of missing values. */
bool
-mv_is_empty (const struct missing_values *mv)
+mv_is_empty (const struct missing_values *mv)
{
- return mv->type == MV_NONE;
+ return mv->type == MVT_NONE;
}
/* Returns the width of the missing values that MV may
/* Attempts to add individual value V to the set of missing
values MV. Returns true if successful, false if MV has no
- more room for missing values. (Long string variables never
- accept missing values.) */
+ more room for missing values or if V is not an acceptable
+ missing value. */
bool
mv_add_value (struct missing_values *mv, const union value *v)
{
- if (mv->width > MAX_SHORT_STRING)
+ if (!mv_is_acceptable (v, mv->width))
return false;
- switch (mv->type)
+
+ switch (mv->type)
{
- case MV_NONE:
- case MV_1:
- case MV_2:
- case MV_RANGE:
- mv->values[mv->type & 3] = *v;
+ case MVT_NONE:
+ case MVT_1:
+ case MVT_2:
+ case MVT_RANGE:
+ value_copy (&mv->values[mv->type & 3], v, mv->width);
mv->type++;
return true;
- case MV_3:
- case MV_RANGE_1:
+ case MVT_3:
+ case MVT_RANGE_1:
return false;
}
- abort ();
+ NOT_REACHED ();
}
-/* Attempts to add S to the set of string missing values MV. S
- must contain exactly as many characters as MV's width.
- Returns true if successful, false if MV has no more room for
- missing values. (Long string variables never accept missing
- values.) */
+/* Attempts to add S, which is LEN bytes long, to the set of string missing
+ values MV. Returns true if successful, false if MV has no more room for
+ missing values or if S is not an acceptable missing value. */
bool
-mv_add_str (struct missing_values *mv, const char s[])
+mv_add_str (struct missing_values *mv, const uint8_t s[], size_t len)
{
+ union value v;
+ bool ok;
+
assert (mv->width > 0);
- return mv_add_value (mv, (union value *) s);
+ while (len > mv->width)
+ if (s[--len] != ' ')
+ return false;
+
+ value_init (&v, mv->width);
+ buf_copy_rpad (CHAR_CAST (char *, v.s), mv->width,
+ CHAR_CAST (char *, s), len, ' ');
+ ok = mv_add_value (mv, &v);
+ value_destroy (&v, mv->width);
+
+ return ok;
}
/* Attempts to add D to the set of numeric missing values MV.
Returns true if successful, false if MV has no more room for
missing values. */
bool
-mv_add_num (struct missing_values *mv, double d)
+mv_add_num (struct missing_values *mv, double d)
{
+ union value v;
+ bool ok;
+
assert (mv->width == 0);
- return mv_add_value (mv, (union value *) &d);
+ value_init (&v, 0);
+ v.f = d;
+ ok = mv_add_value (mv, &v);
+ value_destroy (&v, 0);
+
+ return ok;
}
/* Attempts to add range [LOW, HIGH] to the set of numeric
missing values MV. Returns true if successful, false if MV
has no room for a range, or if LOW > HIGH. */
bool
-mv_add_num_range (struct missing_values *mv, double low, double high)
+mv_add_range (struct missing_values *mv, double low, double high)
{
assert (mv->width == 0);
- if (low > high)
- return false;
- switch (mv->type)
+ if (low <= high && (mv->type == MVT_NONE || mv->type == MVT_1))
{
- case MV_NONE:
- case MV_1:
mv->values[1].f = low;
mv->values[2].f = high;
mv->type |= 4;
return true;
-
- case MV_2:
- case MV_3:
- case MV_RANGE:
- case MV_RANGE_1:
- return false;
}
- abort ();
+ else
+ return false;
}
/* Returns true if MV contains an individual value,
bool
mv_has_value (const struct missing_values *mv)
{
- switch (mv->type)
- {
- case MV_1:
- case MV_2:
- case MV_3:
- case MV_RANGE_1:
- return true;
-
- case MV_NONE:
- case MV_RANGE:
- return false;
- }
- abort ();
+ return mv_n_values (mv) > 0;
}
-/* Removes one individual value from MV and stores it in *V.
+/* Removes one individual value from MV and stores it in V, which
+ must have been initialized as a value with the same width as MV.
MV must contain an individual value (as determined by
- mv_has_value()). */
+ mv_has_value()).
+
+ We remove the first value from MV, not the last, because the
+ common use for this function is in iterating through a set of
+ missing values. If we remove the last value then we'll output
+ the missing values in order opposite of that in which they
+ were added, so that a GET followed by a SAVE would reverse the
+ order of missing values in the system file, a weird effect. */
void
-mv_pop_value (struct missing_values *mv, union value *v)
+mv_pop_value (struct missing_values *mv, union value *v)
{
+ union value tmp;
+
assert (mv_has_value (mv));
+
+ value_copy (v, &mv->values[0], mv->width);
+ tmp = mv->values[0];
+ mv->values[0] = mv->values[1];
+ mv->values[1] = mv->values[2];
+ mv->values[2] = tmp;
mv->type--;
- *v = mv->values[mv->type & 3];
}
-/* Stores a value in *V.
- MV must contain an individual value (as determined by
- mv_has_value()).
- IDX is the zero based index of the value to get
-*/
-void
-mv_peek_value (const struct missing_values *mv, union value *v, int idx)
+/* Returns MV's discrete value with index IDX. The caller must
+ not modify or free this value, or access it after MV is
+ modified or freed.
+ IDX must be less than the number of discrete values in MV, as
+ reported by mv_n_values. */
+const union value *
+mv_get_value (const struct missing_values *mv, int idx)
{
- assert (idx >= 0 ) ;
- assert (idx < 3);
-
- assert (mv_has_value (mv));
- *v = mv->values[idx];
+ assert (idx >= 0 && idx < mv_n_values (mv));
+ return &mv->values[idx];
}
-void
+/* Replaces MV's discrete value with index IDX by a copy of V,
+ which must have the same width as MV.
+ IDX must be less than the number of discrete values in MV, as
+ reported by mv_n_values. */
+bool
mv_replace_value (struct missing_values *mv, const union value *v, int idx)
{
assert (idx >= 0) ;
assert (idx < mv_n_values(mv));
- mv->values[idx] = *v;
-}
-
+ if (!mv_is_acceptable (v, mv->width))
+ return false;
+ value_copy (&mv->values[idx], v, mv->width);
+ return true;
+}
-int
+/* Returns the number of individual (not part of a range) missing
+ values in MV. */
+int
mv_n_values (const struct missing_values *mv)
{
- assert(mv_has_value(mv));
return mv->type & 3;
}
/* Returns true if MV contains a numeric range,
false if MV is empty (or contains only individual values). */
bool
-mv_has_range (const struct missing_values *mv)
+mv_has_range (const struct missing_values *mv)
{
- switch (mv->type)
- {
- case MV_RANGE:
- case MV_RANGE_1:
- return true;
-
- case MV_NONE:
- case MV_1:
- case MV_2:
- case MV_3:
- return false;
- }
- abort ();
+ return mv->type == MVT_RANGE || mv->type == MVT_RANGE_1;
}
/* Removes the numeric range from MV and stores it in *LOW and
*HIGH. MV must contain a individual range (as determined by
mv_has_range()). */
void
-mv_pop_range (struct missing_values *mv, double *low, double *high)
+mv_pop_range (struct missing_values *mv, double *low, double *high)
{
assert (mv_has_range (mv));
*low = mv->values[1].f;
mv->type &= 3;
}
-
/* Returns the numeric range from MV into *LOW and
*HIGH. MV must contain a individual range (as determined by
mv_has_range()). */
void
-mv_peek_range (const struct missing_values *mv, double *low, double *high)
+mv_get_range (const struct missing_values *mv, double *low, double *high)
{
assert (mv_has_range (mv));
*low = mv->values[1].f;
*high = mv->values[2].f;
}
-
/* Returns true if values[IDX] is in use when the `type' member
is set to TYPE (in struct missing_values),
false otherwise. */
static bool
-using_element (unsigned type, int idx)
+using_element (unsigned type, int idx)
{
assert (idx >= 0 && idx < 3);
-
- switch (type)
+
+ switch (type)
{
- case MV_NONE:
+ case MVT_NONE:
return false;
- case MV_1:
+ case MVT_1:
return idx < 1;
- case MV_2:
+ case MVT_2:
return idx < 2;
- case MV_3:
+ case MVT_3:
return true;
- case MV_RANGE:
+ case MVT_RANGE:
return idx > 0;
- case MV_RANGE_1:
+ case MVT_RANGE_1:
return true;
}
- abort ();
+ NOT_REACHED ();
}
-/* Returns true if S contains only spaces between indexes
- NEW_WIDTH (inclusive) and OLD_WIDTH (exclusive),
- false otherwise. */
-static bool
-can_resize_string (const char *s, int old_width, int new_width)
+/* Returns true if MV can be resized to the given WIDTH with
+ mv_resize(), false otherwise. Resizing is possible only when
+ each value in MV (if any) is resizable from MV's current width
+ to WIDTH, as determined by value_is_resizable. */
+bool
+mv_is_resizable (const struct missing_values *mv, int width)
{
int i;
- assert (new_width < old_width);
- for (i = new_width; i < old_width; i++)
- if (s[i] != ' ')
+ for (i = 0; i < 3; i++)
+ if (using_element (mv->type, i)
+ && !value_is_resizable (&mv->values[i], mv->width, width))
return false;
- return true;
-}
-/* Returns true if MV can be resized to the given WIDTH with
- mv_resize(), false otherwise. Resizing to the same width is
- always possible. Resizing to a long string WIDTH is only
- possible if MV is an empty set of missing values; otherwise,
- resizing to a larger WIDTH is always possible. Resizing to a
- shorter width is possible only when each missing value
- contains only spaces in the characters that will be
- trimmed. */
-bool
-mv_is_resizable (struct missing_values *mv, int width)
-{
- assert ((width == 0) == (mv->width == 0));
- if (width > MAX_SHORT_STRING && mv->type != MV_NONE)
- return false;
- else if (width >= mv->width)
- return true;
- else
- {
- int i;
-
- for (i = 0; i < 3; i++)
- if (using_element (mv->type, i)
- && !can_resize_string (mv->values[i].s, mv->width, width))
- return false;
- return true;
- }
+ return true;
}
/* Resizes MV to the given WIDTH. WIDTH must fit the constraints
- explained for mv_is_resizable(). */
+ explained for mv_is_resizable. */
void
-mv_resize (struct missing_values *mv, int width)
+mv_resize (struct missing_values *mv, int width)
{
+ int i;
+
assert (mv_is_resizable (mv, width));
- if (width > mv->width)
- {
- int i;
-
- for (i = 0; i < 3; i++)
- memset (mv->values[i].s + mv->width, ' ', width - mv->width);
- }
+ for (i = 0; i < 3; i++)
+ if (using_element (mv->type, i))
+ value_resize (&mv->values[i], mv->width, width);
+ else
+ {
+ value_destroy (&mv->values[i], mv->width);
+ value_init (&mv->values[i], width);
+ }
mv->width = width;
}
-/* Returns true if V is system missing or a missing value in MV,
- false otherwise. */
-bool
-mv_is_value_missing (const struct missing_values *mv, const union value *v)
-{
- return (mv->width == 0
- ? mv_is_num_missing (mv, v->f)
- : mv_is_str_missing (mv, v->s));
-}
-
-/* Returns true if D is system missing or a missing value in MV,
- false otherwise.
- MV must be a set of numeric missing values. */
-bool
-mv_is_num_missing (const struct missing_values *mv, double d)
-{
- assert (mv->width == 0);
- return d == SYSMIS || mv_is_num_user_missing (mv, d);
-}
-
-/* Returns true if S[] is a missing value in MV, false otherwise.
- MV must be a set of string missing values.
- S[] must contain exactly as many characters as MV's width. */
-bool
-mv_is_str_missing (const struct missing_values *mv, const char s[])
-{
- return mv_is_str_user_missing (mv, s);
-}
-
-/* Returns true if V is a missing value in MV, false otherwise. */
-bool
-mv_is_value_user_missing (const struct missing_values *mv,
- const union value *v)
-{
- return (mv->width == 0
- ? mv_is_num_user_missing (mv, v->f)
- : mv_is_str_user_missing (mv, v->s));
-}
-
/* Returns true if D is a missing value in MV, false otherwise.
MV must be a set of numeric missing values. */
-bool
-mv_is_num_user_missing (const struct missing_values *mv, double d)
+static bool
+is_num_user_missing (const struct missing_values *mv, double d)
{
const union value *v = mv->values;
assert (mv->width == 0);
- switch (mv->type)
+ switch (mv->type)
{
- case MV_NONE:
+ case MVT_NONE:
return false;
- case MV_1:
+ case MVT_1:
return v[0].f == d;
- case MV_2:
+ case MVT_2:
return v[0].f == d || v[1].f == d;
- case MV_3:
+ case MVT_3:
return v[0].f == d || v[1].f == d || v[2].f == d;
- case MV_RANGE:
+ case MVT_RANGE:
return v[1].f <= d && d <= v[2].f;
- case MV_RANGE_1:
+ case MVT_RANGE_1:
return v[0].f == d || (v[1].f <= d && d <= v[2].f);
}
- abort ();
+ NOT_REACHED ();
}
/* Returns true if S[] is a missing value in MV, false otherwise.
- MV must be a set of string missing values.
+ MV must be a set of string missing values.
S[] must contain exactly as many characters as MV's width. */
-bool
-mv_is_str_user_missing (const struct missing_values *mv,
- const char s[])
+static bool
+is_str_user_missing (const struct missing_values *mv, const uint8_t s[])
{
const union value *v = mv->values;
assert (mv->width > 0);
- switch (mv->type)
+ switch (mv->type)
{
- case MV_NONE:
+ case MVT_NONE:
return false;
- case MV_1:
+ case MVT_1:
return !memcmp (v[0].s, s, mv->width);
- case MV_2:
+ case MVT_2:
return (!memcmp (v[0].s, s, mv->width)
|| !memcmp (v[1].s, s, mv->width));
- case MV_3:
+ case MVT_3:
return (!memcmp (v[0].s, s, mv->width)
|| !memcmp (v[1].s, s, mv->width)
|| !memcmp (v[2].s, s, mv->width));
- case MV_RANGE:
- case MV_RANGE_1:
- abort ();
+ case MVT_RANGE:
+ case MVT_RANGE_1:
+ NOT_REACHED ();
}
- abort ();
+ NOT_REACHED ();
}
-/* Returns true if MV is a set of numeric missing values and V is
- the system missing value. */
-bool
-mv_is_value_system_missing (const struct missing_values *mv,
- const union value *v)
+/* Returns MV_USER if V is a user-missing value in MV, MV_SYSTEM if V is
+ system-missing (and MV is numeric), or 0 if V is not missing. */
+enum mv_class
+mv_is_value_missing (const struct missing_values *mv, const union value *v)
{
- return mv->width == 0 ? v->f == SYSMIS : false;
+ return (mv->width == 0
+ ? mv_is_num_missing (mv, v->f)
+ : mv_is_str_missing (mv, v->s));
+}
+
+/* Returns MV_USER if V is a user-missing value in MV, MV_SYSTEM if V is
+ system-missing, or 0 if V is not missing. MV must be a set of numeric
+ missing values. */
+enum mv_class
+mv_is_num_missing (const struct missing_values *mv, double d)
+{
+ assert (mv->width == 0);
+ return (d == SYSMIS ? MV_SYSTEM
+ : is_num_user_missing (mv, d) ? MV_USER
+ : 0);
+}
+
+/* Returns MV_USER if S[] is a user-missing value in MV, or 0 if V is not
+ missing. MV must be a set of string missing values. S[] must contain
+ exactly as many characters as MV's width. */
+enum mv_class
+mv_is_str_missing (const struct missing_values *mv, const uint8_t s[])
+{
+ assert (mv->width > 0);
+ return is_str_user_missing (mv, s) ? MV_USER : 0;
+}
+
+/* Like mv_is_value_missing(), this tests whether V is a missing value in MV.
+ It supports the uncommon case where V and MV might have different widths:
+ the caller must specify VW, the width of V. MV and VW must be both numeric
+ or both string.
+
+ Comparison of strings of different width is done by conceptually extending
+ both strings to infinite width by appending spaces. */
+enum mv_class
+mv_is_value_missing_varwidth (const struct missing_values *mv,
+ const union value *v, int vw)
+{
+ int mvw = mv->width;
+ if (mvw == vw)
+ return mv_is_value_missing (mv, v);
+
+ /* Make sure they're both strings. */
+ assert (mvw && vw);
+ if (mv->type == MVT_NONE)
+ return false;
+
+ for (int i = 0; i < mv->type; i++)
+ if (!buf_compare_rpad (CHAR_CAST_BUG (const char *, mv->values[i].s), mvw,
+ CHAR_CAST_BUG (const char *, v->s), vw))
+ return MV_USER;
+ return 0;
+}
+
+char *
+mv_to_string (const struct missing_values *mv, const char *encoding)
+{
+ struct string s = DS_EMPTY_INITIALIZER;
+ if (mv_has_range (mv))
+ {
+ double x, y;
+ mv_get_range (mv, &x, &y);
+ if (x == LOWEST)
+ ds_put_format (&s, "LOWEST THRU %.*g", DBL_DIG + 1, y);
+ else if (y == HIGHEST)
+ ds_put_format (&s, "%.*g THRU HIGHEST", DBL_DIG + 1, x);
+ else
+ ds_put_format (&s, "%.*g THRU %.*g",
+ DBL_DIG + 1, x,
+ DBL_DIG + 1, y);
+ }
+ for (size_t j = 0; j < mv_n_values (mv); j++)
+ {
+ const union value *value = mv_get_value (mv, j);
+ if (!ds_is_empty (&s))
+ ds_put_cstr (&s, "; ");
+ if (!mv->width)
+ ds_put_format (&s, "%.*g", DBL_DIG + 1, value->f);
+ else
+ {
+ char *mvs = recode_string (
+ "UTF-8", encoding, CHAR_CAST (char *, value->s),
+ MIN (mv->width, MV_MAX_STRING));
+ ds_put_format (&s, "\"%s\"", mvs);
+ free (mvs);
+ }
+ }
+ return ds_is_empty (&s) ? NULL : ds_steal_cstr (&s);
}