1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2005, 2009, 2011, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/missing-values.h"
24 #include "data/variable.h"
25 #include "libpspp/assertion.h"
26 #include "libpspp/cast.h"
27 #include "libpspp/i18n.h"
28 #include "libpspp/str.h"
30 #include "gl/minmax.h"
32 /* Types of user-missing values.
33 Invisible--use access functions defined below instead. */
36 MVT_NONE = 0, /* No user-missing values. */
37 MVT_1 = 1, /* One user-missing value. */
38 MVT_2 = 2, /* Two user-missing values. */
39 MVT_3 = 3, /* Three user-missing values. */
40 MVT_RANGE = 4, /* A range of user-missing values. */
41 MVT_RANGE_1 = 5 /* A range plus an individual value. */
44 /* Initializes MV as a set of missing values for a variable of
45 the given WIDTH. MV should be destroyed with mv_destroy when
46 it is no longer needed. */
48 mv_init (struct missing_values *mv, int width)
52 assert (width >= 0 && width <= MAX_STRING);
55 for (i = 0; i < 3; i++)
56 value_init (&mv->values[i], width);
59 /* Initializes MV as a set of missing values for a variable of
60 the given WIDTH. MV will be automatically destroyed along
61 with POOL; it must not be passed to mv_destroy for explicit
64 mv_init_pool (struct pool *pool, struct missing_values *mv, int width)
68 assert (width >= 0 && width <= MAX_STRING);
71 for (i = 0; i < 3; i++)
72 value_init_pool (pool, &mv->values[i], width);
75 /* Frees any storage allocated by mv_init for MV. */
77 mv_destroy (struct missing_values *mv)
83 for (i = 0; i < 3; i++)
84 value_destroy (&mv->values[i], mv->width);
88 /* Removes any missing values from MV. */
90 mv_clear (struct missing_values *mv)
95 /* Initializes MV as a copy of SRC. */
97 mv_copy (struct missing_values *mv, const struct missing_values *src)
101 mv_init (mv, src->width);
102 mv->type = src->type;
103 for (i = 0; i < 3; i++)
104 value_copy (&mv->values[i], &src->values[i], mv->width);
107 /* Returns true if VALUE, of the given WIDTH, may be added to a
108 missing value set also of the given WIDTH. This is normally
109 the case, but string missing values over MV_MAX_STRING bytes
110 long must consist solely of spaces after the first
111 MV_MAX_STRING bytes. */
113 mv_is_acceptable (const union value *value, int width)
117 for (i = MV_MAX_STRING; i < width; i++)
118 if (value->s[i] != ' ')
123 /* Returns true if MV is an empty set of missing values. */
125 mv_is_empty (const struct missing_values *mv)
127 return mv->type == MVT_NONE;
130 /* Returns the width of the missing values that MV may
133 mv_get_width (const struct missing_values *mv)
138 /* Attempts to add individual value V to the set of missing
139 values MV. Returns true if successful, false if MV has no
140 more room for missing values or if V is not an acceptable
143 mv_add_value (struct missing_values *mv, const union value *v)
145 if (!mv_is_acceptable (v, mv->width))
154 value_copy (&mv->values[mv->type & 3], v, mv->width);
165 /* Attempts to add S, which is LEN bytes long, to the set of string missing
166 values MV. Returns true if successful, false if MV has no more room for
167 missing values or if S is not an acceptable missing value. */
169 mv_add_str (struct missing_values *mv, const uint8_t s[], size_t len)
174 assert (mv->width > 0);
175 while (len > mv->width)
179 value_init (&v, mv->width);
180 buf_copy_rpad (CHAR_CAST (char *, v.s), mv->width,
181 CHAR_CAST (char *, s), len, ' ');
182 ok = mv_add_value (mv, &v);
183 value_destroy (&v, mv->width);
188 /* Attempts to add D to the set of numeric missing values MV.
189 Returns true if successful, false if MV has no more room for
192 mv_add_num (struct missing_values *mv, double d)
197 assert (mv->width == 0);
200 ok = mv_add_value (mv, &v);
201 value_destroy (&v, 0);
206 /* Attempts to add range [LOW, HIGH] to the set of numeric
207 missing values MV. Returns true if successful, false if MV
208 has no room for a range, or if LOW > HIGH. */
210 mv_add_range (struct missing_values *mv, double low, double high)
212 assert (mv->width == 0);
213 if (low <= high && (mv->type == MVT_NONE || mv->type == MVT_1))
215 mv->values[1].f = low;
216 mv->values[2].f = high;
224 /* Returns true if MV contains an individual value,
225 false if MV is empty (or contains only a range). */
227 mv_has_value (const struct missing_values *mv)
229 return mv_n_values (mv) > 0;
232 /* Removes one individual value from MV and stores it in V, which
233 must have been initialized as a value with the same width as MV.
234 MV must contain an individual value (as determined by
237 We remove the first value from MV, not the last, because the
238 common use for this function is in iterating through a set of
239 missing values. If we remove the last value then we'll output
240 the missing values in order opposite of that in which they
241 were added, so that a GET followed by a SAVE would reverse the
242 order of missing values in the system file, a weird effect. */
244 mv_pop_value (struct missing_values *mv, union value *v)
248 assert (mv_has_value (mv));
250 value_copy (v, &mv->values[0], mv->width);
252 mv->values[0] = mv->values[1];
253 mv->values[1] = mv->values[2];
258 /* Returns MV's discrete value with index IDX. The caller must
259 not modify or free this value, or access it after MV is
261 IDX must be less than the number of discrete values in MV, as
262 reported by mv_n_values. */
264 mv_get_value (const struct missing_values *mv, int idx)
266 assert (idx >= 0 && idx < mv_n_values (mv));
267 return &mv->values[idx];
270 /* Replaces MV's discrete value with index IDX by a copy of V,
271 which must have the same width as MV.
272 IDX must be less than the number of discrete values in MV, as
273 reported by mv_n_values. */
275 mv_replace_value (struct missing_values *mv, const union value *v, int idx)
278 assert (idx < mv_n_values(mv));
280 if (!mv_is_acceptable (v, mv->width))
283 value_copy (&mv->values[idx], v, mv->width);
287 /* Returns the number of individual (not part of a range) missing
290 mv_n_values (const struct missing_values *mv)
296 /* Returns true if MV contains a numeric range,
297 false if MV is empty (or contains only individual values). */
299 mv_has_range (const struct missing_values *mv)
301 return mv->type == MVT_RANGE || mv->type == MVT_RANGE_1;
304 /* Removes the numeric range from MV and stores it in *LOW and
305 *HIGH. MV must contain a individual range (as determined by
308 mv_pop_range (struct missing_values *mv, double *low, double *high)
310 assert (mv_has_range (mv));
311 *low = mv->values[1].f;
312 *high = mv->values[2].f;
316 /* Returns the numeric range from MV into *LOW and
317 *HIGH. MV must contain a individual range (as determined by
320 mv_get_range (const struct missing_values *mv, double *low, double *high)
322 assert (mv_has_range (mv));
323 *low = mv->values[1].f;
324 *high = mv->values[2].f;
327 /* Returns true if values[IDX] is in use when the `type' member
328 is set to TYPE (in struct missing_values),
331 using_element (unsigned type, int idx)
333 assert (idx >= 0 && idx < 3);
353 /* Returns true if MV can be resized to the given WIDTH with
354 mv_resize(), false otherwise. Resizing is possible only when
355 each value in MV (if any) is resizable from MV's current width
356 to WIDTH, as determined by value_is_resizable. */
358 mv_is_resizable (const struct missing_values *mv, int width)
362 for (i = 0; i < 3; i++)
363 if (using_element (mv->type, i)
364 && !value_is_resizable (&mv->values[i], mv->width, width))
370 /* Resizes MV to the given WIDTH. WIDTH must fit the constraints
371 explained for mv_is_resizable. */
373 mv_resize (struct missing_values *mv, int width)
377 assert (mv_is_resizable (mv, width));
378 for (i = 0; i < 3; i++)
379 if (using_element (mv->type, i))
380 value_resize (&mv->values[i], mv->width, width);
383 value_destroy (&mv->values[i], mv->width);
384 value_init (&mv->values[i], width);
389 /* Returns true if D is a missing value in MV, false otherwise.
390 MV must be a set of numeric missing values. */
392 is_num_user_missing (const struct missing_values *mv, double d)
394 const union value *v = mv->values;
395 assert (mv->width == 0);
403 return v[0].f == d || v[1].f == d;
405 return v[0].f == d || v[1].f == d || v[2].f == d;
407 return v[1].f <= d && d <= v[2].f;
409 return v[0].f == d || (v[1].f <= d && d <= v[2].f);
414 /* Returns true if S[] is a missing value in MV, false otherwise.
415 MV must be a set of string missing values.
416 S[] must contain exactly as many characters as MV's width. */
418 is_str_user_missing (const struct missing_values *mv, const uint8_t s[])
420 const union value *v = mv->values;
421 assert (mv->width > 0);
427 return !memcmp (v[0].s, s, mv->width);
429 return (!memcmp (v[0].s, s, mv->width)
430 || !memcmp (v[1].s, s, mv->width));
432 return (!memcmp (v[0].s, s, mv->width)
433 || !memcmp (v[1].s, s, mv->width)
434 || !memcmp (v[2].s, s, mv->width));
442 /* Returns true if V is a missing value in the given CLASS in MV,
445 mv_is_value_missing (const struct missing_values *mv, const union value *v,
448 return (mv->width == 0
449 ? mv_is_num_missing (mv, v->f, class)
450 : mv_is_str_missing (mv, v->s, class));
453 /* Returns true if D is a missing value in the given CLASS in MV,
455 MV must be a set of numeric missing values. */
457 mv_is_num_missing (const struct missing_values *mv, double d,
460 assert (mv->width == 0);
461 return ((class & MV_SYSTEM && d == SYSMIS)
462 || (class & MV_USER && is_num_user_missing (mv, d)));
465 /* Returns true if S[] is a missing value in the given CLASS in
467 MV must be a set of string missing values.
468 S[] must contain exactly as many characters as MV's width. */
470 mv_is_str_missing (const struct missing_values *mv, const uint8_t s[],
473 assert (mv->width > 0);
474 return class & MV_USER && is_str_user_missing (mv, s);
477 /* Like mv_is_value_missing(), this tests whether V is a missing value
478 in the given CLASS in MV. It supports the uncommon case where V
479 and MV might have different widths: the caller must specify VW, the
480 width of V. MV and VW must be both numeric or both string.
482 Comparison of strings of different width is done by conceptually
483 extending both strings to infinite width by appending spaces. */
485 mv_is_value_missing_varwidth (const struct missing_values *mv,
486 const union value *v, int vw,
491 return mv_is_value_missing (mv, v, class);
493 /* Make sure they're both strings. */
495 if (!(class & MV_USER) || mv->type == MVT_NONE)
498 for (int i = 0; i < mv->type; i++)
499 if (!buf_compare_rpad (CHAR_CAST_BUG (const char *, mv->values[i].s), mvw,
500 CHAR_CAST_BUG (const char *, v->s), vw))
506 mv_to_string (const struct missing_values *mv, const char *encoding)
508 struct string s = DS_EMPTY_INITIALIZER;
509 if (mv_has_range (mv))
512 mv_get_range (mv, &x, &y);
514 ds_put_format (&s, "LOWEST THRU %.*g", DBL_DIG + 1, y);
515 else if (y == HIGHEST)
516 ds_put_format (&s, "%.*g THRU HIGHEST", DBL_DIG + 1, x);
518 ds_put_format (&s, "%.*g THRU %.*g",
522 for (size_t j = 0; j < mv_n_values (mv); j++)
524 const union value *value = mv_get_value (mv, j);
525 if (!ds_is_empty (&s))
526 ds_put_cstr (&s, "; ");
528 ds_put_format (&s, "%.*g", DBL_DIG + 1, value->f);
531 char *mvs = recode_string (
532 "UTF-8", encoding, CHAR_CAST (char *, value->s),
533 MIN (mv->width, MV_MAX_STRING));
534 ds_put_format (&s, "\"%s\"", mvs);
538 return ds_is_empty (&s) ? NULL : ds_steal_cstr (&s);