From 04d2c99833753252b724dd9d4f15cc3a80b6bec8 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 25 May 2009 20:07:19 -0700 Subject: [PATCH] Implement missing values for long string variables. --- NEWS | 4 +- doc/dev/concepts.texi | 121 ++++++++------- doc/dev/system-file-format.texi | 23 +-- doc/language.texi | 4 +- doc/variables.texi | 7 +- src/data/missing-values.c | 166 +++++++++++++++------ src/data/missing-values.h | 25 +++- src/data/por-file-reader.c | 26 ++-- src/data/por-file-writer.c | 24 +-- src/data/sys-file-reader.c | 23 +-- src/data/sys-file-writer.c | 24 +-- src/data/variable.c | 25 +--- src/data/variable.h | 2 - src/language/dictionary/apply-dictionary.c | 13 +- src/language/dictionary/missing-values.c | 29 ++-- src/language/dictionary/sys-file-info.c | 24 +-- src/ui/gui/missing-val-dialog.c | 11 +- src/ui/gui/psppire-var-sheet.c | 6 - src/ui/gui/text-data-import-dialog.c | 4 +- src/ui/gui/var-display.c | 8 +- tests/command/missing-values.sh | 21 ++- 21 files changed, 337 insertions(+), 253 deletions(-) diff --git a/NEWS b/NEWS index a02810cd..df10031e 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,5 @@ PSPP NEWS -- history of user-visible changes. -Time-stamp: <2009-05-24 16:40:48 blp> +Time-stamp: <2009-05-24 22:25:04 blp> Copyright (C) 1996-9, 2000, 2008, 2009 Free Software Foundation, Inc. See the end for copying conditions. @@ -11,6 +11,8 @@ Changes from 0.7.1 to 0.7.2: * Value labels for long string variables are now supported. + * Missing values for long string variables are now supported. + Changes from 0.7.0 to 0.7.1: * Added a perl module to facilitate reading and writing of pspp system diff --git a/doc/dev/concepts.texi b/doc/dev/concepts.texi index ac45416f..cc6e7522 100644 --- a/doc/dev/concepts.texi +++ b/doc/dev/concepts.texi @@ -743,28 +743,7 @@ variable, is most conveniently executed through functions on A @struct{missing_values} is essentially a set of @union{value}s that have a common value width (@pxref{Values}). For a set of missing values associated with a variable (the common case), the set's -width is the same as the variable's width. The contents of a set of -missing values is subject to some restrictions. Regardless of width, -a set of missing values is allowed to be empty. Otherwise, its -possible contents depend on its width: - -@table @asis -@item 0 (numeric values) -Up to three discrete numeric values, or a range of numeric values -(which includes both ends of the range), or a range plus one discrete -numeric value. - -@item 1@dots{}@t{MAX_SHORT_STRING} - 1 (short string values) -Up to three discrete string values (with the same width as the set). - -@item @t{MAX_SHORT_STRING}@dots{}@t{MAX_STRING} (long string values) -Always empty. -@end table - -These somewhat arbitrary restrictions are the same as those imposed by -SPSS. In PSPP we could easily eliminate these restrictions, but doing -so would also require us to extend the system file format in an -incompatible way, which we consider a bad tradeoff. +width is the same as the variable's width. Function prototypes and other declarations related to missing values are declared in @file{data/missing-values.h}. @@ -773,18 +752,37 @@ are declared in @file{data/missing-values.h}. Opaque type that represents a set of missing values. @end deftp +The contents of a set of missing values is subject to some +restrictions. Regardless of width, a set of missing values is allowed +to be empty. A set of numeric missing values may contain up to three +discrete numeric values, or a range of numeric values (which includes +both ends of the range), or a range plus one discrete numeric value. +A set of string missing values may contain up to three discrete string +values (with the same width as the set), but ranges are not supported. + +In addition, values in string missing values wider than +@code{MV_MAX_STRING} bytes may contain non-space characters only in +their first @code{MV_MAX_STRING} bytes; all the bytes after the first +@code{MV_MAX_STRING} must be spaces. @xref{mv_is_acceptable}, for a +function that tests a value against these constraints. + +@deftypefn Macro int MV_MAX_STRING +Number of bytes in a string missing value that are not required to be +spaces. The current value is 8, a value which is fixed by the system +file format. In PSPP we could easily eliminate this restriction, but +doing so would also require us to extend the system file format in an +incompatible way, which we consider a bad tradeoff. +@end deftypefn + The most often useful functions for missing values are those for testing whether a given value is missing, described in the following section. Several other functions for creating, inspecting, and modifying @struct{missing_values} objects are described afterward, but -these functions are much more rarely useful. No function for -destroying a @struct{missing_values} is provided, because -@struct{missing_values} does not contain any pointers or other -references to resources that need deallocation. +these functions are much more rarely useful. @menu * Testing for Missing Values:: -* Initializing User-Missing Value Sets:: +* Creating and Destroying User-Missing Values:: * Changing User-Missing Value Set Width:: * Inspecting User-Missing Value Sets:: * Modifying User-Missing Value Sets:: @@ -836,8 +834,10 @@ missing. @end deftp @end deftypefun -@node Initializing User-Missing Value Sets -@subsection Initializing User-Missing Value Sets +@node Creating and Destroying User-Missing Values +@subsection Creation and Destruction + +These functions create and destroy @struct{missing_values} objects. @deftypefun void mv_init (struct missing_values *@var{mv}, int @var{width}) Initializes @var{mv} as a set of user-missing values. The set is @@ -845,6 +845,10 @@ initially empty. Any values added to it must have the specified @var{width}. @end deftypefun +@deftypefun void mv_destroy (struct missing_values *@var{mv}) +Destroys @var{mv}, which must not be referred to again. +@end deftypefun + @deftypefun void mv_copy (struct missing_values *@var{mv}, const struct missing_values *@var{old}) Initializes @var{mv} as a copy of the existing set of user-missing values @var{old}. @@ -874,11 +878,9 @@ the required width, may be used instead. Tests whether @var{mv}'s width may be changed to @var{new_width} using @func{mv_resize}. Returns true if it is allowed, false otherwise. -If @var{new_width} is a long string width, @var{mv} may be resized -only if it is empty. Otherwise, if @var{mv} contains any missing -values, then it may be resized only if each missing value may be -resized, as determined by @func{value_is_resizable} -(@pxref{value_is_resizable}). +If @var{mv} contains any missing values, then it may be resized only +if each missing value may be resized, as determined by +@func{value_is_resizable} (@pxref{value_is_resizable}). @end deftypefun @anchor{mv_resize} @@ -897,8 +899,8 @@ width. These functions inspect the properties and contents of @struct{missing_values} objects. -The first set of functions inspects the discrete values that numeric -and short string sets of user-missing values may contain: +The first set of functions inspects the discrete values that sets of +user-missing values may contain: @deftypefun bool mv_is_empty (const struct missing_values *@var{mv}) Returns true if @var{mv} contains no user-missing values, false if it @@ -923,11 +925,12 @@ values, that is, if @func{mv_n_values} would return nonzero for @var{mv}. @end deftypefun -@deftypefun void mv_get_value (const struct missing_values *@var{mv}, union value *@var{value}, int @var{index}) -Copies the discrete user-missing value in @var{mv} with the given -@var{index} into @var{value}. The index must be less than the number -of discrete user-missing values in @var{mv}, as reported by -@func{mv_n_values}. +@deftypefun {const union value *} mv_get_value (const struct missing_values *@var{mv}, int @var{index}) +Returns the discrete user-missing value in @var{mv} with the given +@var{index}. The caller must not modify or free the returned value or +refer to it after modifying or freeing @var{mv}. The index must be +less than the number of discrete user-missing values in @var{mv}, as +reported by @func{mv_n_values}. @end deftypefun The second set of functions inspects the single range of values that @@ -949,7 +952,7 @@ include a range. These functions modify the contents of @struct{missing_values} objects. -The first set of functions applies to all sets of user-missing values: +The next set of functions applies to all sets of user-missing values: @deftypefun bool mv_add_value (struct missing_values *@var{mv}, const union value *@var{value}) @deftypefunx bool mv_add_str (struct missing_values *@var{mv}, const char @var{value}[]) @@ -957,8 +960,8 @@ The first set of functions applies to all sets of user-missing values: Attempts to add the given discrete @var{value} to set of user-missing values @var{mv}. @var{value} must have the same width as @var{mv}. Returns true if @var{value} was successfully added, false if the set -could not accept any more discrete values. (Always returns false if -@var{mv} is a set of long string user-missing values.) +could not accept any more discrete values or if @var{value} is not an +acceptable user-missing value (see @func{mv_is_acceptable} below). These functions are equivalent, except for the form in which @var{value} is provided, so you may use whichever function is most @@ -970,10 +973,22 @@ Removes a discrete value from @var{mv} (which must contain at least one discrete value) and stores it in @var{value}. @end deftypefun -@deftypefun void mv_replace_value (struct missing_values *@var{mv}, const union value *@var{value}, int @var{index}) -Replaces the discrete value with the given @var{index} in @var{mv} -(which must contain at least @var{index} + 1 discrete values) with -@var{value}. +@deftypefun bool mv_replace_value (struct missing_values *@var{mv}, const union value *@var{value}, int @var{index}) +Attempts to replace the discrete value with the given @var{index} in +@var{mv} (which must contain at least @var{index} + 1 discrete values) +by @var{value}. Returns true if successful, false if @var{value} is +not an acceptable user-missing value (see @func{mv_is_acceptable} +below). +@end deftypefun + +@deftypefun bool mv_is_acceptable (const union value *@var{value}, int @var{width}) +@anchor{mv_is_acceptable} +Returns true if @var{value}, which must have the specified +@var{width}, may be added to a missing value set of the same +@var{width}, false if it cannot. As described above, all numeric +values and string values of width @code{MV_MAX_STRING} or less may be +added, but string value of greater width may be added only if bytes +beyond the first @code{MV_MAX_STRING} are all spaces. @end deftypefun The second set of functions applies only to numeric sets of @@ -1298,16 +1313,6 @@ Returns true if @var{var} is an alphanumeric (string) variable, false otherwise. @end deftypefun -@deftypefun bool var_is_short_string (const struct variable *@var{var}) -Returns true if @var{var} is a string variable of width -@code{MAX_SHORT_STRING} or less, false otherwise. -@end deftypefun - -@deftypefun bool var_is_long_string (const struct variable *@var{var}) -Returns true if @var{var} is a string variable of width greater than -@code{MAX_SHORT_STRING}, false otherwise. -@end deftypefun - @node Variable Missing Values @subsection Variable Missing Values diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index 0358961b..a404d0d6 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -289,15 +289,20 @@ length @code{label_len}, rounded up to the nearest multiple of 32 bits. The first @code{label_len} characters are the variable's variable label. @item flt64 missing_values[]; -This field is present only if @code{n_missing_values} is not 0. It has -the same number of elements as the absolute value of -@code{n_missing_values}. For discrete missing values, each element -represents one missing value. When a range is present, the first -element denotes the minimum value in the range, and the second element -denotes the maximum value in the range. When a range plus a value are -present, the third element denotes the additional discrete missing -value. HIGHEST and LOWEST are indicated as described in the chapter -introduction. +This field is present only if @code{n_missing_values} is nonzero. It +has the same number of 8-byte elements as the absolute value of +@code{n_missing_values}. Each element is interpreted as a number for +numeric variables (with HIGHEST and LOWEST indicated as described in +the chapter introduction). For string variables of width less than 8 +bytes, elements are right-padded with spaces; for string variables +wider than 8 bytes, only the first 8 bytes of each missing value are +specified, with the remainder implicitly all spaces. + +For discrete missing values, each element represents one missing +value. When a range is present, the first element denotes the minimum +value in the range, and the second element denotes the maximum value +in the range. When a range plus a value are present, the third +element denotes the additional discrete missing value. @end table The @code{print} and @code{write} members of sysfile_variable are output diff --git a/doc/language.texi b/doc/language.texi index 50e8f646..e23a5580 100644 --- a/doc/language.texi +++ b/doc/language.texi @@ -379,9 +379,7 @@ spaces. Variables, whether numeric or string, can have designated @dfn{user-missing values}. Every user-missing value is an actual value for that variable. However, most of the time user-missing values are -treated in the same way as the system-missing value. String variables -that are wider than a certain width, usually 8 characters (depending on -computer architecture), cannot have user-missing values. +treated in the same way as the system-missing value. For more information on missing values, see the following sections: @ref{Variables}, @ref{MISSING VALUES}, @ref{Expressions}. See also the diff --git a/doc/variables.texi b/doc/variables.texi index 8396b714..c9e270bf 100644 --- a/doc/variables.texi +++ b/doc/variables.texi @@ -212,9 +212,10 @@ As part of a range, LO or LOWEST may take the place of num1; HI or HIGHEST may take the place of num2. @end display -@cmd{MISSING VALUES} sets user-missing values for numeric and -short string variables. Long string variables may not have missing -values. +@cmd{MISSING VALUES} sets user-missing values for numeric and string +variables. Long string variables may have missing values, but +characters after the first 8 bytes of the missing value must be +spaces. Specify a list of variables, followed by a list of their user-missing values in parentheses. Up to three discrete values may be given, or, diff --git a/src/data/missing-values.c b/src/data/missing-values.c index 867e0f71..c1a74691 100644 --- a/src/data/missing-values.c +++ b/src/data/missing-values.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2005 Free Software Foundation, Inc. + Copyright (C) 2005, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,12 +15,11 @@ along with this program. If not, see . */ #include -#include "missing-values.h" +#include #include #include -#include +#include #include -#include "variable.h" #include /* Types of user-missing values. @@ -36,15 +35,47 @@ enum mv_type }; /* Initializes MV as a set of missing values for a variable of - the given WIDTH. Although only numeric variables and short - string variables may have missing values, WIDTH may be any - valid variable width. */ + the given WIDTH. MV should be destroyed with mv_destroy when + it is no longer needed. */ void mv_init (struct missing_values *mv, int width) { + int i; + + assert (width >= 0 && width <= MAX_STRING); + mv->type = MVT_NONE; + mv->width = width; + for (i = 0; i < 3; i++) + value_init (&mv->values[i], width); +} + +/* Initializes MV as a set of missing values for a variable of + the given WIDTH. MV will be automatically destroyed along + with POOL; it must not be passed to mv_destroy for explicit + destruction. */ +void +mv_init_pool (struct pool *pool, struct missing_values *mv, int width) +{ + int i; + assert (width >= 0 && width <= MAX_STRING); mv->type = MVT_NONE; mv->width = width; + for (i = 0; i < 3; i++) + value_init_pool (pool, &mv->values[i], width); +} + +/* Frees any storage allocated by mv_init for MV. */ +void +mv_destroy (struct missing_values *mv) +{ + if (mv != NULL) + { + int i; + + for (i = 0; i < 3; i++) + value_destroy (&mv->values[i], mv->width); + } } /* Removes any missing values from MV. */ @@ -54,13 +85,32 @@ mv_clear (struct missing_values *mv) mv->type = MVT_NONE; } -/* Copies SRC to MV. */ +/* Initializes MV as a copy of SRC. */ void mv_copy (struct missing_values *mv, const struct missing_values *src) { - assert(src); + int i; + + mv_init (mv, src->width); + mv->type = src->type; + for (i = 0; i < 3; i++) + value_copy (&mv->values[i], &src->values[i], mv->width); +} + +/* Returns true if VALUE, of the given WIDTH, may be added to a + missing value set also of the given WIDTH. This is normally + the case, but string missing values over MV_MAX_STRING bytes + long must consist solely of spaces after the first + MV_MAX_STRING bytes. */ +bool +mv_is_acceptable (const union value *value, int width) +{ + int i; - *mv = *src; + for (i = MV_MAX_STRING; i < width; i++) + if (value_str (value, width)[i] != ' ') + return false; + return true; } /* Returns true if MV is an empty set of missing values. */ @@ -80,20 +130,21 @@ mv_get_width (const struct missing_values *mv) /* Attempts to add individual value V to the set of missing values MV. Returns true if successful, false if MV has no - more room for missing values. (Long string variables never - accept missing values.) */ + more room for missing values or if V is not an acceptable + missing value. */ bool mv_add_value (struct missing_values *mv, const union value *v) { - if (mv->width > MAX_SHORT_STRING) + if (!mv_is_acceptable (v, mv->width)) return false; + switch (mv->type) { case MVT_NONE: case MVT_1: case MVT_2: case MVT_RANGE: - mv->values[mv->type & 3] = *v; + value_copy (&mv->values[mv->type & 3], v, mv->width); mv->type++; return true; @@ -107,13 +158,20 @@ mv_add_value (struct missing_values *mv, const union value *v) /* Attempts to add S to the set of string missing values MV. S must contain exactly as many characters as MV's width. Returns true if successful, false if MV has no more room for - missing values. (Long string variables never accept missing - values.) */ + missing values or if S is not an acceptable missing value. */ bool mv_add_str (struct missing_values *mv, const char s[]) { + union value v; + bool ok; + assert (mv->width > 0); - return mv_add_value (mv, (union value *) s); + value_init (&v, mv->width); + memcpy (value_str_rw (&v, mv->width), s, mv->width); + ok = mv_add_value (mv, &v); + value_destroy (&v, mv->width); + + return ok; } /* Attempts to add D to the set of numeric missing values MV. @@ -122,8 +180,16 @@ mv_add_str (struct missing_values *mv, const char s[]) bool mv_add_num (struct missing_values *mv, double d) { + union value v; + bool ok; + assert (mv->width == 0); - return mv_add_value (mv, (union value *) &d); + value_init (&v, 0); + v.f = d; + ok = mv_add_value (mv, &v); + value_destroy (&v, 0); + + return ok; } /* Attempts to add range [LOW, HIGH] to the set of numeric @@ -152,7 +218,8 @@ mv_has_value (const struct missing_values *mv) return mv_n_values (mv) > 0; } -/* Removes one individual value from MV and stores it in *V. +/* Removes one individual value from MV and stores it in V, which + must have been initialized as a value with the same width as MV. MV must contain an individual value (as determined by mv_has_value()). @@ -165,32 +232,46 @@ mv_has_value (const struct missing_values *mv) void mv_pop_value (struct missing_values *mv, union value *v) { + union value tmp; + assert (mv_has_value (mv)); - *v = mv->values[0]; - remove_element (mv->values, mv->type & 3, sizeof *mv->values, 0); + value_copy (v, &mv->values[0], mv->width); + tmp = mv->values[0]; + mv->values[0] = mv->values[1]; + mv->values[1] = mv->values[2]; + mv->values[2] = tmp; mv->type--; } -/* Stores MV's value with index IDX in *V. +/* Returns MV's discrete value with index IDX. The caller must + not modify or free this value, or access it after MV is + modified or freed. IDX must be less than the number of discrete values in MV, as - reported by mv_n_values(MV). */ -void -mv_get_value (const struct missing_values *mv, union value *v, int idx) + reported by mv_n_values. */ +const union value * +mv_get_value (const struct missing_values *mv, int idx) { assert (idx >= 0 && idx < mv_n_values (mv)); - *v = mv->values[idx]; + return &mv->values[idx]; } -void +/* Replaces MV's discrete value with index IDX by a copy of V, + which must have the same width as MV. + IDX must be less than the number of discrete values in MV, as + reported by mv_n_values. */ +bool mv_replace_value (struct missing_values *mv, const union value *v, int idx) { assert (idx >= 0) ; assert (idx < mv_n_values(mv)); - mv->values[idx] = *v; -} + if (!mv_is_acceptable (v, mv->width)) + return false; + value_copy (&mv->values[idx], v, mv->width); + return true; +} /* Returns the number of individual (not part of a range) missing values in MV. */ @@ -232,7 +313,6 @@ mv_get_range (const struct missing_values *mv, double *low, double *high) *high = mv->values[2].f; } - /* Returns true if values[IDX] is in use when the `type' member is set to TYPE (in struct missing_values), false otherwise. */ @@ -262,17 +342,12 @@ using_element (unsigned type, int idx) /* Returns true if MV can be resized to the given WIDTH with mv_resize(), false otherwise. Resizing is possible only when each value in MV (if any) is resizable from MV's current width - to WIDTH, as determined by value_is_resizable. In addition, - resizing must not produce a non-empty set of long string - missing values. */ + to WIDTH, as determined by value_is_resizable. */ bool mv_is_resizable (const struct missing_values *mv, int width) { int i; - if (width > MAX_SHORT_STRING && mv->type != MVT_NONE) - return false; - for (i = 0; i < 3; i++) if (using_element (mv->type, i) && !value_is_resizable (&mv->values[i], mv->width, width)) @@ -292,6 +367,11 @@ mv_resize (struct missing_values *mv, int width) for (i = 0; i < 3; i++) if (using_element (mv->type, i)) value_resize (&mv->values[i], mv->width, width); + else + { + value_destroy (&mv->values[i], mv->width); + value_init (&mv->values[i], width); + } mv->width = width; } @@ -333,14 +413,14 @@ is_str_user_missing (const struct missing_values *mv, const char s[]) case MVT_NONE: return false; case MVT_1: - return !memcmp (v[0].short_string, s, mv->width); + return !memcmp (value_str (&v[0], mv->width), s, mv->width); case MVT_2: - return (!memcmp (v[0].short_string, s, mv->width) - || !memcmp (v[1].short_string, s, mv->width)); + return (!memcmp (value_str (&v[0], mv->width), s, mv->width) + || !memcmp (value_str (&v[1], mv->width), s, mv->width)); case MVT_3: - return (!memcmp (v[0].short_string, s, mv->width) - || !memcmp (v[1].short_string, s, mv->width) - || !memcmp (v[2].short_string, s, mv->width)); + return (!memcmp (value_str (&v[0], mv->width), s, mv->width) + || !memcmp (value_str (&v[1], mv->width), s, mv->width) + || !memcmp (value_str (&v[2], mv->width), s, mv->width)); case MVT_RANGE: case MVT_RANGE_1: NOT_REACHED (); @@ -356,7 +436,7 @@ mv_is_value_missing (const struct missing_values *mv, const union value *v, { return (mv->width == 0 ? mv_is_num_missing (mv, v->f, class) - : mv_is_str_missing (mv, v->short_string, class)); + : mv_is_str_missing (mv, value_str (v, mv->width), class)); } /* Returns true if D is a missing value in the given CLASS in MV, diff --git a/src/data/missing-values.h b/src/data/missing-values.h index b504f8c7..5576fc6c 100644 --- a/src/data/missing-values.h +++ b/src/data/missing-values.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2005 Free Software Foundation, Inc. + Copyright (C) 2005, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,11 +24,11 @@ user-missing values, or a range of numeric values, or a range plus one discrete value. - - Short string variables may have up to 3 discrete string - user-missing values. - - - Long string variables may not have user-missing - values. */ + - String variables may have up to 3 discrete string + user-missing values. (However, for long string + variables all bytes after the first MV_MAX_STRING must + be spaces.) +*/ #ifndef DATA_MISSING_VALUES_H #define DATA_MISSING_VALUES_H 1 @@ -36,6 +36,12 @@ #include #include "data/value.h" +struct pool; + +/* Missing values for long string variables after the first + MV_MAX_STRING bytes must be all spaces. */ +#define MV_MAX_STRING 8 + /* Missing values. Opaque--use access functions defined below. */ struct missing_values @@ -63,6 +69,8 @@ bool mv_is_str_missing (const struct missing_values *, const char[], /* Initializing missing value sets. */ void mv_init (struct missing_values *, int width); +void mv_init_pool (struct pool *pool, struct missing_values *, int width); +void mv_destroy (struct missing_values *); void mv_copy (struct missing_values *, const struct missing_values *); void mv_clear (struct missing_values *); @@ -71,13 +79,14 @@ bool mv_is_resizable (const struct missing_values *, int width); void mv_resize (struct missing_values *, int width); /* Basic property inspection. */ +bool mv_is_acceptable (const union value *, int width); bool mv_is_empty (const struct missing_values *); int mv_get_width (const struct missing_values *); /* Inspecting discrete values. */ int mv_n_values (const struct missing_values *); bool mv_has_value (const struct missing_values *); -void mv_get_value (const struct missing_values *, union value *, int idx); +const union value *mv_get_value (const struct missing_values *, int idx); /* Inspecting ranges. */ bool mv_has_range (const struct missing_values *); @@ -88,7 +97,7 @@ bool mv_add_value (struct missing_values *, const union value *); bool mv_add_str (struct missing_values *, const char[]); bool mv_add_num (struct missing_values *, double); void mv_pop_value (struct missing_values *, union value *); -void mv_replace_value (struct missing_values *, const union value *, int idx); +bool mv_replace_value (struct missing_values *, const union value *, int idx); /* Adding and modifying ranges. */ bool mv_add_range (struct missing_values *, double low, double high); diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c index c8e49289..96af30b1 100644 --- a/src/data/por-file-reader.c +++ b/src/data/por-file-reader.c @@ -43,6 +43,7 @@ #include #include +#include "minmax.h" #include "xalloc.h" #include "gettext.h" @@ -606,8 +607,7 @@ assign_default: return fmt_default_for_width (var_get_width (v)); } -static void parse_value (struct pfm_reader *, struct variable *, - union value *); +static void parse_value (struct pfm_reader *, int width, union value *); /* Read information on all the variables. */ static void @@ -686,7 +686,7 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) var_set_write_format (v, &write); /* Range missing values. */ - mv_init (&miss, var_get_width (v)); + mv_init (&miss, width); if (match (r, 'B')) { double x = read_float (r); @@ -701,13 +701,17 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) /* Single missing values. */ while (match (r, '8')) { + int mv_width = MIN (width, 8); union value value; - parse_value (r, v, &value); + + parse_value (r, mv_width, &value); + value_resize (&value, mv_width, width); mv_add_value (&miss, &value); - value_destroy (&value, var_get_width (v)); + value_destroy (&value, width); } var_set_missing_values (v, &miss); + mv_destroy (&miss); if (match (r, 'C')) { @@ -728,16 +732,16 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) } } -/* Parse a value for variable VV into value V. */ +/* Parse a value of with WIDTH into value V. */ static void -parse_value (struct pfm_reader *r, struct variable *vv, union value *v) +parse_value (struct pfm_reader *r, int width, union value *v) { - value_init (v, var_get_width (vv)); - if (var_is_alpha (vv)) + value_init (v, width); + if (width > 0) { char string[256]; read_string (r, string); - buf_copy_str_rpad (value_str_rw (v, 8), 8, string, ' '); + value_copy_str_rpad (v, width, string, ' '); } else v->f = read_float (r); @@ -780,7 +784,7 @@ read_value_label (struct pfm_reader *r, struct dictionary *dict) char label[256]; int j; - parse_value (r, v[0], &val); + parse_value (r, var_get_width (v[0]), &val); read_string (r, label); /* Assign the value label to each variable. */ diff --git a/src/data/por-file-writer.c b/src/data/por-file-writer.c index 9ccc8fd7..7b1b2551 100644 --- a/src/data/por-file-writer.c +++ b/src/data/por-file-writer.c @@ -307,15 +307,15 @@ write_format (struct pfm_writer *w, struct fmt_spec f, int width) write_int (w, f.d); } -/* Write value V for variable VV to file H. */ +/* Write value V with width WIDTH to file H. */ static void -write_value (struct pfm_writer *w, const union value *v, struct variable *vv) +write_value (struct pfm_writer *w, const union value *v, int width) { - if (var_is_numeric (vv)) + if (width == 0) write_float (w, v->f); else { - int width = MIN (var_get_width (vv), MAX_POR_WIDTH); + width = MIN (width, MAX_POR_WIDTH); write_int (w, width); buf_write (w, value_str (v, width), width); } @@ -344,6 +344,7 @@ write_variables (struct pfm_writer *w, struct dictionary *dict) struct variable *v = dict_get_var (dict, i); struct missing_values mv; int width = MIN (var_get_width (v), MAX_POR_WIDTH); + int j; buf_write (w, "7", 1); write_int (w, width); @@ -353,10 +354,12 @@ write_variables (struct pfm_writer *w, struct dictionary *dict) /* Write missing values. */ mv_copy (&mv, var_get_missing_values (v)); - while (mv_has_range (&mv)) + if (var_get_width (v) > 8) + mv_resize (&mv, 8); + if (mv_has_range (&mv)) { double x, y; - mv_pop_range (&mv, &x, &y); + mv_get_range (&mv, &x, &y); if (x == LOWEST) { buf_write (w, "9", 1); @@ -374,13 +377,12 @@ write_variables (struct pfm_writer *w, struct dictionary *dict) write_float (w, y); } } - while (mv_has_value (&mv)) + for (j = 0; j < mv_n_values (&mv); j++) { - union value value; - mv_pop_value (&mv, &value); buf_write (w, "8", 1); - write_value (w, &value, v); + write_value (w, mv_get_value (&mv, j), mv_get_width (&mv)); } + mv_destroy (&mv); /* Write variable label. */ if (var_get_label (v) != NULL) @@ -417,7 +419,7 @@ write_value_labels (struct pfm_writer *w, const struct dictionary *dict) for (i = 0; i < n_labels; i++) { const struct val_lab *vl = labels[i]; - write_value (w, val_lab_get_value (vl), v); + write_value (w, val_lab_get_value (vl), var_get_width (v)); write_string (w, val_lab_get_label (vl)); } free (labels); diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 5a840279..67767a9f 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -552,7 +552,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, struct missing_values mv; int i; - mv_init (&mv, var_get_width (var)); + mv_init_pool (r->pool, &mv, var_get_width (var)); if (var_is_numeric (var)) { if (missing_value_code < -3 || missing_value_code > 3 @@ -571,21 +571,24 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, } else { + int mv_width = MAX (width, 8); + union value value; + if (missing_value_code < 1 || missing_value_code > 3) sys_error (r, _("String missing value indicator field is not " "0, 1, 2, or 3.")); - if (var_is_long_string (var)) - sys_warn (r, _("Ignoring missing values on long string variable " - "%s, which PSPP does not yet support."), name); + + value_init (&value, mv_width); + value_set_missing (&value, mv_width); for (i = 0; i < missing_value_code; i++) { - char string[9]; - read_string (r, string, sizeof string); - mv_add_str (&mv, string); + char *s = value_str_rw (&value, mv_width); + read_bytes (r, s, 8); + mv_add_str (&mv, s); } + value_destroy (&value, mv_width); } - if (!var_is_long_string (var)) - var_set_missing_values (var, &mv); + var_set_missing_values (var, &mv); } /* Set formats. */ @@ -1203,7 +1206,7 @@ read_value_labels (struct sfm_reader *r, for (i = 0; i < var_cnt; i++) { var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r)); - if (var_is_long_string (var[i])) + if (var_get_width (var[i]) > 8) sys_error (r, _("Value labels may not be added to long string " "variables (e.g. %s) using records types 3 and 4."), var_get_name (var[i])); diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index c1ff9a83..13dc2de6 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -425,7 +425,7 @@ write_variable (struct sfm_writer *w, const struct variable *v) int width = var_get_width (v); int segment_cnt = sfm_width_to_segments (width); int seg0_width = sfm_segment_alloc_width (width, 0); - const struct missing_values *mv = var_get_missing_values (v); + struct missing_values mv; int i; /* Record type. */ @@ -440,7 +440,13 @@ write_variable (struct sfm_writer *w, const struct variable *v) /* Number of missing values. If there is a range, then the range counts as 2 missing values and causes the number to be negated. */ - write_int (w, mv_has_range (mv) ? -2 - mv_n_values (mv) : mv_n_values (mv)); + mv_copy (&mv, var_get_missing_values (v)); + if (mv_get_width (&mv) > 8) + mv_resize (&mv, 8); + if (mv_has_range (&mv)) + write_int (w, -2 - mv_n_values (&mv)); + else + write_int (w, mv_n_values (&mv)); /* Print and write formats. */ write_format (w, *var_get_print_format (v), seg0_width); @@ -461,19 +467,15 @@ write_variable (struct sfm_writer *w, const struct variable *v) } /* Write the missing values, if any, range first. */ - if (mv_has_range (mv)) + if (mv_has_range (&mv)) { double x, y; - mv_get_range (mv, &x, &y); + mv_get_range (&mv, &x, &y); write_float (w, x); write_float (w, y); } - for (i = 0; i < mv_n_values (mv); i++) - { - union value value; - mv_get_value (mv, &value, i); - write_value (w, &value, seg0_width); - } + for (i = 0; i < mv_n_values (&mv); i++) + write_value (w, mv_get_value (&mv, i), mv_get_width (&mv)); write_variable_continuation_records (w, seg0_width); @@ -493,6 +495,8 @@ write_variable (struct sfm_writer *w, const struct variable *v) write_variable_continuation_records (w, seg_width); } + + mv_destroy (&mv); } /* Writes the value labels for variable V having system file diff --git a/src/data/variable.c b/src/data/variable.c index e4961b23..c0767d7c 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -183,6 +183,7 @@ var_destroy (struct variable *v) const struct vardict_info *vdi = var_get_vardict (v); assert (vdi->dict == NULL); } + mv_destroy (&v->miss); cat_stored_values_destroy (v->obs_vals); var_clear_short_names (v); var_clear_aux (v); @@ -390,7 +391,10 @@ var_set_width (struct variable *v, int new_width) if (mv_is_resizable (&v->miss, new_width)) mv_resize (&v->miss, new_width); else - mv_init (&v->miss, new_width); + { + mv_destroy (&v->miss); + mv_init (&v->miss, new_width); + } if (v->val_labs != NULL) { @@ -425,22 +429,6 @@ var_is_alpha (const struct variable *v) { return var_get_type (v) == VAL_STRING; } - -/* Returns true if variable V is a short string variable, false - otherwise. */ -bool -var_is_short_string (const struct variable *v) -{ - return v->width > 0 && v->width <= MAX_SHORT_STRING; -} - -/* Returns true if variable V is a long string variable, false - otherwise. */ -bool -var_is_long_string (const struct variable *v) -{ - return v->width > MAX_SHORT_STRING; -} /* Returns variable V's missing values. */ const struct missing_values * @@ -459,11 +447,12 @@ var_set_missing_values (struct variable *v, const struct missing_values *miss) if (miss != NULL) { assert (mv_is_resizable (miss, v->width)); + mv_destroy (&v->miss); mv_copy (&v->miss, miss); mv_resize (&v->miss, v->width); } else - mv_init (&v->miss, v->width); + mv_clear (&v->miss); dict_var_changed (v); } diff --git a/src/data/variable.h b/src/data/variable.h index 2e6af0bb..2752aeb3 100644 --- a/src/data/variable.h +++ b/src/data/variable.h @@ -59,8 +59,6 @@ void var_set_width (struct variable *, int width); bool var_is_numeric (const struct variable *); bool var_is_alpha (const struct variable *); -bool var_is_short_string (const struct variable *); -bool var_is_long_string (const struct variable *); /* Variables' missing values. */ const struct missing_values *var_get_missing_values (const struct variable *); diff --git a/src/language/dictionary/apply-dictionary.c b/src/language/dictionary/apply-dictionary.c index 0e8eb6c8..ce5cc15d 100644 --- a/src/language/dictionary/apply-dictionary.c +++ b/src/language/dictionary/apply-dictionary.c @@ -96,16 +96,9 @@ cmd_apply_dictionary (struct lexer *lexer, struct dataset *ds) if (var_has_missing_values (s)) { - if (!var_is_long_string (t)) - { - const struct missing_values *miss = var_get_missing_values (s); - if (mv_is_resizable (miss, var_get_width (t))) - var_set_missing_values (t, miss); - } - else - msg (SW, _("Cannot apply missing values from source file to " - "long string variable %s."), - var_get_name (s)); + const struct missing_values *miss = var_get_missing_values (s); + if (mv_is_resizable (miss, var_get_width (t))) + var_set_missing_values (t, miss); } if (var_is_numeric (s)) diff --git a/src/language/dictionary/missing-values.c b/src/language/dictionary/missing-values.c index 66e642eb..aa3ce698 100644 --- a/src/language/dictionary/missing-values.c +++ b/src/language/dictionary/missing-values.c @@ -98,32 +98,31 @@ cmd_missing_values (struct lexer *lexer, struct dataset *ds) } else { - struct string value; - - mv_init (&mv, MAX_SHORT_STRING); + mv_init (&mv, MV_MAX_STRING); while (!lex_match (lexer, ')')) { + char value[MV_MAX_STRING]; + size_t length; + if (!lex_force_string (lexer)) { deferred_errors = true; break; } - ds_init_string (&value, lex_tokstr (lexer)); - - if (ds_length (&value) > MAX_SHORT_STRING) + length = ds_length (lex_tokstr (lexer)); + if (length > MV_MAX_STRING) { - ds_truncate (&value, MAX_SHORT_STRING); - msg (SE, _("Truncating missing value to short string " - "length (%d characters)."), - MAX_SHORT_STRING); + msg (SE, _("Truncating missing value to maximum " + "acceptable length (%d bytes)."), + MV_MAX_STRING); + length = MV_MAX_STRING; } - else - ds_rpad (&value, MAX_SHORT_STRING, ' '); + memset (value, ' ', MV_MAX_STRING); + memcpy (value, ds_data (lex_tokstr (lexer)), length); - if (!mv_add_str (&mv, ds_data (&value))) + if (!mv_add_str (&mv, value)) deferred_errors = true; - ds_destroy (&value); lex_get (lexer); lex_match (lexer, ','); @@ -142,6 +141,8 @@ cmd_missing_values (struct lexer *lexer, struct dataset *ds) deferred_errors = true; } } + + mv_destroy (&mv); } lex_match (lexer, '/'); diff --git a/src/language/dictionary/sys-file-info.c b/src/language/dictionary/sys-file-info.c index ed2f5d07..8f2ab7d7 100644 --- a/src/language/dictionary/sys-file-info.c +++ b/src/language/dictionary/sys-file-info.c @@ -582,18 +582,18 @@ describe_variable (const struct variable *v, struct tab_table *t, int r, /* Missing values if any. */ if (flags & DF_MISSING_VALUES && var_has_missing_values (v)) { + const struct missing_values *mv = var_get_missing_values (v); char buf[128]; char *cp; - struct missing_values mv; int cnt = 0; + int i; cp = stpcpy (buf, _("Missing Values: ")); - mv_copy (&mv, var_get_missing_values (v)); - if (mv_has_range (&mv)) + if (mv_has_range (mv)) { double x, y; - mv_pop_range (&mv, &x, &y); + mv_get_range (mv, &x, &y); if (x == LOWEST) cp += sprintf (cp, "LOWEST THRU %g", y); else if (y == HIGHEST) @@ -602,24 +602,24 @@ describe_variable (const struct variable *v, struct tab_table *t, int r, cp += sprintf (cp, "%g THRU %g", x, y); cnt++; } - while (mv_has_value (&mv)) + for (i = 0; i < mv_n_values (mv); i++) { - union value value; - mv_pop_value (&mv, &value); + const union value *value = mv_get_value (mv, i); if (cnt++ > 0) cp += sprintf (cp, "; "); if (var_is_numeric (v)) - cp += sprintf (cp, "%g", value.f); + cp += sprintf (cp, "%g", value->f); else { + int width = var_get_width (v); + int mv_width = MIN (width, MV_MAX_STRING); + *cp++ = '"'; - memcpy (cp, value_str (&value, var_get_width (v)), - var_get_width (v)); - cp += var_get_width (v); + memcpy (cp, value_str (value, width), mv_width); + cp += mv_width; *cp++ = '"'; *cp = '\0'; } - value_destroy (&value, var_get_width (v)); } tab_joint_text (t, 1, r, 2, r, TAB_LEFT, buf); diff --git a/src/ui/gui/missing-val-dialog.c b/src/ui/gui/missing-val-dialog.c index 0c9831ae..8efbaf97 100644 --- a/src/ui/gui/missing-val-dialog.c +++ b/src/ui/gui/missing-val-dialog.c @@ -1,5 +1,5 @@ /* PSPPIRE - a graphical user interface for PSPP. - Copyright (C) 2005, 2006 Free Software Foundation + Copyright (C) 2005, 2006, 2009 Free Software Foundation This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -320,9 +320,7 @@ missing_val_dialog_show (struct missing_val_dialog *dialog) if ( mv_has_value (&dialog->mvl)) { gchar *text; - union value value; - mv_get_value (&dialog->mvl, &value, 0); - text = value_to_text (value, *write_spec); + text = value_to_text (*mv_get_value (&dialog->mvl, 0), *write_spec); gtk_entry_set_text (GTK_ENTRY (dialog->discrete), text); g_free (text); } @@ -342,10 +340,9 @@ missing_val_dialog_show (struct missing_val_dialog *dialog) if ( i < n) { gchar *text ; - union value value; - mv_get_value (&dialog->mvl, &value, i); - text = value_to_text (value, *write_spec); + text = value_to_text (*mv_get_value (&dialog->mvl, i), + *write_spec); gtk_entry_set_text (GTK_ENTRY (dialog->mv[i]), text); g_free (text); } diff --git a/src/ui/gui/psppire-var-sheet.c b/src/ui/gui/psppire-var-sheet.c index 7925268d..3ebea2ea 100644 --- a/src/ui/gui/psppire-var-sheet.c +++ b/src/ui/gui/psppire-var-sheet.c @@ -381,12 +381,6 @@ var_sheet_change_active_cell (PsppireVarSheet *vs, customEntry = PSPPIRE_CUSTOM_ENTRY (psppire_sheet_get_entry (sheet)); - if ( var_is_long_string (var)) - g_object_set (customEntry, - "editable", FALSE, - NULL); - - vs->missing_val_dialog->pv = psppire_var_store_get_var (var_store, row); diff --git a/src/ui/gui/text-data-import-dialog.c b/src/ui/gui/text-data-import-dialog.c index 70e69d13..dac8b4c1 100644 --- a/src/ui/gui/text-data-import-dialog.c +++ b/src/ui/gui/text-data-import-dialog.c @@ -310,11 +310,9 @@ apply_dict (const struct dictionary *dict, struct string *s) syntax_gen_pspp (s, "MISSING VALUES %ss (", name); for (j = 0; j < mv_n_values (mv); j++) { - union value value; if (j) ds_put_cstr (s, ", "); - mv_get_value (mv, &value, j); - syntax_gen_value (s, &value, width, format); + syntax_gen_value (s, mv_get_value (mv, j), width, format); } if (mv_has_range (mv)) diff --git a/src/ui/gui/var-display.c b/src/ui/gui/var-display.c index 50e0df4b..b615fe52 100644 --- a/src/ui/gui/var-display.c +++ b/src/ui/gui/var-display.c @@ -45,9 +45,7 @@ missing_values_to_string (const PsppireDict *dict, const struct variable *pv, GE gint i; for (i = 0 ; i < n; ++i ) { - union value v; - mv_get_value (miss, &v, i); - mv[i] = value_to_text (v, *fmt); + mv[i] = value_to_text (*mv_get_value (miss, i), *fmt); if ( i > 0 ) g_string_append (gstr, ", "); g_string_append (gstr, mv[i]); @@ -74,10 +72,8 @@ missing_values_to_string (const PsppireDict *dict, const struct variable *pv, GE if ( mv_has_value (miss)) { gchar *ss = 0; - union value v; - mv_get_value (miss, &v, 0); - ss = value_to_text (v, *fmt); + ss = value_to_text (*mv_get_value (miss, 0), *fmt); g_string_append (gstr, ", "); g_string_append (gstr, ss); diff --git a/tests/command/missing-values.sh b/tests/command/missing-values.sh index 6641b520..c01adf31 100755 --- a/tests/command/missing-values.sh +++ b/tests/command/missing-values.sh @@ -66,7 +66,8 @@ if [ $? -ne 0 ] ; then no_result ; fi activity="create program" cat > $TEMPDIR/missing-values.stat << foobar -DATA LIST NOTABLE/str1 1-5 (A) str2 6-8 (A) date1 9-19 (DATE) num1 20-25. +DATA LIST NOTABLE/str1 1-5 (A) str2 6-8 (A) date1 9-19 (DATE) num1 20-25 + longstr 26-36 (A). /* Valid: numeric missing values. MISSING VALUES date1 num1 (1). @@ -96,10 +97,13 @@ MISSING VALUES num1 (1 THRU HI, -1). MISSING VALUES num1 (1 THRU HIGHEST, -1). /* Valid: string missing values. -MISSING VALUES str1 str2 ('abc ','def'). +MISSING VALUES str1 str2 longstr ('abc ','def'). /* Invalid: too long for str2. -MISSING VALUES str1 str2 ('abcde'). +MISSING VALUES str1 str2 longstr ('abcde'). + +/* Invalid: long string missing value longer than 8 bytes. +MISSING VALUES longstr ('abcdefghijk'). /* Invalid: no string ranges. MISSING VALUES str1 ('a' THRU 'z'). @@ -120,11 +124,12 @@ $SUPERVISOR $PSPP --testing-mode --error-file=$TEMPDIR/errs $TEMPDIR/missing-val if [ $? -eq 0 ] ; then fail ; fi activity="compare error messages" -diff -w $TEMPDIR/errs - <