From 5c3291dc396b795696e94f47780308fd7ace6fc4 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 10 May 2009 22:23:00 -0700 Subject: [PATCH] Change "union value" to dynamically allocate long strings. Until now, a single "union value" could hold a numeric value or a short string value. A long string value (one longer than MAX_SHORT_STRING) required a number of contiguous "union value"s. This situation was inconvenient sometimes, because any occasion where a long string value might be required (even if it was unlikely) required using dynamic memory allocation. With this change, a value of any type, regardless of whether it is numeric or short or long string, occupies a single "union value". The internal representation of short and long strings is now different, however: long strings are now internally represented by a pointer to dynamically allocated memory. This means that "union value"s must now be initialized and uninitialized properly, to ensure that memory is properly allocated and freed behind the scenese. This change thus has a ripple effect on PSPP code that works with values. In particular, code that deals with cases is greatly changed, because a case now needs to know the type of each value that it contains. Thus, a new concept called a "case prototype", which represents the type and width of each value within a case, is introduced, and every place in PSPP that creates a case must now create a corresponding prototype to go with it. This is why this commit is so big. As part of writing up this commit, it became clear that some code was poor enough that it needed to be rewritten entirely. Therefore, CROSSTABS and T-TEST are almost completely modified by this commit. --- Smake | 1 + doc/data-io.texi | 8 +- doc/dev/concepts.texi | 268 +- doc/language.texi | 6 +- doc/statistics.texi | 4 +- doc/transformation.texi | 10 +- perl-module/PSPP.xs | 49 +- src/data/automake.mk | 5 +- src/data/case-map.c | 106 +- src/data/case-map.h | 2 +- src/data/case-matcher.c | 13 +- src/data/case-tmpfile.c | 203 +- src/data/case-tmpfile.h | 6 +- src/data/case.c | 360 ++- src/data/case.h | 36 +- src/data/casegrouper.c | 3 +- src/data/caseinit.c | 51 +- src/data/caseproto.c | 411 +++ src/data/caseproto.h | 209 ++ src/data/casereader-filter.c | 2 +- src/data/casereader-provider.h | 4 +- src/data/casereader-translator.c | 37 +- src/data/casereader.c | 45 +- src/data/casereader.h | 5 +- src/data/casewindow.c | 43 +- src/data/casewindow.h | 7 +- src/data/casewriter-provider.h | 2 +- src/data/casewriter-translator.c | 15 +- src/data/casewriter.c | 67 +- src/data/casewriter.h | 12 +- src/data/category.c | 4 +- src/data/data-in.c | 16 +- src/data/data-out.c | 33 +- src/data/datasheet.c | 939 +++--- src/data/datasheet.h | 30 +- src/data/dictionary.c | 110 +- src/data/dictionary.h | 5 +- src/data/gnumeric-reader.c | 29 +- src/data/lazy-casereader.c | 8 +- src/data/lazy-casereader.h | 4 +- src/data/missing-values.c | 17 +- src/data/por-file-reader.c | 42 +- src/data/por-file-writer.c | 24 +- src/data/procedure.c | 24 +- src/data/psql-reader.c | 21 +- src/data/scratch-writer.c | 6 +- src/data/settings.c | 15 +- src/data/settings.h | 6 +- src/data/sparse-cases.c | 354 --- src/data/sparse-cases.h | 66 - src/data/subcase.c | 67 +- src/data/subcase.h | 14 +- src/data/sys-file-private.c | 12 +- src/data/sys-file-private.h | 5 +- src/data/sys-file-reader.c | 23 +- src/data/sys-file-writer.c | 34 +- src/data/value-labels.c | 491 ++- src/data/value-labels.h | 57 +- src/data/value.c | 219 +- src/data/value.h | 166 +- src/data/vardict.h | 4 +- src/data/variable.c | 28 +- src/data/variable.h | 4 +- src/language/data-io/combine-files.c | 25 +- src/language/data-io/data-parser.c | 9 +- src/language/data-io/inpt-pgm.c | 10 +- src/language/dictionary/sys-file-info.c | 25 +- src/language/dictionary/value-labels.c | 16 +- src/language/dictionary/variable-display.c | 1 + src/language/expressions/evaluate.c | 14 +- src/language/stats/aggregate.c | 72 +- src/language/stats/binomial.c | 115 +- src/language/stats/chisquare.c | 80 +- src/language/stats/crosstabs.q | 3252 +++++++++----------- src/language/stats/examine.q | 70 +- src/language/stats/flip.c | 304 +- src/language/stats/freq.c | 11 +- src/language/stats/freq.h | 4 +- src/language/stats/frequencies.q | 133 +- src/language/stats/oneway.q | 40 +- src/language/stats/rank.q | 2 +- src/language/stats/reliability.q | 29 +- src/language/stats/sign.c | 5 +- src/language/stats/t-test.q | 2169 ++++++------- src/language/stats/wilcoxon.c | 44 +- src/language/tests/datasheet-check.c | 487 ++- src/language/xforms/compute.c | 5 +- src/language/xforms/recode.c | 90 +- src/libpspp/automake.mk | 4 + src/libpspp/str.c | 26 +- src/libpspp/str.h | 8 +- src/math/coefficient.c | 9 +- src/math/covariance-matrix.c | 26 +- src/math/group.c | 8 +- src/math/interaction.c | 34 +- src/math/interaction.h | 2 +- src/math/merge.c | 11 +- src/math/merge.h | 10 +- src/math/np.c | 24 +- src/math/sort.c | 28 +- src/math/sort.h | 8 +- src/math/ts/innovations.c | 6 +- src/ui/gui/executor.c | 6 +- src/ui/gui/find-dialog.c | 37 +- src/ui/gui/psppire-data-editor.c | 6 +- src/ui/gui/psppire-data-store.c | 232 +- src/ui/gui/psppire-data-store.h | 2 +- src/ui/gui/psppire-dict.c | 27 +- src/ui/gui/psppire-dict.h | 5 +- src/ui/gui/psppire-var-store.c | 12 +- src/ui/gui/text-data-import-dialog.c | 24 +- src/ui/gui/val-labs-dialog.c | 61 +- src/ui/gui/variable-info-dialog.c | 22 +- src/ui/syntax-gen.c | 2 +- tests/automake.mk | 1 - tests/bugs/crosstabs-crash.sh | 2 +- tests/bugs/crosstabs-crash2.sh | 6 +- tests/bugs/t-test-alpha.sh | 16 +- tests/command/longvars.sh | 2 +- 119 files changed, 6472 insertions(+), 6074 deletions(-) create mode 100644 src/data/caseproto.c create mode 100644 src/data/caseproto.h delete mode 100644 src/data/sparse-cases.c delete mode 100644 src/data/sparse-cases.h diff --git a/Smake b/Smake index e5ff997a..78789d4f 100644 --- a/Smake +++ b/Smake @@ -86,6 +86,7 @@ GNULIB_MODULES = \ xalloc \ xalloc-die \ xmalloca \ + xmemdup0 \ xsize \ xstrndup \ xvasprintf diff --git a/doc/data-io.texi b/doc/data-io.texi index b7bfda9c..1bad334e 100644 --- a/doc/data-io.texi +++ b/doc/data-io.texi @@ -318,7 +318,7 @@ Defines the following variables: @itemize @bullet @item -@code{NAME}, a 10-character-wide long string variable, in columns 1 +@code{NAME}, a 10-character-wide string variable, in columns 1 through 10. @item @@ -359,15 +359,15 @@ Defines the following variables: @code{ID}, a numeric variable, in columns 1-5 of the first record. @item -@code{NAME}, a 30-character long string variable, in columns 7-36 of the +@code{NAME}, a 30-character string variable, in columns 7-36 of the first record. @item -@code{SURNAME}, a 30-character long string variable, in columns 38-67 of +@code{SURNAME}, a 30-character string variable, in columns 38-67 of the first record. @item -@code{MINITIAL}, a 1-character short string variable, in column 69 of +@code{MINITIAL}, a 1-character string variable, in column 69 of the first record. @item diff --git a/doc/dev/concepts.texi b/doc/dev/concepts.texi index 5876ce36..93605767 100644 --- a/doc/dev/concepts.texi +++ b/doc/dev/concepts.texi @@ -117,76 +117,88 @@ case when it processes it later. @subsection Runtime Typed Values When a value's type is only known at runtime, it is often represented -as a @union{value}, defined in @file{data/value.h}. @union{value} has -two members: a @code{double} named @samp{f} to store a numeric value -and an array of @code{char} named @samp{s} to a store a string value. -A @union{value} does not identify the type or width of the data it -contains. Code that works with @union{values}s must therefore have -external knowledge of its content, often through the type and width of -a @struct{variable} (@pxref{Variables}). - -@cindex MAX_SHORT_STRING -@cindex short string -@cindex long string -@cindex string value -The array of @code{char} in @union{value} has only a small, fixed -capacity of @code{MAX_SHORT_STRING} bytes. A value that -fits within this capacity is called a @dfn{short string}. Any wider -string value, which must be represented by more than one -@union{value}, is called a @dfn{long string}. - -@deftypefn Macro int MAX_SHORT_STRING -Maximum width of a short string value, never less than 8 bytes. It is -wider than 8 bytes on systems where @code{double} is either larger -than 8 bytes or has stricter alignment than 8 bytes. -@end deftypefn +as a @union{value}, defined in @file{data/value.h}. A @union{value} +does not identify the type or width of the data it contains. Code +that works with @union{values}s must therefore have external knowledge +of its content, often through the type and width of a +@struct{variable} (@pxref{Variables}). + +@union{value} has one member that clients are permitted to access +directly, a @code{double} named @samp{f} that stores the content of a +numeric @union{value}. It has other members that store the content of +string @union{value}, but client code should use accessor functions +instead of referring to these directly. + +PSPP provides some functions for working with @union{value}s. The +most useful are described below. To use these functions, recall that +a numeric value has a width of 0. -@deftypefn Macro int MIN_LONG_STRING -Minimum width of a long string value, that is, @code{MAX_SHORT_STRING -+ 1}. -@end deftypefn +@deftypefun void value_init (union value *@var{value}, int @var{width}) +Initializes @var{value} as a value of the given @var{width}. After +initialization, the data in @var{value} are indeterminate; the caller +is responsible for storing initial data in it. +@end deftypefun -Long string variables are slightly harder to work with than short -string values, because they cannot be conveniently and efficiently -allocated as block scope variables or structure members. The PSPP -language exposes this inconvenience to the user: there are many -circumstances in PSPP syntax where short strings are allowed but not -long strings. Short string variables, for example, may have -user-missing values, but long string variables may not (@pxref{Missing -Observations,,,pspp, PSPP Users Guide}). +@deftypefun void value_destroy (union value *@var{value}, int @var{width}) +Frees auxiliary storage associated with @var{value}, which must have +the given @var{width}. +@end deftypefun -PSPP provides a few functions for working with @union{value}s. The -most useful are described below. To use these functions, recall that -a numeric value has a width of 0. +@deftypefun bool value_needs_init (int @var{width}) +For some widths, @func{value_init} and @func{value_destroy} do not +actually do anything, because no additional storage is needed beyond +the size of @union{value}. This function returns true if @var{width} +is such a width, which case there is no actual need to call those +functions. This can be a useful optimization if a large number of +@union{value}s of such a width are to be initialized or destroyed. -@deftypefun size_t value_cnt_from_width (int @var{width}) -Returns the number of consecutive @union{value}s that must be -allocated to store a value of the given @var{width}. For a numeric or -short string value, the return value is 1; for long string -variables, it is greater than 1. +This function returns false if @func{value_init} and +@func{value_destroy} are actually required for the given @var{width}. +@end deftypefun + +@deftypefun double value_num (const union value *@var{value}) +Returns the numeric value in @var{value}, which must have been +initialized as a numeric value. Equivalent to @code{@var{value}->f}. +@end deftypefun + +@deftypefun {const char *} value_str (const union value *@var{value}, int @var{width}) +@deftypefunx {char *} value_str_rw (union value *@var{value}, int @var{width}) +Returns the string value in @var{value}, which must have been +initialized with positive width @var{width}. The string returned is +not null-terminated. Only @var{width} bytes of returned data may be +accessed. + +The two different functions exist only for @code{const}-correctness. +Otherwise they are identical. + +It is important that @var{width} be the correct value that was passed +to @func{value_init}. Passing a smaller or larger value (e.g.@: +because that number of bytes will be accessed) will not always work +and should be avoided. @end deftypefun @deftypefun void value_copy (union value *@var{dst}, @ const union value *@var{src}, @ int @var{width}) -Copies a value of the given @var{width} from the @union{value} array -starting at @var{src} to the one starting at @var{dst}. The two -arrays must not overlap. +Copies the contents of @union{value} @var{src} to @var{dst}. Both +@var{dst} and @var{src} must have been initialized with the specified +@var{width}. @end deftypefun @deftypefun void value_set_missing (union value *@var{value}, int @var{width}) Sets @var{value} to @code{SYSMIS} if it is numeric or to all spaces if -it is alphanumeric, according to @var{width}. @var{value} must point -to the start of a @union{value} array of the given @var{width}. +it is alphanumeric, according to @var{width}. @var{value} must have +been initialized with the specified @var{width}. @end deftypefun @anchor{value_is_resizable} @deftypefun bool value_is_resizable (const union value *@var{value}, int @var{old_width}, int @var{new_width}) -Determines whether @var{value} may be resized from @var{old_width} to -@var{new_width}. Resizing is possible if the following criteria are -met. First, @var{old_width} and @var{new_width} must be both numeric -or both string widths. Second, if @var{new_width} is a short string -width and less than @var{old_width}, resizing is allowed only if bytes +Determines whether @var{value}, which must have been initialized with +the specified @var{old_width}, may be resized to @var{new_width}. +Resizing is possible if the following criteria are met. First, +@var{old_width} and @var{new_width} must be both numeric or both +string widths. Second, if @var{new_width} is a short string width and +less than @var{old_width}, resizing is allowed only if bytes @var{new_width} through @var{old_width} in @var{value} contain only spaces. @@ -196,9 +208,36 @@ These rules are part of those used by @func{mv_is_resizable} and @deftypefun void value_resize (union value *@var{value}, int @var{old_width}, int @var{new_width}) Resizes @var{value} from @var{old_width} to @var{new_width}, which -must be allowed by the rules stated above. This has an effect only if -@var{new_width} is greater than @var{old_width}, in which case the -bytes newly added to @var{value} are cleared to spaces. +must be allowed by the rules stated above. @var{value} must have been +initialized with the specified @var{old_width} before calling this +function. After resizing, @var{value} has width @var{new_width}. + +If @var{new_width} is greater than @var{old_width}, @var{value} will +be padded on the right with spaces to the new width. If +@var{new_width} is less than @var{old_width}, the rightmost bytes of +@var{value} are truncated. +@end deftypefun + +@deftypefun bool value_equal (const union value *@var{a}, const union value *@var{b}, int @var{width}) +Compares of @var{a} and @var{b}, which must both have width +@var{width}. Returns true if their contents are the same, false if +they differ. +@end deftypefun + +@deftypefun int value_compare_3way (const union value *@var{a}, const union value *@var{b}, int @var{width}) +Compares of @var{a} and @var{b}, which must both have width +@var{width}. Returns -1 if @var{a} is less than @var{b}, 0 if they +are equal, or 1 if @var{a} is greater than @var{b}. + +Numeric values are compared numerically, with @code{SYSMIS} comparing +less than any real number. String values are compared +lexicographically byte-by-byte. +@end deftypefun + +@deftypefun size_t value_hash (const union value *@var{value}, int @var{width}, unsigned int @var{basis}) +Computes and returns a hash of @var{value}, which must have the +specified @var{width}. The value in @var{basis} is folded into the +hash. @end deftypefun @node Input and Output Formats @@ -617,8 +656,9 @@ These functions provide the ability to convert data fields into @deftypefun bool data_in (struct substring @var{input}, enum legacy_encoding @var{legacy_encoding}, enum fmt_type @var{type}, int @var{implied_decimals}, int @var{first_column}, union value *@var{output}, int @var{width}) Parses @var{input} as a field containing data in the given format -@var{type}. The resulting value is stored in @var{output}, which has -the given @var{width}. For consistency, @var{width} must be 0 if +@var{type}. The resulting value is stored in @var{output}, which the +caller must have initialized with the given @var{width}. For +consistency, @var{width} must be 0 if @var{type} is a numeric format type and greater than 0 if @var{type} is a string format type. @@ -1088,75 +1128,65 @@ was removed, false otherwise. @subsection Iterating through Value Labels These functions allow iteration through the set of value labels -represented by a @struct{val_labs} object. They are usually used in -the context of a @code{for} loop: +represented by a @struct{val_labs} object. They may be used in the +context of a @code{for} loop: @example struct val_labs val_labs; -struct val_labs_iterator *i; -struct val_lab *vl; +const struct val_lab *vl; @dots{} -for (vl = val_labs_first (val_labs, &i); vl != NULL; - vl = val_labs_next (val_labs, &i)) +for (vl = val_labs_first (val_labs); vl != NULL; + vl = val_labs_next (val_labs, vl)) @{ @dots{}@r{do something with @code{vl}}@dots{} @} @end example -The value labels in a @struct{val_labs} must not be modified as it is -undergoing iteration. +Value labels should not be added or deleted from a @struct{val_labs} +as it is undergoing iteration. -@deftp {Structure} {struct val_lab} -Represents a value label for iteration purposes, with two -client-visible members: - -@table @code -@item union value value -Value being labeled, of the same width as the @struct{val_labs} being -iterated. - -@item const char *label -The label, as a null-terminated string. -@end table -@end deftp - -@deftp {Structure} {struct val_labs_iterator} -Opaque object that represents the current state of iteration through a -set of value value labels. Automatically destroyed by successful -completion of iteration. Must be destroyed manually in other -circumstances, by calling @func{val_labs_done}. -@end deftp - -@deftypefun {struct val_lab *} val_labs_first (const struct val_labs *@var{val_labs}, struct val_labs_iterator **@var{iterator}) -If @var{val_labs} contains at least one value label, starts an -iteration through @var{val_labs}, initializes @code{*@var{iterator}} -to point to a newly allocated iterator, and returns the first value -label in @var{val_labs}. If @var{val_labs} is empty, sets -@code{*@var{iterator}} to null and returns a null pointer. +@deftypefun {const struct val_lab *} val_labs_first (const struct val_labs *@var{val_labs}) +Returns the first value label in @var{var_labs}, if it contains at +least one value label, or a null pointer if it does not contain any +value labels. +@end deftypefun -This function creates iterators that traverse sets of value labels in -no particular order. +@deftypefun {const struct val_lab *} val_labs_next (const struct val_labs *@var{val_labs}, const struct val_labs_iterator **@var{vl}) +Returns the value label in @var{var_labs} following @var{vl}, if +@var{vl} is not the last value label in @var{val_labs}, or a null +pointer if there are no value labels following @var{vl}. @end deftypefun -@deftypefun {struct val_lab *} val_labs_first_sorted (const struct val_labs *@var{val_labs}, struct val_labs_iterator **@var{iterator}) -Same as @func{val_labs_first}, except that the created iterator -traverses the set of value labels in ascending order of value. +@deftypefun {const struct val_lab **} val_labs_sorted (const struct val_labs *@var{val_labs}) +Allocates and returns an array of pointers to value labels, which are +sorted in increasing order by value. The array has +@code{val_labs_count (@var{val_labs})} elements. The caller is +responsible for freeing the array with @func{free} (but must not free +any of the @struct{val_lab} elements that the array points to). @end deftypefun -@deftypefun {struct val_lab *} val_labs_next (const struct val_labs *@var{val_labs}, struct val_labs_iterator **@var{iterator}) -Advances an iterator created with @func{val_labs_first} or -@func{val_labs_first_sorted} to the next value label, which is -returned. If the set of value labels is exhausted, returns a null -pointer after freeing @code{*@var{iterator}} and setting it to a null -pointer. +The iteration functions above work with pointers to @struct{val_lab} +which is an opaque data structure that users of @struct{val_labs} must +not modify or free directly. The following functions work with +objects of this type: + +@deftypefun {const union value *} val_lab_get_value (const struct val_lab *@var{vl}) +Returns the value of value label @var{vl}. The caller must not modify +or free the returned value. (To achieve a similar result, remove the +value label with @func{val_labs_remove}, then add the new value with +@func{val_labs_add}.) + +The width of the returned value cannot be determined directly from +@var{vl}. It may be obtained by calling @func{val_labs_get_width} on +the @struct{val_labs} that @var{vl} is in. @end deftypefun -@deftypefun void val_labs_done (struct val_labs_iterator **@var{iterator}) -Frees @code{*@var{iterator}} and sets it to a null pointer. Does -not need to be called explicitly if @func{val_labs_next} returns a -null pointer, indicating that all value labels have been visited. +@deftypefun {const char *} val_lab_get_label (const struct val_lab *@var{vl}) +Returns the label in @var{vl} as a null-terminated string. The caller +must not modify or free the returned string. (Use +@func{val_labs_replace} to change a value label.) @end deftypefun @node Variables @@ -1290,12 +1320,6 @@ Returns true if @var{var} is a string variable of width greater than @code{MAX_SHORT_STRING}, false otherwise. @end deftypefun -@deftypefun size_t var_get_value_cnt (const struct variable *@var{var}) -Returns the number of @union{value}s needed to hold an instance of -variable @var{var}. @code{var_get_value_cnt (var)} is equivalent to -@code{value_cnt_from_width (var_get_width (var))}. -@end deftypefun - @node Variable Missing Values @subsection Variable Missing Values @@ -1313,8 +1337,8 @@ Tests whether @var{value} is a missing value of the given @var{class} for variable @var{var} and returns true if so, false otherwise. @func{var_is_num_missing} may only be applied to numeric variables; @func{var_is_str_missing} may only be applied to string variables. -For string variables, @var{value} must contain exactly as many -characters as @var{var}'s width. +@var{value} must have been initialized with the same width as +@var{var}. @code{var_is_@var{type}_missing (@var{var}, @var{value}, @var{class})} is equivalent to @code{mv_is_@var{type}_missing @@ -1339,7 +1363,7 @@ resizable to @var{var}'s width (@pxref{mv_resize}). The caller retains ownership of @var{miss}. @end deftypefun -b@deftypefun void var_clear_missing_values (struct variable *@var{var}) +@deftypefun void var_clear_missing_values (struct variable *@var{var}) Clears @var{var}'s missing values. Equivalent to @code{var_set_missing_values (@var{var}, NULL)}. @end deftypefun @@ -1360,11 +1384,13 @@ value: @deftypefun {const char *} var_lookup_value_label (const struct variable *@var{var}, const union value *@var{value}) Looks for a label for @var{value} in @var{var}'s set of value labels. -Returns the label if one exists, otherwise a null pointer. +@var{value} must have the same width as @var{var}. Returns the label +if one exists, otherwise a null pointer. @end deftypefun @deftypefun void var_append_value_name (const struct variable *@var{var}, const union value *@var{value}, struct string *@var{str}) Looks for a label for @var{value} in @var{var}'s set of value labels. +@var{value} must have the same width as @var{var}. If a label exists, it will be appended to the string pointed to by @var{str}. Otherwise, it formats @var{value} using @var{var}'s print format (@pxref{Input and Output Formats}) @@ -1406,7 +1432,8 @@ the variable (making a second copy): @deftypefun bool var_add_value_label (struct variable *@var{var}, const union value *@var{value}, const char *@var{label}) Attempts to add a copy of @var{label} as a label for @var{value} for -the given @var{var}. If @var{value} already has a label, then the old +the given @var{var}. @var{value} must have the same width as +@var{var}. If @var{value} already has a label, then the old label is retained. Returns true if a label is added, false if there was an existing label for @var{value} or if @var{var} is a long string variable. Either way, the caller retains ownership of @var{value} and @@ -1415,7 +1442,8 @@ variable. Either way, the caller retains ownership of @var{value} and @deftypefun void var_replace_value_label (struct variable *@var{var}, const union value *@var{value}, const char *@var{label}) Attempts to add a copy of @var{label} as a label for @var{value} for -the given @var{var}. If @var{value} already has a label, then +the given @var{var}. @var{value} must have the same width as +@var{var}. If @var{value} already has a label, then @var{label} replaces the old label. Either way, the caller retains ownership of @var{value} and @var{label}. diff --git a/doc/language.texi b/doc/language.texi index 3a7302cc..50e8f646 100644 --- a/doc/language.texi +++ b/doc/language.texi @@ -447,13 +447,9 @@ Numeric or string. @item Width (string variables only) String variables with a width of 8 characters or fewer are called @dfn{short string variables}. Short string variables -can be used in many procedures where @dfn{long string variables} (those +may be used in a few contexts where @dfn{long string variables} (those with widths greater than 8) are not allowed. -Certain systems may consider strings longer than 8 -characters to be short strings. Eight characters represents a minimum -figure for the maximum length of a short string. - @item Position Variables in the dictionary are arranged in a specific order. @cmd{DISPLAY} can be used to show this order: see @ref{DISPLAY}. diff --git a/doc/statistics.texi b/doc/statistics.texi index 5430c6bc..921ea857 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -348,9 +348,7 @@ is present, the VARIABLES subcommand must precede the TABLES subcommand. In general mode, numeric and string variables may be specified on -TABLES. Although long string variables are allowed, only their -initial short-string parts are used. In integer mode, only numeric -variables are allowed. +TABLES. In integer mode, only numeric variables are allowed. The MISSING subcommand determines the handling of user-missing values. When set to TABLE, the default, missing values are dropped on a table by diff --git a/doc/transformation.texi b/doc/transformation.texi index 5eedba64..fe08b9cd 100644 --- a/doc/transformation.texi +++ b/doc/transformation.texi @@ -85,7 +85,7 @@ variables. Each aggregation variable receives the results of applying the specified aggregation function to the corresponding source variable. The MEAN, MEDIAN, SD, and SUM aggregation functions may only be applied to numeric variables. All the rest may be applied to numeric -and short and long string variables. +and string variables. The available aggregation functions are as follows: @@ -239,7 +239,7 @@ COMPUTE vector(index) = expression. @cmd{COMPUTE} assigns the value of an expression to a target variable. For each case, the expression is evaluated and its value -assigned to the target variable. Numeric and short and long string +assigned to the target variable. Numeric and string variables may be assigned. When a string expression's width differs from the target variable's width, the string result of the expression is truncated or padded with spaces on the right as necessary. The @@ -290,7 +290,7 @@ one or more @dfn{test} variables for each case. The target variable values are always nonnegative integers. They are never missing. The target variable is assigned an F8.2 output format. -@xref{Input and Output Formats}. Any variables, including long and short +@xref{Input and Output Formats}. Any variables, including string variables, may be test variables. User-missing values of test variables are treated just like any other @@ -435,7 +435,7 @@ Specify a boolean-valued expression (@pxref{Expressions}) to be tested following the IF keyword. This expression is evaluated for each case. If the value is true, then the value of the expression is computed and assigned to the specified variable. If the value is false or missing, -nothing is done. Numeric and short and long string variables may be +nothing is done. Numeric and string variables may be assigned. When a string expression's width differs from the target variable's width, the string result of the expression is truncated or padded with spaces on the right as necessary. The expression and @@ -481,7 +481,7 @@ dest_value may take the following forms: @cmd{RECODE} translates data from one range of values to another, via flexible user-specified mappings. Data may be remapped -in-place or copied to new variables. Numeric, short string, and long +in-place or copied to new variables. Numeric and string data can be recoded. Specify the list of source variables, followed by one or more mapping diff --git a/perl-module/PSPP.xs b/perl-module/PSPP.xs index 237ae95f..13386588 100644 --- a/perl-module/PSPP.xs +++ b/perl-module/PSPP.xs @@ -113,8 +113,9 @@ scalar_to_value (union value *val, SV *scalar, const struct variable *var) { STRLEN len; const char *p = SvPV (scalar, len); - memset (val->s, ' ', var_get_width (var)); - memcpy (val->s, p, len); + int width = var_get_width (var); + value_set_missing (val, width); + memcpy (value_str_rw (val, width), p, len); } } @@ -130,7 +131,10 @@ value_to_scalar (const union value *val, const struct variable *var) return newSVnv (val->f); } else - return newSVpvn (val->s, var_get_width (var)); + { + int width = var_get_width (var); + return newSVpvn (value_str (val, width), width); + } } @@ -142,12 +146,11 @@ var_set_input_format (struct variable *v, input_format ip_fmt) var_attach_aux (v, if_copy, var_dtor_free); } -static union value * -make_value_from_scalar (SV *val, const struct variable *var) +static void +make_value_from_scalar (union value *uv, SV *val, const struct variable *var) { - union value *uv = value_create (var_get_width (var)); + value_init (uv, var_get_width (var)); scalar_to_value (uv, val, var); - return uv; } @@ -171,12 +174,13 @@ format_value (val, var) CODE: SV *ret; const struct fmt_spec *fmt = var_get_print_format (var); - union value *uv = make_value_from_scalar (val, var); + union value uv; char *s; + make_value_from_scalar (&uv, val, var); s = malloc (fmt->w); memset (s, '\0', fmt->w); - data_out (uv, fmt, s); - free (uv); + data_out (&uv, fmt, s); + value_destroy (&uv, var_get_width (var)); ret = newSVpv (s, fmt->w); free (s); RETVAL = ret; @@ -189,9 +193,11 @@ value_is_missing (val, var) SV *val struct variable *var CODE: - union value *uv = make_value_from_scalar (val, var); - int ret = var_is_value_missing (var, uv, MV_ANY); - free (uv); + union value uv; + int ret; + make_value_from_scalar (&uv, val, var); + ret = var_is_value_missing (var, &uv, MV_ANY); + value_destroy (&uv, var_get_width (var)); RETVAL = ret; OUTPUT: RETVAL @@ -415,7 +421,7 @@ CODE: sv_setpv (errstr, "Cannot add label to a long string variable"); XSRETURN_IV (0); } - strncpy (the_value.s, SvPV_nolen(key), MAX_SHORT_STRING); + strncpy (the_value.short_string, SvPV_nolen(key), MAX_SHORT_STRING); } if (! var_add_value_label (var, &the_value, label) ) { @@ -486,20 +492,20 @@ get_value_labels (var) struct variable *var CODE: HV *labelhash = (HV *) sv_2mortal ((SV *) newHV()); - struct val_lab *vl; + const struct val_lab *vl; struct val_labs_iterator *viter = NULL; const struct val_labs *labels = var_get_value_labels (var); if ( labels ) { - for (vl = val_labs_first (labels, &viter); + for (vl = val_labs_first (labels); vl; - vl = val_labs_next (labels, &viter)) + vl = val_labs_next (labels, vl)) { SV *sv = value_to_scalar (&vl->value, var); STRLEN len; const char *s = SvPV (sv, len); - hv_store (labelhash, s, len, newSVpv (vl->label, 0), 0); + hv_store (labelhash, s, len, newSVpv (val_lab_get_label (vl), 0), 0); } } @@ -589,7 +595,7 @@ CODE: if ( av_len (av_case) >= dict_get_var_cnt (sfi->dict)) XSRETURN_UNDEF; - c = case_create (dict_get_next_value_idx (sfi->dict)); + c = case_create (dict_get_proto (sfi->dict)); dict_get_vars (sfi->dict, &vv, &nv, 1u << DC_ORDINARY | 1u << DC_SYSTEM); @@ -623,10 +629,7 @@ CODE: { const struct variable *v = vv[i++]; union value *val = case_data_rw (c, v); - if ( var_is_numeric (v)) - val->f = SYSMIS; - else - memset (val->s, ' ', var_get_width (v)); + value_set_missing (val, var_get_width (v)); } RETVAL = casewriter_write (sfi->writer, c); finish: diff --git a/src/data/automake.mk b/src/data/automake.mk index 4169a57e..a249f5ad 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -1,4 +1,3 @@ - noinst_LTLIBRARIES += src/data/libdata.la src_data_libdata_la_CPPFLAGS = $(LIBXML2_CFLAGS) $(PG_CFLAGS) $(AM_CPPFLAGS) @@ -18,6 +17,8 @@ src_data_libdata_la_SOURCES = \ src/data/case-map.h \ src/data/case-matcher.c \ src/data/case-matcher.h \ + src/data/caseproto.c \ + src/data/caseproto.h \ src/data/case.c \ src/data/casegrouper.c \ src/data/casegrouper.h \ @@ -86,8 +87,6 @@ src_data_libdata_la_SOURCES = \ src/data/settings.h \ src/data/short-names.c \ src/data/short-names.h \ - src/data/sparse-cases.c \ - src/data/sparse-cases.h \ src/data/subcase.c \ src/data/subcase.h \ src/data/sys-file-private.c \ diff --git a/src/data/case-map.c b/src/data/case-map.c index cff621f3..411b9f0d 100644 --- a/src/data/case-map.c +++ b/src/data/case-map.c @@ -33,43 +33,41 @@ /* A case map. */ struct case_map { - size_t value_cnt; /* Number of values in map. */ - int *map; /* For each destination index, the - corresponding source index. */ + struct caseproto *proto; /* Prototype for output cases. */ + int *map; /* For each destination index, the + corresponding source index. */ }; static struct ccase *translate_case (struct ccase *, void *map_); static bool destroy_case_map (void *map_); -/* Creates and returns an empty map. */ +/* Creates and returns an empty map that outputs cases matching + PROTO. */ static struct case_map * -create_case_map (size_t n) +create_case_map (const struct caseproto *proto) { + size_t n_values = caseproto_get_n_widths (proto); struct case_map *map; size_t i; map = xmalloc (sizeof *map); - map->value_cnt = n; - map->map = xnmalloc (n, sizeof *map->map); - for (i = 0; i < map->value_cnt; i++) + map->proto = caseproto_ref (proto); + map->map = xnmalloc (n_values, sizeof *map->map); + for (i = 0; i < n_values; i++) map->map[i] = -1; return map; } -/* Inserts into MAP a mapping of the CNT values starting at FROM - to the CNT values starting at TO. */ +/* Inserts into MAP a mapping of the value at index FROM in the + source case to the value at index TO in the destination + case. */ static void -insert_mapping (struct case_map *map, size_t from, size_t to, size_t cnt) +insert_mapping (struct case_map *map, size_t from, size_t to) { - size_t i; - - assert (to + cnt <= map->value_cnt); - for (i = 0; i < cnt; i++) - { - assert (map->map[to + i] == -1); - map->map[to + i] = from + i; - } + assert (to < caseproto_get_n_widths (map->proto)); + assert (map->map[to] == -1); + map->map[to] = from; } /* Destroys case map MAP. */ @@ -78,6 +76,7 @@ case_map_destroy (struct case_map *map) { if (map != NULL) { + caseproto_unref (map->proto); free (map->map); free (map); } @@ -92,11 +91,12 @@ case_map_execute (const struct case_map *map, struct ccase *src) { if (map != NULL) { + size_t n_values = caseproto_get_n_widths (map->proto); struct ccase *dst; size_t dst_idx; - dst = case_create (map->value_cnt); - for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++) + dst = case_create (map->proto); + for (dst_idx = 0; dst_idx < n_values; dst_idx++) { int src_idx = map->map[dst_idx]; if (src_idx != -1) @@ -109,12 +109,12 @@ case_map_execute (const struct case_map *map, struct ccase *src) return src; } -/* Returns the number of `union value's in cases created by - MAP. */ -size_t -case_map_get_value_cnt (const struct case_map *map) +/* Returns the prototype for output cases created by MAP. The + caller must not unref the returned case prototype. */ +const struct caseproto * +case_map_get_proto (const struct case_map *map) { - return map->value_cnt; + return map->proto; } /* Creates and returns a new casereader whose cases are produced @@ -130,7 +130,7 @@ case_map_create_input_translator (struct case_map *map, struct casereader *subreader) { return casereader_create_translator (subreader, - case_map_get_value_cnt (map), + case_map_get_proto (map), translate_case, destroy_case_map, map); @@ -150,7 +150,7 @@ case_map_create_output_translator (struct case_map *map, struct casewriter *subwriter) { return casewriter_create_translator (subwriter, - case_map_get_value_cnt (map), + case_map_get_proto (map), translate_case, destroy_case_map, map); @@ -187,31 +187,25 @@ struct case_map * case_map_to_compact_dict (const struct dictionary *d, unsigned int exclude_classes) { - size_t var_cnt; + size_t n_vars = dict_get_var_cnt (d); + struct caseproto *proto; struct case_map *map; - size_t value_idx; + size_t n_values; size_t i; - assert ((exclude_classes & ~((1u << DC_ORDINARY) - | (1u << DC_SYSTEM) - | (1u << DC_SCRATCH))) == 0); + /* Create the case mapping. */ + proto = dict_get_compacted_proto (d, exclude_classes); + map = create_case_map (proto); + caseproto_unref (proto); - map = create_case_map (dict_count_values (d, exclude_classes)); - var_cnt = dict_get_var_cnt (d); - value_idx = 0; - for (i = 0; i < var_cnt; i++) + /* Add the values to the case mapping. */ + n_values = 0; + for (i = 0; i < n_vars; i++) { struct variable *v = dict_get_var (d, i); - enum dict_class class = dict_class_from_id (var_get_name (v)); - - if (!(exclude_classes & (1u << class))) - { - size_t value_cnt = var_get_value_cnt (v); - insert_mapping (map, var_get_case_index (v), value_idx, value_cnt); - value_idx += value_cnt; - } + if (!(exclude_classes & (1u << var_get_dict_class (v)))) + insert_mapping (map, var_get_case_index (v), n_values++); } - assert (value_idx == map->value_cnt); return map; } @@ -250,20 +244,20 @@ case_map_from_dict (const struct dictionary *d) { struct case_map *map; size_t var_cnt = dict_get_var_cnt (d); + size_t n_values; size_t i; bool identity_map = true; - map = create_case_map (dict_get_next_value_idx (d)); + map = create_case_map (dict_get_proto (d)); for (i = 0; i < var_cnt; i++) { struct variable *v = dict_get_var (d, i); - size_t value_cnt = var_get_value_cnt (v); - int *src_fv = (int *) var_detach_aux (v); + int *src_fv = var_detach_aux (v); if (var_get_case_index (v) != *src_fv) identity_map = false; - insert_mapping (map, *src_fv, var_get_case_index (v), value_cnt); + insert_mapping (map, *src_fv, var_get_case_index (v)); free (src_fv); } @@ -274,8 +268,9 @@ case_map_from_dict (const struct dictionary *d) return NULL; } - while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1) - map->value_cnt--; + n_values = caseproto_get_n_widths (map->proto); + while (n_values > 0 && caseproto_get_width (map->proto, n_values - 1) == -1) + map->proto = caseproto_remove_widths (map->proto, --n_values, 1); return map; } @@ -292,14 +287,13 @@ case_map_by_name (const struct dictionary *old, size_t var_cnt = dict_get_var_cnt (new); size_t i; - map = create_case_map (dict_get_next_value_idx (new)); + map = create_case_map (dict_get_proto (new)); for (i = 0; i < var_cnt; i++) { struct variable *nv = dict_get_var (new, i); struct variable *ov = dict_lookup_var_assert (old, var_get_name (nv)); assert (var_get_width (nv) == var_get_width (ov)); - insert_mapping (map, var_get_case_index (ov), var_get_case_index (nv), - var_get_value_cnt (ov)); + insert_mapping (map, var_get_case_index (ov), var_get_case_index (nv)); } return map; } @@ -310,6 +304,6 @@ void case_map_dump (const struct case_map *cm) { int i; - for (i = 0 ; i < cm->value_cnt; ++i ) + for (i = 0 ; i < caseproto_get_n_widths (cm->proto); ++i ) printf ("%d -> %d\n", i, cm->map[i]); } diff --git a/src/data/case-map.h b/src/data/case-map.h index 010dd487..cefbe766 100644 --- a/src/data/case-map.h +++ b/src/data/case-map.h @@ -36,7 +36,7 @@ struct case_map *case_map_create (void); void case_map_destroy (struct case_map *); struct ccase *case_map_execute (const struct case_map *, struct ccase *); -size_t case_map_get_value_cnt (const struct case_map *); +const struct caseproto *case_map_get_proto (const struct case_map *); struct casereader *case_map_create_input_translator (struct case_map *, struct casereader *); diff --git a/src/data/case-matcher.c b/src/data/case-matcher.c index a1251cb7..37cb4a60 100644 --- a/src/data/case-matcher.c +++ b/src/data/case-matcher.c @@ -68,8 +68,11 @@ case_matcher_add_input (struct case_matcher *cm, const struct subcase *by, struct case_matcher_input *input; if (cm->n_inputs == 0) - cm->by_values = xmalloc (subcase_get_n_values (by) - * sizeof *cm->by_values); + { + cm->by_values = xmalloc (sizeof *cm->by_values + * subcase_get_n_fields (by)); + caseproto_init_values (subcase_get_proto (by), cm->by_values); + } else assert (subcase_conformable (by, &cm->inputs[0].by_vars)); @@ -90,6 +93,12 @@ case_matcher_destroy (struct case_matcher *cm) { size_t i; + if (cm->by_values != NULL) + { + caseproto_destroy_values (subcase_get_proto (&cm->inputs[0].by_vars), + cm->by_values); + free (cm->by_values); + } for (i = 0; i < cm->n_inputs; i++) { struct case_matcher_input *input = &cm->inputs[i]; diff --git a/src/data/case-tmpfile.c b/src/data/case-tmpfile.c index ca3a6a56..57447866 100644 --- a/src/data/case-tmpfile.c +++ b/src/data/case-tmpfile.c @@ -24,41 +24,65 @@ #include #include +#include #include "error.h" #include "xalloc.h" -#include "gettext.h" -#define _(msgid) gettext (msgid) - /* A temporary file that stores an array of cases. */ struct case_tmpfile { struct taint *taint; /* Taint. */ - FILE *file; /* Underlying file. */ - size_t value_cnt; /* Number of `union value's per case. */ - - /* Current byte offset in file. We track this manually, - instead of using ftello, because in glibc ftello flushes - the stream buffer, making the common case of sequential - access to cases unreasonably slow. */ - off_t position; + struct caseproto *proto; /* Format of cases in the tmpfile. */ + size_t case_size; /* Number of bytes per case. */ + size_t *offsets; /* Offset to each value. */ + struct tmpfile *tmpfile; /* Temporary file. */ }; -/* Creates and returns a new case_tmpfile. */ +/* Returns the number of bytes needed to store a value with the + given WIDTH on disk. */ +static size_t +width_to_n_bytes (int width) +{ + return width == 0 ? sizeof (double) : width; +} + +/* Returns the address of the data in VALUE (for reading or + writing to/from disk). VALUE must have the given WIDTH. */ +static void * +value_to_data (const union value *value_, int width) +{ + union value *value = (union value *) value_; + assert (sizeof value->f == sizeof (double)); + if (width == 0) + return &value->f; + else + return value_str_rw (value, width); +} + +/* Creates and returns a new case_tmpfile that will store cases + that match case prototype PROTO. The caller retains + ownership of PROTO. */ struct case_tmpfile * -case_tmpfile_create (size_t value_cnt) +case_tmpfile_create (const struct caseproto *proto) { - struct case_tmpfile *ctf = xmalloc (sizeof *ctf); + struct case_tmpfile *ctf; + size_t n_values; + size_t i; + + ctf = xmalloc (sizeof *ctf); ctf->taint = taint_create (); - ctf->file = tmpfile (); - if (ctf->file == NULL) + ctf->tmpfile = tmpfile_create (); + ctf->proto = caseproto_ref (proto); + ctf->case_size = 0; + n_values = caseproto_get_n_widths (proto); + ctf->offsets = xmalloc (n_values * sizeof *ctf->offsets); + for (i = 0; i < n_values; i++) { - error (0, errno, _("failed to create temporary file")); - taint_set_taint (ctf->taint); + size_t width = caseproto_get_width (proto, i); + ctf->offsets[i] = ctf->case_size; + ctf->case_size += width == -1 ? 0 : width == 0 ? sizeof (double) : width; } - ctf->value_cnt = value_cnt; - ctf->position = 0; return ctf; } @@ -73,8 +97,9 @@ case_tmpfile_destroy (struct case_tmpfile *ctf) if (ctf != NULL) { struct taint *taint = ctf->taint; - if (ctf->file != NULL) - fclose (ctf->file); + tmpfile_destroy (ctf->tmpfile); + caseproto_unref (ctf->proto); + free (ctf->offsets); free (ctf); ok = taint_destroy (taint); } @@ -104,98 +129,32 @@ case_tmpfile_get_taint (const struct case_tmpfile *ctf) return ctf->taint; } -/* Seeks CTF's underlying file to the start of `union value' - VALUE_IDX within case CASE_IDX. - Returns true if the seek is successful and CTF is not - otherwise tainted, false otherwise. */ -static bool -do_seek (const struct case_tmpfile *ctf_, - casenumber case_idx, size_t value_idx) -{ - struct case_tmpfile *ctf = (struct case_tmpfile *) ctf_; - - if (!case_tmpfile_error (ctf)) - { - off_t value_ofs = value_idx + (off_t) ctf->value_cnt * case_idx; - off_t byte_ofs = sizeof (union value) * value_ofs; - - if (ctf->position == byte_ofs) - return true; - else if (fseeko (ctf->file, byte_ofs, SEEK_SET) == 0) - { - ctf->position = byte_ofs; - return true; - } - else - { - error (0, errno, _("seeking in temporary file")); - case_tmpfile_force_error (ctf); - } - } - - return false; -} - -/* Reads BYTES bytes from CTF's underlying file into BUFFER. - CTF must not be tainted upon entry into this function. - Returns true if successful, false upon an I/O error (in which - case CTF is marked tainted). */ -static bool -do_read (const struct case_tmpfile *ctf_, size_t bytes, void *buffer) -{ - struct case_tmpfile *ctf = (struct case_tmpfile *) ctf_; - - assert (!case_tmpfile_error (ctf)); - if (fread (buffer, bytes, 1, ctf->file) != 1) - { - case_tmpfile_force_error (ctf); - if (ferror (ctf->file)) - error (0, errno, _("reading temporary file")); - else if (feof (ctf->file)) - error (0, 0, _("unexpected end of file reading temporary file")); - else - NOT_REACHED (); - return false; - } - ctf->position += bytes; - return true; -} - -/* Writes BYTES bytes from BUFFER into CTF's underlying file. - CTF must not be tainted upon entry into this function. - Returns true if successful, false upon an I/O error (in which - case CTF is marked tainted). */ -static bool -do_write (struct case_tmpfile *ctf, size_t bytes, const void *buffer) -{ - assert (!case_tmpfile_error (ctf)); - if (fwrite (buffer, bytes, 1, ctf->file) != 1) - { - case_tmpfile_force_error (ctf); - error (0, errno, _("writing to temporary file")); - return false; - } - ctf->position += bytes; - return true; -} - -/* Reads VALUE_CNT values into VALUES, from the case numbered - CASE_IDX starting START_VALUE values into that case. - Returns true if successful, false if CTF is tainted or an I/O - error occurs during the operation. +/* Reads N_VALUES values into VALUES, from the case numbered + CASE_IDX starting START_VALUE values into that case. Returns + true if successful, false if CTF is tainted or an I/O error + occurs during the operation. The results of this function are undefined if any of the values read have not been previously written to CTF. */ bool case_tmpfile_get_values (const struct case_tmpfile *ctf, casenumber case_idx, size_t start_value, - union value values[], size_t value_cnt) + union value values[], size_t n_values) { - assert (value_cnt <= ctf->value_cnt); - assert (value_cnt + start_value <= ctf->value_cnt); + off_t case_offset = (off_t) ctf->case_size * case_idx; + size_t i; - return (do_seek (ctf, case_idx, start_value) - && do_read (ctf, sizeof *values * value_cnt, values)); + assert (caseproto_range_is_valid (ctf->proto, start_value, n_values)); + for (i = start_value; i < start_value + n_values; i++) + { + int width = caseproto_get_width (ctf->proto, i); + if (width != -1 + && !tmpfile_read (ctf->tmpfile, case_offset + ctf->offsets[i], + width_to_n_bytes (width), + value_to_data (&values[i], width))) + return false; + } + return true; } /* Reads the case numbered CASE_IDX from CTF. @@ -207,9 +166,9 @@ case_tmpfile_get_values (const struct case_tmpfile *ctf, struct ccase * case_tmpfile_get_case (const struct case_tmpfile *ctf, casenumber case_idx) { - struct ccase *c = case_create (ctf->value_cnt); - if (case_tmpfile_get_values (ctf, case_idx, 0, - case_data_all_rw (c), ctf->value_cnt)) + struct ccase *c = case_create (ctf->proto); + if (case_tmpfile_get_values (ctf, case_idx, 0, case_data_all_rw (c), + caseproto_get_n_widths (ctf->proto))) return c; else { @@ -218,21 +177,29 @@ case_tmpfile_get_case (const struct case_tmpfile *ctf, casenumber case_idx) } } -/* Writes VALUE_CNT values from VALUES, into the case numbered +/* Writes N_VALUES values from VALUES, into the case numbered CASE_IDX starting START_VALUE values into that case. Returns true if successful, false if CTF is tainted or an I/O error occurs during the operation. */ bool case_tmpfile_put_values (struct case_tmpfile *ctf, casenumber case_idx, size_t start_value, - const union value values[], size_t value_cnt) - + const union value values[], size_t n_values) { - assert (value_cnt <= ctf->value_cnt); - assert (value_cnt + start_value <= ctf->value_cnt); + off_t case_offset = (off_t) ctf->case_size * case_idx; + size_t i; - return (do_seek (ctf, case_idx, start_value) - && do_write (ctf, sizeof *values * value_cnt, values)); + assert (caseproto_range_is_valid (ctf->proto, start_value, n_values)); + for (i = start_value; i < start_value + n_values; i++) + { + int width = caseproto_get_width (ctf->proto, i); + if (width != -1 + && !tmpfile_write (ctf->tmpfile, case_offset + ctf->offsets[i], + width_to_n_bytes (width), + value_to_data (values++, width))) + return false; + } + return true; } /* Writes C to CTF as the case numbered CASE_IDX. @@ -242,8 +209,8 @@ bool case_tmpfile_put_case (struct case_tmpfile *ctf, casenumber case_idx, struct ccase *c) { - bool ok = case_tmpfile_put_values (ctf, case_idx, 0, - case_data_all (c), ctf->value_cnt); + bool ok = case_tmpfile_put_values (ctf, case_idx, 0, case_data_all (c), + caseproto_get_n_widths (ctf->proto)); case_unref (c); return ok; } diff --git a/src/data/case-tmpfile.h b/src/data/case-tmpfile.h index a5916249..bbf736e3 100644 --- a/src/data/case-tmpfile.h +++ b/src/data/case-tmpfile.h @@ -31,14 +31,16 @@ #include -struct case_tmpfile *case_tmpfile_create (size_t value_cnt); +struct caseproto; + +struct case_tmpfile *case_tmpfile_create (const struct caseproto *); bool case_tmpfile_destroy (struct case_tmpfile *); bool case_tmpfile_error (const struct case_tmpfile *); void case_tmpfile_force_error (struct case_tmpfile *); const struct taint *case_tmpfile_get_taint (const struct case_tmpfile *); - bool case_tmpfile_get_values (const struct case_tmpfile *, +bool case_tmpfile_get_values (const struct case_tmpfile *, casenumber, size_t start_value, union value[], size_t value_cnt); struct ccase *case_tmpfile_get_case (const struct case_tmpfile *, casenumber); diff --git a/src/data/case.c b/src/data/case.c index 4432579e..a4a78dd0 100644 --- a/src/data/case.c +++ b/src/data/case.c @@ -18,44 +18,37 @@ #include -#include #include #include #include #include #include +#include #include #include "minmax.h" #include "xalloc.h" -/* Returns the number of bytes needed by a case with N_VALUES - values. */ -static size_t -case_size (size_t n_values) -{ - return offsetof (struct ccase, values) + n_values * sizeof (union value); -} - -/* Returns true if case C contains COUNT cases starting at index - OFS, false if any of those values are out of range for case - C. */ -static inline bool UNUSED -range_is_valid (const struct ccase *c, size_t ofs, size_t count) -{ - return (count <= c->n_values - && ofs <= c->n_values - && ofs + count <= c->n_values); -} - -/* Creates and returns a new case that can store N_VALUES values. - The values have indeterminate contents until explicitly - written. */ +static size_t case_size (const struct caseproto *); +static bool variable_matches_case (const struct ccase *, + const struct variable *); +static void copy_forward (struct ccase *dst, size_t dst_idx, + const struct ccase *src, size_t src_idx, + size_t n_values); +static void copy_backward (struct ccase *dst, size_t dst_idx, + const struct ccase *src, size_t src_idx, + size_t n_values); + +/* Creates and returns a new case that stores data of the form + specified by PROTO. The data in the case have indeterminate + contents until explicitly written. + + The caller retains ownership of PROTO. */ struct ccase * -case_create (size_t n_values) +case_create (const struct caseproto *proto) { - struct ccase *c = case_try_create (n_values); + struct ccase *c = case_try_create (proto); if (c == NULL) xalloc_die (); return c; @@ -64,56 +57,119 @@ case_create (size_t n_values) /* Like case_create, but returns a null pointer if not enough memory is available. */ struct ccase * -case_try_create (size_t n_values) +case_try_create (const struct caseproto *proto) { - struct ccase *c = malloc (case_size (n_values)); - if (c) + struct ccase *c = malloc (case_size (proto)); + if (c != NULL) { - c->n_values = n_values; - c->ref_cnt = 1; + if (caseproto_try_init_values (proto, c->values)) + { + c->proto = caseproto_ref (proto); + c->ref_cnt = 1; + return c; + } + free (c); } - return c; + return NULL; } -/* Resizes case C, which must not be shared, to N_VALUES union - values. If N_VALUES is greater than the current size of case - C, then the newly added values have indeterminate content that - the caller is responsible for initializing. Returns the new - case. */ +/* Creates and returns an unshared copy of case C. */ struct ccase * -case_resize (struct ccase *c, size_t n_values) +case_clone (const struct ccase *c) { + return case_unshare (case_ref (c)); +} + +/* Returns an estimate of the number of bytes of memory that + would be consumed in creating a case based on PROTO. The + estimate includes typical overhead from malloc() in addition + to the actual size of data. */ +size_t +case_get_cost (const struct caseproto *proto) +{ + /* FIXME: improve approximation? */ + return (1 + caseproto_get_n_widths (proto) + + 3 * caseproto_get_n_long_strings (proto)) * sizeof (union value); +} + +/* Changes the prototype for case C, which must not be shared. + The new PROTO must be conformable with C's current prototype + (as defined by caseproto_is_conformable). + + Any new values created by this function have indeterminate + content that the caller is responsible for initializing. + + The caller retains ownership of PROTO. + + Returns a new case that replaces C, which is freed. */ +struct ccase * +case_resize (struct ccase *c, const struct caseproto *new_proto) +{ + struct caseproto *old_proto = c->proto; + size_t old_n_widths = caseproto_get_n_widths (old_proto); + size_t new_n_widths = caseproto_get_n_widths (new_proto); + assert (!case_is_shared (c)); - if (n_values != c->n_values) + expensive_assert (caseproto_is_conformable (old_proto, new_proto)); + + if (old_n_widths != new_n_widths) { - c->n_values = n_values; - return xrealloc (c, case_size (n_values)); + if (new_n_widths < old_n_widths) + caseproto_reinit_values (old_proto, new_proto, c->values); + c = xrealloc (c, case_size (new_proto)); + if (new_n_widths > old_n_widths) + caseproto_reinit_values (old_proto, new_proto, c->values); + + caseproto_unref (old_proto); + c->proto = caseproto_ref (new_proto); } - else - return c; + + return c; } -/* case_unshare_and_resize(C, N) is equivalent to - case_resize(case_unshare(C), N), but it is faster if case C is - shared. +/* case_unshare_and_resize(C, PROTO) is equivalent to + case_resize(case_unshare(C), PROTO), but it is faster if case + C is shared. + + Any new values created by this function have indeterminate + content that the caller is responsible for initializing. - Returns the new case.*/ + The caller retains ownership of PROTO. + + Returns the new case that replaces C, which is freed. */ struct ccase * -case_unshare_and_resize (struct ccase *c, size_t n_values) +case_unshare_and_resize (struct ccase *c, const struct caseproto *proto) { if (!case_is_shared (c)) - return case_resize (c, n_values); + return case_resize (c, proto); else { - struct ccase *new = case_create (n_values); - case_copy (new, 0, c, 0, MIN (n_values, c->n_values)); + struct ccase *new = case_create (proto); + size_t old_n_values = caseproto_get_n_widths (c->proto); + size_t new_n_values = caseproto_get_n_widths (proto); + case_copy (new, 0, c, 0, MIN (old_n_values, new_n_values)); c->ref_cnt--; return new; } } +/* Sets all of the numeric values in case C to the system-missing + value, and all of the string values to spaces. */ +void +case_set_missing (struct ccase *c) +{ + size_t i; + + assert (!case_is_shared (c)); + for (i = 0; i < caseproto_get_n_widths (c->proto); i++) + value_set_missing (&c->values[i], caseproto_get_width (c->proto, i)); +} + /* Copies N_VALUES values from SRC (starting at SRC_IDX) to DST - (starting at DST_IDX). + (starting at DST_IDX). Each value that is copied into must + have the same width as the value that it is copied from. + + Properly handles overlapping ranges when DST == SRC. DST must not be shared. */ void @@ -122,12 +178,29 @@ case_copy (struct ccase *dst, size_t dst_idx, size_t n_values) { assert (!case_is_shared (dst)); - assert (range_is_valid (dst, dst_idx, n_values)); - assert (range_is_valid (src, src_idx, n_values)); + assert (caseproto_range_is_valid (dst->proto, dst_idx, n_values)); + assert (caseproto_range_is_valid (src->proto, src_idx, n_values)); + assert (caseproto_equal (dst->proto, dst_idx, src->proto, src_idx, + n_values)); - if (dst != src || dst_idx != src_idx) - memmove (dst->values + dst_idx, src->values + src_idx, - sizeof *dst->values * n_values); + if (dst != src) + { + if (!dst->proto->n_long_strings || !src->proto->n_long_strings) + memcpy (&dst->values[dst_idx], &src->values[src_idx], + sizeof dst->values[0] * n_values); + else + copy_forward (dst, dst_idx, src, src_idx, n_values); + } + else if (dst_idx != src_idx) + { + if (!dst->proto->n_long_strings) + memmove (&dst->values[dst_idx], &src->values[src_idx], + sizeof dst->values[0] * n_values); + else if (dst_idx < src_idx) + copy_forward (dst, dst_idx, src, src_idx, n_values); + else /* dst_idx > src_idx */ + copy_backward (dst, dst_idx, src, src_idx, n_values); + } } /* Copies N_VALUES values out of case C to VALUES, starting at @@ -136,8 +209,13 @@ void case_copy_out (const struct ccase *c, size_t start_idx, union value *values, size_t n_values) { - assert (range_is_valid (c, start_idx, n_values)); - memcpy (values, c->values + start_idx, n_values * sizeof *values); + size_t i; + + assert (caseproto_range_is_valid (c->proto, start_idx, n_values)); + + for (i = 0; i < n_values; i++) + value_copy (&values[i], &c->values[start_idx + i], + caseproto_get_width (c->proto, start_idx + i)); } /* Copies N_VALUES values from VALUES into case C, starting at @@ -148,9 +226,14 @@ void case_copy_in (struct ccase *c, size_t start_idx, const union value *values, size_t n_values) { + size_t i; + assert (!case_is_shared (c)); - assert (range_is_valid (c, start_idx, n_values)); - memcpy (c->values + start_idx, values, n_values * sizeof *values); + assert (caseproto_range_is_valid (c->proto, start_idx, n_values)); + + for (i = 0; i < n_values; i++) + value_copy (&c->values[start_idx + i], &values[i], + caseproto_get_width (c->proto, start_idx + i)); } /* Returns a pointer to the `union value' used for the @@ -160,7 +243,8 @@ case_copy_in (struct ccase *c, const union value * case_data (const struct ccase *c, const struct variable *v) { - return case_data_idx (c, var_get_case_index (v)); + assert (variable_matches_case (c, v)); + return &c->values[var_get_case_index (v)]; } /* Returns a pointer to the `union value' used for the element of @@ -169,7 +253,7 @@ case_data (const struct ccase *c, const struct variable *v) const union value * case_data_idx (const struct ccase *c, size_t idx) { - assert (idx < c->n_values); + assert (idx < c->proto->n_widths); return &c->values[idx]; } @@ -181,7 +265,9 @@ case_data_idx (const struct ccase *c, size_t idx) union value * case_data_rw (struct ccase *c, const struct variable *v) { - return case_data_rw_idx (c, var_get_case_index (v)); + assert (variable_matches_case (c, v)); + assert (!case_is_shared (c)); + return &c->values[var_get_case_index (v)]; } /* Returns a pointer to the `union value' used for the @@ -192,8 +278,8 @@ case_data_rw (struct ccase *c, const struct variable *v) union value * case_data_rw_idx (struct ccase *c, size_t idx) { + assert (idx < c->proto->n_widths); assert (!case_is_shared (c)); - assert (idx < c->n_values); return &c->values[idx]; } @@ -203,7 +289,8 @@ case_data_rw_idx (struct ccase *c, size_t idx) double case_num (const struct ccase *c, const struct variable *v) { - return case_num_idx (c, var_get_case_index (v)); + assert (variable_matches_case (c, v)); + return c->values[var_get_case_index (v)].f; } /* Returns the numeric value of the `union value' in C numbered @@ -211,7 +298,7 @@ case_num (const struct ccase *c, const struct variable *v) double case_num_idx (const struct ccase *c, size_t idx) { - assert (idx < c->n_values); + assert (idx < c->proto->n_widths); return c->values[idx].f; } @@ -219,24 +306,58 @@ case_num_idx (const struct ccase *c, size_t idx) variable V. Case C must be drawn from V's dictionary. The caller must not modify the return value. - Like all "union value"s, the return value is not - null-terminated. */ + Like the strings embedded in all "union value"s, the return + value is not null-terminated. */ const char * case_str (const struct ccase *c, const struct variable *v) { - return case_str_idx (c, var_get_case_index (v)); + size_t idx = var_get_case_index (v); + assert (variable_matches_case (c, v)); + return value_str (&c->values[idx], caseproto_get_width (c->proto, idx)); } /* Returns the string value of the `union value' in C numbered IDX. The caller must not modify the return value. - Like all "union value"s, the return value is not - null-terminated. */ + Like the strings embedded in all "union value"s, the return + value is not null-terminated. */ const char * case_str_idx (const struct ccase *c, size_t idx) { - assert (idx < c->n_values); - return c->values[idx].s; + assert (idx < c->proto->n_widths); + return value_str (&c->values[idx], caseproto_get_width (c->proto, idx)); +} + +/* Returns the string value of the `union value' in C for + variable V. Case C must be drawn from V's dictionary. The + caller may modify the return value. + + Case C must not be shared. + + Like the strings embedded in all "union value"s, the return + value is not null-terminated. */ +char * +case_str_rw (struct ccase *c, const struct variable *v) +{ + size_t idx = var_get_case_index (v); + assert (variable_matches_case (c, v)); + assert (!case_is_shared (c)); + return value_str_rw (&c->values[idx], caseproto_get_width (c->proto, idx)); +} + +/* Returns the string value of the `union value' in C numbered + IDX. The caller may modify the return value. + + Case C must not be shared. + + Like the strings embedded in all "union value"s, the return + value is not null-terminated. */ +char * +case_str_rw_idx (struct ccase *c, size_t idx) +{ + assert (idx < c->proto->n_widths); + assert (!case_is_shared (c)); + return value_str_rw (&c->values[idx], caseproto_get_width (c->proto, idx)); } /* Compares the values of the N_VARS variables in VP @@ -257,32 +378,15 @@ case_compare_2dict (const struct ccase *ca, const struct ccase *cb, const struct variable *const *vbp, size_t n_vars) { - for (; n_vars-- > 0; vap++, vbp++) + int cmp = 0; + for (; !cmp && n_vars-- > 0; vap++, vbp++) { - const struct variable *va = *vap; - const struct variable *vb = *vbp; - - assert (var_get_width (va) == var_get_width (vb)); - - if (var_get_width (va) == 0) - { - double af = case_num (ca, va); - double bf = case_num (cb, vb); - - if (af != bf) - return af > bf ? 1 : -1; - } - else - { - const char *as = case_str (ca, va); - const char *bs = case_str (cb, vb); - int cmp = memcmp (as, bs, var_get_width (va)); - - if (cmp != 0) - return cmp; - } + const union value *va = case_data (ca, *vap); + const union value *vb = case_data (cb, *vbp); + assert (var_get_width (*vap) == var_get_width (*vbp)); + cmp = value_compare_3way (va, vb, var_get_width (*vap)); } - return 0; + return cmp; } /* Returns a pointer to the array of `union value's used for C. @@ -314,8 +418,66 @@ case_data_all_rw (struct ccase *c) struct ccase * case_unshare__ (struct ccase *old) { - struct ccase *new = case_create (old->n_values); - memcpy (new->values, old->values, old->n_values * sizeof old->values[0]); + struct ccase *new = case_create (old->proto); + case_copy (new, 0, old, 0, caseproto_get_n_widths (new->proto)); --old->ref_cnt; return new; } + +/* Internal helper function for case_unref. */ +void +case_unref__ (struct ccase *c) +{ + caseproto_destroy_values (c->proto, c->values); + caseproto_unref (c->proto); + free (c); +} + +/* Returns the number of bytes needed by a case for case + prototype PROTO. */ +static size_t +case_size (const struct caseproto *proto) +{ + return (offsetof (struct ccase, values) + + caseproto_get_n_widths (proto) * sizeof (union value)); +} + +/* Returns true if C contains a value at V's case index with the + same width as V; that is, if V may plausibly be used to read + or write data in C. + + Useful in assertions. */ +static bool UNUSED +variable_matches_case (const struct ccase *c, const struct variable *v) +{ + size_t case_idx = var_get_case_index (v); + return (case_idx < caseproto_get_n_widths (c->proto) + && caseproto_get_width (c->proto, case_idx) == var_get_width (v)); +} + +/* Internal helper function for case_copy(). */ +static void +copy_forward (struct ccase *dst, size_t dst_idx, + const struct ccase *src, size_t src_idx, + size_t n_values) +{ + size_t i; + + for (i = 0; i < n_values; i++) + value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i], + caseproto_get_width (dst->proto, dst_idx + i)); +} + +/* Internal helper function for case_copy(). */ +static void +copy_backward (struct ccase *dst, size_t dst_idx, + const struct ccase *src, size_t src_idx, + size_t n_values) +{ + size_t i; + + for (i = n_values; i-- != 0; ) + value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i], + caseproto_get_width (dst->proto, dst_idx + i)); +} + diff --git a/src/data/case.h b/src/data/case.h index 6f537726..36feb15f 100644 --- a/src/data/case.h +++ b/src/data/case.h @@ -22,7 +22,7 @@ #include #include #include -#include "value.h" +#include struct variable; @@ -51,13 +51,14 @@ typedef long int casenumber; shared case. */ struct ccase { - size_t n_values; /* Number of values. */ + struct caseproto *proto; /* Case prototype. */ size_t ref_cnt; /* Reference count. */ union value values[1]; /* Values. */ }; -struct ccase *case_create (size_t n_values) MALLOC_LIKE; -struct ccase *case_try_create (size_t n_values) MALLOC_LIKE; +struct ccase *case_create (const struct caseproto *) MALLOC_LIKE; +struct ccase *case_try_create (const struct caseproto *) MALLOC_LIKE; +struct ccase *case_clone (const struct ccase *) MALLOC_LIKE; static inline struct ccase *case_unshare (struct ccase *) WARN_UNUSED_RESULT; static inline struct ccase *case_ref (const struct ccase *); @@ -65,15 +66,21 @@ static inline void case_unref (struct ccase *); static inline bool case_is_shared (const struct ccase *); static inline size_t case_get_value_cnt (const struct ccase *); +static inline const struct caseproto *case_get_proto (const struct ccase *); -struct ccase *case_resize (struct ccase *, size_t new_cnt) WARN_UNUSED_RESULT; -struct ccase *case_unshare_and_resize (struct ccase *, size_t new_cnt) +size_t case_get_cost (const struct caseproto *); + +struct ccase *case_resize (struct ccase *, const struct caseproto *) + WARN_UNUSED_RESULT; +struct ccase *case_unshare_and_resize (struct ccase *, + const struct caseproto *) WARN_UNUSED_RESULT; +void case_set_missing (struct ccase *); + void case_copy (struct ccase *dst, size_t dst_idx, const struct ccase *src, size_t src_idx, size_t cnt); - void case_copy_out (const struct ccase *, size_t start_idx, union value *, size_t n_values); void case_copy_in (struct ccase *, @@ -89,6 +96,8 @@ double case_num_idx (const struct ccase *, size_t idx); const char *case_str (const struct ccase *, const struct variable *); const char *case_str_idx (const struct ccase *, size_t idx); +char *case_str_rw (struct ccase *, const struct variable *); +char *case_str_rw_idx (struct ccase *, size_t idx); int case_compare (const struct ccase *, const struct ccase *, const struct variable *const *, size_t n_vars); @@ -101,6 +110,7 @@ const union value *case_data_all (const struct ccase *); union value *case_data_all_rw (struct ccase *); struct ccase *case_unshare__ (struct ccase *); +void case_unref__ (struct ccase *); /* If C is a shared case, that is, if it has a reference count greater than 1, makes a new unshared copy and returns it, @@ -138,7 +148,7 @@ static inline void case_unref (struct ccase *c) { if (c != NULL && !--c->ref_cnt) - free (c); + case_unref__ (c); } /* Returns true if case C is shared. A case that is shared @@ -154,7 +164,15 @@ case_is_shared (const struct ccase *c) static inline size_t case_get_value_cnt (const struct ccase *c) { - return c->n_values; + return caseproto_get_n_widths (c->proto); +} + +/* Returns the prototype that describes the format of case C. + The caller must not unref the returned prototype. */ +static inline const struct caseproto * +case_get_proto (const struct ccase *c) +{ + return c->proto; } #endif /* data/case.h */ diff --git a/src/data/casegrouper.c b/src/data/casegrouper.c index 86788ba7..9e7ab157 100644 --- a/src/data/casegrouper.c +++ b/src/data/casegrouper.c @@ -89,7 +89,8 @@ casegrouper_get_next_group (struct casegrouper *grouper, return false; } - writer = autopaging_writer_create (casereader_get_value_cnt (grouper->reader)); + writer = autopaging_writer_create ( + casereader_get_proto (grouper->reader)); case_ref (group_case); casewriter_write (writer, group_case); diff --git a/src/data/caseinit.c b/src/data/caseinit.c index 4f7ece7e..572f9160 100644 --- a/src/data/caseinit.c +++ b/src/data/caseinit.c @@ -38,8 +38,9 @@ /* Binds a value with a place to put it. */ struct init_value { - union value value; size_t case_index; + int width; + union value value; }; /* A set of values to initialize in a case. */ @@ -68,6 +69,10 @@ init_list_create (struct init_list *list) static void init_list_destroy (struct init_list *list) { + struct init_value *iv; + + for (iv = &list->values[0]; iv < &list->values[list->cnt]; iv++) + value_destroy (&iv->value, iv->width); free (list->values); } @@ -111,14 +116,13 @@ init_list_mark (struct init_list *list, const struct init_list *exclude, size_t i; assert (list != exclude); - list->values = xnrealloc (list->values, - list->cnt + dict_get_next_value_idx (d), + list->values = xnrealloc (list->values, list->cnt + dict_get_var_cnt (d), sizeof *list->values); for (i = 0; i < var_cnt; i++) { struct variable *v = dict_get_var (d, i); size_t case_index = var_get_case_index (v); - int offset; + struct init_value *iv; /* Only include the correct class. */ if (!(include & (var_get_leave (v) ? LEAVE_LEFT : LEAVE_REINIT))) @@ -128,19 +132,14 @@ init_list_mark (struct init_list *list, const struct init_list *exclude, if (exclude != NULL && init_list_includes (exclude, case_index)) continue; - offset = 0; - do - { - struct init_value *iv = &list->values[list->cnt++]; - iv->case_index = case_index++; - if (var_is_numeric (v)) - iv->value.f = var_get_leave (v) ? 0 : SYSMIS; - else - memset (iv->value.s, ' ', sizeof iv->value.s); - - offset += sizeof iv->value.s; - } - while (offset < var_get_width (v)); + iv = &list->values[list->cnt++]; + iv->case_index = case_index; + iv->width = var_get_width (v); + value_init (&iv->value, iv->width); + if (var_is_numeric (v) && var_get_leave (v)) + iv->value.f = 0; + else + value_set_missing (&iv->value, iv->width); } /* Drop duplicates. */ @@ -153,13 +152,10 @@ init_list_mark (struct init_list *list, const struct init_list *exclude, static void init_list_init (const struct init_list *list, struct ccase *c) { - size_t i; + const struct init_value *iv; - for (i = 0; i < list->cnt; i++) - { - const struct init_value *value = &list->values[i]; - *case_data_rw_idx (c, value->case_index) = value->value; - } + for (iv = &list->values[0]; iv < &list->values[list->cnt]; iv++) + value_copy (case_data_rw_idx (c, iv->case_index), &iv->value, iv->width); } /* Updates the values in the initializer LIST from the data in @@ -167,13 +163,10 @@ init_list_init (const struct init_list *list, struct ccase *c) static void init_list_update (const struct init_list *list, const struct ccase *c) { - size_t i; + struct init_value *iv; - for (i = 0; i < list->cnt; i++) - { - struct init_value *value = &list->values[i]; - value->value = *case_data_idx (c, value->case_index); - } + for (iv = &list->values[0]; iv < &list->values[list->cnt]; iv++) + value_copy (&iv->value, case_data_idx (c, iv->case_index), iv->width); } /* A case initializer. */ diff --git a/src/data/caseproto.c b/src/data/caseproto.c new file mode 100644 index 00000000..1b6827db --- /dev/null +++ b/src/data/caseproto.c @@ -0,0 +1,411 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "minmax.h" + +static struct caseproto *caseproto_unshare (struct caseproto *); +static bool try_init_long_strings (const struct caseproto *, + size_t first, size_t last, union value[]); +static void init_long_strings (const struct caseproto *, + size_t first, size_t last, union value[]); +static void destroy_long_strings (const struct caseproto *, + size_t first, size_t last, union value[]); +static size_t count_long_strings (const struct caseproto *, + size_t idx, size_t count); + +/* Returns the number of bytes to allocate for a struct caseproto + with room for N_WIDTHS elements in its widths[] array. */ +static inline size_t +caseproto_size (size_t n_widths) +{ + return (offsetof (struct caseproto, widths) + + n_widths * sizeof (((struct caseproto *) NULL)->widths[0])); +} + +/* Creates and returns a case prototype that initially has no + widths. */ +struct caseproto * +caseproto_create (void) +{ + enum { N_ALLOCATE = 4 }; + struct caseproto *proto = xmalloc (caseproto_size (N_ALLOCATE)); + proto->ref_cnt = 1; + proto->long_strings = NULL; + proto->n_long_strings = 0; + proto->n_widths = 0; + proto->allocated_widths = N_ALLOCATE; + return proto; +} + +static void +do_unref (void *proto_) +{ + struct caseproto *proto = proto_; + caseproto_unref (proto); +} + +/* Creates and returns a new reference to PROTO. When POOL is + destroyed, the new reference will be destroyed (unrefed). */ +struct caseproto * +caseproto_ref_pool (const struct caseproto *proto_, struct pool *pool) +{ + struct caseproto *proto = caseproto_ref (proto_); + pool_register (pool, do_unref, proto); + return proto; +} + +/* Returns a replacement for PROTO that is unshared and has + enough room for at least N_WIDTHS widths before additional + memory is needed. */ +struct caseproto * +caseproto_reserve (struct caseproto *proto, size_t n_widths) +{ + proto = caseproto_unshare (proto); + if (n_widths > proto->allocated_widths) + { + proto->allocated_widths *= MAX (proto->allocated_widths * 2, n_widths); + proto = xrealloc (proto, caseproto_size (proto->allocated_widths)); + } + return proto; +} + +/* Returns a replacement for PROTO with WIDTH appended. */ +struct caseproto * +caseproto_add_width (struct caseproto *proto, int width) +{ + assert (width >= -1 && width <= MAX_STRING); + + proto = caseproto_reserve (proto, proto->n_widths + 1); + proto->widths[proto->n_widths++] = width; + proto->n_long_strings += count_long_strings (proto, proto->n_widths - 1, 1); + + return proto; +} + +/* Returns a replacement for PROTO with the width at index IDX + replaced by WIDTH. IDX may be greater than the current number + of widths in PROTO, in which case any gap is filled in by + widths of -1. */ +struct caseproto * +caseproto_set_width (struct caseproto *proto, size_t idx, int width) +{ + assert (width >= -1 && width <= MAX_STRING); + + proto = caseproto_reserve (proto, idx + 1); + while (idx >= proto->n_widths) + proto->widths[proto->n_widths++] = -1; + proto->n_long_strings -= count_long_strings (proto, idx, 1); + proto->widths[idx] = width; + proto->n_long_strings += count_long_strings (proto, idx, 1); + + return proto; +} + +/* Returns a replacement for PROTO with WIDTH inserted just + before index BEFORE, or just after the last element if BEFORE + is the number of widths in PROTO. */ +struct caseproto * +caseproto_insert_width (struct caseproto *proto, size_t before, int width) +{ + assert (before <= proto->n_widths); + + proto = caseproto_reserve (proto, proto->n_widths + 1); + proto->n_long_strings += value_needs_init (width); + insert_element (proto->widths, proto->n_widths, sizeof *proto->widths, + before); + proto->widths[before] = width; + proto->n_widths++; + + return proto; +} + +/* Returns a replacement for PROTO with CNT widths removed + starting at index IDX. */ +struct caseproto * +caseproto_remove_widths (struct caseproto *proto, size_t idx, size_t cnt) +{ + assert (caseproto_range_is_valid (proto, idx, cnt)); + + proto = caseproto_unshare (proto); + proto->n_long_strings -= count_long_strings (proto, idx, cnt); + remove_range (proto->widths, proto->n_widths, sizeof *proto->widths, + idx, cnt); + proto->n_widths -= cnt; + return proto; +} + +/* Returns a replacement for PROTO in which the CNT widths + starting at index OLD_WIDTH now start at index NEW_WIDTH, with + other widths shifting out of the way to make room. */ +struct caseproto * +caseproto_move_widths (struct caseproto *proto, + size_t old_start, size_t new_start, + size_t cnt) +{ + assert (caseproto_range_is_valid (proto, old_start, cnt)); + assert (caseproto_range_is_valid (proto, new_start, cnt)); + + proto = caseproto_unshare (proto); + move_range (proto->widths, proto->n_widths, sizeof *proto->widths, + old_start, new_start, cnt); + return proto; +} + +/* Returns true if PROTO contains COUNT widths starting at index + OFS, false if any of those widths are out of range for + PROTO. */ +bool +caseproto_range_is_valid (const struct caseproto *proto, + size_t ofs, size_t count) +{ + return (count <= proto->n_widths + && ofs <= proto->n_widths + && ofs + count <= proto->n_widths); +} + +/* Returns true if A and B have the same widths along their + common length. (When this is so, a case with prototype A may + be extended or truncated to have prototype B without having to + change any existing values, and vice versa.) */ +bool +caseproto_is_conformable (const struct caseproto *a, const struct caseproto *b) +{ + size_t min; + size_t i; + + min = MIN (a->n_widths, b->n_widths); + for (i = 0; i < min; i++) + if (a->widths[i] != b->widths[i]) + return false; + return true; +} + +/* Returns true if the N widths starting at A_START in A are the + same as the N widths starting at B_START in B, false if any of + the corresponding widths differ. */ +bool +caseproto_equal (const struct caseproto *a, size_t a_start, + const struct caseproto *b, size_t b_start, + size_t n) +{ + size_t i; + + assert (caseproto_range_is_valid (a, a_start, n)); + assert (caseproto_range_is_valid (b, b_start, n)); + for (i = 0; i < n; i++) + if (a->widths[a_start + i] != b->widths[b_start + i]) + return false; + return true; +} + +/* Returns true if an array of values that is to be used for + data of the format specified in PROTO needs to be initialized + by calling caseproto_init_values, false if that step may be + skipped because such an initialization would be a no-op anyhow. + + This optimization is useful only when a large number of + initializations of such arrays may be skipped as a group. */ +bool +caseproto_needs_init_values (const struct caseproto *proto) +{ + return proto->n_long_strings > 0; +} + +/* Initializes the values in VALUES as required by PROTO, by + calling value_init() on each value for which this is required. + The data in VALUES have indeterminate contents until + explicitly written. + + VALUES must have at least caseproto_get_n_widths(PROTO) + elements; only that many elements of VALUES are initialized. + + The caller retains ownership of PROTO. */ +void +caseproto_init_values (const struct caseproto *proto, union value values[]) +{ + init_long_strings (proto, 0, proto->n_long_strings, values); +} + +/* Like caseproto_init_values, but returns false instead of + terminating if memory cannot be obtained. */ +bool +caseproto_try_init_values (const struct caseproto *proto, union value values[]) +{ + return try_init_long_strings (proto, 0, proto->n_long_strings, values); +} + +/* Initializes the data in VALUES that are in NEW but not in OLD, + destroys the data in VALUES that are in OLD but not NEW, and + does not modify the data in VALUES that are in both OLD and + NEW. VALUES must previously have been initialized as required + by OLD using e.g. caseproto_init_values. The data in VALUES + that are in NEW but not in OLD will have indeterminate + contents until explicitly written. + + OLD and NEW must be conformable for this operation, as + reported by caseproto_is_conformable. + + The caller retains ownership of OLD and NEW. */ +void +caseproto_reinit_values (const struct caseproto *old, + const struct caseproto *new, union value values[]) +{ + size_t old_n_long = old->n_long_strings; + size_t new_n_long = new->n_long_strings; + + expensive_assert (caseproto_is_conformable (old, new)); + + if (new_n_long > old_n_long) + init_long_strings (new, old_n_long, new_n_long, values); + else if (new_n_long < old_n_long) + destroy_long_strings (old, new_n_long, old_n_long, values); +} + +/* Frees the values in VALUES as required by PROTO, by calling + value_destroy() on each value for which this is required. The + values must previously have been initialized using + e.g. caseproto_init_values. + + The caller retains ownership of PROTO. */ +void +caseproto_destroy_values (const struct caseproto *proto, union value values[]) +{ + destroy_long_strings (proto, 0, proto->n_long_strings, values); +} + +/* Copies COUNT values, whose widths are given by widths in PROTO + starting with index IDX, from SRC to DST. The caller must + ensure that the values in SRC and DST were appropriately + initialized using e.g. caseproto_init_values. */ +void +caseproto_copy (const struct caseproto *proto, size_t idx, size_t count, + union value *dst, const union value *src) +{ + size_t i; + + assert (caseproto_range_is_valid (proto, idx, count)); + for (i = 0; i < count; i++) + value_copy (&dst[idx + i], &src[idx + i], proto->widths[idx + i]); +} + +void +caseproto_free__ (struct caseproto *proto) +{ + free (proto->long_strings); + free (proto); +} + +void +caseproto_refresh_long_string_cache__ (const struct caseproto *proto_) +{ + struct caseproto *proto = (struct caseproto *) proto_; + size_t n, i; + + assert (proto->long_strings == NULL); + assert (proto->n_long_strings > 0); + + proto->long_strings = xmalloc (proto->n_long_strings + * sizeof *proto->long_strings); + n = 0; + for (i = 0; i < proto->n_widths; i++) + if (proto->widths[i] >= MIN_LONG_STRING) + proto->long_strings[n++] = i; + assert (n == proto->n_long_strings); +} + +static struct caseproto * +caseproto_unshare (struct caseproto *old) +{ + struct caseproto *new; + if (old->ref_cnt > 1) + { + new = xmemdup (old, caseproto_size (old->allocated_widths)); + new->ref_cnt = 1; + --old->ref_cnt; + } + else + { + new = old; + free (new->long_strings); + } + new->long_strings = NULL; + return new; +} + +static bool +try_init_long_strings (const struct caseproto *proto, + size_t first, size_t last, union value values[]) +{ + size_t i; + + if (last > 0 && proto->long_strings == NULL) + caseproto_refresh_long_string_cache__ (proto); + + for (i = first; i < last; i++) + { + size_t idx = proto->long_strings[i]; + if (!value_try_init (&values[idx], proto->widths[idx])) + { + destroy_long_strings (proto, first, i, values); + return false; + } + } + return true; +} + +static void +init_long_strings (const struct caseproto *proto, + size_t first, size_t last, union value values[]) +{ + if (!try_init_long_strings (proto, first, last, values)) + xalloc_die (); +} + +static void +destroy_long_strings (const struct caseproto *proto, size_t first, size_t last, + union value values[]) +{ + size_t i; + + if (last > 0 && proto->long_strings == NULL) + caseproto_refresh_long_string_cache__ (proto); + + for (i = first; i < last; i++) + { + size_t idx = proto->long_strings[i]; + value_destroy (&values[idx], proto->widths[idx]); + } +} + +static size_t +count_long_strings (const struct caseproto *proto, size_t idx, size_t count) +{ + size_t n, i; + + n = 0; + for (i = 0; i < count; i++) + n += proto->widths[idx + i] >= MIN_LONG_STRING; + return n; +} diff --git a/src/data/caseproto.h b/src/data/caseproto.h new file mode 100644 index 00000000..ba091e6b --- /dev/null +++ b/src/data/caseproto.h @@ -0,0 +1,209 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef DATA_CASEPROTO_H +#define DATA_CASEPROTO_H 1 + +#include +#include +#include +#include +#include +#include + +/* Case prototype. + + A case prototype specifies the number and type of the values + in a case. It is essentially an array of integers, where the + array index is an index into a case and each element + represents the width of a value in a case. Valid widths are: + + * 0, indicating a numeric value. + + * A positive integer between 1 and 32767, indicating the + size in bytes of a string value. + + * -1, indicating that the value at this index in the case + is not used at all. (This is rarely useful.) + + Case prototypes are reference counted. A newly created case + prototype has a single owner (the code that created it), + represented by an initial reference count of 1. Other code + that receives the case prototype may keep a virtual copy of it + by calling caseproto_ref, which increments the case + prototype's reference count. When this is done, the case + prototype becomes shared between its original owner and each + piece of code that incremented the reference count. + + Functions that modifying case prototypes automatically unshare + them as necessary. All of these functions potentially move + the caseproto around in memory even when the case prototype is + not shared. Thus it is very important that every caller of a + function that modifies a case prototype thereafter uses the returned + caseproto instead of the one passed in as an argument. + + Only the case prototype code should refer to caseproto members + directly. Other code should use the provided helper + functions. */ +struct caseproto + { + size_t ref_cnt; /* Reference count. */ + + /* Tracking of long string widths. Lazily maintained: when + 'long_strings' is null and 'n_long_strings' is nonzero, + the former must be regenerated. */ + size_t *long_strings; /* Array of indexes of long string widths. */ + size_t n_long_strings; /* Number of long string widths. */ + + /* Widths. */ + size_t n_widths; /* Number of widths. */ + size_t allocated_widths; /* Space allocated for 'widths' array. */ + short int widths[1]; /* Width of each case value. */ + }; + +struct pool; + +/* Creation and destruction. */ +struct caseproto *caseproto_create (void) MALLOC_LIKE; +static inline struct caseproto *caseproto_ref (const struct caseproto *); +struct caseproto *caseproto_ref_pool (const struct caseproto *, struct pool *); +static inline void caseproto_unref (struct caseproto *); + +/* Inspecting stored widths. */ +static inline int caseproto_get_width (const struct caseproto *, size_t idx); +static inline size_t caseproto_get_n_widths (const struct caseproto *); + +/* Adding and removing widths. */ +struct caseproto *caseproto_reserve (struct caseproto *, size_t n_widths) + WARN_UNUSED_RESULT; +struct caseproto *caseproto_add_width (struct caseproto *, int width) + WARN_UNUSED_RESULT; +struct caseproto *caseproto_set_width (struct caseproto *, + size_t idx, int width) + WARN_UNUSED_RESULT; +struct caseproto *caseproto_insert_width (struct caseproto *, + size_t before, int width) + WARN_UNUSED_RESULT; +struct caseproto *caseproto_remove_widths (struct caseproto *, + size_t idx, size_t cnt) + WARN_UNUSED_RESULT; +struct caseproto *caseproto_move_widths (struct caseproto *, + size_t old_start, size_t new_start, + size_t cnt) + WARN_UNUSED_RESULT; + +/* Working with "union value" arrays. */ +bool caseproto_needs_init_values (const struct caseproto *); +void caseproto_init_values (const struct caseproto *, union value[]); +bool caseproto_try_init_values (const struct caseproto *, union value[]); +void caseproto_reinit_values (const struct caseproto *old, + const struct caseproto *new, union value[]); +void caseproto_destroy_values (const struct caseproto *, union value[]); + +void caseproto_copy (const struct caseproto *, size_t idx, size_t count, + union value *dst, const union value *src); + +/* Inspecting the cache of long string widths. + + (These functions are useful for allocating cases, which + requires allocating a block memory for each long string value + in the case.) */ +static inline size_t caseproto_get_n_long_strings (const struct caseproto *); +static inline size_t caseproto_get_long_string_idx (const struct caseproto *, + size_t idx1); + +/* For use in assertions. */ +bool caseproto_range_is_valid (const struct caseproto *, + size_t ofs, size_t count); +bool caseproto_is_conformable (const struct caseproto *a, + const struct caseproto *b); +bool caseproto_equal (const struct caseproto *a, size_t a_start, + const struct caseproto *b, size_t b_start, + size_t n); + +/* Creation and destruction. */ + +void caseproto_free__ (struct caseproto *); + +/* Increments case prototype PROTO's reference count and returns + PROTO. Afterward, PROTO is shared among its reference count + holders. */ +static inline struct caseproto * +caseproto_ref (const struct caseproto *proto_) +{ + struct caseproto *proto = (struct caseproto *) proto_; + proto->ref_cnt++; + return proto; +} + +/* Decrements case prototype PROTO's reference count. Frees + PROTO if its reference count drops to 0. + + If PROTO is a null pointer, this function has no effect. */ +static inline void +caseproto_unref (struct caseproto *proto) +{ + if (proto != NULL && !--proto->ref_cnt) + caseproto_free__ (proto); +} + +/* Inspecting stored widths. */ + +/* Returns case prototype PROTO's width with the given IDX. IDX + must be less than caseproto_get_n_widths(PROTO). */ +static inline int +caseproto_get_width (const struct caseproto *proto, size_t idx) +{ + assert (idx < proto->n_widths); + return proto->widths[idx]; +} + +/* Returns the number of widths in case prototype PROTO. */ +static inline size_t +caseproto_get_n_widths (const struct caseproto *proto) +{ + return proto->n_widths; +} + +/* Inspecting the cache of long string widths. */ + +void caseproto_refresh_long_string_cache__ (const struct caseproto *); + +/* Returns the number of long string widths in PROTO; that is, + the number of widths in PROTO that are greater than or equal + to MIN_LONG_STRING. */ +static inline size_t +caseproto_get_n_long_strings (const struct caseproto *proto) +{ + return proto->n_long_strings; +} + +/* Given long string width IDX1, returns a value IDX2 for which + caseproto_get_width(PROTO, IDX2) will return a value greater + than or equal to MIN_LONG_STRING. IDX1 must be less than + caseproto_get_n_long_strings(PROTO), and IDX2 will be less + than caseproto_get_n_widths(PROTO). */ +static inline size_t +caseproto_get_long_string_idx (const struct caseproto *proto, size_t idx1) +{ + if (proto->long_strings == NULL) + caseproto_refresh_long_string_cache__ (proto); + + assert (idx1 < proto->n_long_strings); + return proto->long_strings[idx1]; +} + +#endif /* data/caseproto.h */ diff --git a/src/data/casereader-filter.c b/src/data/casereader-filter.c index d387f5ea..5244202e 100644 --- a/src/data/casereader-filter.c +++ b/src/data/casereader-filter.c @@ -78,7 +78,7 @@ casereader_create_filter_func (struct casereader *subreader, filter->aux = aux; filter->exclude = exclude; reader = casereader_create_sequential ( - NULL, casereader_get_value_cnt (filter->subreader), CASENUMBER_MAX, + NULL, casereader_get_proto (filter->subreader), CASENUMBER_MAX, &casereader_filter_class, filter); taint_propagate (casereader_get_taint (filter->subreader), casereader_get_taint (reader)); diff --git a/src/data/casereader-provider.h b/src/data/casereader-provider.h index 31d8a6d4..b8d53500 100644 --- a/src/data/casereader-provider.h +++ b/src/data/casereader-provider.h @@ -112,7 +112,7 @@ struct casereader_class struct casereader * casereader_create_sequential (const struct taint *, - size_t value_cnt, casenumber case_cnt, + const struct caseproto *, casenumber case_cnt, const struct casereader_class *, void *); void *casereader_dynamic_cast (struct casereader *, const struct casereader_class *); @@ -160,7 +160,7 @@ struct casereader_random_class }; struct casereader * -casereader_create_random (size_t value_cnt, casenumber case_cnt, +casereader_create_random (const struct caseproto *, casenumber case_cnt, const struct casereader_random_class *, void *aux); #endif /* data/casereader-provider.h */ diff --git a/src/data/casereader-translator.c b/src/data/casereader-translator.c index d55e18e5..548c22fb 100644 --- a/src/data/casereader-translator.c +++ b/src/data/casereader-translator.c @@ -41,9 +41,11 @@ static const struct casereader_class casereader_translator_class; /* Creates and returns a new casereader whose cases are produced by reading from SUBREADER and passing through TRANSLATE, which - must return the translated case, with OUTPUT_VALUE_CNT values, - and populate it based on INPUT and auxiliary data AUX. - TRANSLATE must destroy its input case. + must return the translated case, and populate it based on + INPUT and auxiliary data AUX. TRANSLATE must destroy its + input case. + + The cases returned by TRANSLATE must match OUTPUT_PROTO. When the translating casereader is destroyed, DESTROY will be called to allow any state maintained by TRANSLATE to be freed. @@ -53,7 +55,7 @@ static const struct casereader_class casereader_translator_class; when the translating casereader is destroyed. */ struct casereader * casereader_create_translator (struct casereader *subreader, - size_t output_value_cnt, + const struct caseproto *output_proto, struct ccase *(*translate) (struct ccase *input, void *aux), bool (*destroy) (void *aux), @@ -66,7 +68,7 @@ casereader_create_translator (struct casereader *subreader, ct->destroy = destroy; ct->aux = aux; reader = casereader_create_sequential ( - NULL, output_value_cnt, casereader_get_case_cnt (ct->subreader), + NULL, output_proto, casereader_get_case_cnt (ct->subreader), &casereader_translator_class, ct); taint_propagate (casereader_get_taint (ct->subreader), casereader_get_taint (reader)); @@ -108,7 +110,7 @@ static const struct casereader_class casereader_translator_class = struct casereader_append_numeric { - int value_ofs; + struct caseproto *proto; casenumber n; new_value_func *func; void *aux; @@ -135,12 +137,13 @@ casereader_create_append_numeric (struct casereader *subreader, void (*destroy) (void *aux)) { struct casereader_append_numeric *can = xmalloc (sizeof *can); - can->value_ofs = casereader_get_value_cnt (subreader); + can->proto = caseproto_ref (casereader_get_proto (subreader)); + can->proto = caseproto_add_width (can->proto, 0); can->n = 0; can->aux = aux; can->func = func; can->destroy = destroy; - return casereader_create_translator (subreader, can->value_ofs + 1, + return casereader_create_translator (subreader, can->proto, can_translate, can_destroy, can); } @@ -150,8 +153,8 @@ can_translate (struct ccase *c, void *can_) { struct casereader_append_numeric *can = can_; double new_value = can->func (c, can->n++, can->aux); - c = case_unshare_and_resize (c, can->value_ofs + 1); - case_data_rw_idx (c, can->value_ofs)->f = new_value; + c = case_unshare_and_resize (c, can->proto); + case_data_rw_idx (c, caseproto_get_n_widths (can->proto) - 1)->f = new_value; return c; } @@ -161,6 +164,7 @@ can_destroy (void *can_) struct casereader_append_numeric *can = can_; if (can->destroy) can->destroy (can->aux); + caseproto_unref (can->proto); free (can); return true; } @@ -211,7 +215,7 @@ struct casereader_append_rank casenumber n; const struct variable *var; const struct variable *weight; - int value_ofs; + struct caseproto *proto; casenumber n_common; double mean_rank; double cc; @@ -258,7 +262,8 @@ casereader_create_append_rank (struct casereader *subreader, ) { struct casereader_append_rank *car = xmalloc (sizeof *car); - car->value_ofs = casereader_get_value_cnt (subreader); + car->proto = caseproto_ref (casereader_get_proto (subreader)); + car->proto = caseproto_add_width (car->proto, 0); car->weight = w; car->var = v; car->n = 0; @@ -270,7 +275,7 @@ casereader_create_append_rank (struct casereader *subreader, car->err = err; car->prev_value = SYSMIS; - return casereader_create_translator (subreader, car->value_ofs + 1, + return casereader_create_translator (subreader, car->proto, car_translate, car_destroy, car); } @@ -280,6 +285,7 @@ car_destroy (void *car_) { struct casereader_append_rank *car = car_; casereader_destroy (car->clone); + caseproto_unref (car->proto); free (car); return true; } @@ -345,8 +351,9 @@ car_translate (struct ccase *input, void *car_) car->n++; - input = case_unshare_and_resize (input, car->value_ofs + 1); - case_data_rw_idx (input, car->value_ofs)->f = car->mean_rank ; + input = case_unshare_and_resize (input, car->proto); + case_data_rw_idx (input, caseproto_get_n_widths (car->proto) - 1)->f + = car->mean_rank; car->prev_value = value; return input; } diff --git a/src/data/casereader.c b/src/data/casereader.c index 3d27a919..a1550ac5 100644 --- a/src/data/casereader.c +++ b/src/data/casereader.c @@ -34,7 +34,7 @@ struct casereader { struct taint *taint; /* Corrupted? */ - size_t value_cnt; /* Values per case. */ + struct caseproto *proto; /* Format of contained cases. */ casenumber case_cnt; /* Number of cases, CASENUMBER_MAX if unknown. */ const struct casereader_class *class; /* Class. */ @@ -74,7 +74,10 @@ casereader_read (struct casereader *reader) c = reader->class->read (reader, reader->aux); if (c != NULL) { - assert (case_get_value_cnt (c) >= reader->value_cnt); + size_t n_widths UNUSED = caseproto_get_n_widths (reader->proto); + assert (case_get_value_cnt (c) >= n_widths); + expensive_assert (caseproto_equal (case_get_proto (c), 0, + reader->proto, 0, n_widths)); return c; } } @@ -93,6 +96,7 @@ casereader_destroy (struct casereader *reader) { reader->class->destroy (reader, reader->aux); ok = taint_destroy (reader->taint); + caseproto_unref (reader->proto); free (reader); } return ok; @@ -282,11 +286,12 @@ casereader_count_cases (struct casereader *reader) return reader->case_cnt; } -/* Returns the number of struct values in each case in READER. */ -size_t -casereader_get_value_cnt (struct casereader *reader) +/* Returns the prototype for the cases in READER. The caller + must not unref the returned prototype. */ +const struct caseproto * +casereader_get_proto (const struct casereader *reader) { - return reader->value_cnt; + return reader->proto; } /* Copies all the cases in READER to WRITER, propagating errors @@ -320,8 +325,9 @@ casereader_transfer (struct casereader *reader, struct casewriter *writer) function, in which case the cloned casereader should have the same taint object as the original casereader.) - VALUE_CNT must be the number of struct values per case read - from the casereader. + PROTO must be the prototype for the cases that may be read + from the casereader. The caller retains its reference to + PROTO. CASE_CNT is an upper limit on the number of cases that casereader_read will return from the casereader in successive @@ -334,12 +340,13 @@ casereader_transfer (struct casereader *reader, struct casewriter *writer) functions, respectively. */ struct casereader * casereader_create_sequential (const struct taint *taint, - size_t value_cnt, casenumber case_cnt, + const struct caseproto *proto, + casenumber case_cnt, const struct casereader_class *class, void *aux) { struct casereader *reader = xmalloc (sizeof *reader); reader->taint = taint != NULL ? taint_clone (taint) : taint_create (); - reader->value_cnt = value_cnt; + reader->proto = caseproto_ref (proto); reader->case_cnt = case_cnt; reader->class = class; reader->aux = aux; @@ -434,8 +441,9 @@ compare_random_readers_by_offset (const struct heap_node *a_, casereader_create_sequential is more appropriate for a data source that is naturally sequential. - VALUE_CNT must be the number of struct values per case read - from the casereader. + PROTO must be the prototype for the cases that may be read + from the casereader. The caller retains its reference to + PROTO. CASE_CNT is an upper limit on the number of cases that casereader_read will return from the casereader in successive @@ -447,7 +455,7 @@ compare_random_readers_by_offset (const struct heap_node *a_, member functions and auxiliary data to pass to those member functions, respectively. */ struct casereader * -casereader_create_random (size_t value_cnt, casenumber case_cnt, +casereader_create_random (const struct caseproto *proto, casenumber case_cnt, const struct casereader_random_class *class, void *aux) { @@ -456,7 +464,7 @@ casereader_create_random (size_t value_cnt, casenumber case_cnt, shared->class = class; shared->aux = aux; shared->min_offset = 0; - return casereader_create_sequential (NULL, value_cnt, case_cnt, + return casereader_create_sequential (NULL, proto, case_cnt, &random_reader_casereader_class, make_random_reader (shared, 0)); } @@ -524,7 +532,7 @@ random_reader_clone (struct casereader *reader, void *br_) struct random_reader *br = br_; struct random_reader_shared *shared = br->shared; return casereader_create_sequential (casereader_get_taint (reader), - casereader_get_value_cnt (reader), + reader->proto, casereader_get_case_cnt (reader), &random_reader_casereader_class, make_random_reader (shared, @@ -588,12 +596,11 @@ static const struct casereader_random_class shim_class; static void insert_shim (struct casereader *reader) { - size_t value_cnt = casereader_get_value_cnt (reader); + const struct caseproto *proto = casereader_get_proto (reader); casenumber case_cnt = casereader_get_case_cnt (reader); struct shim *b = xmalloc (sizeof *b); - b->window = casewindow_create (value_cnt, settings_get_workspace_cases (value_cnt)); - b->subreader = casereader_create_random (value_cnt, case_cnt, - &shim_class, b); + b->window = casewindow_create (proto, settings_get_workspace_cases (proto)); + b->subreader = casereader_create_random (proto, case_cnt, &shim_class, b); casereader_swap (reader, b->subreader); taint_propagate (casewindow_get_taint (b->window), casereader_get_taint (reader)); diff --git a/src/data/casereader.h b/src/data/casereader.h index deab5641..d4f2966a 100644 --- a/src/data/casereader.h +++ b/src/data/casereader.h @@ -77,7 +77,7 @@ const struct taint *casereader_get_taint (const struct casereader *); casenumber casereader_get_case_cnt (struct casereader *); casenumber casereader_count_cases (struct casereader *); -size_t casereader_get_value_cnt (struct casereader *); +const struct caseproto *casereader_get_proto (const struct casereader *); void casereader_transfer (struct casereader *, struct casewriter *); @@ -105,7 +105,8 @@ casereader_create_counter (struct casereader *, casenumber *counter, casenumber initial_value); struct casereader * -casereader_create_translator (struct casereader *, size_t output_value_cnt, +casereader_create_translator (struct casereader *, + const struct caseproto *output_proto, struct ccase *(*translate) (struct ccase *, void *aux), bool (*destroy) (void *aux), diff --git a/src/data/casewindow.c b/src/data/casewindow.c index d2be9cfa..9b04b941 100644 --- a/src/data/casewindow.c +++ b/src/data/casewindow.c @@ -36,7 +36,7 @@ struct casewindow { /* Common data. */ - size_t value_cnt; /* Number of values per case. */ + struct caseproto *proto; /* Prototype of cases in window. */ casenumber max_in_core_cases; /* Max cases before dumping to disk. */ struct taint *taint; /* Taint status. */ @@ -48,7 +48,7 @@ struct casewindow /* Implementation of a casewindow. */ struct casewindow_class { - void *(*create) (struct taint *, size_t value_cnt); + void *(*create) (struct taint *, const struct caseproto *); void (*destroy) (void *aux); void (*push_head) (void *aux, struct ccase *); void (*pop_tail) (void *aux, casenumber cnt); @@ -63,28 +63,30 @@ static const struct casewindow_class casewindow_file_class; /* Creates and returns a new casewindow using the given parameters. */ static struct casewindow * -do_casewindow_create (struct taint *taint, - size_t value_cnt, casenumber max_in_core_cases) +do_casewindow_create (struct taint *taint, const struct caseproto *proto, + casenumber max_in_core_cases) { struct casewindow *cw = xmalloc (sizeof *cw); cw->class = (max_in_core_cases ? &casewindow_memory_class : &casewindow_file_class); - cw->aux = cw->class->create (taint, value_cnt); - cw->value_cnt = value_cnt; + cw->aux = cw->class->create (taint, proto); + cw->proto = caseproto_ref (proto); cw->max_in_core_cases = max_in_core_cases; cw->taint = taint; return cw; } -/* Creates and returns a new casewindow for cases with VALUE_CNT - values each. If the casewindow holds more than +/* Creates and returns a new casewindow for cases that take the + form specified by PROTO. If the casewindow holds more than MAX_IN_CORE_CASES cases at any time, its cases will be dumped - to disk; otherwise, its cases will be held in memory. */ + to disk; otherwise, its cases will be held in memory. + + The caller retains its reference to PROTO. */ struct casewindow * -casewindow_create (size_t value_cnt, casenumber max_in_core_cases) +casewindow_create (const struct caseproto *proto, casenumber max_in_core_cases) { - return do_casewindow_create (taint_create (), value_cnt, max_in_core_cases); + return do_casewindow_create (taint_create (), proto, max_in_core_cases); } /* Destroys casewindow CW. @@ -98,6 +100,7 @@ casewindow_destroy (struct casewindow *cw) { cw->class->destroy (cw->aux); ok = taint_destroy (cw->taint); + caseproto_unref (cw->proto); free (cw); } return ok; @@ -117,7 +120,7 @@ static void casewindow_to_disk (struct casewindow *old) { struct casewindow *new; - new = do_casewindow_create (taint_clone (old->taint), old->value_cnt, 0); + new = do_casewindow_create (taint_clone (old->taint), old->proto, 0); while (casewindow_get_case_cnt (old) > 0 && !casewindow_error (new)) { struct ccase *c = casewindow_get_case (old, 0); @@ -180,11 +183,12 @@ casewindow_get_case_cnt (const struct casewindow *cw) return cw->class->get_case_cnt (cw->aux); } -/* Returns the number of values per case in casewindow CW. */ -size_t -casewindow_get_value_cnt (const struct casewindow *cw) +/* Returns the case prototype for the cases in casewindow CW. + The caller must not unref the returned prototype. */ +const struct caseproto * +casewindow_get_proto (const struct casewindow *cw) { - return cw->value_cnt; + return cw->proto; } /* Returns true if casewindow CW is tainted. @@ -218,7 +222,8 @@ struct casewindow_memory }; static void * -casewindow_memory_create (struct taint *taint UNUSED, size_t value_cnt UNUSED) +casewindow_memory_create (struct taint *taint UNUSED, + const struct caseproto *proto UNUSED) { struct casewindow_memory *cwm = xmalloc (sizeof *cwm); cwm->cases = deque_init (&cwm->deque, 4, sizeof *cwm->cases); @@ -285,10 +290,10 @@ struct casewindow_file }; static void * -casewindow_file_create (struct taint *taint, size_t value_cnt) +casewindow_file_create (struct taint *taint, const struct caseproto *proto) { struct casewindow_file *cwf = xmalloc (sizeof *cwf); - cwf->file = case_tmpfile_create (value_cnt); + cwf->file = case_tmpfile_create (proto); cwf->head = cwf->tail = 0; taint_propagate (case_tmpfile_get_taint (cwf->file), taint); return cwf; diff --git a/src/data/casewindow.h b/src/data/casewindow.h index f0a200f4..b303c85d 100644 --- a/src/data/casewindow.h +++ b/src/data/casewindow.h @@ -28,10 +28,11 @@ #ifndef DATA_CASEWINDOW_H #define DATA_CASEWINDOW_H 1 -#include #include -struct casewindow *casewindow_create (size_t value_cnt, +struct caseproto; + +struct casewindow *casewindow_create (const struct caseproto *, casenumber max_in_core_cases); bool casewindow_destroy (struct casewindow *); @@ -39,7 +40,7 @@ void casewindow_push_head (struct casewindow *, struct ccase *); void casewindow_pop_tail (struct casewindow *, casenumber cnt); struct ccase *casewindow_get_case (const struct casewindow *, casenumber case_idx); -size_t casewindow_get_value_cnt (const struct casewindow *); +const struct caseproto *casewindow_get_proto (const struct casewindow *); casenumber casewindow_get_case_cnt (const struct casewindow *); bool casewindow_error (const struct casewindow *); diff --git a/src/data/casewriter-provider.h b/src/data/casewriter-provider.h index 1680fe28..7231a1f3 100644 --- a/src/data/casewriter-provider.h +++ b/src/data/casewriter-provider.h @@ -57,7 +57,7 @@ struct casewriter_class struct casereader *(*convert_to_reader) (struct casewriter *, void *aux); }; -struct casewriter *casewriter_create (size_t value_cnt, +struct casewriter *casewriter_create (const struct caseproto *, const struct casewriter_class *, void *); #endif /* data/casewriter-provider.h */ diff --git a/src/data/casewriter-translator.c b/src/data/casewriter-translator.c index a19533d6..e80b9d68 100644 --- a/src/data/casewriter-translator.c +++ b/src/data/casewriter-translator.c @@ -37,11 +37,12 @@ struct casewriter_translator static const struct casewriter_class casewriter_translator_class; /* Creates and returns a new casewriter whose cases are passed - through TRANSLATE, which must return a case with - OUTPUT_VALUE_CNT values, based on INPUT and auxiliary data - AUX. (TRANSLATE may also return a null pointer, in which case - no case is written to the output.) The translated cases are - then written to SUBWRITER. + through TRANSLATE, based on INPUT and auxiliary data AUX. + (TRANSLATE may also return a null pointer, in which case no + case is written to the output.) The translated cases are then + written to SUBWRITER. + + The cases returned by TRANSLATE must match OUTPUT_PROTO. TRANSLATE takes ownership of each case passed to it. Thus, it should either unref each case and return a new case, or @@ -55,7 +56,7 @@ static const struct casewriter_class casewriter_translator_class; when the translating casewriter is destroyed. */ struct casewriter * casewriter_create_translator (struct casewriter *subwriter, - size_t translated_value_cnt, + const struct caseproto *translated_proto, struct ccase *(*translate) (struct ccase *, void *aux), bool (*destroy) (void *aux), @@ -67,7 +68,7 @@ casewriter_create_translator (struct casewriter *subwriter, ct->translate = translate; ct->destroy = destroy; ct->aux = aux; - writer = casewriter_create (translated_value_cnt, + writer = casewriter_create (translated_proto, &casewriter_translator_class, ct); taint_propagate (casewriter_get_taint (ct->subwriter), casewriter_get_taint (writer)); diff --git a/src/data/casewriter.c b/src/data/casewriter.c index 56e6c291..f7760eca 100644 --- a/src/data/casewriter.c +++ b/src/data/casewriter.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -35,20 +36,24 @@ struct casewriter { struct taint *taint; - size_t value_cnt; + struct caseproto *proto; casenumber case_cnt; const struct casewriter_class *class; void *aux; }; -static struct casewriter *create_casewriter_window (size_t value_cnt, +static struct casewriter *create_casewriter_window (const struct caseproto *, casenumber max_in_core); -/* Writes case C to WRITER. */ +/* Writes case C to WRITER. Ownership of C is transferred to + WRITER. */ void casewriter_write (struct casewriter *writer, struct ccase *c) { - assert (case_get_value_cnt (c) >= writer->value_cnt); + size_t n_widths UNUSED = caseproto_get_n_widths (writer->proto); + assert (case_get_value_cnt (c) >= n_widths); + expensive_assert (caseproto_equal (case_get_proto (c), 0, + writer->proto, 0, n_widths)); writer->class->write (writer, writer->aux, c); } @@ -64,17 +69,18 @@ casewriter_destroy (struct casewriter *writer) { writer->class->destroy (writer, writer->aux); ok = taint_destroy (writer->taint); + caseproto_unref (writer->proto); free (writer); } return ok; } -/* Returns the number of `union value's in each case written to - WRITER. */ -size_t -casewriter_get_value_cnt (const struct casewriter *writer) +/* Returns the prototype for that cases written to WRITER must + follow. */ +const struct caseproto * +casewriter_get_proto (const struct casewriter *writer) { - return writer->value_cnt; + return writer->proto; } /* Destroys WRITER and in its place returns a casereader that can @@ -142,23 +148,24 @@ casewriter_get_taint (const struct casewriter *writer) } /* Creates and returns a new casewriter with the given CLASS and - auxiliary data AUX. The casewriter accepts cases with - VALUE_CNT `union value's. */ + auxiliary data AUX. The casewriter accepts cases that match + case prototype PROTO, of which the caller retains + ownership. */ struct casewriter * -casewriter_create (size_t value_cnt, +casewriter_create (const struct caseproto *proto, const struct casewriter_class *class, void *aux) { struct casewriter *writer = xmalloc (sizeof *writer); writer->taint = taint_create (); - writer->value_cnt = value_cnt; + writer->proto = caseproto_ref (proto); writer->case_cnt = 0; writer->class = class; writer->aux = aux; return writer; } -/* Returns a casewriter for cases with VALUE_CNT struct values - per case. The cases written to the casewriter will be kept in +/* Returns a casewriter for cases that match case prototype + PROTO. The cases written to the casewriter will be kept in memory, unless the amount of memory used grows too large, in which case they will be written to disk. @@ -167,33 +174,34 @@ casewriter_create (size_t value_cnt, This is usually the right kind of casewriter to use. */ struct casewriter * -autopaging_writer_create (size_t value_cnt) +autopaging_writer_create (const struct caseproto *proto) { - return create_casewriter_window (value_cnt, settings_get_workspace_cases (value_cnt)); + return create_casewriter_window (proto, + settings_get_workspace_cases (proto)); } -/* Returns a casewriter for cases with VALUE_CNT struct values - per case. The cases written to the casewriter will be kept in +/* Returns a casewriter for cases that match case prototype + PROTO. The cases written to the casewriter will be kept in memory. A casewriter created with this function may be passed to casewriter_make_reader. */ struct casewriter * -mem_writer_create (size_t value_cnt) +mem_writer_create (const struct caseproto *proto) { - return create_casewriter_window (value_cnt, CASENUMBER_MAX); + return create_casewriter_window (proto, CASENUMBER_MAX); } -/* Returns a casewriter for cases with VALUE_CNT struct values - per case. The cases written to the casewriter will be written +/* Returns a casewriter for cases that match case prototype + PROTO. The cases written to the casewriter will be written to disk. A casewriter created with this function may be passed to casewriter_make_reader. */ struct casewriter * -tmpfile_writer_create (size_t value_cnt) +tmpfile_writer_create (const struct caseproto *proto) { - return create_casewriter_window (value_cnt, 0); + return create_casewriter_window (proto, 0); } static const struct casewriter_class casewriter_window_class; @@ -205,10 +213,11 @@ static const struct casereader_random_class casereader_window_class; memory until MAX_IN_CORE_CASES have been written, at which point they will be written to disk. */ static struct casewriter * -create_casewriter_window (size_t value_cnt, casenumber max_in_core_cases) +create_casewriter_window (const struct caseproto *proto, + casenumber max_in_core_cases) { - struct casewindow *window = casewindow_create (value_cnt, max_in_core_cases); - struct casewriter *writer = casewriter_create (value_cnt, + struct casewindow *window = casewindow_create (proto, max_in_core_cases); + struct casewriter *writer = casewriter_create (proto, &casewriter_window_class, window); taint_propagate (casewindow_get_taint (window), @@ -241,7 +250,7 @@ casewriter_window_convert_to_reader (struct casewriter *writer UNUSED, { struct casewindow *window = window_; struct casereader *reader = - casereader_create_random (casewindow_get_value_cnt (window), + casereader_create_random (casewindow_get_proto (window), casewindow_get_case_cnt (window), &casereader_window_class, window); diff --git a/src/data/casewriter.h b/src/data/casewriter.h index 05ef7072..146cc654 100644 --- a/src/data/casewriter.h +++ b/src/data/casewriter.h @@ -18,7 +18,6 @@ #define DATA_CASEWRITER_H 1 #include -#include #include #include @@ -27,7 +26,7 @@ struct casewriter; void casewriter_write (struct casewriter *, struct ccase *); bool casewriter_destroy (struct casewriter *); -size_t casewriter_get_value_cnt (const struct casewriter *); +const struct caseproto *casewriter_get_proto (const struct casewriter *); struct casereader *casewriter_make_reader (struct casewriter *); @@ -37,12 +36,13 @@ bool casewriter_error (const struct casewriter *); void casewriter_force_error (struct casewriter *); const struct taint *casewriter_get_taint (const struct casewriter *); -struct casewriter *mem_writer_create (size_t value_cnt); -struct casewriter *tmpfile_writer_create (size_t value_cnt); -struct casewriter *autopaging_writer_create (size_t value_cnt); +struct casewriter *mem_writer_create (const struct caseproto *); +struct casewriter *tmpfile_writer_create (const struct caseproto *); +struct casewriter *autopaging_writer_create (const struct caseproto *); struct casewriter * -casewriter_create_translator (struct casewriter *, size_t translated_value_cnt, +casewriter_create_translator (struct casewriter *, + const struct caseproto *translated_proto, struct ccase *(*translate) (struct ccase *input, void *aux), bool (*destroy) (void *aux), diff --git a/src/data/category.c b/src/data/category.c index 3aaf5c55..968dd4c5 100644 --- a/src/data/category.c +++ b/src/data/category.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2005 Free Software Foundation, Inc. + Copyright (C) 2005, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -106,7 +106,7 @@ cat_value_find (const struct variable *v, const union value *val) { candidate = obs_vals->vals + i; assert (candidate != NULL); - if (!compare_values_short (candidate, val, v)) + if (value_equal (candidate, val, var_get_width (v))) { return i; } diff --git a/src/data/data-in.c b/src/data/data-in.c index a6544afb..eda6d125 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -85,8 +85,9 @@ static int hexit_value (int c); /* Parses the characters in INPUT, which are encoded in the given ENCODING, according to FORMAT. Stores the parsed - representation in OUTPUT, which has the given WIDTH (0 for - a numeric field, otherwise the string width). + representation in OUTPUT, which the caller must have + initialized with the given WIDTH (0 for a numeric field, + otherwise the string width). If no decimal point is included in a numeric format, then IMPLIED_DECIMALS decimal places are implied. Specify 0 if no @@ -607,7 +608,7 @@ parse_A (struct data_in *i) { /* This is equivalent to buf_copy_rpad, except that we posibly do a character set recoding in the middle. */ - char *dst = i->output->s; + char *dst = value_str_rw (i->output, i->width); size_t dst_size = i->width; const char *src = ss_data (i->input); size_t src_size = ss_length (i->input); @@ -623,6 +624,7 @@ parse_A (struct data_in *i) static bool parse_AHEX (struct data_in *i) { + char *s = value_str_rw (i->output, i->width); size_t j; for (j = 0; ; j++) @@ -649,10 +651,10 @@ parse_AHEX (struct data_in *i) } if (j < i->width) - i->output->s[j] = hexit_value (hi) * 16 + hexit_value (lo); + s[j] = hexit_value (hi) * 16 + hexit_value (lo); } - memset (i->output->s + j, ' ', i->width - j); + memset (&s[j], ' ', i->width - j); return true; } @@ -1220,7 +1222,7 @@ static void default_result (struct data_in *i) { if (fmt_is_string (i->format)) - memset (i->output->s, ' ', i->width); + memset (value_str_rw (i->output, i->width), ' ', i->width); else i->output->f = settings_get_blanks (); } diff --git a/src/data/data-out.c b/src/data/data-out.c index 86688a67..e7800a8f 100644 --- a/src/data/data-out.c +++ b/src/data/data-out.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -83,11 +83,8 @@ static void output_binary_integer (uint64_t, int bytes, enum integer_format, char *); static void output_hex (const void *, size_t bytes, char *); -/* Converts the INPUT value into printable form in the exactly - FORMAT->W characters in OUTPUT according to format - specification FORMAT. The output is recoded from native form - into the given legacy character ENCODING. No null terminator - is appended to the buffer. */ +/* Same as data_out, and additionally recodes the output from + native form into the given legacy character ENCODING. */ void data_out_legacy (const union value *input, enum legacy_encoding encoding, const struct fmt_spec *format, char *output) @@ -106,12 +103,18 @@ data_out_legacy (const union value *input, enum legacy_encoding encoding, legacy_recode (LEGACY_NATIVE, output, encoding, output, format->w); } -/* Same as data_out_legacy with ENCODING set to LEGACY_NATIVE. */ +/* Converts the INPUT value into printable form in the exactly + FORMAT->W characters in OUTPUT according to format + specification FORMAT. No null terminator is appended to the + buffer. + + VALUE must be the correct width for FORMAT, that is, its + width must be fmt_var_width(FORMAT). */ void -data_out (const union value *value, const struct fmt_spec *format, +data_out (const union value *input, const struct fmt_spec *format, char *output) { - return data_out_legacy (value, LEGACY_NATIVE, format, output); + return data_out_legacy (input, LEGACY_NATIVE, format, output); } @@ -415,7 +418,7 @@ output_date (const union value *input, const struct fmt_spec *format, } } - buf_copy_lpad (output, format->w, tmp, p - tmp); + buf_copy_lpad (output, format->w, tmp, p - tmp, ' '); return; overflow: @@ -439,7 +442,7 @@ output_WKDAY (const union value *input, const struct fmt_spec *format, }; if (input->f >= 1 && input->f < 8) - buf_copy_str_rpad (output, format->w, weekdays[(int) input->f - 1]); + buf_copy_str_rpad (output, format->w, weekdays[(int) input->f - 1], ' '); else { if (input->f != SYSMIS) @@ -460,7 +463,7 @@ output_MONTH (const union value *input, const struct fmt_spec *format, }; if (input->f >= 1 && input->f < 13) - buf_copy_str_rpad (output, format->w, months[(int) input->f - 1]); + buf_copy_str_rpad (output, format->w, months[(int) input->f - 1], ' '); else { if (input->f != SYSMIS) @@ -474,7 +477,7 @@ static void output_A (const union value *input, const struct fmt_spec *format, char *output) { - memcpy (output, input->s, format->w); + memcpy (output, value_str (input, format->w), format->w); } /* Outputs AHEX format. */ @@ -482,7 +485,7 @@ static void output_AHEX (const union value *input, const struct fmt_spec *format, char *output) { - output_hex (input->s, format->w / 2, output); + output_hex (value_str (input, format->w), format->w / 2, output); } /* Decimal and scientific formatting. */ @@ -934,7 +937,7 @@ output_infinite (double number, const struct fmt_spec *format, char *output) else s = "Unknown"; - buf_copy_str_lpad (output, format->w, s); + buf_copy_str_lpad (output, format->w, s, ' '); } else output_overflow (format, output); diff --git a/src/data/datasheet.c b/src/data/datasheet.c index 64dea786..4a483685 100644 --- a/src/data/datasheet.c +++ b/src/data/datasheet.c @@ -25,11 +25,13 @@ #include #include #include -#include +#include #include #include +#include #include #include +#include #include #include @@ -37,6 +39,8 @@ #include "md4.h" #include "xalloc.h" +struct column; + static struct axis *axis_create (void); static struct axis *axis_clone (const struct axis *); static void axis_destroy (struct axis *); @@ -65,27 +69,25 @@ static void axis_move (struct axis *, unsigned long int new_start, unsigned long int cnt); -static struct source *source_create_empty (size_t column_cnt); +static struct source *source_create_empty (size_t n_bytes); static struct source *source_create_casereader (struct casereader *); static struct source *source_clone (const struct source *); static void source_destroy (struct source *); -static casenumber source_get_backing_row_cnt (const struct source *); -static size_t source_get_column_cnt (const struct source *); - -static bool source_read (const struct source *, - casenumber row, size_t column, - union value[], size_t value_cnt); -static bool source_write (struct source *, - casenumber row, size_t column, - const union value[], size_t value_cnt); -static bool source_write_columns (struct source *, size_t start_column, - const union value[], size_t value_cnt); -static bool source_has_backing (const struct source *); -static void source_increase_use (struct source *, size_t delta); -static void source_decrease_use (struct source *, size_t delta); +static casenumber source_get_backing_n_rows (const struct source *); + +static int source_allocate_column (struct source *, int width); +static void source_release_column (struct source *, int ofs, int width); static bool source_in_use (const struct source *); +static bool source_read (const struct column *, casenumber row, union value *); +static bool source_write (const struct column *, casenumber row, + const union value *); +static bool source_write_column (struct column *, const union value *); +static bool source_has_backing (const struct source *); + +static int get_source_index (const struct datasheet *ds, const struct source *source); + /* A datasheet is internally composed from a set of data files, called "sources". The sources that make up a datasheet must have the same number of rows (cases), but their numbers of @@ -99,37 +101,47 @@ static bool source_in_use (const struct source *); to the column mapping. Each source in a datasheet can be a casereader or a - sparse_cases. Casereaders are read-only, so when sources made - from casereaders need to be modified, it is done "virtually" - through being overlaid by a sparse_cases. */ + sparse_xarray. Casereaders are read-only, so when sources + made from casereaders need to be modified, it is done + "virtually" through being overlaid by a sparse_xarray. */ +struct source + { + struct range_set *avail; /* Free bytes are set to 1s. */ + struct sparse_xarray *data; /* Data at top level, atop the backing. */ + struct casereader *backing; /* Backing casereader (or null). */ + casenumber backing_rows; /* Number of rows in backing (if backed). */ + size_t n_used; /* Number of column in use (if backed). */ + }; + +/* A logical column. */ +struct column + { + struct source *source; /* Source of the underlying physical column. */ + int value_ofs; /* If 'source' has a backing casereader, + column's value offset in its cases. */ + int byte_ofs; /* Byte offset in source's sparse_xarray. */ + int width; /* 0=numeric, otherwise string width. */ + }; /* A datasheet. */ struct datasheet -{ - /* Mappings from logical to physical columns/rows. */ - struct axis *columns; - struct axis *rows; - - /* Mapping from physical columns to "source_info"s. */ - struct range_map sources; + { + /* Data sources. */ + struct source **sources; /* Sources, in no particular order. */ + size_t n_sources; /* Number of sources. */ - /* Minimum number of columns to put in a new source when we - need new columns and none are free. We double it whenever - we add a new source to keep the number of file descriptors - needed by the datasheet to a minimum, reducing the - likelihood of running out. */ - unsigned column_min_alloc; + /* Columns. */ + struct caseproto *proto; /* Prototype for rows (initialized lazily). */ + struct column *columns; /* Logical to physical column mapping. */ + size_t n_columns; /* Number of logical columns. */ + unsigned column_min_alloc; /* Min. # of columns to put in a new source. */ - /* Indicates corrupted data in the datasheet. */ - struct taint *taint; -}; + /* Rows. */ + struct axis *rows; /* Logical to physical row mapping. */ -/* Maps from a range of physical columns to a source. */ -struct source_info -{ - struct range_map_node column_range; - struct source *source; -}; + /* Tainting. */ + struct taint *taint; /* Indicates corrupted data. */ + }; /* Is this operation a read or a write? */ enum rw_op @@ -138,13 +150,51 @@ enum rw_op OP_WRITE }; -static void free_source_info (struct datasheet *, struct source_info *); -static struct source_info *source_info_from_range_map ( - struct range_map_node *); +static void allocate_column (struct datasheet *, int width, struct column *); +static void release_source (struct datasheet *, struct source *); static bool rw_case (struct datasheet *ds, enum rw_op op, - casenumber lrow, size_t start_column, size_t column_cnt, + casenumber lrow, size_t start_column, size_t n_columns, union value data[]); +/* Returns the number of bytes needed to store a value with the + given WIDTH on disk. */ +static size_t +width_to_n_bytes (int width) +{ + return width == 0 ? sizeof (double) : width; +} + +/* Returns the address of the data in VALUE (for reading or + writing to/from disk). VALUE must have the given WIDTH. */ +static void * +value_to_data (const union value *value_, int width) +{ + union value *value = (union value *) value_; + assert (sizeof value->f == sizeof (double)); + if (width == 0) + return &value->f; + else + return value_str_rw (value, width); +} + +/* Returns the number of bytes needed to store all the values in + PROTO on disk. */ +static size_t +caseproto_to_n_bytes (const struct caseproto *proto) +{ + size_t n_bytes; + size_t i; + + n_bytes = 0; + for (i = 0; i < caseproto_get_n_widths (proto); i++) + { + int width = caseproto_get_width (proto, i); + if (width >= 0) + n_bytes += width_to_n_bytes (width); + } + return n_bytes; +} + /* Creates and returns a new datasheet. If READER is nonnull, then the datasheet initially contains @@ -152,42 +202,50 @@ static bool rw_case (struct datasheet *ds, enum rw_op op, struct datasheet * datasheet_create (struct casereader *reader) { - /* Create datasheet. */ struct datasheet *ds = xmalloc (sizeof *ds); - ds->columns = axis_create (); + ds->sources = NULL; + ds->n_sources = 0; + ds->proto = NULL; + ds->columns = NULL; + ds->n_columns = 0; + ds->column_min_alloc = 8; ds->rows = axis_create (); - range_map_init (&ds->sources); - ds->column_min_alloc = 1; ds->taint = taint_create (); - /* Add backing. */ if (reader != NULL) { - size_t column_cnt; - casenumber row_cnt; - struct source_info *si; - - si = xmalloc (sizeof *si); - si->source = source_create_casereader (reader); - column_cnt = source_get_column_cnt (si->source); - row_cnt = source_get_backing_row_cnt (si->source); - source_increase_use (si->source, column_cnt); - - if ( column_cnt > 0 ) - { - unsigned long int column_start; - column_start = axis_extend (ds->columns, column_cnt); - axis_insert (ds->columns, 0, column_start, column_cnt); - range_map_insert (&ds->sources, column_start, column_cnt, - &si->column_range); - } - - if ( row_cnt > 0 ) - { - unsigned long int row_start; - row_start = axis_extend (ds->rows, row_cnt); - axis_insert (ds->rows, 0, row_start, row_cnt); - } + casenumber n_rows; + size_t byte_ofs; + size_t i; + + taint_propagate (casereader_get_taint (reader), ds->taint); + + ds->proto = caseproto_ref (casereader_get_proto (reader)); + + ds->sources = xmalloc (sizeof *ds->sources); + ds->sources[0] = source_create_casereader (reader); + ds->n_sources = 1; + + ds->n_columns = caseproto_get_n_widths (ds->proto); + ds->columns = xnmalloc (ds->n_columns, sizeof *ds->columns); + byte_ofs = 0; + for (i = 0; i < ds->n_columns; i++) + { + struct column *column = &ds->columns[i]; + int width = caseproto_get_width (ds->proto, i); + column->source = ds->sources[0]; + column->width = width; + if (width >= 0) + { + column->value_ofs = i; + column->byte_ofs = byte_ofs; + byte_ofs += width_to_n_bytes (column->width); + } + } + + n_rows = source_get_backing_n_rows (ds->sources[0]); + if (n_rows > 0) + axis_insert (ds->rows, 0, axis_extend (ds->rows, n_rows), n_rows); } return ds; @@ -197,21 +255,47 @@ datasheet_create (struct casereader *reader) void datasheet_destroy (struct datasheet *ds) { + size_t i; + if (ds == NULL) return; - axis_destroy (ds->columns); + for (i = 0; i < ds->n_sources; i++) + source_destroy (ds->sources[i]); + free (ds->sources); + caseproto_unref (ds->proto); + free (ds->columns); axis_destroy (ds->rows); - while (!range_map_is_empty (&ds->sources)) - { - struct range_map_node *r = range_map_first (&ds->sources); - struct source_info *si = source_info_from_range_map (r); - free_source_info (ds, si); - } taint_destroy (ds->taint); free (ds); } +/* Returns the prototype for the cases in DS. The caller must + not unref the returned prototype. */ +const struct caseproto * +datasheet_get_proto (const struct datasheet *ds_) +{ + struct datasheet *ds = (struct datasheet *) ds_; + if (ds->proto == NULL) + { + size_t i; + + ds->proto = caseproto_create (); + for (i = 0; i < ds->n_columns; i++) + ds->proto = caseproto_add_width (ds->proto, ds->columns[i].width); + } + return ds->proto; +} + +/* Returns the width of the given COLUMN within DS. + COLUMN must be less than the number of columns in DS. */ +int +datasheet_get_column_width (const struct datasheet *ds, size_t column) +{ + assert (column < datasheet_get_n_columns (ds)); + return ds->columns[column].width; +} + /* Moves datasheet DS to a new location in memory, and returns the new location. Afterward, the datasheet must not be accessed at its former location. @@ -252,141 +336,209 @@ datasheet_get_taint (const struct datasheet *ds) /* Returns the number of rows in DS. */ casenumber -datasheet_get_row_cnt (const struct datasheet *ds) +datasheet_get_n_rows (const struct datasheet *ds) { return axis_get_size (ds->rows); } /* Returns the number of columns in DS. */ size_t -datasheet_get_column_cnt (const struct datasheet *ds) +datasheet_get_n_columns (const struct datasheet *ds) { - return axis_get_size (ds->columns); + return ds->n_columns; } -/* Inserts CNT columns into datasheet DS just before column - BEFORE. Initializes the contents of each row in the inserted - columns to the CNT values in INIT_VALUES. +/* Inserts a column of the given WIDTH into datasheet DS just + before column BEFORE. Initializes the contents of each row in + the inserted column to VALUE (which must have width WIDTH). Returns true if successful, false on failure. In case of failure, the datasheet is unchanged. */ bool -datasheet_insert_columns (struct datasheet *ds, - const union value init_values[], size_t cnt, - size_t before) +datasheet_insert_column (struct datasheet *ds, + const union value *value, int width, size_t before) { - size_t added = 0; - while (cnt > 0) - { - unsigned long first_phy; /* First allocated physical column. */ - unsigned long phy_cnt; /* Number of allocated physical columns. */ + struct column *col; - /* Allocate physical columns from the pool of available - columns. */ - if (!axis_allocate (ds->columns, cnt, &first_phy, &phy_cnt)) - { - /* No columns were available. Create a new source and - extend the axis to make some new ones available. */ - struct source_info *si; - - phy_cnt = MAX (cnt, ds->column_min_alloc); - first_phy = axis_extend (ds->columns, phy_cnt); - ds->column_min_alloc = MIN (65536, ds->column_min_alloc * 2); - - si = xmalloc (sizeof *si); - si->source = source_create_empty (phy_cnt); - range_map_insert (&ds->sources, first_phy, phy_cnt, - &si->column_range); - if (phy_cnt > cnt) - { - axis_make_available (ds->columns, first_phy + cnt, - phy_cnt - cnt); - phy_cnt = cnt; - } - } + ds->columns = xnrealloc (ds->columns, + ds->n_columns + 1, sizeof *ds->columns); + insert_element (ds->columns, ds->n_columns, sizeof *ds->columns, before); + col = &ds->columns[before]; + ds->n_columns++; - /* Initialize the columns and insert them into the columns - axis. */ - while (phy_cnt > 0) - { - struct range_map_node *r; /* Range map holding FIRST_PHY column. */ - struct source_info *s; /* Source containing FIRST_PHY column. */ - size_t source_avail; /* Number of phys columns available. */ - size_t source_cnt; /* Number of phys columns to use. */ - - /* Figure out how many columns we can and want to take - starting at FIRST_PHY, and then insert them into the - columns axis. */ - r = range_map_lookup (&ds->sources, first_phy); - s = source_info_from_range_map (r); - source_avail = range_map_node_get_end (r) - first_phy; - source_cnt = MIN (phy_cnt, source_avail); - axis_insert (ds->columns, before, first_phy, source_cnt); - - /* Initialize the data for those columns in the - source. */ - if (!source_write_columns (s->source, - first_phy - range_map_node_get_start (r), - init_values, source_cnt)) - { - datasheet_delete_columns (ds, before - added, - source_cnt + added); - taint_set_taint (ds->taint); - return false; - } - source_increase_use (s->source, source_cnt); - - /* Advance. */ - phy_cnt -= source_cnt; - first_phy += source_cnt; - init_values += source_cnt; - cnt -= source_cnt; - before += source_cnt; - added += source_cnt; - } + allocate_column (ds, width, col); + + if (width >= 0 && !source_write_column (col, value)) + { + datasheet_delete_columns (ds, before, 1); + taint_set_taint (ds->taint); + return false; } + return true; } -/* Deletes the CNT columns in DS starting from column START. */ +/* Deletes the N columns in DS starting from column START. */ void -datasheet_delete_columns (struct datasheet *ds, size_t start, size_t cnt) +datasheet_delete_columns (struct datasheet *ds, size_t start, size_t n) { - size_t lcol; - - assert ( start + cnt <= axis_get_size (ds->columns) ); - - /* Free up columns for reuse. */ - for (lcol = start; lcol < start + cnt; lcol++) + if (n > 0) { - size_t pcol = axis_map (ds->columns, lcol); - struct range_map_node *r = range_map_lookup (&ds->sources, pcol); - struct source_info *si = source_info_from_range_map (r); + size_t i; - source_decrease_use (si->source, 1); - if (source_has_backing (si->source)) + for (i = start; i < start + n; i++) { - if (!source_in_use (si->source)) - free_source_info (ds, si); + struct column *column = &ds->columns[i]; + struct source *source = column->source; + source_release_column (source, column->byte_ofs, column->width); + release_source (ds, source); } - else - axis_make_available (ds->columns, pcol, 1); - } - /* Remove columns from logical-to-physical mapping. */ - axis_remove (ds->columns, start, cnt); + remove_range (ds->columns, ds->n_columns, sizeof *ds->columns, start, n); + ds->n_columns -= n; + + caseproto_unref (ds->proto); + ds->proto = NULL; + } } -/* Moves the CNT columns in DS starting at position OLD_START so +/* Moves the N columns in DS starting at position OLD_START so that they then start at position NEW_START. Equivalent to deleting the column rows, then inserting them at what becomes - position NEW_START after the deletion.*/ + position NEW_START after the deletion. */ void datasheet_move_columns (struct datasheet *ds, size_t old_start, size_t new_start, - size_t cnt) + size_t n) +{ + move_range (ds->columns, ds->n_columns, sizeof *ds->columns, + old_start, new_start, n); + + caseproto_unref (ds->proto); + ds->proto = NULL; +} + +struct resize_datasheet_value_aux + { + union value src_value; + size_t src_ofs; + int src_width; + + void (*resize_cb) (const union value *, union value *, void *aux); + void *resize_cb_aux; + + union value dst_value; + size_t dst_ofs; + int dst_width; + }; + +static bool +resize_datasheet_value (const void *src, void *dst, void *aux_) { - axis_move (ds->columns, old_start, new_start, cnt); + struct resize_datasheet_value_aux *aux = aux_; + + memcpy (value_to_data (&aux->src_value, aux->src_width), + (uint8_t *) src + aux->src_ofs, + width_to_n_bytes (aux->src_width)); + + aux->resize_cb (&aux->src_value, &aux->dst_value, aux->resize_cb_aux); + + memcpy ((uint8_t *) dst + aux->dst_ofs, + value_to_data (&aux->dst_value, aux->dst_width), + width_to_n_bytes (aux->dst_width)); + + return true; +} + +bool +datasheet_resize_column (struct datasheet *ds, size_t column, int new_width, + void (*resize_cb) (const union value *, + union value *, void *aux), + void *resize_cb_aux) +{ + /* XXX needs a test. */ + struct column old_col; + struct column *col; + int old_width; + + assert (column < datasheet_get_n_columns (ds)); + + col = &ds->columns[column]; + old_col = *col; + old_width = old_col.width; + + if (old_width == new_width) + { + /* FIXME: for consistency, we should call resize_cb() on + each row. */ + } + else if (new_width == -1) + { + datasheet_delete_columns (ds, column, 1); + datasheet_insert_column (ds, NULL, -1, column); + } + else if (old_width == -1) + { + union value value; + value_init (&value, new_width); + value_set_missing (&value, new_width); + if (resize_cb != NULL) + resize_cb (NULL, &value, resize_cb_aux); + datasheet_delete_columns (ds, column, 1); + datasheet_insert_column (ds, &value, new_width, column); + value_destroy (&value, new_width); + } + else if (source_has_backing (col->source)) + { + unsigned long int n_rows = axis_get_size (ds->rows); + union value src, dst; + size_t row; + + source_release_column (col->source, col->byte_ofs, col->width); + allocate_column (ds, new_width, col); + + value_init (&src, old_width); + value_init (&dst, new_width); + for (row = 0; row < n_rows; row++) + { + if (!source_read (&old_col, row, &src)) + { + /* FIXME: back out col changes. */ + return false; + } + resize_cb (&src, &dst, resize_cb_aux); + if (!source_write (col, row, &dst)) + { + /* FIXME: back out col changes. */ + return false; + } + } + + release_source (ds, old_col.source); + } + else + { + struct resize_datasheet_value_aux aux; + + source_release_column (col->source, col->byte_ofs, col->width); + allocate_column (ds, new_width, col); + + value_init (&aux.src_value, old_col.width); + aux.src_ofs = old_col.byte_ofs; + aux.src_width = old_col.width; + aux.resize_cb = resize_cb; + aux.resize_cb_aux = resize_cb_aux; + value_init (&aux.dst_value, new_width); + aux.dst_ofs = col->byte_ofs; + aux.dst_width = new_width; + sparse_xarray_copy (old_col.source->data, col->source->data, + resize_datasheet_value, &aux); + value_destroy (&aux.src_value, old_width); + value_destroy (&aux.dst_value, new_width); + + release_source (ds, old_col.source); + } + return true; } /* Retrieves and returns the contents of the given ROW in @@ -396,10 +548,10 @@ datasheet_move_columns (struct datasheet *ds, struct ccase * datasheet_get_row (const struct datasheet *ds, casenumber row) { - size_t column_cnt = datasheet_get_column_cnt (ds); - struct ccase *c = case_create (column_cnt); + size_t n_columns = datasheet_get_n_columns (ds); + struct ccase *c = case_create (datasheet_get_proto (ds)); if (rw_case ((struct datasheet *) ds, OP_READ, - row, 0, column_cnt, case_data_all_rw (c))) + row, 0, n_columns, case_data_all_rw (c))) return c; else { @@ -415,35 +567,36 @@ datasheet_get_row (const struct datasheet *ds, casenumber row) bool datasheet_put_row (struct datasheet *ds, casenumber row, struct ccase *c) { - size_t column_cnt = datasheet_get_column_cnt (ds); - bool ok = rw_case (ds, OP_WRITE, row, 0, column_cnt, + size_t n_columns = datasheet_get_n_columns (ds); + bool ok = rw_case (ds, OP_WRITE, row, 0, n_columns, (union value *) case_data_all (c)); case_unref (c); return ok; } -/* Stores the values of the WIDTH columns in DS in the given ROW - starting at COLUMN in DS into VALUES. Returns true if +/* Stores the values of COLUMN in DS in the given ROW in DS into + VALUE. The caller must have already initialized VALUE as a + value of the appropriate width (as returned by + datasheet_get_column_width (DS, COLUMN)). Returns true if successful, false on I/O error. */ bool -datasheet_get_value (const struct datasheet *ds, casenumber row, size_t column, - union value *value, int width) +datasheet_get_value (const struct datasheet *ds, casenumber row, + size_t column, union value *value) { - assert ( row >= 0 ); - return rw_case ((struct datasheet *) ds, - OP_READ, row, column, value_cnt_from_width (width), value); + assert (row >= 0); + return rw_case ((struct datasheet *) ds, OP_READ, row, column, 1, value); } -/* Stores the WIDTH given VALUES into the given ROW in DS - starting at COLUMN. Returns true if successful, false on I/O - error. On failure, the given ROW might be partially modified - or corrupted. */ +/* Stores VALUE into DS in the given ROW and COLUMN. VALUE must + have the correct width for COLUMN (as returned by + datasheet_get_column_width (DS, COLUMN)). Returns true if + successful, false on I/O error. On failure, ROW might be + partially modified or corrupted. */ bool -datasheet_put_value (struct datasheet *ds, casenumber row, size_t column, - const union value *value, int width) +datasheet_put_value (struct datasheet *ds UNUSED, casenumber row UNUSED, + size_t column UNUSED, const union value *value UNUSED) { - return rw_case (ds, OP_WRITE, row, column, value_cnt_from_width (width), - (union value *) value); + return rw_case (ds, OP_WRITE, row, column, 1, (union value *) value); } /* Inserts the CNT cases at C into datasheet DS just before row @@ -535,8 +688,8 @@ datasheet_make_reader (struct datasheet *ds) { struct casereader *reader; ds = datasheet_rename (ds); - reader = casereader_create_random (datasheet_get_column_cnt (ds), - datasheet_get_row_cnt (ds), + reader = casereader_create_random (datasheet_get_proto (ds), + datasheet_get_n_rows (ds), &datasheet_reader_class, ds); taint_propagate (datasheet_get_taint (ds), casereader_get_taint (reader)); return reader; @@ -548,7 +701,7 @@ datasheet_reader_read (struct casereader *reader UNUSED, void *ds_, casenumber case_idx) { struct datasheet *ds = ds_; - if (case_idx < datasheet_get_row_cnt (ds)) + if (case_idx < datasheet_get_n_rows (ds)) { struct ccase *c = datasheet_get_row (ds, case_idx); if (c == NULL) @@ -584,50 +737,93 @@ static const struct casereader_random_class datasheet_reader_class = datasheet_reader_advance, }; -/* Removes SI from DS's set of sources and destroys its - source. */ static void -free_source_info (struct datasheet *ds, struct source_info *si) +allocate_column (struct datasheet *ds, int width, struct column *column) { - range_map_delete (&ds->sources, &si->column_range); - source_destroy (si->source); - free (si); + caseproto_unref (ds->proto); + ds->proto = NULL; + + column->value_ofs = -1; + column->width = width; + if (width >= 0) + { + int n_bytes; + size_t i; + + n_bytes = width_to_n_bytes (width); + for (i = 0; i < ds->n_sources; i++) + { + column->source = ds->sources[i]; + column->byte_ofs = source_allocate_column (column->source, n_bytes); + if (column->byte_ofs >= 0) + return; + } + + column->source = source_create_empty (MAX (n_bytes, + ds->column_min_alloc)); + ds->sources = xnrealloc (ds->sources, + ds->n_sources + 1, sizeof *ds->sources); + ds->sources[ds->n_sources++] = column->source; + + ds->column_min_alloc = MIN (65536, ds->column_min_alloc * 2); + + column->byte_ofs = source_allocate_column (column->source, n_bytes); + assert (column->byte_ofs >= 0); + } + else + { + column->source = NULL; + column->byte_ofs = -1; + } } -static struct source_info * -source_info_from_range_map (struct range_map_node *node) +static void +release_source (struct datasheet *ds, struct source *source) { - return range_map_data (node, struct source_info, column_range); + if (source_has_backing (source) && !source_in_use (source)) + { + /* Since only the first source to be added ever + has a backing, this source must have index + 0. */ + assert (source == ds->sources[0]); + ds->sources[0] = ds->sources[--ds->n_sources]; + source_destroy (source); + } } /* Reads (if OP is OP_READ) or writes (if op is OP_WRITE) the - COLUMN_CNT columns starting from column START_COLUMN in row - LROW to/from the COLUMN_CNT values in DATA. */ + N_COLUMNS columns starting from column START_COLUMN in row + LROW to/from the N_COLUMNS values in DATA. */ static bool rw_case (struct datasheet *ds, enum rw_op op, - casenumber lrow, size_t start_column, size_t column_cnt, + casenumber lrow, size_t start_column, size_t n_columns, union value data[]) { casenumber prow; - size_t lcol; + size_t i; - assert (lrow < datasheet_get_row_cnt (ds)); - assert (column_cnt <= datasheet_get_column_cnt (ds)); - assert (start_column + column_cnt <= datasheet_get_column_cnt (ds)); + assert (lrow < datasheet_get_n_rows (ds)); + assert (n_columns <= datasheet_get_n_columns (ds)); + assert (start_column + n_columns <= datasheet_get_n_columns (ds)); prow = axis_map (ds->rows, lrow); - for (lcol = start_column; lcol < start_column + column_cnt; lcol++, data++) + for (i = 0; i < n_columns; i++) { - size_t pcol = axis_map (ds->columns, lcol); - struct range_map_node *r = range_map_lookup (&ds->sources, pcol); - struct source_info *s = source_info_from_range_map (r); - size_t pcol_ofs = pcol - range_map_node_get_start (r); - if (!(op == OP_READ - ? source_read (s->source, prow, pcol_ofs, data, 1) - : source_write (s->source, prow, pcol_ofs, data, 1))) + struct column *c = &ds->columns[start_column + i]; + if (c->width >= 0) { - taint_set_taint (ds->taint); - return false; + bool ok; + + if (op == OP_READ) + ok = source_read (c, prow, &data[i]); + else + ok = source_write (c, prow, &data[i]); + + if (!ok) + { + taint_set_taint (ds->taint); + return false; + } } } return true; @@ -640,9 +836,7 @@ rw_case (struct datasheet *ds, enum rw_op op, axis_map and axis_get_size functions inspect this mapping, and the axis_insert, axis_remove, and axis_move functions modify it. Second, it tracks the set of ordinates that are unused - and available for reuse. (Not all unused ordinates are - available for reuse: in particular, unused columns that are - backed by a casereader are never reused.) The axis_allocate, + and available for reuse. The axis_allocate, axis_make_available, and axis_extend functions affect the set of available ordinates. */ struct axis @@ -1018,24 +1212,21 @@ check_axis_merged (const struct axis *axis UNUSED) } /* A source. */ -struct source -{ - size_t columns_used; /* Number of columns in use by client. */ - struct sparse_cases *data; /* Data at top level, atop the backing. */ - struct casereader *backing; /* Backing casereader (or null). */ - casenumber backing_rows; /* Number of rows in backing (if nonnull). */ -}; -/* Creates and returns an empty, unbacked source with COLUMN_CNT - columns and an initial "columns_used" of 0. */ +/* Creates and returns an empty, unbacked source with N_BYTES + bytes per case, none of which are initially in use. */ static struct source * -source_create_empty (size_t column_cnt) +source_create_empty (size_t n_bytes) { struct source *source = xmalloc (sizeof *source); - source->columns_used = 0; - source->data = sparse_cases_create (column_cnt); + size_t row_size = n_bytes + 4 * sizeof (void *); + size_t max_memory_rows = settings_get_workspace () / row_size; + source->avail = range_set_create (); + range_set_insert (source->avail, 0, n_bytes); + source->data = sparse_xarray_create (n_bytes, MAX (max_memory_rows, 4)); source->backing = NULL; source->backing_rows = 0; + source->n_used = 0; return source; } @@ -1044,10 +1235,22 @@ source_create_empty (size_t column_cnt) static struct source * source_create_casereader (struct casereader *reader) { - struct source *source - = source_create_empty (casereader_get_value_cnt (reader)); + const struct caseproto *proto = casereader_get_proto (reader); + size_t n_bytes = caseproto_to_n_bytes (proto); + struct source *source = source_create_empty (n_bytes); + size_t n_columns; + size_t i; + + range_set_delete (source->avail, 0, n_bytes); source->backing = reader; source->backing_rows = casereader_count_cases (reader); + + source->n_used = 0; + n_columns = caseproto_get_n_widths (proto); + for (i = 0; i < n_columns; i++) + if (caseproto_get_width (proto, i) >= 0) + source->n_used++; + return source; } @@ -1060,10 +1263,11 @@ static struct source * source_clone (const struct source *old) { struct source *new = xmalloc (sizeof *new); - new->columns_used = old->columns_used; - new->data = sparse_cases_clone (old->data); + new->avail = range_set_clone (old->avail, NULL); + new->data = sparse_xarray_clone (old->data); new->backing = old->backing != NULL ? casereader_clone (old->backing) : NULL; new->backing_rows = old->backing_rows; + new->n_used = old->n_used; if (new->data == NULL) { source_destroy (new); @@ -1072,23 +1276,28 @@ source_clone (const struct source *old) return new; } -/* Increases the columns_used count of SOURCE by DELTA. - The new value must not exceed SOURCE's number of columns. */ -static void -source_increase_use (struct source *source, size_t delta) +static int +source_allocate_column (struct source *source, int width) { - source->columns_used += delta; - assert (source->columns_used <= sparse_cases_get_value_cnt (source->data)); + unsigned long int start; + int n_bytes; + + assert (width >= 0); + n_bytes = width_to_n_bytes (width); + if (source->backing == NULL + && range_set_allocate_fully (source->avail, n_bytes, &start)) + return start; + else + return -1; } -/* Decreases the columns_used count of SOURCE by DELTA. - This must not attempt to decrease the columns_used count below - zero. */ static void -source_decrease_use (struct source *source, size_t delta) +source_release_column (struct source *source, int ofs, int width) { - assert (delta <= source->columns_used); - source->columns_used -= delta; + assert (width >= 0); + range_set_insert (source->avail, ofs, width_to_n_bytes (width)); + if (source->backing != NULL) + source->n_used--; } /* Returns true if SOURCE has any columns in use, @@ -1096,7 +1305,7 @@ source_decrease_use (struct source *source, size_t delta) static bool source_in_use (const struct source *source) { - return source->columns_used > 0; + return source->n_used > 0; } /* Destroys SOURCE and its data and backing, if any. */ @@ -1105,7 +1314,8 @@ source_destroy (struct source *source) { if (source != NULL) { - sparse_cases_destroy (source->data); + range_set_destroy (source->avail); + sparse_xarray_destroy (source->data); casereader_destroy (source->backing); free (source); } @@ -1114,88 +1324,99 @@ source_destroy (struct source *source) /* Returns the number of rows in SOURCE's backing casereader (SOURCE must have a backing casereader). */ static casenumber -source_get_backing_row_cnt (const struct source *source) +source_get_backing_n_rows (const struct source *source) { assert (source_has_backing (source)); return source->backing_rows; } -/* Returns the number of columns in SOURCE. */ -static size_t -source_get_column_cnt (const struct source *source) -{ - return sparse_cases_get_value_cnt (source->data); -} +/* Reads the given COLUMN from SOURCE in the given ROW, into + VALUE. Returns true if successful, false on I/O error. -/* Reads VALUE_CNT columns from SOURCE in the given ROW, starting - from COLUMN, into VALUES. Returns true if successful, false - on I/O error. */ + The caller must have initialized VALUE with the proper + width. */ static bool -source_read (const struct source *source, - casenumber row, size_t column, - union value values[], size_t value_cnt) +source_read (const struct column *column, casenumber row, union value *value) { - if (source->backing == NULL || sparse_cases_contains_row (source->data, row)) - return sparse_cases_read (source->data, row, column, values, value_cnt); + struct source *source = column->source; + + assert (column->width >= 0); + if (source->backing == NULL + || sparse_xarray_contains_row (source->data, row)) + return sparse_xarray_read (source->data, row, column->byte_ofs, + width_to_n_bytes (column->width), + value_to_data (value, column->width)); else { struct ccase *c = casereader_peek (source->backing, row); bool ok = c != NULL; if (ok) { - case_copy_out (c, column, values, value_cnt); + value_copy (value, case_data_idx (c, column->value_ofs), + column->width); case_unref (c); } return ok; } } -/* Writes the VALUE_CNT values in VALUES to SOURCE in the given - ROW, starting at ROW. Returns true if successful, false on - I/O error. On error, the row's data may be completely or - partially corrupted, both inside and outside the region to be - written. */ static bool -source_write (struct source *source, - casenumber row, size_t column, - const union value values[], size_t value_cnt) +copy_case_into_source (struct source *source, struct ccase *c, casenumber row) { - size_t column_cnt = sparse_cases_get_value_cnt (source->data); - bool ok; + const struct caseproto *proto = casereader_get_proto (source->backing); + size_t n_widths = caseproto_get_n_widths (proto); + size_t ofs; + size_t i; - if (source->backing == NULL - || (column == 0 && value_cnt == column_cnt) - || sparse_cases_contains_row (source->data, row)) - ok = sparse_cases_write (source->data, row, column, values, value_cnt); - else + ofs = 0; + for (i = 0; i < n_widths; i++) { - struct ccase *c; - - if (row < source->backing_rows) - c = case_unshare (casereader_peek (source->backing, row)); - else + int width = caseproto_get_width (proto, i); + if (width >= 0) { - /* It's not one of the backed rows. Ideally, this - should never happen: we'd always be writing the full - contents of new, unbacked rows in a single call to - this function, so that the first case above would - trigger. But that's a little difficult at higher - levels, so that we in fact usually write the full - contents of new, unbacked rows in multiple calls to - this function. Make this work. */ - c = case_create (column_cnt); + int n_bytes = width_to_n_bytes (width); + if (!sparse_xarray_write (source->data, row, ofs, n_bytes, + value_to_data (case_data_idx (c, i), + width))) + return false; + ofs += n_bytes; } - ok = c != NULL; + } + return true; +} - if (ok) - { - case_copy_in (c, column, values, value_cnt); - ok = sparse_cases_write (source->data, row, 0, - case_data_all (c), column_cnt); - case_unref (c); - } +/* Writes VALUE to SOURCE in the given ROW and COLUMN. Returns + true if successful, false on I/O error. On error, the row's + data may be completely or partially corrupted, both inside and + outside the region to be written. */ +static bool +source_write (const struct column *column, casenumber row, + const union value *value) +{ + struct source *source = column->source; + struct casereader *backing = source->backing; + + assert (column->width >= 0); + if (backing != NULL + && !sparse_xarray_contains_row (source->data, row) + && row < source->backing_rows) + { + struct ccase *c; + bool ok; + + c = casereader_peek (backing, row); + if (c == NULL) + return false; + + ok = copy_case_into_source (source, c, row); + case_unref (c); + if (!ok) + return false; } - return ok; + + return sparse_xarray_write (source->data, row, column->byte_ofs, + width_to_n_bytes (column->width), + value_to_data (value, column->width)); } /* Within SOURCE, which must not have a backing casereader, @@ -1205,17 +1426,20 @@ source_write (struct source *source, false if an I/O error occurs. We don't support backing != NULL because (1) it's harder and - (2) source_write_columns is only called by - datasheet_insert_columns, which doesn't reuse columns from + (2) this function is only called by + datasheet_insert_column, which doesn't reuse columns from sources that are backed by casereaders. */ static bool -source_write_columns (struct source *source, size_t start_column, - const union value values[], size_t value_cnt) +source_write_column (struct column *column, const union value *value) { - assert (source->backing == NULL); + int width = column->width; + + assert (column->source->backing == NULL); + assert (width >= 0); - return sparse_cases_write_columns (source->data, start_column, - values, value_cnt); + return sparse_xarray_write_columns (column->source->data, column->byte_ofs, + width_to_n_bytes (width), + value_to_data (value, width)); } /* Returns true if SOURCE has a backing casereader, false @@ -1228,11 +1452,16 @@ source_has_backing (const struct source *source) /* Datasheet model checker test driver. */ -/* Maximum size of datasheet supported for model checking - purposes. */ -#define MAX_ROWS 5 -#define MAX_COLS 5 +static int +get_source_index (const struct datasheet *ds, const struct source *source) +{ + size_t i; + for (i = 0; i < ds->n_sources; i++) + if (ds->sources[i] == source) + return i; + NOT_REACHED (); +} /* Clones the structure and contents of ODS into a new datasheet, and returns the new datasheet. */ @@ -1240,28 +1469,30 @@ struct datasheet * clone_datasheet (const struct datasheet *ods) { struct datasheet *ds; - struct range_map_node *r; + size_t i; ds = xmalloc (sizeof *ds); - ds->columns = axis_clone (ods->columns); - ds->rows = axis_clone (ods->rows); - range_map_init (&ds->sources); - for (r = range_map_first (&ods->sources); r != NULL; - r = range_map_next (&ods->sources, r)) - { - const struct source_info *osi = source_info_from_range_map (r); - struct source_info *si = xmalloc (sizeof *si); - si->source = source_clone (osi->source); - range_map_insert (&ds->sources, range_map_node_get_start (r), - range_map_node_get_width (r), &si->column_range); - } + + ds->sources = xmalloc (ods->n_sources * sizeof *ds->sources); + for (i = 0; i < ods->n_sources; i++) + ds->sources[i] = source_clone (ods->sources[i]); + ds->n_sources = ods->n_sources; + + ds->proto = ods->proto != NULL ? caseproto_ref (ods->proto) : NULL; + ds->columns = xmemdup (ods->columns, ods->n_columns * sizeof *ods->columns); + for (i = 0; i < ods->n_columns; i++) + ds->columns[i].source + = ds->sources[get_source_index (ods, ods->columns[i].source)]; + ds->n_columns = ods->n_columns; ds->column_min_alloc = ods->column_min_alloc; + + ds->rows = axis_clone (ods->rows); + ds->taint = taint_create (); return ds; } - /* Hashes the structure of datasheet DS and returns the hash. We use MD4 because it is much faster than MD5 or SHA-1 but its collision resistance is just as good. */ @@ -1270,22 +1501,20 @@ hash_datasheet (const struct datasheet *ds) { unsigned int hash[DIV_RND_UP (20, sizeof (unsigned int))]; struct md4_ctx ctx; - struct range_map_node *r; + size_t i; md4_init_ctx (&ctx); - axis_hash (ds->columns, &ctx); - axis_hash (ds->rows, &ctx); - for (r = range_map_first (&ds->sources); r != NULL; - r = range_map_next (&ds->sources, r)) + for (i = 0; i < ds->n_columns; i++) { - unsigned long int start = range_map_node_get_start (r); - unsigned long int end = range_map_node_get_end (r); - md4_process_bytes (&start, sizeof start, &ctx); - md4_process_bytes (&end, sizeof end, &ctx); + const struct column *column = &ds->columns[i]; + int source_n_bytes = sparse_xarray_get_n_columns (column->source->data); + md4_process_bytes (&source_n_bytes, sizeof source_n_bytes, &ctx); + /*md4_process_bytes (&column->byte_ofs, sizeof column->byte_ofs, &ctx);*/ + md4_process_bytes (&column->value_ofs, sizeof column->value_ofs, &ctx); + md4_process_bytes (&column->width, sizeof column->width, &ctx); } - md4_process_bytes (&ds->column_min_alloc, sizeof ds->column_min_alloc, - &ctx); + axis_hash (ds->rows, &ctx); + md4_process_bytes (&ds->column_min_alloc, sizeof ds->column_min_alloc, &ctx); md4_finish_ctx (&ctx, hash); return hash[0]; } - diff --git a/src/data/datasheet.h b/src/data/datasheet.h index 860f236e..0508896a 100644 --- a/src/data/datasheet.h +++ b/src/data/datasheet.h @@ -20,17 +20,21 @@ #include #include +struct caseproto; struct casereader; -/* A datasheet is a 2-d array of data that may be stored in - memory or on disk. It efficiently supports data storage and - retrieval, as well as adding, removing, and rearranging both - rows and columns. */ +/* A datasheet is a 2-d array of "union value"s that may be + stored in memory or on disk. It efficiently supports data + storage and retrieval, as well as adding, removing, and + rearranging both rows and columns. */ struct datasheet *datasheet_create (struct casereader *); void datasheet_destroy (struct datasheet *); struct datasheet *datasheet_rename (struct datasheet *); +const struct caseproto *datasheet_get_proto (const struct datasheet *); +int datasheet_get_column_width (const struct datasheet *, size_t column); + bool datasheet_error (const struct datasheet *); void datasheet_force_error (struct datasheet *); const struct taint *datasheet_get_taint (const struct datasheet *); @@ -38,17 +42,20 @@ const struct taint *datasheet_get_taint (const struct datasheet *); struct casereader *datasheet_make_reader (struct datasheet *); /* Columns. */ -size_t datasheet_get_column_cnt (const struct datasheet *); -bool datasheet_insert_columns (struct datasheet *, - const union value[], size_t cnt, - size_t before); +size_t datasheet_get_n_columns (const struct datasheet *); +bool datasheet_insert_column (struct datasheet *, + const union value *, int width, size_t before); void datasheet_delete_columns (struct datasheet *, size_t start, size_t cnt); void datasheet_move_columns (struct datasheet *, size_t old_start, size_t new_start, size_t cnt); +bool datasheet_resize_column (struct datasheet *, size_t column, int new_width, + void (*resize_cb) (const union value *, + union value *, void *aux), + void *aux); /* Rows. */ -casenumber datasheet_get_row_cnt (const struct datasheet *); +casenumber datasheet_get_n_rows (const struct datasheet *); bool datasheet_insert_rows (struct datasheet *, casenumber before, struct ccase *[], casenumber cnt); @@ -62,11 +69,12 @@ void datasheet_move_rows (struct datasheet *, struct ccase *datasheet_get_row (const struct datasheet *, casenumber); bool datasheet_put_row (struct datasheet *, casenumber, struct ccase *); bool datasheet_get_value (const struct datasheet *, casenumber, size_t column, - union value *, int width); + union value *); bool datasheet_put_value (struct datasheet *, casenumber, size_t column, - const union value *, int width); + const union value *); unsigned int hash_datasheet (const struct datasheet *ds); struct datasheet *clone_datasheet (const struct datasheet *ds); + #endif /* data/datasheet.h */ diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 2dd1dfc4..67af049b 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -51,6 +51,8 @@ struct dictionary { struct variable **var; /* Variables. */ size_t var_cnt, var_cap; /* Number of variables, capacity. */ + struct caseproto *proto; /* Prototype for dictionary cases + (updated lazily). */ struct hsh_table *name_tab; /* Variable index by name. */ int next_value_idx; /* Index of next `union value' to allocate. */ const struct variable **split; /* SPLIT FILE vars. */ @@ -98,6 +100,14 @@ dict_set_change_callback (struct dictionary *d, d->changed_data = data; } +/* Discards dictionary D's caseproto. (It will be regenerated + lazily, on demand.) */ +static void +invalidate_proto (struct dictionary *d) +{ + caseproto_unref (d->proto); + d->proto = NULL; +} /* Print a representation of dictionary D to stdout, for debugging purposes. */ @@ -237,6 +247,7 @@ dict_clear (struct dictionary *d) free (d->var); d->var = NULL; d->var_cnt = d->var_cap = 0; + invalidate_proto (d); hsh_clear (d->name_tab); d->next_value_idx = 0; dict_set_split_vars (d, NULL, 0); @@ -372,7 +383,8 @@ add_var (struct dictionary *d, struct variable *v) if ( d->callbacks && d->callbacks->var_added ) d->callbacks->var_added (d, var_get_dict_index (v), d->cb_data); - d->next_value_idx += var_get_value_cnt (v); + d->next_value_idx++; + invalidate_proto (d); return v; } @@ -539,7 +551,7 @@ dict_delete_var (struct dictionary *d, struct variable *v) { int dict_index = var_get_dict_index (v); const int case_index = var_get_case_index (v); - const int val_cnt = var_get_value_cnt (v); + const int width = var_get_width (v); assert (dict_contains_var (d, v)); @@ -572,8 +584,10 @@ dict_delete_var (struct dictionary *d, struct variable *v) var_destroy (v); if ( d->changed ) d->changed (d, d->changed_data); + + invalidate_proto (d); if (d->callbacks && d->callbacks->var_deleted ) - d->callbacks->var_deleted (d, dict_index, case_index, val_cnt, d->cb_data); + d->callbacks->var_deleted (d, dict_index, case_index, width, d->cb_data); } /* Deletes the COUNT variables listed in VARS from D. This is @@ -998,6 +1012,25 @@ dict_set_case_limit (struct dictionary *d, casenumber case_limit) d->case_limit = case_limit; } +/* Returns the prototype used for cases created by dictionary D. */ +const struct caseproto * +dict_get_proto (const struct dictionary *d_) +{ + struct dictionary *d = (struct dictionary *) d_; + if (d->proto == NULL) + { + size_t i; + + d->proto = caseproto_create (); + d->proto = caseproto_reserve (d->proto, d->var_cnt); + for (i = 0; i < d->var_cnt; i++) + d->proto = caseproto_set_width (d->proto, + var_get_case_index (d->var[i]), + var_get_width (d->var[i])); + } + return d->proto; +} + /* Returns the case index of the next value to be added to D. This value is the number of `union value's that need to be allocated to store a case for dictionary D. */ @@ -1030,37 +1063,11 @@ dict_compact_values (struct dictionary *d) for (i = 0; i < d->var_cnt; i++) { struct variable *v = d->var[i]; - set_var_case_index (v, d->next_value_idx); - d->next_value_idx += var_get_value_cnt (v); + set_var_case_index (v, d->next_value_idx++); } + invalidate_proto (d); } -/* - Reassigns case indices for D, increasing each index above START by - the value PADDING. -*/ -static void -dict_pad_values (struct dictionary *d, int start, int padding) -{ - size_t i; - - if ( padding <= 0 ) - return; - - for (i = 0; i < d->var_cnt; ++i) - { - struct variable *v = d->var[i]; - - int index = var_get_case_index (v); - - if ( index >= start) - set_var_case_index (v, index + padding); - } - - d->next_value_idx += padding; -} - - /* Returns the number of values occupied by the variables in dictionary D. All variables are considered if EXCLUDE_CLASSES is 0, or it may contain one or more of (1u << DC_ORDINARY), @@ -1086,10 +1093,38 @@ dict_count_values (const struct dictionary *d, unsigned int exclude_classes) { enum dict_class class = var_get_dict_class (d->var[i]); if (!(exclude_classes & (1u << class))) - cnt += var_get_value_cnt (d->var[i]); + cnt++; } return cnt; } + +/* Returns the case prototype that would result after deleting + all variables from D that are not in one of the + EXCLUDE_CLASSES and compacting the dictionary with + dict_compact(). + + The caller must unref the returned caseproto when it is no + longer needed. */ +struct caseproto * +dict_get_compacted_proto (const struct dictionary *d, + unsigned int exclude_classes) +{ + struct caseproto *proto; + size_t i; + + assert ((exclude_classes & ~((1u << DC_ORDINARY) + | (1u << DC_SYSTEM) + | (1u << DC_SCRATCH))) == 0); + + proto = caseproto_create (); + for (i = 0; i < d->var_cnt; i++) + { + struct variable *v = d->var[i]; + if (!(exclude_classes & (1u << var_get_dict_class (v)))) + proto = caseproto_add_width (proto, var_get_width (v)); + } + return proto; +} /* Returns the SPLIT FILE vars (see cmd_split_file()). Call dict_get_split_cnt() to determine how many SPLIT FILE vars @@ -1228,7 +1263,7 @@ dict_add_document_line (struct dictionary *d, const char *line) msg (SW, _("Truncating document line to %d bytes."), DOC_LINE_LENGTH); } buf_copy_str_rpad (ds_put_uninit (&d->documents, DOC_LINE_LENGTH), - DOC_LINE_LENGTH, line); + DOC_LINE_LENGTH, line, ' '); } /* Returns the number of document lines in dictionary D. */ @@ -1382,7 +1417,7 @@ dict_var_changed (const struct variable *v) /* Called from variable.c to notify the dictionary that the variable's width has changed */ void -dict_var_resized (const struct variable *v, int delta) +dict_var_resized (const struct variable *v, int old_width) { if ( var_has_vardict (v)) { @@ -1391,11 +1426,12 @@ dict_var_resized (const struct variable *v, int delta) d = vdi->dict; - dict_pad_values (d, var_get_case_index(v) + 1, delta); - if (d->changed) d->changed (d, d->changed_data); + + invalidate_proto (d); if ( d->callbacks && d->callbacks->var_resized ) - d->callbacks->var_resized (d, var_get_dict_index (v), delta, d->cb_data); + d->callbacks->var_resized (d, var_get_dict_index (v), old_width, + d->cb_data); } } diff --git a/src/data/dictionary.h b/src/data/dictionary.h index 4efb953c..02fd5cd4 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004, 2007 Free Software Foundation, Inc. + Copyright (C) 2004, 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -99,6 +99,7 @@ casenumber dict_get_case_limit (const struct dictionary *); void dict_set_case_limit (struct dictionary *, casenumber); /* Size of cases for this dictionary. */ +const struct caseproto *dict_get_proto (const struct dictionary *); int dict_get_next_value_idx (const struct dictionary *); size_t dict_get_case_size (const struct dictionary *); @@ -107,6 +108,8 @@ size_t dict_get_case_size (const struct dictionary *); size_t dict_count_values (const struct dictionary *, unsigned int exclude_classes); void dict_compact_values (struct dictionary *); +struct caseproto *dict_get_compacted_proto (const struct dictionary *, + unsigned int exclude_classes); /* SPLIT FILE variables. */ const struct variable *const *dict_get_split_vars (const struct dictionary *); diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c index 2e92e3f5..c3797175 100644 --- a/src/data/gnumeric-reader.c +++ b/src/data/gnumeric-reader.c @@ -19,6 +19,9 @@ #include #include +#include + +#include "minmax.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -168,8 +171,7 @@ struct gnumeric_reader int stop_row; int stop_col; - - size_t value_cnt; + struct caseproto *proto; struct dictionary *dict; struct ccase *first_case; bool used_first_case; @@ -194,6 +196,8 @@ gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) if ( ! r->used_first_case ) case_unref (r->first_case); + caseproto_unref (r->proto); + free (r); } @@ -324,7 +328,7 @@ convert_xml_string_to_value (struct ccase *c, const struct variable *var, if ( var_is_alpha (var)) { - memcpy (v->s, text, n_bytes); + memcpy (value_str_rw (v, var_get_width (var)), text, n_bytes); } else { @@ -498,8 +502,6 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) *dict = r->dict = dict_create (); dict_set_encoding (r->dict, (const char *) xmlTextReaderConstEncoding (r->xtr)); - - r->value_cnt = 0; for (i = 0 ; i < n_var_specs ; ++i ) { @@ -510,8 +512,6 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) if ( var_spec[i].width == -1 ) var_spec[i].width = MAX_SHORT_STRING; - r->value_cnt += value_cnt_from_width (var_spec[i].width); - if ( ! dict_make_unique_var_name (r->dict, var_spec[i].name, &vstart, name)) { @@ -532,9 +532,9 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) goto error; } - r->first_case = case_create (r->value_cnt); - memset (case_data_rw_idx (r->first_case, 0)->s, - ' ', MAX_SHORT_STRING * r->value_cnt); + r->proto = caseproto_ref (dict_get_proto (r->dict)); + r->first_case = case_create (r->proto); + case_set_missing (r->first_case); for ( i = 0 ; i < n_var_specs ; ++i ) { @@ -554,7 +554,7 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) return casereader_create_sequential (NULL, - r->value_cnt, + r->proto, n_cases, &gnm_file_casereader_class, r); @@ -592,9 +592,8 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_) return r->first_case; } - c = case_create (r->value_cnt); - - memset (case_data_rw_idx (c, 0)->s, ' ', MAX_SHORT_STRING * r->value_cnt); + c = case_create (r->proto); + case_set_missing (c); while ((r->state == STATE_CELL || r->state == STATE_CELLS_START ) && r->row == current_row && (ret = xmlTextReaderRead (r->xtr))) @@ -605,7 +604,7 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_) r->col > r->stop_col)) continue; - if ( r->col - r->start_col >= r->value_cnt) + if ( r->col - r->start_col >= caseproto_get_n_widths (r->proto)) continue; if ( r->stop_row != -1 && r->row > r->stop_row) diff --git a/src/data/lazy-casereader.c b/src/data/lazy-casereader.c index 65d10aea..44897d16 100644 --- a/src/data/lazy-casereader.c +++ b/src/data/lazy-casereader.c @@ -43,8 +43,8 @@ static const struct casereader_class lazy_casereader_class; to a "serial number" that uniquely identifies the new lazy casereader, for use with lazy_casereader_destroy. - VALUE_CNT must be the number of struct values per case read - from the casereader. + PROTO must be the format of the cases to be read from the + casereader. CASE_CNT is an upper limit on the number of cases that casereader_read will return from the casereader in successive @@ -52,7 +52,7 @@ static const struct casereader_class lazy_casereader_class; data source or CASENUMBER_MAX if the number of cases cannot be predicted in advance. */ struct casereader * -lazy_casereader_create (size_t value_cnt, casenumber case_cnt, +lazy_casereader_create (const struct caseproto *proto, casenumber case_cnt, struct casereader *(*callback) (void *aux), void *aux, unsigned long int *serial) { @@ -63,7 +63,7 @@ lazy_casereader_create (size_t value_cnt, casenumber case_cnt, *serial = lc->serial = next_serial++; lc->callback = callback; lc->aux = aux; - return casereader_create_sequential (NULL, value_cnt, case_cnt, + return casereader_create_sequential (NULL, proto, case_cnt, &lazy_casereader_class, lc); } diff --git a/src/data/lazy-casereader.h b/src/data/lazy-casereader.h index c83f39db..561d98eb 100644 --- a/src/data/lazy-casereader.h +++ b/src/data/lazy-casereader.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (C) 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,7 +29,7 @@ #include #include -struct casereader *lazy_casereader_create (size_t value_cnt, +struct casereader *lazy_casereader_create (const struct caseproto *, casenumber case_cnt, struct casereader *(*) (void *aux), void *aux, diff --git a/src/data/missing-values.c b/src/data/missing-values.c index 07ebb77a..867e0f71 100644 --- a/src/data/missing-values.c +++ b/src/data/missing-values.c @@ -324,8 +324,7 @@ is_num_user_missing (const struct missing_values *mv, double d) MV must be a set of string missing values. S[] must contain exactly as many characters as MV's width. */ static bool -is_str_user_missing (const struct missing_values *mv, - const char s[]) +is_str_user_missing (const struct missing_values *mv, const char s[]) { const union value *v = mv->values; assert (mv->width > 0); @@ -334,14 +333,14 @@ is_str_user_missing (const struct missing_values *mv, case MVT_NONE: return false; case MVT_1: - return !memcmp (v[0].s, s, mv->width); + return !memcmp (v[0].short_string, s, mv->width); case MVT_2: - return (!memcmp (v[0].s, s, mv->width) - || !memcmp (v[1].s, s, mv->width)); + return (!memcmp (v[0].short_string, s, mv->width) + || !memcmp (v[1].short_string, s, mv->width)); case MVT_3: - return (!memcmp (v[0].s, s, mv->width) - || !memcmp (v[1].s, s, mv->width) - || !memcmp (v[2].s, s, mv->width)); + return (!memcmp (v[0].short_string, s, mv->width) + || !memcmp (v[1].short_string, s, mv->width) + || !memcmp (v[2].short_string, s, mv->width)); case MVT_RANGE: case MVT_RANGE_1: NOT_REACHED (); @@ -357,7 +356,7 @@ mv_is_value_missing (const struct missing_values *mv, const union value *v, { return (mv->width == 0 ? mv_is_num_missing (mv, v->f, class) - : mv_is_str_missing (mv, v->s, class)); + : mv_is_str_missing (mv, v->short_string, class)); } /* Returns true if D is a missing value in the given CLASS in MV, diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c index 7e65c28b..00ca4196 100644 --- a/src/data/por-file-reader.c +++ b/src/data/por-file-reader.c @@ -74,8 +74,7 @@ struct pfm_reader char *trans; /* 256-byte character set translation table. */ int var_cnt; /* Number of variables. */ int weight_index; /* 0-based index of weight variable, or -1. */ - int *widths; /* Variable widths, 0 for numeric. */ - size_t value_cnt; /* Number of `value's per case. */ + struct caseproto *proto; /* Format of output cases. */ bool ok; /* Set false on I/O error. */ }; @@ -256,8 +255,7 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->weight_index = -1; r->trans = NULL; r->var_cnt = 0; - r->widths = NULL; - r->value_cnt = 0; + r->proto = NULL; r->ok = true; if (setjmp (r->bail_out)) goto error; @@ -296,8 +294,8 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict, if (!match (r, 'F')) error (r, _("Data record expected.")); - r->value_cnt = dict_get_next_value_idx (*dict); - return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX, + r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool); + return casereader_create_sequential (NULL, r->proto, CASENUMBER_MAX, &por_file_casereader_class, r); error: @@ -608,7 +606,8 @@ assign_default: return fmt_default_for_width (var_get_width (v)); } -static union value parse_value (struct pfm_reader *, struct variable *); +static void parse_value (struct pfm_reader *, struct variable *, + union value *); /* Read information on all the variables. */ static void @@ -623,7 +622,6 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) r->var_cnt = read_int (r); if (r->var_cnt <= 0) error (r, _("Invalid number of variables %d."), r->var_cnt); - r->widths = pool_nalloc (r->pool, r->var_cnt, sizeof *r->widths); /* Purpose of this value is unknown. It is typically 161. */ read_int (r); @@ -652,7 +650,6 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) width = read_int (r); if (width < 0) error (r, _("Invalid variable width %d."), width); - r->widths[i] = width; read_string (r, name); for (j = 0; j < 6; j++) @@ -704,8 +701,10 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) /* Single missing values. */ while (match (r, '8')) { - union value value = parse_value (r, v); + union value value; + parse_value (r, v, &value); mv_add_value (&miss, &value); + value_destroy (&value, var_get_width (v)); } var_set_missing_values (v, &miss); @@ -730,21 +729,18 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) } /* Parse a value for variable VV into value V. */ -static union value -parse_value (struct pfm_reader *r, struct variable *vv) +static void +parse_value (struct pfm_reader *r, struct variable *vv, union value *v) { - union value v; - + value_init (v, var_get_width (vv)); if (var_is_alpha (vv)) { char string[256]; read_string (r, string); - buf_copy_str_rpad (v.s, 8, string); + buf_copy_str_rpad (value_str_rw (v, 8), 8, string, ' '); } else - v.f = read_float (r); - - return v; + v->f = read_float (r); } /* Parse a value label record and return success. */ @@ -784,7 +780,7 @@ read_value_label (struct pfm_reader *r, struct dictionary *dict) char label[256]; int j; - val = parse_value (r, v[0]); + parse_value (r, v[0], &val); read_string (r, label); /* Assign the value label to each variable. */ @@ -795,6 +791,8 @@ read_value_label (struct pfm_reader *r, struct dictionary *dict) if (!var_is_long_string (var)) var_replace_value_label (var, &val, label); } + + value_destroy (&val, var_get_width (v[0])); } } @@ -824,7 +822,7 @@ por_file_casereader_read (struct casereader *reader, void *r_) size_t i; size_t idx; - c = case_create (casereader_get_value_cnt (reader)); + c = case_create (r->proto); setjmp (r->bail_out); if (!r->ok) { @@ -843,7 +841,7 @@ por_file_casereader_read (struct casereader *reader, void *r_) idx = 0; for (i = 0; i < r->var_cnt; i++) { - int width = r->widths[i]; + int width = caseproto_get_width (r->proto, i); if (width == 0) { @@ -854,7 +852,7 @@ por_file_casereader_read (struct casereader *reader, void *r_) { char string[256]; read_string (r, string); - buf_copy_str_rpad (case_data_rw_idx (c, idx)->s, width, string); + buf_copy_str_rpad (case_str_rw_idx (c, idx), width, string, ' '); idx += DIV_RND_UP (width, MAX_SHORT_STRING); } } diff --git a/src/data/por-file-writer.c b/src/data/por-file-writer.c index 8de293c5..9ccc8fd7 100644 --- a/src/data/por-file-writer.c +++ b/src/data/por-file-writer.c @@ -45,6 +45,7 @@ #include #include +#include "minmax.h" #include "xalloc.h" #include "gettext.h" @@ -172,7 +173,7 @@ pfm_open_writer (struct file_handle *fh, struct dictionary *dict, buf_write (w, "F", 1); if (ferror (w->file)) goto error; - return casewriter_create (dict_get_next_value_idx (dict), + return casewriter_create (dict_get_proto (dict), &por_file_casewriter_class, w); error: @@ -308,7 +309,7 @@ write_format (struct pfm_writer *w, struct fmt_spec f, int width) /* Write value V for variable VV to file H. */ static void -write_value (struct pfm_writer *w, union value *v, struct variable *vv) +write_value (struct pfm_writer *w, const union value *v, struct variable *vv) { if (var_is_numeric (vv)) write_float (w, v->f); @@ -316,7 +317,7 @@ write_value (struct pfm_writer *w, union value *v, struct variable *vv) { int width = MIN (var_get_width (vv), MAX_POR_WIDTH); write_int (w, width); - buf_write (w, v->s, width); + buf_write (w, value_str (v, width), width); } } @@ -398,12 +399,12 @@ write_value_labels (struct pfm_writer *w, const struct dictionary *dict) for (i = 0; i < dict_get_var_cnt (dict); i++) { - struct val_labs_iterator *j; struct variable *v = dict_get_var (dict, i); const struct val_labs *val_labs = var_get_value_labels (v); - struct val_lab *vl; + size_t n_labels = val_labs_count (val_labs); + const struct val_lab **labels; - if (val_labs == NULL) + if (n_labels == 0) continue; buf_write (w, "D", 1); @@ -411,12 +412,15 @@ write_value_labels (struct pfm_writer *w, const struct dictionary *dict) write_string (w, var_get_short_name (v, 0)); write_int (w, val_labs_count (val_labs)); - for (vl = val_labs_first_sorted (val_labs, &j); vl != NULL; - vl = val_labs_next (val_labs, &j)) + n_labels = val_labs_count (val_labs); + labels = val_labs_sorted (val_labs); + for (i = 0; i < n_labels; i++) { - write_value (w, &vl->value, v); - write_string (w, vl->label); + const struct val_lab *vl = labels[i]; + write_value (w, val_lab_get_value (vl), v); + write_string (w, val_lab_get_label (vl)); } + free (labels); } } diff --git a/src/data/procedure.c b/src/data/procedure.c index fbb9a757..b762214d 100644 --- a/src/data/procedure.c +++ b/src/data/procedure.c @@ -38,6 +38,7 @@ #include #include +#include "minmax.h" #include "xalloc.h" struct dataset { @@ -186,11 +187,19 @@ proc_open (struct dataset *ds) { struct dictionary *pd = ds->permanent_dict; size_t compacted_value_cnt = dict_count_values (pd, 1u << DC_SCRATCH); - bool should_compact = compacted_value_cnt < dict_get_next_value_idx (pd); - ds->compactor = (should_compact - ? case_map_to_compact_dict (pd, 1u << DC_SCRATCH) - : NULL); - ds->sink = autopaging_writer_create (compacted_value_cnt); + if (compacted_value_cnt < dict_get_next_value_idx (pd)) + { + struct caseproto *compacted_proto; + compacted_proto = dict_get_compacted_proto (pd, 1u << DC_SCRATCH); + ds->compactor = case_map_to_compact_dict (pd, 1u << DC_SCRATCH); + ds->sink = autopaging_writer_create (compacted_proto); + caseproto_unref (compacted_proto); + } + else + { + ds->compactor = NULL; + ds->sink = autopaging_writer_create (dict_get_proto (pd)); + } } else { @@ -208,8 +217,7 @@ proc_open (struct dataset *ds) /* FIXME: use taint in dataset in place of `ok'? */ /* FIXME: for trivial cases we can just return a clone of ds->source? */ - return casereader_create_sequential (NULL, - dict_get_next_value_idx (ds->dict), + return casereader_create_sequential (NULL, dict_get_proto (ds->dict), CASENUMBER_MAX, &proc_casereader_class, ds); } @@ -245,7 +253,7 @@ proc_casereader_read (struct casereader *reader UNUSED, void *ds_) c = casereader_read (ds->source); if (c == NULL) return NULL; - c = case_unshare_and_resize (c, dict_get_next_value_idx (ds->dict)); + c = case_unshare_and_resize (c, dict_get_proto (ds->dict)); caseinit_init_vars (ds->caseinit, c); /* Execute permanent transformations. */ diff --git a/src/data/psql-reader.c b/src/data/psql-reader.c index 85e777a9..741bf36e 100644 --- a/src/data/psql-reader.c +++ b/src/data/psql-reader.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "psql-reader.h" @@ -28,8 +29,11 @@ #include "calendar.h" #include +#include #include +#include "minmax.h" + #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) (msgid) @@ -95,7 +99,7 @@ struct psql_reader double postgres_epoch; - size_t value_cnt; + struct caseproto *proto; struct dictionary *dict; /* An array of ints, which maps psql column numbers into @@ -175,8 +179,6 @@ create_var (struct psql_reader *r, const struct fmt_spec *fmt, struct variable *var; char name[VAR_NAME_LEN + 1]; - r->value_cnt += value_cnt_from_width (width); - if ( ! dict_make_unique_var_name (r->dict, suggested_name, &vx, name)) { msg (ME, _("Cannot create variable name from %s"), suggested_name); @@ -357,7 +359,7 @@ psql_open_reader (struct psql_read_info *info, struct dictionary **dict) n_tuples = PQntuples (qres); n_fields = PQnfields (qres); - r->value_cnt = 0; + r->proto = NULL; r->vmap = NULL; r->vmapsize = 0; @@ -528,10 +530,11 @@ psql_open_reader (struct psql_read_info *info, struct dictionary **dict) ds_put_format (&r->fetch_cmd, "FETCH FORWARD %d FROM pspp", r->cache_size); reload_cache (r); + r->proto = caseproto_ref (dict_get_proto (*dict)); return casereader_create_sequential (NULL, - r->value_cnt, + r->proto, n_cases, &psql_casereader_class, r); @@ -554,6 +557,7 @@ psql_casereader_destroy (struct casereader *reader UNUSED, void *r_) free (r->vmap); if (r->res) PQclear (r->res); PQfinish (r->conn); + caseproto_unref (r->proto); free (r); } @@ -588,8 +592,8 @@ set_value (struct psql_reader *r) if ( r->tuple >= PQntuples (r->res)) return NULL; - c = case_create (r->value_cnt); - memset (case_data_rw_idx (c, 0)->s, ' ', MAX_SHORT_STRING * r->value_cnt); + c = case_create (r->proto); + case_set_missing (c); for (i = 0 ; i < n_vars ; ++i ) @@ -831,7 +835,8 @@ set_value (struct psql_reader *r) case VARCHAROID: case BPCHAROID: case BYTEAOID: - memcpy (val->s, (char *) vptr, MIN (length, var_width)); + memcpy (value_str_rw (val, var_width), (char *) vptr, + MIN (length, var_width)); break; case NUMERICOID: diff --git a/src/data/scratch-writer.c b/src/data/scratch-writer.c index 952860cd..631305fe 100644 --- a/src/data/scratch-writer.c +++ b/src/data/scratch-writer.c @@ -59,7 +59,6 @@ scratch_writer_open (struct file_handle *fh, struct scratch_writer *writer; struct casewriter *casewriter; struct fh_lock *lock; - size_t dict_value_cnt; /* Get exclusive write access to handle. */ /* TRANSLATORS: this fragment will be interpolated into @@ -83,10 +82,9 @@ scratch_writer_open (struct file_handle *fh, } else writer->compactor = NULL; - dict_value_cnt = dict_get_next_value_idx (writer->dict); - writer->subwriter = autopaging_writer_create (dict_value_cnt); + writer->subwriter = autopaging_writer_create (dict_get_proto (writer->dict)); - casewriter = casewriter_create (dict_value_cnt, + casewriter = casewriter_create (dict_get_proto (writer->dict), &scratch_writer_casewriter_class, writer); taint_propagate (casewriter_get_taint (writer->subwriter), casewriter_get_taint (casewriter)); diff --git a/src/data/settings.c b/src/data/settings.c index f9c65fc8..c86ac9cc 100644 --- a/src/data/settings.c +++ b/src/data/settings.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,11 +22,13 @@ #include "format.h" #include "value.h" #include "xalloc.h" +#include +#include #include #include -#include #include "error.h" +#include "minmax.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -515,13 +517,12 @@ settings_get_workspace (void) } /* Approximate maximum number of cases to allocate in-core, given - that each case contains VALUE_CNT values. */ + that each case has the format given in PROTO. */ size_t -settings_get_workspace_cases (size_t value_cnt) +settings_get_workspace_cases (const struct caseproto *proto) { - size_t case_size = sizeof (union value) * value_cnt + 4 * sizeof (void *); - size_t case_cnt = MAX (settings_get_workspace () / case_size, 4); - return case_cnt; + size_t n_cases = settings_get_workspace () / case_get_cost (proto); + return MAX (n_cases, 4); } /* Set approximate maximum amount of memory to use for cases, in diff --git a/src/data/settings.h b/src/data/settings.h index 2f2bbe99..3de1715f 100644 --- a/src/data/settings.h +++ b/src/data/settings.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,9 +23,9 @@ #include #include +struct caseproto; struct settings; - void settings_init (int *, int *); void settings_done (void); @@ -107,7 +107,7 @@ char settings_get_endcmd (void); void settings_set_endcmd (char); size_t settings_get_workspace (void); -size_t settings_get_workspace_cases (size_t value_cnt); +size_t settings_get_workspace_cases (const struct caseproto *); void settings_set_workspace (size_t); const struct fmt_spec *settings_get_format (void); diff --git a/src/data/sparse-cases.c b/src/data/sparse-cases.c deleted file mode 100644 index 7abe4291..00000000 --- a/src/data/sparse-cases.c +++ /dev/null @@ -1,354 +0,0 @@ -/* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "xalloc.h" - -/* A sparse array of cases. */ -struct sparse_cases - { - size_t column_cnt; /* Number of values per case. */ - union value *default_columns; /* Defaults for unwritten cases. */ - casenumber max_memory_cases; /* Max cases before dumping to disk. */ - struct sparse_array *memory; /* Backing, if stored in memory. */ - struct case_tmpfile *disk; /* Backing, if stored on disk. */ - struct range_set *disk_cases; /* Allocated cases, if on disk. */ - }; - -/* Creates and returns a new sparse array of cases with - COLUMN_CNT values per case. */ -struct sparse_cases * -sparse_cases_create (size_t column_cnt) -{ - struct sparse_cases *sc = xmalloc (sizeof *sc); - sc->column_cnt = column_cnt; - sc->default_columns = NULL; - sc->max_memory_cases = settings_get_workspace_cases (column_cnt); - sc->memory = sparse_array_create (sizeof (struct ccase *)); - sc->disk = NULL; - sc->disk_cases = NULL; - return sc; -} - -/* Creates and returns a new sparse array of cases that contains - the same data as OLD. */ -struct sparse_cases * -sparse_cases_clone (const struct sparse_cases *old) -{ - struct sparse_cases *new = xmalloc (sizeof *new); - - new->column_cnt = old->column_cnt; - - if (old->default_columns != NULL) - new->default_columns - = xmemdup (old->default_columns, - old->column_cnt * sizeof *old->default_columns); - else - new->default_columns = NULL; - - new->max_memory_cases = old->max_memory_cases; - - if (old->memory != NULL) - { - unsigned long int idx; - struct ccase **cp; - - new->memory = sparse_array_create (sizeof (struct ccase *)); - for (cp = sparse_array_first (old->memory, &idx); cp != NULL; - cp = sparse_array_next (old->memory, idx, &idx)) - { - struct ccase **ncp = sparse_array_insert (new->memory, idx); - *ncp = case_ref (*cp); - } - } - else - new->memory = NULL; - - if (old->disk != NULL) - { - const struct range_set_node *node; - - new->disk = case_tmpfile_create (old->column_cnt); - new->disk_cases = range_set_create (); - for (node = range_set_first (old->disk_cases); node != NULL; - node = range_set_next (old->disk_cases, node)) - { - unsigned long int start = range_set_node_get_start (node); - unsigned long int end = range_set_node_get_end (node); - unsigned long int idx; - - for (idx = start; idx < end; idx++) - { - struct ccase *c = case_tmpfile_get_case (old->disk, idx); - if (c == NULL || !case_tmpfile_put_case (new->disk, idx, c)) - { - sparse_cases_destroy (new); - return NULL; - } - } - } - } - else - { - new->disk = NULL; - new->disk_cases = NULL; - } - - return new; -} - -/* Destroys sparse array of cases SC. */ -void -sparse_cases_destroy (struct sparse_cases *sc) -{ - if (sc != NULL) - { - if (sc->memory != NULL) - { - unsigned long int idx; - struct ccase **cp; - for (cp = sparse_array_first (sc->memory, &idx); cp != NULL; - cp = sparse_array_next (sc->memory, idx, &idx)) - case_unref (*cp); - sparse_array_destroy (sc->memory); - } - free (sc->default_columns); - case_tmpfile_destroy (sc->disk); - range_set_destroy (sc->disk_cases); - free (sc); - } -} - -/* Returns the number of `union value's in each case in SC. */ -size_t -sparse_cases_get_value_cnt (const struct sparse_cases *sc) -{ - return sc->column_cnt; -} - -/* Dumps the cases in SC, which must currently be stored in - memory, to disk. Returns true if successful, false on I/O - error. */ -static bool -dump_sparse_cases_to_disk (struct sparse_cases *sc) -{ - unsigned long int idx; - struct ccase **cp; - - assert (sc->memory != NULL); - assert (sc->disk == NULL); - - sc->disk = case_tmpfile_create (sc->column_cnt); - sc->disk_cases = range_set_create (); - - for (cp = sparse_array_first (sc->memory, &idx); cp != NULL; - cp = sparse_array_next (sc->memory, idx, &idx)) - { - if (!case_tmpfile_put_case (sc->disk, idx, *cp)) - { - case_tmpfile_destroy (sc->disk); - sc->disk = NULL; - range_set_destroy (sc->disk_cases); - sc->disk_cases = NULL; - return false; - } - range_set_insert (sc->disk_cases, idx, 1); - } - sparse_array_destroy (sc->memory); - sc->memory = NULL; - return true; -} - -/* Returns true if any data has ever been written to ROW in SC, - false otherwise. */ -bool -sparse_cases_contains_row (const struct sparse_cases *sc, casenumber row) -{ - return (sc->memory != NULL - ? sparse_array_get (sc->memory, row) != NULL - : range_set_contains (sc->disk_cases, row)); -} - -/* Reads columns COLUMNS...(COLUMNS + VALUE_CNT), exclusive, in - the given ROW in SC, into the VALUE_CNT values in VALUES. - Returns true if successful, false on I/O error. */ -bool -sparse_cases_read (struct sparse_cases *sc, casenumber row, size_t column, - union value values[], size_t value_cnt) -{ - assert (value_cnt <= sc->column_cnt); - assert (column + value_cnt <= sc->column_cnt); - - if (sparse_cases_contains_row (sc, row)) - { - struct ccase *c; - if (sc->memory != NULL) - { - struct ccase **cp = sparse_array_get (sc->memory, row); - c = case_ref (*cp); - } - else - { - c = case_tmpfile_get_case (sc->disk, row); - if (c == NULL) - return false; - } - case_copy_out (c, column, values, value_cnt); - case_unref (c); - } - else - { - assert (sc->default_columns != NULL); - memcpy (values, sc->default_columns + column, - sizeof *values * value_cnt); - } - - return true; -} - -/* Implements sparse_cases_write for an on-disk sparse_cases. */ -static bool -write_disk_case (struct sparse_cases *sc, casenumber row, size_t column, - const union value values[], size_t value_cnt) -{ - struct ccase *c; - bool ok; - - /* Get current case data. */ - if (column == 0 && value_cnt == sc->column_cnt) - c = case_create (sc->column_cnt); - else - { - c = case_tmpfile_get_case (sc->disk, row); - if (c == NULL) - return false; - } - - /* Copy in new data. */ - case_copy_in (c, column, values, value_cnt); - - /* Write new case. */ - ok = case_tmpfile_put_case (sc->disk, row, c); - if (ok) - range_set_insert (sc->disk_cases, row, 1); - - return ok; -} - -/* Writes the VALUE_CNT values in VALUES into columns - COLUMNS...(COLUMNS + VALUE_CNT), exclusive, in the given ROW - in SC. - Returns true if successful, false on I/O error. */ -bool -sparse_cases_write (struct sparse_cases *sc, casenumber row, size_t column, - const union value values[], size_t value_cnt) -{ - if (sc->memory != NULL) - { - struct ccase *c, **cp; - cp = sparse_array_get (sc->memory, row); - if (cp != NULL) - c = *cp = case_unshare (*cp); - else - { - if (sparse_array_count (sc->memory) >= sc->max_memory_cases) - { - if (!dump_sparse_cases_to_disk (sc)) - return false; - return write_disk_case (sc, row, column, values, value_cnt); - } - - cp = sparse_array_insert (sc->memory, row); - c = *cp = case_create (sc->column_cnt); - if (sc->default_columns != NULL - && (column != 0 || value_cnt != sc->column_cnt)) - case_copy_in (c, 0, sc->default_columns, sc->column_cnt); - } - case_copy_in (c, column, values, value_cnt); - return true; - } - else - return write_disk_case (sc, row, column, values, value_cnt); -} - -/* Writes the VALUE_CNT values in VALUES to columns - START_COLUMN...(START_COLUMN + VALUE_CNT), exclusive, in every - row in SC, even those rows that have not yet been written. - Returns true if successful, false on I/O error. - - The runtime of this function is linear in the number of rows - in SC that have already been written. */ -bool -sparse_cases_write_columns (struct sparse_cases *sc, size_t start_column, - const union value values[], size_t value_cnt) -{ - assert (value_cnt <= sc->column_cnt); - assert (start_column + value_cnt <= sc->column_cnt); - - /* Set defaults. */ - if (sc->default_columns == NULL) - sc->default_columns = xnmalloc (sc->column_cnt, - sizeof *sc->default_columns); - memcpy (sc->default_columns + start_column, values, - value_cnt * sizeof *sc->default_columns); - - /* Set individual rows. */ - if (sc->memory != NULL) - { - struct ccase **cp; - unsigned long int idx; - - for (cp = sparse_array_first (sc->memory, &idx); cp != NULL; - cp = sparse_array_next (sc->memory, idx, &idx)) - { - *cp = case_unshare (*cp); - case_copy_in (*cp, start_column, values, value_cnt); - } - } - else - { - const struct range_set_node *node; - - for (node = range_set_first (sc->disk_cases); node != NULL; - node = range_set_next (sc->disk_cases, node)) - { - unsigned long int start = range_set_node_get_start (node); - unsigned long int end = range_set_node_get_end (node); - unsigned long int row; - - for (row = start; row < end; row++) - case_tmpfile_put_values (sc->disk, row, - start_column, values, value_cnt); - } - - if (case_tmpfile_error (sc->disk)) - return false; - } - return true; -} diff --git a/src/data/sparse-cases.h b/src/data/sparse-cases.h deleted file mode 100644 index 139863ff..00000000 --- a/src/data/sparse-cases.h +++ /dev/null @@ -1,66 +0,0 @@ -/* PSPP - a program for statistical analysis. - Copyright (C) 2007 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Sparse array of cases. - - Implements a 2-d sparse array in which each row represents a - case, each column represents a variable, and each intersection - contains a `union value'. Data in the array may be accessed - randomly by column and row. When the number of cases stored - in the array is small, the data is stored in memory in memory; - when it is large, the data is stored in a temporary file. - - The sparse_cases_write_columns function provides a somewhat - unusual ability: to write a given value to every row in a - column or set of columns. This overwrites any values - previously written into those columns. For rows that have - never been written, this function sets "default" values that - later writes can override. - - The array keeps track of which row have been written. If - sparse_cases_write_columns has been used, reading from a row - that has never been written yields the default values; - otherwise, reading from such a row in an error. It is - permissible to write to only some columns in a row and leave - the rest of the row's data undefined (or, if - sparse_cases_write_columns has been used, at the default - values). The array does not keep track of which columns in a - row have never been written, but reading values that have - never been written or set as defaults yields undefined - behavior. */ - -#ifndef DATA_SPARSE_CASES_H -#define DATA_SPARSE_CASES_H 1 - -#include -#include -#include - -struct sparse_cases *sparse_cases_create (size_t value_cnt); -struct sparse_cases *sparse_cases_clone (const struct sparse_cases *); -void sparse_cases_destroy (struct sparse_cases *); - -size_t sparse_cases_get_value_cnt (const struct sparse_cases *); - -bool sparse_cases_contains_row (const struct sparse_cases *, casenumber row); -bool sparse_cases_read (struct sparse_cases *, casenumber row, size_t column, - union value[], size_t value_cnt); -bool sparse_cases_write (struct sparse_cases *, casenumber row, size_t column, - const union value[], size_t value_cnt); -bool sparse_cases_write_columns (struct sparse_cases *, size_t start_column, - const union value[], size_t value_cnt); - -#endif /* data/sparse-cases.h */ diff --git a/src/data/subcase.c b/src/data/subcase.c index d4b13783..be586096 100644 --- a/src/data/subcase.c +++ b/src/data/subcase.c @@ -23,13 +23,15 @@ #include "xalloc.h" +static void invalidate_proto (struct subcase *sc); + /* Initializes SC as a subcase that contains no fields. */ void subcase_init_empty (struct subcase *sc) { sc->fields = NULL; sc->n_fields = 0; - sc->n_values = 0; + sc->proto = NULL; } /* Initializes SC as a subcase with fields extracted from the @@ -42,14 +44,13 @@ subcase_init_vars (struct subcase *sc, sc->fields = xnmalloc (n_vars, sizeof *sc->fields); sc->n_fields = n_vars; - sc->n_values = 0; + sc->proto = NULL; for (i = 0; i < n_vars; i++) { struct subcase_field *field = &sc->fields[i]; field->case_index = var_get_case_index (vars[i]); field->width = var_get_width (vars[i]); field->direction = SC_ASCEND; - sc->n_values += value_cnt_from_width (field->width); } } @@ -68,7 +69,7 @@ void subcase_clear (struct subcase *sc) { sc->n_fields = 0; - sc->n_values = 0; + invalidate_proto (sc); } /* Initializes SC with the same fields as ORIG. */ @@ -77,7 +78,7 @@ subcase_clone (struct subcase *sc, const struct subcase *orig) { sc->fields = xmemdup (orig->fields, orig->n_fields * sizeof *orig->fields); sc->n_fields = orig->n_fields; - sc->n_values = orig->n_values; + sc->proto = orig->proto ? caseproto_ref (orig->proto) : NULL; } /* Frees the memory owned by SC (but not SC itself). */ @@ -85,6 +86,7 @@ void subcase_destroy (struct subcase *sc) { free (sc->fields); + caseproto_unref (sc->proto); } /* Add a field for VAR to SC, with DIRECTION as the sort order. @@ -107,10 +109,28 @@ subcase_add_var (struct subcase *sc, const struct variable *var, field->case_index = case_index; field->width = var_get_width (var); field->direction = direction; - sc->n_values += value_cnt_from_width (field->width); + invalidate_proto (sc); return true; } +/* Obtains a caseproto for a case described by SC. The caller + must not modify or unref the returned case prototype. */ +const struct caseproto * +subcase_get_proto (const struct subcase *sc_) +{ + struct subcase *sc = (struct subcase *) sc_; + + if (sc->proto == NULL) + { + size_t i; + + sc->proto = caseproto_create (); + for (i = 0; i < sc->n_fields; i++) + sc->proto = caseproto_add_width (sc->proto, sc->fields[i].width); + } + return sc->proto; +} + /* Returns true if and only if A and B are conformable, which means that they have the same number of fields and that each corresponding field in A and B have the same width. */ @@ -121,7 +141,7 @@ subcase_conformable (const struct subcase *a, const struct subcase *b) if (a == b) return true; - if (a->n_values != b->n_values || a->n_fields != b->n_fields) + if (a->n_fields != b->n_fields) return false; for (i = 0; i < a->n_fields; i++) if (a->fields[i].width != b->fields[i].width) @@ -130,7 +150,7 @@ subcase_conformable (const struct subcase *a, const struct subcase *b) } /* Copies the fields represented by SC from C into VALUES. - VALUES must have space for at least subcase_get_n_values(SC) + VALUES must have space for at least subcase_get_n_fields(SC) array elements. */ void subcase_extract (const struct subcase *sc, const struct ccase *c, @@ -141,13 +161,13 @@ subcase_extract (const struct subcase *sc, const struct ccase *c, for (i = 0; i < sc->n_fields; i++) { const struct subcase_field *field = &sc->fields[i]; - value_copy (values, case_data_idx (c, field->case_index), field->width); - values += value_cnt_from_width (field->width); + union value *value = &values[i]; + value_copy (value, case_data_idx (c, field->case_index), field->width); } } /* Copies the data in VALUES into the fields in C represented by - SC. VALUES must have at least subcase_get_n_values(SC) array + SC. VALUES must have at least subcase_get_n_fields(SC) array elements, and C must be large enough to contain all the fields in SC. */ void @@ -159,9 +179,9 @@ subcase_inject (const struct subcase *sc, for (i = 0; i < sc->n_fields; i++) { const struct subcase_field *field = &sc->fields[i]; - value_copy (case_data_rw_idx (c, field->case_index), values, + const union value *value = &values[i]; + value_copy (case_data_rw_idx (c, field->case_index), value, field->width); - values += value_cnt_from_width (field->width); } } @@ -228,11 +248,11 @@ subcase_compare_3way_xc (const struct subcase *sc, for (i = 0; i < sc->n_fields; i++) { const struct subcase_field *field = &sc->fields[i]; - int cmp = value_compare_3way (a, case_data_idx (b, field->case_index), + int cmp = value_compare_3way (&a[i], + case_data_idx (b, field->case_index), field->width); if (cmp != 0) return field->direction == SC_ASCEND ? cmp : -cmp; - a += value_cnt_from_width (field->width); } return 0; } @@ -261,16 +281,9 @@ subcase_compare_3way_xx (const struct subcase *sc, for (i = 0; i < sc->n_fields; i++) { const struct subcase_field *field = &sc->fields[i]; - size_t n_values; - int cmp; - - cmp = value_compare_3way (a, b, field->width); + int cmp = value_compare_3way (a++, b++, field->width); if (cmp != 0) return field->direction == SC_ASCEND ? cmp : -cmp; - - n_values = value_cnt_from_width (field->width); - a += n_values; - b += n_values; } return 0; } @@ -318,3 +331,11 @@ subcase_equal_xx (const struct subcase *sc, return subcase_compare_3way_xx (sc, a, b) == 0; } +/* Discards SC's case prototype. (It will be recreated if needed + again later.) */ +static void +invalidate_proto (struct subcase *sc) +{ + caseproto_unref (sc->proto); + sc->proto = NULL; +} diff --git a/src/data/subcase.h b/src/data/subcase.h index d50d0748..050cf17d 100644 --- a/src/data/subcase.h +++ b/src/data/subcase.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2008 Free Software Foundation, Inc. + Copyright (C) 2008, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,7 +44,8 @@ struct subcase { struct subcase_field *fields; size_t n_fields; - size_t n_values; + + struct caseproto *proto; /* Created lazily. */ }; void subcase_init_empty (struct subcase *); @@ -59,9 +60,10 @@ void subcase_destroy (struct subcase *); bool subcase_add_var (struct subcase *, const struct variable *, enum subcase_direction); +const struct caseproto *subcase_get_proto (const struct subcase *); + static inline bool subcase_is_empty (const struct subcase *); static inline size_t subcase_get_n_fields (const struct subcase *); -static inline size_t subcase_get_n_values (const struct subcase *); static inline enum subcase_direction subcase_get_direction ( const struct subcase *, size_t idx); @@ -110,10 +112,4 @@ subcase_get_n_fields (const struct subcase *sc) return sc->n_fields; } -static inline size_t -subcase_get_n_values (const struct subcase *sc) -{ - return sc->n_values; -} - #endif /* data/subcase.h */ diff --git a/src/data/sys-file-private.c b/src/data/sys-file-private.c index 2a2979f9..dd50ea8b 100644 --- a/src/data/sys-file-private.c +++ b/src/data/sys-file-private.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,7 @@ #include #include #include +#include #include "minmax.h" #include "xalloc.h" @@ -228,7 +229,8 @@ sfm_dictionary_to_sfm_vars (const struct dictionary *dict, if (used_bytes != 0) { sv = &(*sfm_vars)[(*sfm_var_cnt)++]; - sv->width = width == 0 ? 0 : used_bytes; + sv->var_width = width; + sv->segment_width = width == 0 ? 0 : used_bytes; sv->case_index = var_get_case_index (dv); sv->offset = sfm_segment_offset (width, j); sv->padding = padding; @@ -236,13 +238,11 @@ sfm_dictionary_to_sfm_vars (const struct dictionary *dict, else { /* Segment is all padding. Just add it to the - previous segment. (Otherwise we'd have an - ambiguity whether ->width of 0 indicates a - numeric variable or an all-padding segment.) */ + previous segment. */ sv = &(*sfm_vars)[*sfm_var_cnt - 1]; sv->padding += padding; } - assert ((sv->width + sv->padding) % 8 == 0); + assert ((sv->segment_width + sv->padding) % 8 == 0); } } diff --git a/src/data/sys-file-private.h b/src/data/sys-file-private.h index 9d5f52f0..e839cd6a 100644 --- a/src/data/sys-file-private.h +++ b/src/data/sys-file-private.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,7 +38,8 @@ struct dictionary; /* A variable in a system file. */ struct sfm_var { - int width; /* Value width (0=numeric, else string). */ + int var_width; /* Variable width (0 to 32767). */ + int segment_width; /* Segment width (0 to 255). */ int case_index; /* Index into case. */ /* The following members are interesting only for string diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index eff114a5..983ae502 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -71,7 +71,7 @@ struct sfm_reader struct fh_lock *lock; /* Mutual exclusion for file handle. */ FILE *file; /* File stream. */ bool error; /* I/O or corruption error? */ - size_t value_cnt; /* Number of "union value"s in struct case. */ + struct caseproto *proto; /* Format of output cases. */ /* File format. */ enum integer_format integer_format; /* On-disk integer format. */ @@ -319,11 +319,11 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, dictionary and may destroy or modify its variables. */ sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt); pool_register (r->pool, free, r->sfm_vars); + r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool); pool_free (r->pool, var_by_value_idx); - r->value_cnt = dict_get_next_value_idx (*dict); return casereader_create_sequential - (NULL, r->value_cnt, + (NULL, r->proto, r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt, &sys_file_casereader_class, r); @@ -1138,6 +1138,7 @@ read_value_labels (struct sfm_reader *r, struct variable **var = NULL; /* Associated variables. */ int var_cnt; /* Number of associated variables. */ + int max_width; /* Maximum width of string variables. */ int i; @@ -1196,12 +1197,14 @@ read_value_labels (struct sfm_reader *r, /* Read the list of variables. */ var = pool_nalloc (subpool, var_cnt, sizeof *var); + max_width = 0; for (i = 0; i < var_cnt; i++) { var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r)); if (var_is_long_string (var[i])) sys_error (r, _("Value labels are not allowed on long string " "variables (%s)."), var_get_name (var[i])); + max_width = MAX (max_width, var_get_width (var[i])); } /* Type check the variables. */ @@ -1220,9 +1223,10 @@ read_value_labels (struct sfm_reader *r, { struct label *label = labels + i; + value_init_pool (subpool, &label->value, max_width); if (var_is_alpha (var[0])) - buf_copy_rpad (label->value.s, sizeof label->value.s, - label->raw_value, sizeof label->raw_value); + buf_copy_rpad (value_str_rw (&label->value, max_width), max_width, + label->raw_value, sizeof label->raw_value, ' '); else label->value.f = float_get_double (r->float_format, label->raw_value); } @@ -1244,7 +1248,7 @@ read_value_labels (struct sfm_reader *r, label->value.f, var_get_name (v)); else sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."), - var_get_width (v), label->value.s, + max_width, value_str (&label->value, max_width), var_get_name (v)); } } @@ -1370,7 +1374,7 @@ sys_file_casereader_read (struct casereader *reader, void *r_) if (r->error) return NULL; - c = case_create (r->value_cnt); + c = case_create (r->proto); if (setjmp (r->bail_out)) { casereader_force_error (reader); @@ -1383,14 +1387,15 @@ sys_file_casereader_read (struct casereader *reader, void *r_) struct sfm_var *sv = &r->sfm_vars[i]; union value *v = case_data_rw_idx (c, sv->case_index); - if (sv->width == 0) + if (sv->var_width == 0) { if (!read_case_number (r, &v->f)) goto eof; } else { - if (!read_case_string (r, v->s + sv->offset, sv->width)) + char *s = value_str_rw (v, sv->var_width); + if (!read_case_string (r, s + sv->offset, sv->segment_width)) goto eof; if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8))) partial_record (r); diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index 292ec9c5..4907ad73 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -261,8 +261,7 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, return NULL; } - return casewriter_create (dict_get_next_value_idx (d), - &sys_file_casewriter_class, w); + return casewriter_create (dict_get_proto (d), &sys_file_casewriter_class, w); error: close_writer (w); @@ -498,26 +497,31 @@ static void write_value_labels (struct sfm_writer *w, struct variable *v, int idx) { const struct val_labs *val_labs; - struct val_labs_iterator *i; - struct val_lab *vl; + const struct val_lab **labels; + size_t n_labels; + size_t i; val_labs = var_get_value_labels (v); - if (val_labs == NULL) + n_labels = val_labs_count (val_labs); + if (n_labels == 0) return; /* Value label record. */ write_int (w, 3); /* Record type. */ write_int (w, val_labs_count (val_labs)); - for (vl = val_labs_first_sorted (val_labs, &i); vl != NULL; - vl = val_labs_next (val_labs, &i)) + labels = val_labs_sorted (val_labs); + for (i = 0; i < n_labels; i++) { - uint8_t len = MIN (strlen (vl->label), 255); + const struct val_lab *vl = labels[i]; + const char *label = val_lab_get_label (vl); + uint8_t len = MIN (strlen (label), 255); - write_value (w, &vl->value, var_get_width (v)); + write_value (w, val_lab_get_value (vl), var_get_width (v)); write_bytes (w, &len, 1); - write_bytes (w, vl->label, len); + write_bytes (w, label, len); write_zeros (w, REM_RND_UP (len + 1, 8)); } + free (labels); /* Value label variable record. */ write_int (w, 4); /* Record type. */ @@ -868,12 +872,12 @@ write_case_uncompressed (struct sfm_writer *w, const struct ccase *c) { struct sfm_var *v = &w->sfm_vars[i]; - if (v->width == 0) + if (v->var_width == 0) write_float (w, case_num_idx (c, v->case_index)); else { write_bytes (w, case_str_idx (c, v->case_index) + v->offset, - v->width); + v->segment_width); write_spaces (w, v->padding); } } @@ -889,7 +893,7 @@ write_case_compressed (struct sfm_writer *w, const struct ccase *c) { struct sfm_var *v = &w->sfm_vars[i]; - if (v->width == 0) + if (v->var_width == 0) { double d = case_num_idx (c, v->case_index); if (d == SYSMIS) @@ -913,7 +917,7 @@ write_case_compressed (struct sfm_writer *w, const struct ccase *c) multiple of 8, by ensuring that the final partial oct (8 byte unit) is treated as padded with spaces on the right. */ - for (width = v->width; width > 0; width -= 8, offset += 8) + for (width = v->segment_width; width > 0; width -= 8, offset += 8) { const void *data = case_str_idx (c, v->case_index) + offset; int chunk_size = MIN (width, 8); @@ -1036,7 +1040,7 @@ write_value (struct sfm_writer *w, const union value *value, int width) write_float (w, value->f); else { - write_bytes (w, value->s, width); + write_bytes (w, value_str (value, width), width); write_zeros (w, 8 - width); } } diff --git a/src/data/value-labels.c b/src/data/value-labels.c index 9f6113b3..c8061f7b 100644 --- a/src/data/value-labels.c +++ b/src/data/value-labels.c @@ -23,61 +23,52 @@ #include #include #include +#include #include -#include +#include +#include #include #include #include "xalloc.h" -static hsh_compare_func compare_int_val_lab; -static hsh_hash_func hash_int_val_lab; -static hsh_free_func free_int_val_lab; - -struct atom; static struct atom *atom_create (const char *string); static void atom_destroy (struct atom *); -static char *atom_to_string (const struct atom *); +static const char *atom_to_string (const struct atom *); -/* A set of value labels. */ -struct val_labs - { - int width; /* 0=numeric, otherwise string width. */ - struct hsh_table *labels; /* Hash table of `struct int_val_lab's. */ - }; +/* Returns the label in VL. The caller must not modify or free + the returned value. */ +const char * +val_lab_get_label (const struct val_lab *vl) +{ + return atom_to_string (vl->label); +} /* Creates and returns a new, empty set of value labels with the - given WIDTH. To actually add any value labels, WIDTH must be - a numeric or short string width. */ + given WIDTH. */ struct val_labs * val_labs_create (int width) { - struct val_labs *vls; - - assert (width >= 0); - - vls = xmalloc (sizeof *vls); + struct val_labs *vls = xmalloc (sizeof *vls); vls->width = width; - vls->labels = NULL; + hmap_init (&vls->labels); return vls; } /* Creates and returns a new set of value labels identical to - VLS. */ + VLS. Returns a null pointer if VLS is null. */ struct val_labs * val_labs_clone (const struct val_labs *vls) { struct val_labs *copy; - struct val_labs_iterator *i; - struct val_lab *vl; + struct val_lab *label; if (vls == NULL) return NULL; copy = val_labs_create (vls->width); - for (vl = val_labs_first (vls, &i); vl != NULL; - vl = val_labs_next (vls, &i)) - val_labs_add (copy, vl->value, vl->label); + HMAP_FOR_EACH (label, struct val_lab, node, &vls->labels) + val_labs_add (copy, &label->value, atom_to_string (label->label)); return copy; } @@ -86,32 +77,28 @@ val_labs_clone (const struct val_labs *vls) bool val_labs_can_set_width (const struct val_labs *vls, int new_width) { - struct val_labs_iterator *i; - struct val_lab *lab; + struct val_lab *label; - for (lab = val_labs_first (vls, &i); lab != NULL; - lab = val_labs_next (vls, &i)) - if (!value_is_resizable (&lab->value, vls->width, new_width)) - { - val_labs_done (&i); - return false; - } + HMAP_FOR_EACH (label, struct val_lab, node, &vls->labels) + if (!value_is_resizable (&label->value, vls->width, new_width)) + return false; return true; } /* Changes the width of VLS to NEW_WIDTH. The original and new - width must be both numeric or both string. If the new width - is a long string width, then any value labels in VLS are - deleted. */ + width must be both numeric or both string. */ void val_labs_set_width (struct val_labs *vls, int new_width) { assert (val_labs_can_set_width (vls, new_width)); - + if (value_needs_resize (vls->width, new_width)) + { + struct val_lab *label; + HMAP_FOR_EACH (label, struct val_lab, node, &vls->labels) + value_resize (&label->value, vls->width, new_width); + } vls->width = new_width; - if (new_width > MAX_SHORT_STRING) - val_labs_clear (vls); } /* Destroys VLS. */ @@ -120,7 +107,8 @@ val_labs_destroy (struct val_labs *vls) { if (vls != NULL) { - hsh_destroy (vls->labels); + val_labs_clear (vls); + hmap_destroy (&vls->labels); free (vls); } } @@ -129,328 +117,205 @@ val_labs_destroy (struct val_labs *vls) void val_labs_clear (struct val_labs *vls) { - assert (vls != NULL); + struct val_lab *label, *next; - hsh_destroy (vls->labels); - vls->labels = NULL; + HMAP_FOR_EACH_SAFE (label, next, struct val_lab, node, &vls->labels) + { + hmap_delete (&vls->labels, &label->node); + value_destroy (&label->value, vls->width); + atom_destroy (label->label); + free (label); + } } -/* Returns the number of value labels in VLS. */ +/* Returns the number of value labels in VLS. + Returns 0 if VLS is null. */ size_t val_labs_count (const struct val_labs *vls) { - return vls == NULL || vls->labels == NULL ? 0 : hsh_count (vls->labels); + return vls == NULL ? 0 : hmap_count (&vls->labels); } -/* One value label in internal format. */ -struct int_val_lab - { - union value value; /* The value being labeled. */ - struct atom *label; /* A ref-counted string. */ - }; - -/* Creates and returns an int_val_lab based on VALUE and - LABEL. */ -static struct int_val_lab * -create_int_val_lab (struct val_labs *vls, union value value, const char *label) +static void +do_add_val_lab (struct val_labs *vls, const union value *value, + const char *label) { - struct int_val_lab *ivl; - - assert (label != NULL); - assert (vls->width <= MAX_SHORT_STRING); - - ivl = xmalloc (sizeof *ivl); - ivl->value = value; - if (vls->width > 0) - memset (ivl->value.s + vls->width, ' ', MAX_SHORT_STRING - vls->width); - ivl->label = atom_create (label); - - return ivl; + struct val_lab *lab = xmalloc (sizeof *lab); + value_init (&lab->value, vls->width); + value_copy (&lab->value, value, vls->width); + lab->label = atom_create (label); + hmap_insert (&vls->labels, &lab->node, value_hash (value, vls->width, 0)); } -/* If VLS does not already contain a value label for VALUE (and - VLS represents a numeric or short string set of value labels), - adds LABEL for it and returns true. Otherwise, returns - false. */ +/* If VLS does not already contain a value label for VALUE, adds + LABEL for it and returns true. Otherwise, returns false. */ bool -val_labs_add (struct val_labs *vls, union value value, const char *label) +val_labs_add (struct val_labs *vls, const union value *value, + const char *label) { - assert (label != NULL); - if (vls->width < MIN_LONG_STRING) + const struct val_lab *lab = val_labs_lookup (vls, value); + if (lab == NULL) { - struct int_val_lab *ivl; - void **vlpp; - - if (vls->labels == NULL) - vls->labels = hsh_create (8, compare_int_val_lab, hash_int_val_lab, - free_int_val_lab, vls); - - ivl = create_int_val_lab (vls, value, label); - vlpp = hsh_probe (vls->labels, ivl); - if (*vlpp == NULL) - { - *vlpp = ivl; - return true; - } - free_int_val_lab (ivl, vls); + do_add_val_lab (vls, value, label); + return true; } - return false; + else + return false; } /* Sets LABEL as the value label for VALUE in VLS, replacing any - existing label for VALUE. Has no effect if VLS has a long - string width. */ + existing label for VALUE. */ void -val_labs_replace (struct val_labs *vls, union value value, const char *label) +val_labs_replace (struct val_labs *vls, const union value *value, + const char *label) { - if (vls->width < MIN_LONG_STRING) + struct val_lab *vl = (struct val_lab *) val_labs_lookup (vls, value); + if (vl != NULL) { - if (vls->labels != NULL) - { - struct int_val_lab *new = create_int_val_lab (vls, value, label); - struct int_val_lab *old = hsh_replace (vls->labels, new); - if (old != NULL) - free_int_val_lab (old, vls); - } - else - val_labs_add (vls, value, label); + atom_destroy (vl->label); + vl->label = atom_create (label); } + else + do_add_val_lab (vls, value, label); } -/* Removes any value label for VALUE within VLS. Returns true - if a value label was removed. */ -bool -val_labs_remove (struct val_labs *vls, union value value) +/* Removes LABEL from VLS. */ +void +val_labs_remove (struct val_labs *vls, const struct val_lab *label_) { - if (vls->width < MIN_LONG_STRING && vls->labels != NULL) - { - struct int_val_lab *ivl = create_int_val_lab (vls, value, ""); - int deleted = hsh_delete (vls->labels, ivl); - free (ivl); - return deleted; - } - else - return false; + struct val_lab *label = (struct val_lab *) label_; + hmap_delete (&vls->labels, &label->node); + value_destroy (&label->value, vls->width); + atom_destroy (label->label); + free (label); } /* Searches VLS for a value label for VALUE. If successful, - returns the label; otherwise, returns a null pointer. If - VLS's width is greater than MAX_SHORT_STRING, always returns a - null pointer. */ -char * -val_labs_find (const struct val_labs *vls, union value value) + returns the string used as the label; otherwise, returns a + null pointer. Returns a null pointer if VLS is null. */ +const char * +val_labs_find (const struct val_labs *vls, const union value *value) { - if (vls != NULL - && vls->width <= MAX_SHORT_STRING - && vls->labels != NULL) - { - struct int_val_lab ivl, *vlp; - - ivl.value = value; - vlp = hsh_find (vls->labels, &ivl); - if (vlp != NULL) - return atom_to_string (vlp->label); - } - return NULL; + const struct val_lab *label = val_labs_lookup (vls, value); + return label ? atom_to_string (label->label) : NULL; } - -/* A value labels iterator. */ -struct val_labs_iterator - { - void **labels; /* The labels, in order. */ - void **lp; /* Current label. */ - struct val_lab vl; /* Structure presented to caller. */ - }; -/* Sets up *IP for iterating through the value labels in VLS in - no particular order. Returns the first value label or a null - pointer if VLS is empty. If the return value is non-null, - then val_labs_next() may be used to continue iterating or - val_labs_done() to free up the iterator. Otherwise, neither - function may be called for *IP. */ -struct val_lab * -val_labs_first (const struct val_labs *vls, struct val_labs_iterator **ip) +/* Searches VLS for a value label for VALUE. If successful, + returns the value label; otherwise, returns a null pointer. + Returns a null pointer if VLS is null. */ +const struct val_lab * +val_labs_lookup (const struct val_labs *vls, const union value *value) { - struct val_labs_iterator *i; - - assert (vls != NULL); - assert (ip != NULL); - - if (vls->labels == NULL || vls->width > MAX_SHORT_STRING) + if (vls != NULL) { - *ip = NULL; - return NULL; + struct val_lab *label; + HMAP_FOR_EACH_WITH_HASH (label, struct val_lab, node, + value_hash (value, vls->width, 0), &vls->labels) + if (value_equal (&label->value, value, vls->width)) + return label; } - - i = *ip = xmalloc (sizeof *i); - i->labels = hsh_data_copy (vls->labels); - i->lp = i->labels; - return val_labs_next (vls, ip); + return NULL; } - -/* Sets up *IP for iterating through the value labels in VLS in - sorted order of values. Returns the first value label or a - null pointer if VLS is empty. If the return value is - non-null, then val_labs_next() may be used to continue - iterating or val_labs_done() to free up the iterator. - Otherwise, neither function may be called for *IP. */ -struct val_lab * -val_labs_first_sorted (const struct val_labs *vls, - struct val_labs_iterator **ip) + +/* Returns the first value label in VLS, in arbitrary order, or a + null pointer if VLS is empty or if VLS is a null pointer. If + the return value is non-null, then val_labs_next() may be used + to continue iterating. */ +const struct val_lab * +val_labs_first (const struct val_labs *vls) { - struct val_labs_iterator *i; - - assert (vls != NULL); - assert (ip != NULL); - - if (vls->labels == NULL || vls->width > MAX_SHORT_STRING) - { - *ip = NULL; - return NULL; - } - - i = *ip = xmalloc (sizeof *i); - i->lp = i->labels = hsh_sort_copy (vls->labels); - return val_labs_next (vls, ip); + return vls ? HMAP_FIRST (struct val_lab, node, &vls->labels) : NULL; } /* Returns the next value label in an iteration begun by - val_labs_first() or val_labs_first_sorted(). If the return - value is non-null, then val_labs_next() may be used to - continue iterating or val_labs_done() to free up the iterator. - Otherwise, neither function may be called for *IP. */ -struct val_lab * -val_labs_next (const struct val_labs *vls, struct val_labs_iterator **ip) + val_labs_first(). If the return value is non-null, then + val_labs_next() may be used to continue iterating. */ +const struct val_lab * +val_labs_next (const struct val_labs *vls, const struct val_lab *label) { - struct val_labs_iterator *i; - struct int_val_lab *ivl; - - assert (vls != NULL); - assert (vls->width <= MAX_SHORT_STRING); - assert (ip != NULL); - assert (*ip != NULL); - - i = *ip; - ivl = *i->lp++; - if (ivl != NULL) - { - i->vl.value = ivl->value; - i->vl.label = atom_to_string (ivl->label); - return &i->vl; - } - else - { - free (i->labels); - free (i); - *ip = NULL; - return NULL; - } + return HMAP_NEXT (label, struct val_lab, node, &vls->labels); } -/* Discards the state for an incomplete iteration begun by - val_labs_first() or val_labs_first_sorted(). */ -void -val_labs_done (struct val_labs_iterator **ip) -{ - if (*ip != NULL) - { - struct val_labs_iterator *i = *ip; - free (i->labels); - free (i); - *ip = NULL; - } -} - -/* Compares two value labels and returns a strcmp()-type result. */ -int -compare_int_val_lab (const void *a_, const void *b_, const void *vls_) +static int +compare_labels_by_value_3way (const void *a_, const void *b_, const void *vls_) { - const struct int_val_lab *a = a_; - const struct int_val_lab *b = b_; + const struct val_lab *const *a = a_; + const struct val_lab *const *b = b_; const struct val_labs *vls = vls_; - - if (vls->width == 0) - return a->value.f < b->value.f ? -1 : a->value.f > b->value.f; - else - return memcmp (a->value.s, b->value.s, vls->width); + return value_compare_3way (&(*a)->value, &(*b)->value, vls->width); } -/* Hash a value label. */ -unsigned -hash_int_val_lab (const void *vl_, const void *vls_) +/* Allocates and returns an array of pointers to value labels + that is sorted in increasing order by value. The array has + val_labs_count(VLS) elements. The caller is responsible for + freeing the array. */ +const struct val_lab ** +val_labs_sorted (const struct val_labs *vls) { - const struct int_val_lab *vl = vl_; - const struct val_labs *vls = vls_; - - if (vls->width == 0) - return hash_double (vl->value.f, 0); + if (vls != NULL) + { + const struct val_lab *label; + const struct val_lab **labels; + size_t i; + + labels = xmalloc (val_labs_count (vls) * sizeof *labels); + i = 0; + HMAP_FOR_EACH (label, struct val_lab, node, &vls->labels) + labels[i++] = label; + assert (i == val_labs_count (vls)); + sort (labels, val_labs_count (vls), sizeof *labels, + compare_labels_by_value_3way, vls); + return labels; + } else - return hash_bytes (vl->value.s, vls->width, 0); -} - -/* Free a value label. */ -void -free_int_val_lab (void *vl_, const void *vls_ UNUSED) -{ - struct int_val_lab *vl = vl_; - - atom_destroy (vl->label); - free (vl); + return NULL; } -/* Atoms. */ +/* Atoms: reference-counted constant strings. */ /* An atom. */ struct atom { + struct hmap_node node; /* Hash map node. */ char *string; /* String value. */ unsigned ref_count; /* Number of references. */ }; -static hsh_compare_func compare_atoms; -static hsh_hash_func hash_atom; -static hsh_free_func free_atom; - /* Hash table of atoms. */ -static struct hsh_table *atoms; +static struct hmap atoms = HMAP_INITIALIZER (atoms); -static void -destroy_atoms (void) -{ - hsh_destroy (atoms); -} +static void free_atom (struct atom *atom); +static void free_all_atoms (void); /* Creates and returns an atom for STRING. */ static struct atom * atom_create (const char *string) { - struct atom a; - void **app; + static bool initialized; + struct atom *atom; + size_t hash; assert (string != NULL); - if (atoms == NULL) + if (!initialized) { - atoms = hsh_create (8, compare_atoms, hash_atom, free_atom, NULL); - atexit (destroy_atoms); + initialized = true; + atexit (free_all_atoms); } - a.string = (char *) string; - app = hsh_probe (atoms, &a); - if (*app != NULL) - { - struct atom *ap = *app; - ap->ref_count++; - return ap; - } - else - { - struct atom *ap = xmalloc (sizeof *ap); - ap->string = xstrdup (string); - ap->ref_count = 1; - *app = ap; - return ap; - } + hash = hash_string (string, 0); + HMAP_FOR_EACH_WITH_HASH (atom, struct atom, node, hash, &atoms) + if (!strcmp (atom->string, string)) + { + atom->ref_count++; + return atom; + } + + atom = xmalloc (sizeof *atom); + atom->string = xstrdup (string); + atom->ref_count = 1; + hmap_insert (&atoms, &atom->node, hash); + return atom; } /* Destroys ATOM. */ @@ -462,44 +327,32 @@ atom_destroy (struct atom *atom) assert (atom->ref_count > 0); atom->ref_count--; if (atom->ref_count == 0) - hsh_force_delete (atoms, atom); + { + hmap_delete (&atoms, &atom->node); + free_atom (atom); + } } } /* Returns the string associated with ATOM. */ -static char * +static const char * atom_to_string (const struct atom *atom) { - assert (atom != NULL); - return atom->string; } -/* A hsh_compare_func that compares A and B. */ -static int -compare_atoms (const void *a_, const void *b_, const void *aux UNUSED) -{ - const struct atom *a = a_; - const struct atom *b = b_; - - return strcmp (a->string, b->string); -} - -/* A hsh_hash_func that hashes ATOM. */ -static unsigned -hash_atom (const void *atom_, const void *aux UNUSED) +static void +free_atom (struct atom *atom) { - const struct atom *atom = atom_; - - return hash_string (atom->string, 0); + free (atom->string); + free (atom); } -/* A hsh_free_func that destroys ATOM. */ static void -free_atom (void *atom_, const void *aux UNUSED) +free_all_atoms (void) { - struct atom *atom = atom_; + struct atom *atom, *next; - free (atom->string); - free (atom); + HMAP_FOR_EACH_SAFE (atom, next, struct atom, node, &atoms) + free_atom (atom); } diff --git a/src/data/value-labels.h b/src/data/value-labels.h index fb7ec22b..53d13a38 100644 --- a/src/data/value-labels.h +++ b/src/data/value-labels.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,12 +28,37 @@ #include #include #include +#include -/* One value label. */ +/* One value label. + + A value label is normally part of a struct val_labs (see + below). */ struct val_lab { - union value value; - const char *label; + struct hmap_node node; /* Node in hash map. */ + union value value; /* The value being labeled. */ + struct atom *label; /* A ref-counted string. */ + }; + +/* Returns the value in VL. The caller must not modify or free + the returned value. + + The width of the returned value cannot be determined directly + from VL. It may be obtained by calling val_labs_get_width on + the val_labs struct that VL is in. */ +static inline const union value *val_lab_get_value (const struct val_lab *vl) +{ + return &vl->value; +} + +const char *val_lab_get_label (const struct val_lab *); + +/* A set of value labels. */ +struct val_labs + { + int width; /* 0=numeric, otherwise string width. */ + struct hmap labels; /* Hash table of `struct int_val_lab's. */ }; /* Creating and destroying sets of value labels. */ @@ -41,28 +66,28 @@ struct val_labs *val_labs_create (int width); struct val_labs *val_labs_clone (const struct val_labs *); void val_labs_clear (struct val_labs *); void val_labs_destroy (struct val_labs *); +size_t val_labs_count (const struct val_labs *); /* Looking up value labels. */ -char *val_labs_find (const struct val_labs *, union value); +const char *val_labs_find (const struct val_labs *, const union value *); +const struct val_lab *val_labs_lookup (const struct val_labs *, + const union value *); /* Basic properties. */ size_t val_labs_count (const struct val_labs *); +int val_labs_get_width (const struct val_labs *); bool val_labs_can_set_width (const struct val_labs *, int new_width); void val_labs_set_width (struct val_labs *, int new_width); /* Adding value labels. */ -bool val_labs_add (struct val_labs *, union value, const char *); -void val_labs_replace (struct val_labs *, union value, const char *); -bool val_labs_remove (struct val_labs *, union value); +bool val_labs_add (struct val_labs *, const union value *, const char *); +void val_labs_replace (struct val_labs *, const union value *, const char *); +void val_labs_remove (struct val_labs *, const struct val_lab *); /* Iterating through value labels. */ -struct val_labs_iterator; -struct val_lab *val_labs_first (const struct val_labs *, - struct val_labs_iterator **); -struct val_lab *val_labs_first_sorted (const struct val_labs *, - struct val_labs_iterator **); -struct val_lab *val_labs_next (const struct val_labs *, - struct val_labs_iterator **); -void val_labs_done (struct val_labs_iterator **); +const struct val_lab *val_labs_first (const struct val_labs *); +const struct val_lab *val_labs_next (const struct val_labs *, + const struct val_lab *); +const struct val_lab **val_labs_sorted (const struct val_labs *); #endif /* data/value-labels.h */ diff --git a/src/data/value.c b/src/data/value.c index 180f1b6d..2341f029 100644 --- a/src/data/value.c +++ b/src/data/value.c @@ -18,66 +18,72 @@ #include #include +#include #include +#include #include -#include "variable.h" +#include "minmax.h" #include "xalloc.h" -/* Duplicate a value. - The caller is responsible for freeing the returned value. */ -union value * -value_dup (const union value *val, int width) -{ - return xmemdup (val, MAX (width, sizeof *val)); -} +/* Copies the contents of string value SRC with width SRC_WIDTH + to string value DST with width DST_WIDTH. If SRC_WIDTH is + greater than DST_WIDTH, then only the first DST_WIDTH bytes + are copied; if DST_WIDTH is greater than SRC_WIDTH, then DST + is padded on the right with PAD bytes. + SRC and DST must be string values; that is, SRC_WIDTH and + DST_WIDTH must both be positive. -/* Create a value of specified width. - The caller is responsible for freeing the returned value. */ -union value * -value_create (int width) + It is important that SRC_WIDTH and DST_WIDTH be the actual + widths with which SRC and DST were initialized. Passing, + e.g., smaller values in order to copy only a prefix of SRC or + modify only a prefix of DST will not work in every case. */ +void +value_copy_rpad (union value *dst, int dst_width, + const union value *src, int src_width, + char pad) { - return xnmalloc (value_cnt_from_width (width), sizeof (union value)); + buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, + value_str (src, src_width), src_width, + pad); } +/* Copies the contents of null-terminated string SRC to string + value DST with width DST_WIDTH. If SRC is more than DST_WIDTH + bytes long, then only the first DST_WIDTH bytes are copied; if + DST_WIDTH is greater than the length of SRC, then DST is + padded on the right with PAD bytes. -/* Compares A and B, which both have the given WIDTH, and returns - a strcmp()-type result. - Only the short string portion of longer strings are - compared. */ -int -compare_values_short (const void *a_, const void *b_, const void *var_) -{ - const union value *a = a_; - const union value *b = b_; - const struct variable *var = var_; - int width = var_get_width (var); - return value_compare_3way (a, b, MIN (width, MAX_SHORT_STRING)); -} - + DST must be a string value; that is, DST_WIDTH must be + positive. -/* Create a hash of V, which has the given WIDTH. - Only the short string portion of a longer string is hashed. */ -unsigned -hash_value_short (const void *v_, const void *var_) + It is important that DST_WIDTH be the actual width with which + DST was initialized. Passing, e.g., a smaller value in order + to modify only a prefix of DST will not work in every case. */ +void +value_copy_str_rpad (union value *dst, int dst_width, const char *src, + char pad) { - const union value *v = v_; - const struct variable *var = var_; - int width = var_get_width (var); - return width == 0 ? hash_double (v->f, 0) : hash_bytes (v->s, width, 0); + value_copy_buf_rpad (dst, dst_width, src, strlen (src), pad); } +/* Copies the SRC_LEN bytes at SRC to string value DST with width + DST_WIDTH. If SRC_LEN is greater than DST_WIDTH, then only + the first DST_WIDTH bytes are copied; if DST_WIDTH is greater + than SRC_LEN, then DST is padded on the right with PAD bytes. -/* Copies SRC to DST, given that they both contain data of the - given WIDTH. */ + DST must be a string value; that is, DST_WIDTH must be + positive. + + It is important that DST_WIDTH be the actual width with which + DST was initialized. Passing, e.g., a smaller value in order + to modify only a prefix of DST will not work in every case. */ void -value_copy (union value *dst, const union value *src, int width) +value_copy_buf_rpad (union value *dst, int dst_width, + const char *src, size_t src_len, char pad) { - if (width == 0) - dst->f = src->f; - else - memcpy (dst->s, src->s, width); + buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, src, src_len, pad); } /* Sets V to the system-missing value for data of the given @@ -88,7 +94,37 @@ value_set_missing (union value *v, int width) if (width == 0) v->f = SYSMIS; else - memset (v->s, ' ', width); + memset (value_str_rw (v, width), ' ', width); +} + +/* Compares A and B, which both have the given WIDTH, and returns + a strcmp()-type result. */ +int +value_compare_3way (const union value *a, const union value *b, int width) +{ + return (width == 0 + ? (a->f < b->f ? -1 : a->f > b->f) + : memcmp (value_str (a, width), value_str (b, width), width)); +} + +/* Returns true if A and B, which must both have the given WIDTH, + have equal contents, false if their contents differ. */ +bool +value_equal (const union value *a, const union value *b, int width) +{ + return (width == 0 + ? a->f == b->f + : !memcmp (value_str (a, width), value_str (b, width), width)); +} + +/* Returns a hash of the data in VALUE, which must have the given + WIDTH, folding BASIS into the hash value calculation. */ +unsigned int +value_hash (const union value *value, int width, unsigned int basis) +{ + return (width == 0 + ? hash_double (value->f, basis) + : hash_bytes (value_str (value, width), width, basis)); } /* Tests whether VALUE may be resized from OLD_WIDTH to @@ -100,14 +136,20 @@ value_set_missing (union value *v, int width) bool value_is_resizable (const union value *value, int old_width, int new_width) { - int i; - - if (val_type_from_width (old_width) != val_type_from_width (new_width)) + if (old_width == new_width) + return true; + else if (val_type_from_width (old_width) != val_type_from_width (new_width)) return false; - for (i = new_width; i < old_width; i++) - if (value->s[i] != ' ') - return false; - return true; + else + { + const char *str = value_str (value, old_width); + int i; + + for (i = new_width; i < old_width; i++) + if (str[i] != ' ') + return false; + return true; + } } /* Resizes VALUE from OLD_WIDTH to NEW_WIDTH. The arguments must @@ -116,16 +158,75 @@ void value_resize (union value *value, int old_width, int new_width) { assert (value_is_resizable (value, old_width, new_width)); - if (new_width > old_width) - memset (&value->s[old_width], ' ', new_width - old_width); + if (new_width != old_width) + { + union value tmp; + value_init (&tmp, new_width); + value_copy_rpad (&tmp, new_width, value, old_width, ' '); + value_destroy (value, old_width); + *value = tmp; + } } -/* Compares A and B, which both have the given WIDTH, and returns - a strcmp()-type result. */ -int -value_compare_3way (const union value *a, const union value *b, int width) +/* Returns true if resizing a value from OLD_WIDTH to NEW_WIDTH + actually changes anything, false otherwise. If false is + returned, calls to value_resize() with the specified + parameters may be omitted without any ill effects. + + This is generally useful only if many values can skip being + resized from OLD_WIDTH to NEW_WIDTH. Otherwise you might as + well just call value_resize directly. */ +bool +value_needs_resize (int old_width, int new_width) { - return (width == 0 - ? (a->f < b->f ? -1 : a->f > b->f) - : memcmp (a->s, b->s, width)); + assert (val_type_from_width (old_width) == val_type_from_width (new_width)); + + /* We need to call value_resize if either the new width is + longer than the old width (in which case the new characters + must be set to spaces) or if either width is a long string. + (We could omit resizing if both the old and new widths were + long and the new width was shorter, but we choose to do so + anyway in hopes of saving memory.) */ + return (old_width != new_width + && (new_width > old_width + || old_width >= MIN_LONG_STRING + || new_width >= MIN_LONG_STRING)); +} + +/* Same as value_init, except that memory for VALUE (if + necessary) is allocated from POOL and will be freed + automatically when POOL is destroyed. + + VALUE must not be freed manually by calling value_destroy. If + it needs to be resized, it must be done using + value_resize_pool instead of value_resize. */ +void +value_init_pool (struct pool *pool, union value *value, int width) +{ + if (width > MAX_SHORT_STRING) + value->long_string = pool_alloc_unaligned (pool, width); +} + +/* Same as value_resize, except that VALUE must have been + allocated from POOL using value_init_pool. + + This function causes some memory in POOL to be wasted in some + cases (until the pool is freed), so it should only be done if + this is acceptable. */ +void +value_resize_pool (struct pool *pool, union value *value, + int old_width, int new_width) +{ + assert (value_is_resizable (value, old_width, new_width)); + if (new_width > old_width) + { + if (new_width > MAX_SHORT_STRING) + { + char *new_long_string = pool_alloc_unaligned (pool, new_width); + memcpy (new_long_string, value_str (value, old_width), old_width); + value->long_string = new_long_string; + } + memset (value_str_rw (value, new_width) + old_width, ' ', + new_width - old_width); + } } diff --git a/src/data/value.h b/src/data/value.h index 97c24050..8f174633 100644 --- a/src/data/value.h +++ b/src/data/value.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2007 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,47 +17,163 @@ #ifndef DATA_VALUE_H #define DATA_VALUE_H 1 -#include +#include #include -#include -#include "minmax.h" - -/* "Short" strings, which are generally those no more than 8 - characters wide, can participate in more operations than - longer strings. */ -#define MAX_SHORT_STRING (MAX (ROUND_UP (SIZEOF_DOUBLE, 2), 8)) +#include +#include +#include "xalloc.h" + +#define MAX_SHORT_STRING 8 #define MIN_LONG_STRING (MAX_SHORT_STRING + 1) -/* A numeric or short string value. - Multiple consecutive values represent a long string. */ +/* A numeric or string value. + + The client is responsible for keeping track of the value's + width. + + This structure is semi-opaque: + + - If the value is a number, clients may access the 'f' + member directly. + + - Clients should not access other members directly. +*/ union value { double f; - char s[MAX_SHORT_STRING]; + char short_string[MAX_SHORT_STRING]; + char *long_string; }; -union value *value_dup (const union value *, int width); -union value *value_create (int width); +static inline void value_init (union value *, int width); +static inline bool value_needs_init (int width); +static inline bool value_try_init (union value *, int width); +static inline void value_destroy (union value *, int width); + +static inline double value_num (const union value *); +static inline const char *value_str (const union value *, int width); +static inline char *value_str_rw (union value *, int width); int compare_values (const void *, const void *, const void *var); unsigned hash_value (const void *, const void *var); -int compare_values_short (const void *, const void *, const void *var); -unsigned hash_value_short (const void *, const void *var); - -static inline size_t value_cnt_from_width (int width); -void value_copy (union value *, const union value *, int width); +static inline void value_copy (union value *, const union value *, int width); +void value_copy_rpad (union value *, int dst_width, + const union value *, int src_width, + char pad); +void value_copy_str_rpad (union value *, int dst_width, const char *, + char pad); +void value_copy_buf_rpad (union value *dst, int dst_width, + const char *src, size_t src_len, char pad); void value_set_missing (union value *, int width); +int value_compare_3way (const union value *, const union value *, int width); +bool value_equal (const union value *, const union value *, int width); +size_t value_hash (const union value *, int width, unsigned int basis); + bool value_is_resizable (const union value *, int old_width, int new_width); +bool value_needs_resize (int old_width, int new_width); void value_resize (union value *, int old_width, int new_width); -int value_compare_3way (const union value *, const union value *, int width); -/* Number of "union value"s required for a variable of the given - WIDTH. */ -static inline size_t -value_cnt_from_width (int width) +struct pool; +void value_init_pool (struct pool *, union value *, int width); +void value_resize_pool (struct pool *, union value *, + int old_width, int new_width); + +/* Initializes V as a value of the given WIDTH, where 0 + represents a numeric value and a positive integer represents a + string value WIDTH bytes long. + + A WIDTH of -1 is ignored. + + The contents of value V are indeterminate after + initialization. */ +static inline void +value_init (union value *v, int width) +{ + if (width > MAX_SHORT_STRING) + v->long_string = xmalloc (width); +} + +/* Returns true if a value of the given WIDTH actually needs to + have the value_init and value_destroy functions called, false + if those functions are no-ops for values of the given WIDTH. + + Using this function is only a valuable optimization if a large + number of values of the given WIDTH are to be initialized*/ +static inline bool +value_needs_init (int width) +{ + return width > MAX_SHORT_STRING; +} + +/* Same as value_init, except that failure to allocate memory + causes it to return false instead of terminating the + program. On success, returns true. */ +static inline bool +value_try_init (union value *v, int width) +{ + if (width > MAX_SHORT_STRING) + { + v->long_string = malloc (width); + return v->long_string != NULL; + } + else + return true; +} + +/* Frees any memory allocated by value_init for V, which must + have the given WIDTH. */ +static inline void +value_destroy (union value *v, int width) +{ + if (width > MAX_SHORT_STRING) + free (v->long_string); +} + +/* Returns the numeric value in V, which must have width 0. */ +static inline double +value_num (const union value *v) +{ + return v->f; +} + +/* Returns the string value in V, which must have width WIDTH. + + The returned value is not null-terminated. + + It is important that WIDTH be the actual value that was passed + to value_init. Passing, e.g., a smaller value because only + that number of bytes will be accessed will not always work. */ +static inline const char * +value_str (const union value *v, int width) +{ + assert (width > 0); + return (width >= MIN_LONG_STRING ? v->long_string : v->short_string); +} + +/* Returns the string value in V, which must have width WIDTH. + + The returned value is not null-terminated. + + It is important that WIDTH be the actual value that was passed + to value_init. Passing, e.g., a smaller value because only + that number of bytes will be accessed will not always work. */ +static inline char * +value_str_rw (union value *v, int width) +{ + assert (width > 0); + return (width >= MIN_LONG_STRING ? v->long_string : v->short_string); +} + +/* Copies SRC to DST, given that they both contain data of the + given WIDTH. */ +static inline void +value_copy (union value *dst, const union value *src, int width) { - return width == 0 ? 1 : DIV_RND_UP (width, MAX_SHORT_STRING); + if (width <= MAX_SHORT_STRING) + *dst = *src; + else if (dst != src) + memcpy (dst->long_string, src->long_string, width); } #endif /* data/value.h */ diff --git a/src/data/vardict.h b/src/data/vardict.h index dfde1bff..b4552cf1 100644 --- a/src/data/vardict.h +++ b/src/data/vardict.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -40,7 +40,7 @@ void var_clear_vardict (struct variable *); /* Called by variable.c, defined in dictionary.c. */ void dict_var_changed (const struct variable *v); -void dict_var_resized (const struct variable *v, int delta); +void dict_var_resized (const struct variable *v, int old_width); void dict_var_display_width_changed (const struct variable *v); #endif /* data/vardict.h */ diff --git a/src/data/variable.c b/src/data/variable.c index 505ae79d..730317b7 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -36,6 +36,7 @@ #include #include +#include "minmax.h" #include "xalloc.h" #include "gettext.h" @@ -383,6 +384,9 @@ var_set_width (struct variable *v, int new_width) { const int old_width = v->width; + if (old_width == new_width) + return; + if (mv_is_resizable (&v->miss, new_width)) mv_resize (&v->miss, new_width); else @@ -403,15 +407,7 @@ var_set_width (struct variable *v, int new_width) fmt_resize (&v->write, new_width); v->width = new_width; - - { - const int old_val_count = value_cnt_from_width (old_width); - const int new_val_count = value_cnt_from_width (new_width); - - if ( old_val_count != new_val_count) - dict_var_resized (v, new_val_count - old_val_count); - } - + dict_var_resized (v, old_width); dict_var_changed (v); } @@ -445,14 +441,6 @@ var_is_long_string (const struct variable *v) { return v->width > MAX_SHORT_STRING; } - -/* Returns the number of "union value"s need to store a value of - variable V. */ -size_t -var_get_value_cnt (const struct variable *v) -{ - return value_cnt_from_width (v->width); -} /* Returns variable V's missing values. */ const struct missing_values * @@ -575,7 +563,7 @@ var_add_value_label (struct variable *v, const union value *value, const char *label) { alloc_value_labels (v); - return val_labs_add (v->val_labs, *value, label); + return val_labs_add (v->val_labs, value, label); } /* Adds or replaces a value label with the given VALUE and LABEL @@ -586,7 +574,7 @@ var_replace_value_label (struct variable *v, const union value *value, const char *label) { alloc_value_labels (v); - val_labs_replace (v->val_labs, *value, label); + val_labs_replace (v->val_labs, value, label); } /* Removes V's value labels, if any. */ @@ -601,7 +589,7 @@ var_clear_value_labels (struct variable *v) const char * var_lookup_value_label (const struct variable *v, const union value *value) { - return val_labs_find (v->val_labs, *value); + return val_labs_find (v->val_labs, value); } /* Append STR with a string representing VALUE for variable V. diff --git a/src/data/variable.h b/src/data/variable.h index ecfa6b76..2e6af0bb 100644 --- a/src/data/variable.h +++ b/src/data/variable.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -62,8 +62,6 @@ bool var_is_alpha (const struct variable *); bool var_is_short_string (const struct variable *); bool var_is_long_string (const struct variable *); -size_t var_get_value_cnt (const struct variable *); - /* Variables' missing values. */ const struct missing_values *var_get_missing_values (const struct variable *); void var_set_missing_values (struct variable *, const struct missing_values *); diff --git a/src/language/data-io/combine-files.c b/src/language/data-io/combine-files.c index 39ecedd1..64405180 100644 --- a/src/language/data-io/combine-files.c +++ b/src/language/data-io/combine-files.c @@ -289,7 +289,7 @@ combine_files (enum comb_command_type command, struct comb_file *file = &proc.files[i]; size_t j; - for (j = 0; j < subcase_get_n_values (&proc.by_vars); j++) + for (j = 0; j < subcase_get_n_fields (&proc.by_vars); j++) { const char *name = var_get_name (by_vars[j]); struct variable *var = dict_lookup_var (file->dict, name); @@ -423,7 +423,7 @@ combine_files (enum comb_command_type command, } } - proc.output = autopaging_writer_create (dict_get_next_value_idx (proc.dict)); + proc.output = autopaging_writer_create (dict_get_proto (proc.dict)); taint = taint_clone (casewriter_get_taint (proc.output)); /* Set up case matcher. */ @@ -640,9 +640,14 @@ free_comb_proc (struct comb_proc *proc) dict_destroy (proc->dict); casewriter_destroy (proc->output); case_matcher_destroy (proc->matcher); + if (proc->prev_BY) + { + caseproto_destroy_values (subcase_get_proto (&proc->by_vars), + proc->prev_BY); + free (proc->prev_BY); + } subcase_destroy (&proc->by_vars); case_unref (proc->buffered_case); - free (proc->prev_BY); } static bool scan_table (struct comb_file *, union value by[]); @@ -789,7 +794,7 @@ create_output_case (const struct comb_proc *proc) struct ccase *output; size_t i; - output = case_create (dict_get_next_value_idx (proc->dict)); + output = case_create (dict_get_proto (proc->dict)); for (i = 0; i < n_vars; i++) { struct variable *v = dict_get_var (proc->dict, i); @@ -861,11 +866,15 @@ output_case (struct comb_proc *proc, struct ccase *output, union value by[]) if (new_BY) { - size_t n = (subcase_get_n_values (&proc->by_vars) - * sizeof (union value)); + size_t n_values = subcase_get_n_fields (&proc->by_vars); + const struct caseproto *proto = subcase_get_proto (&proc->by_vars); if (proc->prev_BY == NULL) - proc->prev_BY = xmalloc (n); - memcpy (proc->prev_BY, by, n); + { + proc->prev_BY = xmalloc (n_values * sizeof *proc->prev_BY); + caseproto_init_values (proto, proc->prev_BY); + } + caseproto_copy (subcase_get_proto (&proc->by_vars), 0, n_values, + proc->prev_BY, by); } } } diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c index 1b9eb899..2f503423 100644 --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@ -721,7 +721,7 @@ struct data_parser_casereader { struct data_parser *parser; /* Parser. */ struct dfm_reader *reader; /* Data file reader. */ - size_t value_cnt; /* Number of `union value's in case. */ + struct caseproto *proto; /* Format of cases. */ }; static const struct casereader_class data_parser_casereader_class; @@ -742,8 +742,8 @@ data_parser_make_active_file (struct data_parser *parser, struct dataset *ds, r = xmalloc (sizeof *r); r->parser = parser; r->reader = reader; - r->value_cnt = dict_get_next_value_idx (dict); - casereader = casereader_create_sequential (NULL, r->value_cnt, + r->proto = caseproto_ref (dict_get_proto (dict)); + casereader = casereader_create_sequential (NULL, r->proto, CASENUMBER_MAX, &data_parser_casereader_class, r); proc_set_active_file (ds, casereader, dict); @@ -753,7 +753,7 @@ static struct ccase * data_parser_casereader_read (struct casereader *reader UNUSED, void *r_) { struct data_parser_casereader *r = r_; - struct ccase *c = case_create (r->value_cnt); + struct ccase *c = case_create (r->proto); if (data_parser_parse (r->parser, r->reader, c)) return c; else @@ -771,6 +771,7 @@ data_parser_casereader_destroy (struct casereader *reader UNUSED, void *r_) casereader_force_error (reader); data_parser_destroy (r->parser); dfm_close_reader (r->reader); + caseproto_unref (r->proto); free (r); } diff --git a/src/language/data-io/inpt-pgm.c b/src/language/data-io/inpt-pgm.c index c2e5dc0f..46228998 100644 --- a/src/language/data-io/inpt-pgm.c +++ b/src/language/data-io/inpt-pgm.c @@ -70,7 +70,7 @@ struct input_program_pgm casenumber case_nr; /* Incremented by END CASE transformation. */ struct caseinit *init; - size_t value_cnt; + struct caseproto *proto; }; static void destroy_input_program (struct input_program_pgm *); @@ -111,6 +111,7 @@ cmd_input_program (struct lexer *lexer, struct dataset *ds) inp = xmalloc (sizeof *inp); inp->trns_chain = NULL; inp->init = NULL; + inp->proto = NULL; inside_input_program = true; for (;;) @@ -153,10 +154,10 @@ cmd_input_program (struct lexer *lexer, struct dataset *ds) /* Figure out how to initialize each input case. */ inp->init = caseinit_create (); caseinit_mark_for_init (inp->init, dataset_dict (ds)); - inp->value_cnt = dict_get_next_value_idx (dataset_dict (ds)); + inp->proto = caseproto_ref (dict_get_proto (dataset_dict (ds))); proc_set_active_file_data ( - ds, casereader_create_sequential (NULL, inp->value_cnt, CASENUMBER_MAX, + ds, casereader_create_sequential (NULL, inp->proto, CASENUMBER_MAX, &input_program_casereader_class, inp)); return CMD_SUCCESS; @@ -187,7 +188,7 @@ static struct ccase * input_program_casereader_read (struct casereader *reader UNUSED, void *inp_) { struct input_program_pgm *inp = inp_; - struct ccase *c = case_create (inp->value_cnt); + struct ccase *c = case_create (inp->proto); do { @@ -217,6 +218,7 @@ destroy_input_program (struct input_program_pgm *pgm) { trns_chain_destroy (pgm->trns_chain); caseinit_destroy (pgm->init); + caseproto_unref (pgm->proto); free (pgm); } } diff --git a/src/language/dictionary/sys-file-info.c b/src/language/dictionary/sys-file-info.c index ce25f78a..ed2f5d07 100644 --- a/src/language/dictionary/sys-file-info.c +++ b/src/language/dictionary/sys-file-info.c @@ -613,11 +613,13 @@ describe_variable (const struct variable *v, struct tab_table *t, int r, else { *cp++ = '"'; - memcpy (cp, value.s, var_get_width (v)); + memcpy (cp, value_str (&value, var_get_width (v)), + var_get_width (v)); cp += var_get_width (v); *cp++ = '"'; *cp = '\0'; } + value_destroy (&value, var_get_width (v)); } tab_joint_text (t, 1, r, 2, r, TAB_LEFT, buf); @@ -628,9 +630,10 @@ describe_variable (const struct variable *v, struct tab_table *t, int r, if (flags & DF_VALUE_LABELS && var_has_value_labels (v)) { const struct val_labs *val_labs = var_get_value_labels (v); - struct val_labs_iterator *i; - struct val_lab *vl; + size_t n_labels = val_labs_count (val_labs); + const struct val_lab **labels; int orig_r = r; + size_t i; #if 0 tab_text (t, 1, r, TAB_LEFT, _("Value")); @@ -639,23 +642,27 @@ describe_variable (const struct variable *v, struct tab_table *t, int r, #endif tab_hline (t, TAL_1, 1, 2, r); - for (vl = val_labs_first_sorted (val_labs, &i); vl != NULL; - vl = val_labs_next (val_labs, &i)) + + labels = val_labs_sorted (val_labs); + for (i = 0; i < n_labels; i++) { - char buf[128]; + const struct val_lab *vl = labels[i]; + char buf[MAX_STRING + 1]; if (var_is_alpha (v)) { - memcpy (buf, vl->value.s, var_get_width (v)); - buf[var_get_width (v)] = 0; + int width = var_get_width (v); + memcpy (buf, value_str (&vl->value, width), width); + buf[width] = 0; } else sprintf (buf, "%g", vl->value.f); tab_text (t, 1, r, TAB_NONE, buf); - tab_text (t, 2, r, TAB_LEFT, vl->label); + tab_text (t, 2, r, TAB_LEFT, val_lab_get_label (vl)); r++; } + free (labels); tab_vline (t, TAL_1, 2, orig_r, r - 1); } diff --git a/src/language/dictionary/value-labels.c b/src/language/dictionary/value-labels.c index 39f544ec..6f9652f5 100644 --- a/src/language/dictionary/value-labels.c +++ b/src/language/dictionary/value-labels.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -147,6 +147,7 @@ get_label (struct lexer *lexer, struct variable **vars, size_t var_cnt) { union value value; struct string label; + int width; size_t i; /* Set value. */ @@ -157,7 +158,10 @@ get_label (struct lexer *lexer, struct variable **vars, size_t var_cnt) lex_error (lexer, _("expecting string")); return 0; } - buf_copy_str_rpad (value.s, MAX_SHORT_STRING, ds_cstr (lex_tokstr (lexer))); + width = MAX_SHORT_STRING; + value_init (&value, width); + buf_copy_str_rpad (value_str_rw (&value, width), width, + ds_cstr (lex_tokstr (lexer)), ' '); } else { @@ -166,6 +170,8 @@ get_label (struct lexer *lexer, struct variable **vars, size_t var_cnt) lex_error (lexer, _("expecting number")); return 0; } + width = 0; + value_init (&value, width); value.f = lex_tokval (lexer); } lex_get (lexer); @@ -173,7 +179,10 @@ get_label (struct lexer *lexer, struct variable **vars, size_t var_cnt) /* Set label. */ if (!lex_force_string (lexer)) - return 0; + { + value_destroy (&value, width); + return 0; + } ds_init_string (&label, lex_tokstr (lexer)); @@ -187,6 +196,7 @@ get_label (struct lexer *lexer, struct variable **vars, size_t var_cnt) var_replace_value_label (vars[i], &value, ds_cstr (&label)); ds_destroy (&label); + value_destroy (&value, width); lex_get (lexer); lex_match (lexer, ','); diff --git a/src/language/dictionary/variable-display.c b/src/language/dictionary/variable-display.c index 277db48e..83df065b 100644 --- a/src/language/dictionary/variable-display.c +++ b/src/language/dictionary/variable-display.c @@ -27,6 +27,7 @@ #include #include +#include "minmax.h" #include "xalloc.h" #include "gettext.h" diff --git a/src/language/expressions/evaluate.c b/src/language/expressions/evaluate.c index 91b9c842..d5f5f387 100644 --- a/src/language/expressions/evaluate.c +++ b/src/language/expressions/evaluate.c @@ -15,13 +15,13 @@ along with this program. If not, see . */ #include -#include "private.h" +#include "evaluate.h" #include #include #include -#include "helpers.h" -#include "evaluate.h" +#include +#include #include #include "xalloc.h" @@ -98,7 +98,7 @@ expr_evaluate_str (struct expression *e, const struct ccase *c, int case_idx, assert ((dst == NULL) == (dst_size == 0)); expr_evaluate (e, c, case_idx, &s); - buf_copy_rpad (dst, dst_size, s.string, s.length); + buf_copy_rpad (dst, dst_size, s.string, s.length, ' '); } #include @@ -170,14 +170,14 @@ cmd_debug_evaluate (struct lexer *lexer, struct dataset *dsother UNUSED) } if (c == NULL) - c = case_create (dict_get_next_value_idx (d)); + c = case_create (dict_get_proto (d)); else - c = case_resize (c, dict_get_next_value_idx (d)); + c = case_unshare_and_resize (c, dict_get_proto (d)); if (lex_is_number (lexer)) case_data_rw (c, v)->f = lex_tokval (lexer); else - memcpy (case_data_rw (c, v)->s, ds_data (lex_tokstr (lexer)), + memcpy (case_str_rw (c, v), ds_data (lex_tokstr (lexer)), var_get_width (v)); lex_get (lexer); diff --git a/src/language/stats/aggregate.c b/src/language/stats/aggregate.c index c2173109..0d181bd4 100644 --- a/src/language/stats/aggregate.c +++ b/src/language/stats/aggregate.c @@ -274,7 +274,7 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) so TEMPORARY is moot. */ proc_cancel_temporary_transformations (ds); proc_discard_output (ds); - output = autopaging_writer_create (dict_get_next_value_idx (agr.dict)); + output = autopaging_writer_create (dict_get_proto (agr.dict)); } else { @@ -769,7 +769,9 @@ accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) case MEDIAN: { double wv ; - struct ccase *cout = case_create (2); + struct ccase *cout; + + cout = case_create (casewriter_get_proto (iter->writer)); case_data_rw (cout, iter->subject)->f = case_data (input, iter->src)->f; @@ -791,8 +793,8 @@ accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) iter->int1 = 1; break; case MAX | FSTRING: - if (memcmp (iter->string, v->s, src_width) < 0) - memcpy (iter->string, v->s, src_width); + if (memcmp (iter->string, value_str (v, src_width), src_width) < 0) + memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; break; case MIN: @@ -800,8 +802,8 @@ accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) iter->int1 = 1; break; case MIN | FSTRING: - if (memcmp (iter->string, v->s, src_width) > 0) - memcpy (iter->string, v->s, src_width); + if (memcmp (iter->string, value_str (v, src_width), src_width) > 0) + memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; break; case FGT: @@ -812,7 +814,8 @@ accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) break; case FGT | FSTRING: case PGT | FSTRING: - if (memcmp (iter->arg[0].c, v->s, src_width) < 0) + if (memcmp (iter->arg[0].c, + value_str (v, src_width), src_width) < 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; @@ -824,7 +827,8 @@ accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) break; case FLT | FSTRING: case PLT | FSTRING: - if (memcmp (iter->arg[0].c, v->s, src_width) > 0) + if (memcmp (iter->arg[0].c, + value_str (v, src_width), src_width) > 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; @@ -836,8 +840,10 @@ accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) break; case FIN | FSTRING: case PIN | FSTRING: - if (memcmp (iter->arg[0].c, v->s, src_width) <= 0 - && memcmp (iter->arg[1].c, v->s, src_width) >= 0) + if (memcmp (iter->arg[0].c, + value_str (v, src_width), src_width) <= 0 + && memcmp (iter->arg[1].c, + value_str (v, src_width), src_width) >= 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; @@ -849,8 +855,10 @@ accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) break; case FOUT | FSTRING: case POUT | FSTRING: - if (memcmp (iter->arg[0].c, v->s, src_width) > 0 - || memcmp (iter->arg[1].c, v->s, src_width) < 0) + if (memcmp (iter->arg[0].c, + value_str (v, src_width), src_width) > 0 + || memcmp (iter->arg[1].c, + value_str (v, src_width), src_width) < 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; @@ -872,7 +880,7 @@ accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) case FIRST | FSTRING: if (iter->int1 == 0) { - memcpy (iter->string, v->s, src_width); + memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; } break; @@ -881,7 +889,7 @@ accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) iter->int1 = 1; break; case LAST | FSTRING: - memcpy (iter->string, v->s, src_width); + memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; break; case NMISS: @@ -913,7 +921,7 @@ accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) static void dump_aggregate_info (struct agr_proc *agr, struct casewriter *output) { - struct ccase *c = case_create (dict_get_next_value_idx (agr->dict)); + struct ccase *c = case_create (dict_get_proto (agr->dict)); { int value_idx = 0; @@ -922,11 +930,10 @@ dump_aggregate_info (struct agr_proc *agr, struct casewriter *output) for (i = 0; i < agr->break_var_cnt; i++) { const struct variable *v = agr->break_vars[i]; - size_t value_cnt = var_get_value_cnt (v); - memcpy (case_data_rw_idx (c, value_idx), - case_data (agr->break_case, v), - sizeof (union value) * value_cnt); - value_idx += value_cnt; + value_copy (case_data_rw_idx (c, value_idx), + case_data (agr->break_case, v), + var_get_width (v)); + value_idx++; } } @@ -936,19 +943,14 @@ dump_aggregate_info (struct agr_proc *agr, struct casewriter *output) for (i = agr->agr_vars; i; i = i->next) { union value *v = case_data_rw (c, i->dest); - + int width = var_get_width (i->dest); if (agr->missing == COLUMNWISE && i->saw_missing && (i->function & FUNC) != N && (i->function & FUNC) != NU && (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS) { - if (var_is_alpha (i->dest)) - memset (v->s, ' ', var_get_width (i->dest)); - else - v->f = SYSMIS; - + value_set_missing (v, width); casewriter_destroy (i->writer); - continue; } @@ -999,9 +1001,9 @@ dump_aggregate_info (struct agr_proc *agr, struct casewriter *output) case MAX | FSTRING: case MIN | FSTRING: if (i->int1) - memcpy (v->s, i->string, var_get_width (i->dest)); + memcpy (value_str_rw (v, width), i->string, width); else - memset (v->s, ' ', var_get_width (i->dest)); + value_set_missing (v, width); break; case FGT: case FGT | FSTRING: @@ -1038,9 +1040,9 @@ dump_aggregate_info (struct agr_proc *agr, struct casewriter *output) case FIRST | FSTRING: case LAST | FSTRING: if (i->int1) - memcpy (v->s, i->string, var_get_width (i->dest)); + memcpy (value_str_rw (v, width), i->string, width); else - memset (v->s, ' ', var_get_width (i->dest)); + value_set_missing (v, width); break; case N_NO_VARS: v->f = i->dbl[0]; @@ -1095,8 +1097,13 @@ initialize_aggregate_info (struct agr_proc *agr, const struct ccase *input) break; case MEDIAN: { + struct caseproto *proto; struct subcase ordering; + proto = caseproto_create (); + proto = caseproto_add_width (proto, 0); + proto = caseproto_add_width (proto, 0); + if ( ! iter->subject) iter->subject = var_create_internal (0); @@ -1104,8 +1111,9 @@ initialize_aggregate_info (struct agr_proc *agr, const struct ccase *input) iter->weight = var_create_internal (1); subcase_init_var (&ordering, iter->subject, SC_ASCEND); - iter->writer = sort_create_writer (&ordering, 2); + iter->writer = sort_create_writer (&ordering, proto); subcase_destroy (&ordering); + caseproto_unref (proto); iter->cc = 0; } diff --git a/src/language/stats/binomial.c b/src/language/stats/binomial.c index 94b41193..b44c5a6f 100644 --- a/src/language/stats/binomial.c +++ b/src/language/stats/binomial.c @@ -101,7 +101,7 @@ do_binomial (const struct dictionary *dict, const struct one_sample_test *ost = (const struct one_sample_test *) bst; struct ccase *c; - while ((c = casereader_read(input)) != NULL) + for (; (c = casereader_read (input)) != NULL; case_unref (c)) { int v; double w = dict_get_case_weight (dict, c, &warn); @@ -109,41 +109,38 @@ do_binomial (const struct dictionary *dict, for (v = 0 ; v < ost->n_vars ; ++v ) { const struct variable *var = ost->vars[v]; - const union value *value = case_data (c, var); - int width = var_get_width (var); + double value = case_num (c, var); - if (var_is_value_missing (var, value, exclude)) + if (var_is_num_missing (var, value, exclude)) continue; if (bst->cutpoint != SYSMIS) { - if ( compare_values_short (cat1[v].value, value, var) >= 0 ) + if ( cat1[v].value.f >= value ) cat1[v].count += w; else cat2[v].count += w; } else { - if ( NULL == cat1[v].value ) + if ( SYSMIS == cat1[v].value.f ) { - cat1[v].value = value_dup (value, width); + cat1[v].value.f = value; cat1[v].count = w; } - else if ( 0 == compare_values_short (cat1[v].value, value, var)) + else if ( cat1[v].value.f == value ) cat1[v].count += w; - else if ( NULL == cat2[v].value ) + else if ( SYSMIS == cat2[v].value.f ) { - cat2[v].value = value_dup (value, width); + cat2[v].value.f = value; cat2[v].count = w; } - else if ( 0 == compare_values_short (cat2[v].value, value, var)) + else if ( cat2[v].value.f == value ) cat2[v].count += w; else if ( bst->category1 == SYSMIS) msg (ME, _("Variable %s is not dichotomous"), var_get_name (var)); } } - - case_unref (c); } return casereader_destroy (input); } @@ -163,38 +160,28 @@ binomial_execute (const struct dataset *ds, const struct binomial_test *bst = (const struct binomial_test *) test; const struct one_sample_test *ost = (const struct one_sample_test*) test; - struct freq_mutable *cat1 = xzalloc (sizeof (*cat1) * ost->n_vars); - struct freq_mutable *cat2 = xzalloc (sizeof (*cat1) * ost->n_vars); + struct freq_mutable *cat[2]; + int i; assert ((bst->category1 == SYSMIS) == (bst->category2 == SYSMIS) || bst->cutpoint != SYSMIS); - if ( bst->cutpoint != SYSMIS ) - { - int i; - union value v; - v.f = bst->cutpoint; - for (i = 0; i < ost->n_vars; i++) - cat1[i].value = value_dup (&v, 0); - } - else if ( bst->category1 != SYSMIS ) + for (i = 0; i < 2; i++) { - int i; - union value v; - v.f = bst->category1; - for (i = 0; i < ost->n_vars; i++) - cat1[i].value = value_dup (&v, 0); - } - - if ( bst->category2 != SYSMIS ) - { - int i; - union value v; - v.f = bst->category2; - for (i = 0; i < ost->n_vars; i++) - cat2[i].value = value_dup (&v, 0); + double value; + if (i == 0) + value = bst->cutpoint != SYSMIS ? bst->cutpoint : bst->category1; + else + value = bst->category2; + + cat[i] = xnmalloc (ost->n_vars, sizeof *cat[i]); + for (v = 0; v < ost->n_vars; v++) + { + cat[i][v].value.f = value; + cat[i][v].count = 0; + } } - if (do_binomial (dict, input, bst, cat1, cat2, exclude)) + if (do_binomial (dataset_dict (ds), input, bst, cat[0], cat[1], exclude)) { const struct variable *wvar = dict_get_weight (dict); const struct fmt_spec *wfmt = wvar ? @@ -214,22 +201,21 @@ binomial_execute (const struct dataset *ds, for (v = 0 ; v < ost->n_vars; ++v) { double n_total, sig; - struct string catstr1; - struct string catstr2; + struct string catstr[2]; const struct variable *var = ost->vars[v]; - ds_init_empty (&catstr1); - ds_init_empty (&catstr2); + ds_init_empty (&catstr[0]); + ds_init_empty (&catstr[1]); if ( bst->cutpoint != SYSMIS) { - ds_put_format (&catstr1, "<= %g", bst->cutpoint); - } - else - { - var_append_value_name (var, cat1[v].value, &catstr1); - var_append_value_name (var, cat2[v].value, &catstr2); + ds_put_format (&catstr[0], "<= %g", bst->cutpoint); } + else + { + var_append_value_name (var, &cat[0][v].value, &catstr[0]); + var_append_value_name (var, &cat[1][v].value, &catstr[1]); + } tab_hline (table, TAL_1, 0, tab_nc (table) -1, 1 + v * 3); @@ -243,31 +229,31 @@ binomial_execute (const struct dataset *ds, tab_double (table, 5, 1 + v * 3, TAB_NONE, bst->p, NULL); /* Category labels */ - tab_text (table, 2, 1 + v * 3, TAB_NONE, ds_cstr (&catstr1)); - tab_text (table, 2, 2 + v * 3, TAB_NONE, ds_cstr (&catstr2)); + tab_text (table, 2, 1 + v * 3, TAB_NONE, ds_cstr (&catstr[0])); + tab_text (table, 2, 2 + v * 3, TAB_NONE, ds_cstr (&catstr[1])); /* Observed N */ - tab_double (table, 3, 1 + v * 3, TAB_NONE, cat1[v].count, wfmt); - tab_double (table, 3, 2 + v * 3, TAB_NONE, cat2[v].count, wfmt); + tab_double (table, 3, 1 + v * 3, TAB_NONE, cat[0][v].count, wfmt); + tab_double (table, 3, 2 + v * 3, TAB_NONE, cat[1][v].count, wfmt); - n_total = cat1[v].count + cat2[v].count; + n_total = cat[0][v].count + cat[1][v].count; tab_double (table, 3, 3 + v * 3, TAB_NONE, n_total, wfmt); /* Observed Proportions */ tab_double (table, 4, 1 + v * 3, TAB_NONE, - cat1[v].count / n_total, NULL); + cat[0][v].count / n_total, NULL); tab_double (table, 4, 2 + v * 3, TAB_NONE, - cat2[v].count / n_total, NULL); + cat[1][v].count / n_total, NULL); tab_double (table, 4, 3 + v * 3, TAB_NONE, - (cat1[v].count + cat2[v].count) / n_total, NULL); + (cat[0][v].count + cat[1][v].count) / n_total, NULL); /* Significance */ - sig = calculate_binomial (cat1[v].count, cat2[v].count, bst->p); + sig = calculate_binomial (cat[0][v].count, cat[1][v].count, bst->p); tab_double (table, 6, 1 + v * 3, TAB_NONE, sig, NULL); - ds_destroy (&catstr1); - ds_destroy (&catstr2); + ds_destroy (&catstr[0]); + ds_destroy (&catstr[1]); } tab_text (table, 2, 0, TAB_CENTER, _("Category")); @@ -283,11 +269,6 @@ binomial_execute (const struct dataset *ds, tab_submit (table); } - for (v = 0; v < ost->n_vars; v++) - { - free (cat1[v].value); - free (cat2[v].value); - } - free (cat1); - free (cat2); + for (i = 0; i < 2; i++) + free (cat[i]); } diff --git a/src/language/stats/chisquare.c b/src/language/stats/chisquare.c index 7354f8e4..4593df41 100644 --- a/src/language/stats/chisquare.c +++ b/src/language/stats/chisquare.c @@ -68,47 +68,22 @@ create_freq_hash_with_range (const struct dictionary *dict, /* Populate the hash with zero entries */ for (i_d = trunc (lo); i_d <= trunc (hi); i_d += 1.0 ) { - union value the_value; struct freq_mutable *fr = xmalloc (sizeof (*fr)); - - the_value.f = i_d; - - fr->value = value_dup (&the_value, 0); + value_init (&fr->value, 0); + fr->value.f = i_d; fr->count = 0; - hsh_insert (freq_hash, fr); } - while ((c = casereader_read (input)) != NULL) + for (; (c = casereader_read (input)) != NULL; case_unref (c)) { - union value obs_value; - struct freq **existing_fr; - struct freq *fr = xmalloc(sizeof (*fr)); - fr->value = case_data (c, var); - - fr->count = dict_get_case_weight (dict, c, &warn); - - obs_value.f = trunc (fr->value->f); - - if ( obs_value.f < lo || obs_value.f > hi) - { - free (fr); - case_unref (c); - continue; - } - - fr->value = &obs_value; - - existing_fr = (struct freq **) hsh_probe (freq_hash, fr); - - /* This must exist in the hash, because we previously populated it - with zero counts */ - assert (*existing_fr); - - (*existing_fr)->count += fr->count; - free (fr); - - case_unref (c); + struct freq_mutable fr; + fr.value.f = trunc (case_num (c, var)); + if (fr.value.f >= lo && fr.value.f <= hi) + { + struct freq_mutable *existing_fr = hsh_force_find (freq_hash, &fr); + existing_fr->count += dict_get_case_weight (dict, c, &warn); + } } if (casereader_destroy (input)) return freq_hash; @@ -130,6 +105,7 @@ create_freq_hash (const struct dictionary *dict, struct casereader *input, const struct variable *var) { + int width = var_get_width (var); bool warn = true; struct ccase *c; @@ -140,23 +116,25 @@ create_freq_hash (const struct dictionary *dict, for (; (c = casereader_read (input)) != NULL; case_unref (c)) { - struct freq **existing_fr; - struct freq *fr = xmalloc(sizeof (*fr)); - fr->value = case_data (c, var); + struct freq_mutable fr; + void **p; - fr->count = dict_get_case_weight (dict, c, &warn); + fr.value = *case_data (c, var); + fr.count = dict_get_case_weight (dict, c, &warn); - existing_fr = (struct freq **) hsh_probe (freq_hash, fr); - if ( *existing_fr) - { - (*existing_fr)->count += fr->count; - free (fr); - } + p = hsh_probe (freq_hash, &fr); + if (*p == NULL) + { + struct freq_mutable *new_fr = *p = xmalloc (sizeof *new_fr); + value_init (&new_fr->value, width); + value_copy (&new_fr->value, &fr.value, width); + new_fr->count = fr.count; + } else - { - *existing_fr = fr; - fr->value = value_dup (fr->value, var_get_width (var)); - } + { + struct freq *existing_fr = *p; + existing_fr->count += fr.count; + } } if (casereader_destroy (input)) return freq_hash; @@ -371,7 +349,7 @@ chisquare_execute (const struct dataset *ds, { struct string str; double exp; - const union value *observed_value = ff[i]->value; + const union value *observed_value = &ff[i]->value; ds_init_empty (&str); var_append_value_name (ost->vars[v], observed_value, &str); @@ -444,7 +422,7 @@ chisquare_execute (const struct dataset *ds, struct string str; double exp; - const union value *observed_value = ff[i]->value; + const union value *observed_value = &ff[i]->value; ds_init_empty (&str); var_append_value_name (ost->vars[v], observed_value, &str); diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q index b71524a1..46b5e413 100644 --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include #include #include @@ -59,7 +61,7 @@ #include "minmax.h" #include "xalloc.h" -#include "xmalloca.h" +#include "xsize.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -79,8 +81,8 @@ tabl:!tables/notables, box:!box/nobox, pivot:!pivot/nopivot; - +cells[cl_]=count,none,expected,row,column,total,residual,sresidual, - asresidual,all; + +cells[cl_]=count,expected,row,column,total,residual,sresidual, + asresidual,all,none; +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d, kappa,eta,corr,all. */ @@ -99,24 +101,99 @@ /* A single table entry for general mode. */ struct table_entry { - int table; /* Flattened table number. */ - union - { - double freq; /* Frequency count. */ - double *data; /* Crosstabulation table for integer mode. */ - } - u; + struct hmap_node node; /* Entry in hash table. */ + double freq; /* Frequency count. */ union value values[1]; /* Values. */ }; -/* A crosstabulation. */ +static size_t +table_entry_size (size_t n_values) +{ + return (offsetof (struct table_entry, values) + + n_values * sizeof (union value)); +} + +/* Indexes into the 'vars' member of struct pivot_table and + struct crosstab member. */ +enum + { + ROW_VAR = 0, /* Row variable. */ + COL_VAR = 1 /* Column variable. */ + /* Higher indexes cause multiple tables to be output. */ + }; + +/* A crosstabulation of 2 or more variables. */ +struct pivot_table + { + struct fmt_spec weight_format; /* Format for weight variable. */ + double missing; /* Weight of missing cases. */ + + /* Variables (2 or more). */ + int n_vars; + const struct variable **vars; + + /* Constants (0 or more). */ + int n_consts; + const struct variable **const_vars; + union value *const_values; + + /* Data. */ + struct hmap data; + struct table_entry **entries; + size_t n_entries; + + /* Column values, number of columns. */ + union value *cols; + int n_cols; + + /* Row values, number of rows. */ + union value *rows; + int n_rows; + + /* Number of statistically interesting columns/rows + (columns/rows with data in them). */ + int ns_cols, ns_rows; + + /* Matrix contents. */ + double *mat; /* Matrix proper. */ + double *row_tot; /* Row totals. */ + double *col_tot; /* Column totals. */ + double total; /* Grand total. */ + }; + +/* A crosstabulation of exactly 2 variables, conditional on zero + or more other variables having given values. */ struct crosstab { - int nvar; /* Number of variables. */ - double missing; /* Missing cases count. */ - int ofs; /* Integer mode: Offset into sorted_tab[]. */ - const struct variable *vars[2]; /* At least two variables; sorted by - larger indices first. */ + /* Case counts. */ + double missing; + + /* Variables. */ + int n_vars; /* Number of variables (at least 2). */ + const struct variable **vars; + union value *values; /* Values of variables beyond 2. */ + + /* Data. */ + struct table_entry **entries; + size_t n_entries; + + /* Column values, number of columns. */ + union value *cols; + int n_cols; + + /* Row values, number of rows. */ + union value *rows; + int n_rows; + + /* Number of statistically interesting columns/rows + (columns/rows with data in them). */ + int ns_cols, ns_rows; + + /* Matrix contents. */ + double *mat; /* Matrix proper. */ + double *row_tot; /* Row totals. */ + double *col_tot; /* Column totals. */ + double total; /* Grand total. */ }; /* Integer mode variable info. */ @@ -133,173 +210,146 @@ get_var_range (const struct variable *v) return var_get_aux (v); } -/* Indexes into crosstab.v. */ -enum - { - ROW_VAR = 0, - COL_VAR = 1 - }; - -/* General mode crosstabulation table. */ -static struct hsh_table *gen_tab; /* Hash table. */ -static int n_sorted_tab; /* Number of entries in sorted_tab. */ -static struct table_entry **sorted_tab; /* Sorted table. */ - -/* Variables specifies on VARIABLES. */ -static const struct variable **variables; -static size_t variables_cnt; - -/* TABLES. */ -static struct crosstab **xtab; -static int nxtab; - -/* Integer or general mode? */ -enum +struct crosstabs_proc { - INTEGER, - GENERAL + enum { INTEGER, GENERAL } mode; + enum mv_class exclude; + bool pivot; + bool bad_warn; + struct fmt_spec weight_format; + + /* Variables specifies on VARIABLES. */ + const struct variable **variables; + size_t n_variables; + + /* TABLES. */ + struct pivot_table *pivots; + int n_pivots; + + /* CELLS. */ + int n_cells; /* Number of cells requested. */ + unsigned int cells; /* Bit k is 1 if cell k is requested. */ + int a_cells[CRS_CL_count]; /* 0...n_cells-1 are the requested cells. */ + + /* STATISTICS. */ + unsigned int statistics; /* Bit k is 1 if statistic k is requested. */ }; -static int mode; - -/* CELLS. */ -static int num_cells; /* Number of cells requested. */ -static int cells[8]; /* Cells requested. */ - -/* WRITE. */ -static int write_style; /* One of WR_* that specifies the WRITE style. */ - -/* Command parsing info. */ -static struct cmd_crosstabs cmd; - -/* Pools. */ -static struct pool *pl_tc; /* For table cells. */ -static struct pool *pl_col; /* For column data. */ -static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds); -static void precalc (struct casereader *, const struct dataset *); -static void calc_general (const struct ccase *, const struct dataset *); -static void calc_integer (const struct ccase *, const struct dataset *); -static void postcalc (const struct dataset *); +static void +init_proc (struct crosstabs_proc *proc, struct dataset *ds) +{ + const struct variable *wv = dict_get_weight (dataset_dict (ds)); + proc->bad_warn = true; + proc->variables = NULL; + proc->n_variables = 0; + proc->pivots = NULL; + proc->n_pivots = 0; + proc->weight_format = wv ? *var_get_print_format (wv) : F_8_0; +} -static void submit (struct tab_table *); +static void +free_proc (struct crosstabs_proc *proc UNUSED) +{ + /* XXX */ +} -static void format_short (char *s, const struct fmt_spec *fp, - const union value *v); +static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds, + struct crosstabs_proc *); +static bool should_tabulate_case (const struct pivot_table *, + const struct ccase *, enum mv_class exclude); +static void tabulate_general_case (struct pivot_table *, const struct ccase *, + double weight); +static void tabulate_integer_case (struct pivot_table *, const struct ccase *, + double weight); +static void postcalc (struct crosstabs_proc *); +static void submit (struct crosstabs_proc *, struct pivot_table *, + struct tab_table *); /* Parse and execute CROSSTABS, then clean up. */ int cmd_crosstabs (struct lexer *lexer, struct dataset *ds) { - int result = internal_cmd_crosstabs (lexer, ds); - int i; - - free (variables); - pool_destroy (pl_tc); - pool_destroy (pl_col); + struct crosstabs_proc proc; + int result; - for (i = 0; i < nxtab; i++) - free (xtab[i]); - free (xtab); + init_proc (&proc, ds); + result = internal_cmd_crosstabs (lexer, ds, &proc); + free_proc (&proc); return result; } /* Parses and executes the CROSSTABS procedure. */ static int -internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds) +internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds, + struct crosstabs_proc *proc) { struct casegrouper *grouper; struct casereader *input, *group; + struct cmd_crosstabs cmd; + struct pivot_table *pt; bool ok; int i; - variables = NULL; - variables_cnt = 0; - xtab = NULL; - nxtab = 0; - pl_tc = pool_create (); - pl_col = pool_create (); - - if (!parse_crosstabs (lexer, ds, &cmd, NULL)) + if (!parse_crosstabs (lexer, ds, &cmd, proc)) return CMD_FAILURE; - mode = variables ? INTEGER : GENERAL; + proc->mode = proc->n_variables ? INTEGER : GENERAL; /* CELLS. */ if (!cmd.sbc_cells) - { - cmd.a_cells[CRS_CL_COUNT] = 1; - } + proc->cells = 1u << CRS_CL_COUNT; + else if (cmd.a_cells[CRS_CL_ALL]) + proc->cells = UINT_MAX; else { - int count = 0; - + proc->cells = 0; for (i = 0; i < CRS_CL_count; i++) if (cmd.a_cells[i]) - count++; - if (count == 0) - { - cmd.a_cells[CRS_CL_COUNT] = 1; - cmd.a_cells[CRS_CL_ROW] = 1; - cmd.a_cells[CRS_CL_COLUMN] = 1; - cmd.a_cells[CRS_CL_TOTAL] = 1; - } - if (cmd.a_cells[CRS_CL_ALL]) - { - for (i = 0; i < CRS_CL_count; i++) - cmd.a_cells[i] = 1; - cmd.a_cells[CRS_CL_ALL] = 0; - } - cmd.a_cells[CRS_CL_NONE] = 0; + proc->cells |= 1u << i; + if (proc->cells == 0) + proc->cells = ((1u << CRS_CL_COUNT) + | (1u << CRS_CL_ROW) + | (1u << CRS_CL_COLUMN) + | (1u << CRS_CL_TOTAL)); } - for (num_cells = i = 0; i < CRS_CL_count; i++) - if (cmd.a_cells[i]) - cells[num_cells++] = i; + proc->cells &= ((1u << CRS_CL_count) - 1); + proc->cells &= ~((1u << CRS_CL_NONE) | (1u << CRS_CL_ALL)); + proc->n_cells = 0; + for (i = 0; i < CRS_CL_count; i++) + if (proc->cells & (1u << i)) + proc->a_cells[proc->n_cells++] = i; /* STATISTICS. */ - if (cmd.sbc_statistics) + if (cmd.a_statistics[CRS_ST_ALL]) + proc->statistics = UINT_MAX; + else if (cmd.sbc_statistics) { int i; - int count = 0; + proc->statistics = 0; for (i = 0; i < CRS_ST_count; i++) if (cmd.a_statistics[i]) - count++; - if (count == 0) - cmd.a_statistics[CRS_ST_CHISQ] = 1; - if (cmd.a_statistics[CRS_ST_ALL]) - for (i = 0; i < CRS_ST_count; i++) - cmd.a_statistics[i] = 1; + proc->statistics |= 1u << i; + if (proc->statistics == 0) + proc->statistics |= 1u << CRS_ST_CHISQ; } + else + proc->statistics = 0; /* MISSING. */ - if (cmd.miss == CRS_REPORT && mode == GENERAL) + proc->exclude = (cmd.miss == CRS_TABLE ? MV_ANY + : cmd.miss == CRS_INCLUDE ? MV_SYSTEM + : MV_NEVER); + if (proc->mode == GENERAL && proc->mode == MV_NEVER) { msg (SE, _("Missing mode REPORT not allowed in general mode. " "Assuming MISSING=TABLE.")); - cmd.miss = CRS_TABLE; + proc->mode = MV_ANY; } - /* WRITE. */ - if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS]) - cmd.a_write[CRS_WR_ALL] = 0; - if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL) - { - msg (SE, _("Write mode ALL not allowed in general mode. " - "Assuming WRITE=CELLS.")); - cmd.a_write[CRS_WR_CELLS] = 1; - } - if (cmd.sbc_write - && (cmd.a_write[CRS_WR_NONE] - + cmd.a_write[CRS_WR_ALL] - + cmd.a_write[CRS_WR_CELLS] == 0)) - cmd.a_write[CRS_WR_CELLS] = 1; - if (cmd.a_write[CRS_WR_CELLS]) - write_style = CRS_WR_CELLS; - else if (cmd.a_write[CRS_WR_ALL]) - write_style = CRS_WR_ALL; - else - write_style = CRS_WR_NONE; + /* PIVOT. */ + proc->pivot = cmd.pivot == CRS_PIVOT; input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds), NULL, NULL); @@ -308,18 +358,34 @@ internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds) { struct ccase *c; - precalc (group, ds); - - for (; (c = casereader_read (group)) != NULL; case_unref (c)) + /* Output SPLIT FILE variables. */ + c = casereader_peek (group, 0); + if (c != NULL) { - if (mode == GENERAL) - calc_general (c, ds); - else - calc_integer (c, ds); + output_split_file_values (ds, c); + case_unref (c); } + + /* Tabulate. */ + for (; (c = casereader_read (group)) != NULL; case_unref (c)) + for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) + { + double weight = dict_get_case_weight (dataset_dict (ds), c, + &proc->bad_warn); + if (should_tabulate_case (pt, c, proc->exclude)) + { + if (proc->mode == GENERAL) + tabulate_general_case (pt, c, weight); + else + tabulate_integer_case (pt, c, weight); + } + else + pt->missing += weight; + } casereader_destroy (group); - postcalc (ds); + /* Output. */ + postcalc (proc); } ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; @@ -329,14 +395,18 @@ internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds) /* Parses the TABLES subcommand. */ static int -crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED) +crs_custom_tables (struct lexer *lexer, struct dataset *ds, + struct cmd_crosstabs *cmd UNUSED, void *proc_) { + struct crosstabs_proc *proc = proc_; struct const_var_set *var_set; int n_by; const struct variable ***by = NULL; + int *by_iter; size_t *by_nvar = NULL; size_t nx = 1; - int success = 0; + bool ok = false; + int i; /* Ensure that this is a TABLES subcommand. */ if (!lex_match_id (lexer, "TABLES") @@ -346,8 +416,9 @@ crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs return 2; lex_match (lexer, '='); - if (variables != NULL) - var_set = const_var_set_create_from_array (variables, variables_cnt); + if (proc->variables != NULL) + var_set = const_var_set_create_from_array (proc->variables, + proc->n_variables); else var_set = const_var_set_create_from_dict (dataset_dict (ds)); assert (var_set != NULL); @@ -357,7 +428,7 @@ crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs by = xnrealloc (by, n_by + 1, sizeof *by); by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar); if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by], - PV_NO_DUPLICATE | PV_NO_SCRATCH)) + PV_NO_DUPLICATE | PV_NO_SCRATCH)) goto done; if (xalloc_oversized (nx, by_nvar[n_by])) { @@ -379,64 +450,57 @@ crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs } } - { - int *by_iter = xcalloc (n_by, sizeof *by_iter); - int i; - - xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab); - for (i = 0; i < nx; i++) - { - struct crosstab *x; - - x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2)); - x->nvar = n_by; - x->missing = 0.; - - { - int i; - - for (i = 0; i < n_by; i++) - x->vars[i] = by[i][by_iter[i]]; - } - - { - int i; - - for (i = n_by - 1; i >= 0; i--) - { - if (++by_iter[i] < by_nvar[i]) - break; - by_iter[i] = 0; - } - } - - xtab[nxtab++] = x; - } - free (by_iter); - } - success = 1; + by_iter = xcalloc (n_by, sizeof *by_iter); + proc->pivots = xnrealloc (proc->pivots, + proc->n_pivots + nx, sizeof *proc->pivots); + for (i = 0; i < nx; i++) + { + struct pivot_table *pt = &proc->pivots[proc->n_pivots++]; + int j; + + pt->weight_format = proc->weight_format; + pt->missing = 0.; + pt->n_vars = n_by; + pt->vars = xmalloc (n_by * sizeof *pt->vars); + pt->n_consts = 0; + pt->const_vars = NULL; + pt->const_values = NULL; + hmap_init (&pt->data); + pt->entries = NULL; + pt->n_entries = 0; + + for (j = 0; j < n_by; j++) + pt->vars[j] = by[j][by_iter[j]]; + + for (j = n_by - 1; j >= 0; j--) + { + if (++by_iter[j] < by_nvar[j]) + break; + by_iter[j] = 0; + } + } + free (by_iter); + ok = true; - done: +done: /* All return paths lead here. */ - { - int i; - - for (i = 0; i < n_by; i++) - free (by[i]); - free (by); - free (by_nvar); - } + for (i = 0; i < n_by; i++) + free (by[i]); + free (by); + free (by_nvar); const_var_set_destroy (var_set); - return success; + return ok; } /* Parses the VARIABLES subcommand. */ static int -crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED) +crs_custom_variables (struct lexer *lexer, struct dataset *ds, + struct cmd_crosstabs *cmd UNUSED, void *proc_) { - if (nxtab) + struct crosstabs_proc *proc = proc_; + if (proc->n_pivots) { msg (SE, _("VARIABLES must be specified before TABLES.")); return 0; @@ -446,15 +510,15 @@ crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosst for (;;) { - size_t orig_nv = variables_cnt; + size_t orig_nv = proc->n_variables; size_t i; long min, max; if (!parse_variables_const (lexer, dataset_dict (ds), - &variables, &variables_cnt, - (PV_APPEND | PV_NUMERIC - | PV_NO_DUPLICATE | PV_NO_SCRATCH))) + &proc->variables, &proc->n_variables, + (PV_APPEND | PV_NUMERIC + | PV_NO_DUPLICATE | PV_NO_SCRATCH))) return 0; if (lex_token (lexer) != '(') @@ -489,13 +553,13 @@ crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosst } lex_get (lexer); - for (i = orig_nv; i < variables_cnt; i++) + for (i = orig_nv; i < proc->n_variables; i++) { struct var_range *vr = xmalloc (sizeof *vr); vr->min = min; vr->max = max + 1.; vr->count = max - min + 1; - var_attach_aux (variables[i], vr, var_dtor_free); + var_attach_aux (proc->variables[i], vr, var_dtor_free); } if (lex_token (lexer) == '/') @@ -505,361 +569,277 @@ crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosst return 1; lossage: - free (variables); - variables = NULL; + free (proc->variables); + proc->variables = NULL; + proc->n_variables = 0; return 0; } /* Data file processing. */ -static int compare_table_entry (const void *, const void *, const void *); -static unsigned hash_table_entry (const void *, const void *); +static bool +should_tabulate_case (const struct pivot_table *pt, const struct ccase *c, + enum mv_class exclude) +{ + int j; + for (j = 0; j < pt->n_vars; j++) + { + const struct variable *var = pt->vars[j]; + struct var_range *range = get_var_range (var); + + if (var_is_value_missing (var, case_data (c, var), exclude)) + return false; + + if (range != NULL) + { + double num = case_num (c, var); + if (num < range->min || num > range->max) + return false; + } + } + return true; +} -/* Set up the crosstabulation tables for processing. */ static void -precalc (struct casereader *input, const struct dataset *ds) +tabulate_integer_case (struct pivot_table *pt, const struct ccase *c, + double weight) { - struct ccase *c; + struct table_entry *te; + size_t hash; + int j; - c = casereader_peek (input, 0); - if (c != NULL) + hash = 0; + for (j = 0; j < pt->n_vars; j++) { - output_split_file_values (ds, c); - case_unref (c); + /* Throw away fractional parts of values. */ + hash = hash_int (case_num (c, pt->vars[j]), hash); } - if (mode == GENERAL) - { - gen_tab = hsh_create (512, compare_table_entry, hash_table_entry, - NULL, NULL); - } - else + HMAP_FOR_EACH_WITH_HASH (te, struct table_entry, node, hash, &pt->data) { - int i; - - sorted_tab = NULL; - n_sorted_tab = 0; + for (j = 0; j < pt->n_vars; j++) + if ((int) case_num (c, pt->vars[j]) != (int) te->values[j].f) + goto no_match; - for (i = 0; i < nxtab; i++) - { - struct crosstab *x = xtab[i]; - int count = 1; - int *v; - int j; - - x->ofs = n_sorted_tab; - - for (j = 2; j < x->nvar; j++) - count *= get_var_range (x->vars[j - 2])->count; - - sorted_tab = xnrealloc (sorted_tab, - n_sorted_tab + count, sizeof *sorted_tab); - v = xmalloca (sizeof *v * x->nvar); - for (j = 2; j < x->nvar; j++) - v[j] = get_var_range (x->vars[j])->min; - for (j = 0; j < count; j++) - { - struct table_entry *te; - int k; - - te = sorted_tab[n_sorted_tab++] - = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1)); - te->table = i; - - { - int row_cnt = get_var_range (x->vars[0])->count; - int col_cnt = get_var_range (x->vars[1])->count; - const int mat_size = row_cnt * col_cnt; - int m; - - te->u.data = xnmalloc (mat_size, sizeof *te->u.data); - for (m = 0; m < mat_size; m++) - te->u.data[m] = 0.; - } - - for (k = 2; k < x->nvar; k++) - te->values[k].f = v[k]; - for (k = 2; k < x->nvar; k++) - { - struct var_range *vr = get_var_range (x->vars[k]); - if (++v[k] >= vr->max) - v[k] = vr->min; - else - break; - } - } - freea (v); - } + /* Found an existing entry. */ + te->freq += weight; + return; - sorted_tab = xnrealloc (sorted_tab, - n_sorted_tab + 1, sizeof *sorted_tab); - sorted_tab[n_sorted_tab] = NULL; + no_match: ; } + /* No existing entry. Create a new one. */ + te = xmalloc (table_entry_size (pt->n_vars)); + te->freq = weight; + for (j = 0; j < pt->n_vars; j++) + te->values[j].f = (int) case_num (c, pt->vars[j]); + hmap_insert (&pt->data, &te->node, hash); } -/* Form crosstabulations for general mode. */ static void -calc_general (const struct ccase *c, const struct dataset *ds) +tabulate_general_case (struct pivot_table *pt, const struct ccase *c, + double weight) { - /* Missing values to exclude. */ - enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY - : cmd.miss == CRS_INCLUDE ? MV_SYSTEM - : MV_NEVER); - - /* Case weight. */ - double weight = dict_get_case_weight (dataset_dict (ds), c, NULL); - - /* Flattened current table index. */ - int t; + struct table_entry *te; + size_t hash; + int j; - for (t = 0; t < nxtab; t++) + hash = 0; + for (j = 0; j < pt->n_vars; j++) { - struct crosstab *x = xtab[t]; - const size_t entry_size = (sizeof (struct table_entry) - + sizeof (union value) * (x->nvar - 1)); - struct table_entry *te = xmalloca (entry_size); - - /* Construct table entry for the current record and table. */ - te->table = t; - { - int j; - - assert (x != NULL); - for (j = 0; j < x->nvar; j++) - { - const union value *v = case_data (c, x->vars[j]); - if (var_is_value_missing (x->vars[j], v, exclude)) - { - x->missing += weight; - goto next_crosstab; - } - - if (var_is_numeric (x->vars[j])) - te->values[j].f = case_num (c, x->vars[j]); - else - { - size_t n = var_get_width (x->vars[j]); - if (n > MAX_SHORT_STRING) - n = MAX_SHORT_STRING; - memcpy (te->values[j].s, case_str (c, x->vars[j]), n); - - /* Necessary in order to simplify comparisons. */ - memset (&te->values[j].s[var_get_width (x->vars[j])], 0, - sizeof (union value) - n); - } - } - } + const struct variable *var = pt->vars[j]; + hash = value_hash (case_data (c, var), var_get_width (var), hash); + } - /* Add record to hash table. */ - { - struct table_entry **tepp - = (struct table_entry **) hsh_probe (gen_tab, te); - if (*tepp == NULL) - { - struct table_entry *tep = pool_alloc (pl_tc, entry_size); + HMAP_FOR_EACH_WITH_HASH (te, struct table_entry, node, hash, &pt->data) + { + for (j = 0; j < pt->n_vars; j++) + { + const struct variable *var = pt->vars[j]; + if (!value_equal (case_data (c, var), &te->values[j], + var_get_width (var))) + goto no_match; + } - te->u.freq = weight; - memcpy (tep, te, entry_size); + /* Found an existing entry. */ + te->freq += weight; + return; - *tepp = tep; - } - else - (*tepp)->u.freq += weight; - } + no_match: ; + } - next_crosstab: - freea (te); + /* No existing entry. Create a new one. */ + te = xmalloc (table_entry_size (pt->n_vars)); + te->freq = weight; + for (j = 0; j < pt->n_vars; j++) + { + const struct variable *var = pt->vars[j]; + int width = var_get_width (var); + value_init (&te->values[j], width); + value_copy (&te->values[j], case_data (c, var), width); } + hmap_insert (&pt->data, &te->node, hash); } + +/* Post-data reading calculations. */ + +static int compare_table_entry_vars_3way (const struct table_entry *a, + const struct table_entry *b, + const struct pivot_table *pt, + int idx0, int idx1); +static int compare_table_entry_3way (const void *ap_, const void *bp_, + const void *pt_); +static void enum_var_values (const struct pivot_table *, int var_idx, + union value **valuesp, int *n_values); +static void output_pivot_table (struct crosstabs_proc *, + struct pivot_table *); +static void make_pivot_table_subset (struct pivot_table *pt, + size_t row0, size_t row1, + struct pivot_table *subset); +static void make_summary_table (struct crosstabs_proc *); +static bool find_crosstab (struct pivot_table *, size_t *row0p, size_t *row1p); static void -calc_integer (const struct ccase *c, const struct dataset *ds) +postcalc (struct crosstabs_proc *proc) { - bool bad_warn = true; - - /* Case weight. */ - double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn); + struct pivot_table *pt; - /* Flattened current table index. */ - int t; - - for (t = 0; t < nxtab; t++) + /* Convert hash tables into sorted arrays of entries. */ + for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) { - struct crosstab *x = xtab[t]; - int i, fact, ofs; - - fact = i = 1; - ofs = x->ofs; - for (i = 0; i < x->nvar; i++) - { - const struct variable *const v = x->vars[i]; - struct var_range *vr = get_var_range (v); - double value = case_num (c, v); - - /* Note that the first test also rules out SYSMIS. */ - if ((value < vr->min || value >= vr->max) - || (cmd.miss == CRS_TABLE - && var_is_num_missing (v, value, MV_USER))) - { - x->missing += weight; - goto next_crosstab; - } + struct table_entry *e; + size_t i; - if (i > 1) - { - ofs += fact * ((int) value - vr->min); - fact *= vr->count; - } - } + pt->n_entries = hmap_count (&pt->data); + pt->entries = xnmalloc (pt->n_entries, sizeof *pt->entries); + i = 0; + HMAP_FOR_EACH (e, struct table_entry, node, &pt->data) + pt->entries[i++] = e; + hmap_destroy (&pt->data); - { - const struct variable *row_var = x->vars[ROW_VAR]; - const int row = case_num (c, row_var) - get_var_range (row_var)->min; + sort (pt->entries, pt->n_entries, sizeof *pt->entries, + compare_table_entry_3way, pt); + } - const struct variable *col_var = x->vars[COL_VAR]; - const int col = case_num (c, col_var) - get_var_range (col_var)->min; + make_summary_table (proc); - const int col_dim = get_var_range (col_var)->count; + /* Output each pivot table. */ + for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) + { + if (proc->pivot || pt->n_vars == 2) + output_pivot_table (proc, pt); + else + { + size_t row0 = 0, row1 = 0; + while (find_crosstab (pt, &row0, &row1)) + { + struct pivot_table subset; + make_pivot_table_subset (pt, row0, row1, &subset); + output_pivot_table (proc, &subset); + } + } + } - sorted_tab[ofs]->u.data[col + row * col_dim] += weight; - } + /* XXX clear output and prepare for next split file. */ +} - next_crosstab: ; +static void +make_pivot_table_subset (struct pivot_table *pt, size_t row0, size_t row1, + struct pivot_table *subset) +{ + *subset = *pt; + if (pt->n_vars > 2) + { + assert (pt->n_consts == 0); + subset->missing = pt->missing; + subset->n_vars = 2; + subset->vars = pt->vars; + subset->n_consts = pt->n_vars - 2; + subset->const_vars = pt->vars + 2; + subset->const_values = &pt->entries[row0]->values[2]; } + subset->entries = &pt->entries[row0]; + subset->n_entries = row1 - row0; } -/* Compare the table_entry's at A and B and return a strcmp()-type - result. */ static int -compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED) +compare_table_entry_var_3way (const struct table_entry *a, + const struct table_entry *b, + const struct pivot_table *pt, + int idx) { - const struct table_entry *a = a_; - const struct table_entry *b = b_; - - if (a->table > b->table) - return 1; - else if (a->table < b->table) - return -1; - - { - const struct crosstab *x = xtab[a->table]; - int i; - - for (i = x->nvar - 1; i >= 0; i--) - if (var_is_numeric (x->vars[i])) - { - const double diffnum = a->values[i].f - b->values[i].f; - if (diffnum < 0) - return -1; - else if (diffnum > 0) - return 1; - } - else - { - const int diffstr = strncmp (a->values[i].s, b->values[i].s, - var_get_width (x->vars[i])); - if (diffstr) - return diffstr; - } - } - - return 0; + return value_compare_3way (&a->values[idx], &b->values[idx], + var_get_width (pt->vars[idx])); } -/* Calculate a hash value from table_entry A. */ -static unsigned -hash_table_entry (const void *a_, const void *aux UNUSED) +static int +compare_table_entry_vars_3way (const struct table_entry *a, + const struct table_entry *b, + const struct pivot_table *pt, + int idx0, int idx1) { - const struct table_entry *a = a_; - unsigned long hash; int i; - hash = a->table; - for (i = 0; i < xtab[a->table]->nvar; i++) - hash = hash_bytes (&a->values[i], sizeof a->values[i], hash); - - return hash; + for (i = idx1 - 1; i >= idx0; i--) + { + int cmp = compare_table_entry_var_3way (a, b, pt, i); + if (cmp != 0) + return cmp; + } + return 0; } - -/* Post-data reading calculations. */ -static struct table_entry **find_pivot_extent (struct table_entry **, - int *cnt, int pivot); -static void enum_var_values (struct table_entry **entries, int entry_cnt, - int var_idx, - union value **values, int *value_cnt); -static void output_pivot_table (struct table_entry **, struct table_entry **, - const struct dictionary *, - double **, double **, double **, - int *, int *, int *); -static void make_summary_table (const struct dictionary *); +/* Compare the struct table_entry at *AP to the one at *BP and + return a strcmp()-type result. */ +static int +compare_table_entry_3way (const void *ap_, const void *bp_, const void *pt_) +{ + const struct table_entry *const *ap = ap_; + const struct table_entry *const *bp = bp_; + const struct table_entry *a = *ap; + const struct table_entry *b = *bp; + const struct pivot_table *pt = pt_; + int cmp; + + cmp = compare_table_entry_vars_3way (a, b, pt, 2, pt->n_vars); + if (cmp != 0) + return cmp; + + cmp = compare_table_entry_var_3way (a, b, pt, ROW_VAR); + if (cmp != 0) + return cmp; + + return compare_table_entry_var_3way (a, b, pt, COL_VAR); +} -static void -postcalc (const struct dataset *ds) +static int +find_first_difference (const struct pivot_table *pt, size_t row) { - if (mode == GENERAL) + if (row == 0) + return pt->n_vars - 1; + else { - n_sorted_tab = hsh_count (gen_tab); - sorted_tab = (struct table_entry **) hsh_sort (gen_tab); - } + const struct table_entry *a = pt->entries[row]; + const struct table_entry *b = pt->entries[row - 1]; + int col; - make_summary_table (dataset_dict (ds)); - - /* Identify all the individual crosstabulation tables, and deal with - them. */ - { - struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */ - int pc = n_sorted_tab; /* Pivot count. */ - - double *mat = NULL, *row_tot = NULL, *col_tot = NULL; - int maxrows = 0, maxcols = 0, maxcells = 0; - - for (;;) - { - pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT); - if (pe == NULL) - break; - - output_pivot_table (pb, pe, dataset_dict (ds), - &mat, &row_tot, &col_tot, - &maxrows, &maxcols, &maxcells); - - pb = pe; - } - free (mat); - free (row_tot); - free (col_tot); - } - - hsh_destroy (gen_tab); - if (mode == INTEGER) - { - int i; - for (i = 0; i < n_sorted_tab; i++) - { - free (sorted_tab[i]->u.data); - free (sorted_tab[i]); - } - free (sorted_tab); + for (col = pt->n_vars - 1; col >= 0; col--) + if (compare_table_entry_var_3way (a, b, pt, col)) + return col; + NOT_REACHED (); } } -static void insert_summary (struct tab_table *, int tab_index, - const struct dictionary *, - double valid); - /* Output a table summarizing the cases processed. */ static void -make_summary_table (const struct dictionary *dict) +make_summary_table (struct crosstabs_proc *proc) { struct tab_table *summary; + struct pivot_table *pt; + struct string name; + int i; - struct table_entry **pb = sorted_tab, **pe; - int pc = n_sorted_tab; - int cur_tab = 0; - - summary = tab_create (7, 3 + nxtab, 1); + summary = tab_create (7, 3 + proc->n_pivots, 1); tab_title (summary, _("Summary.")); tab_headers (summary, 1, 0, 3, 0); tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases")); @@ -870,638 +850,492 @@ make_summary_table (const struct dictionary *dict) tab_hline (summary, TAL_1, 1, 6, 2); tab_vline (summary, TAL_1, 3, 1, 1); tab_vline (summary, TAL_1, 5, 1, 1); - { - int i; - - for (i = 0; i < 3; i++) - { - tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N")); - tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent")); - } - } + for (i = 0; i < 3; i++) + { + tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N")); + tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent")); + } tab_offset (summary, 0, 3); - for (;;) + ds_init_empty (&name); + for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) { double valid; + double n[3]; + size_t i; - pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT); - if (pe == NULL) - break; - - while (cur_tab < (*pb)->table) - insert_summary (summary, cur_tab++, dict, 0.); + tab_hline (summary, TAL_1, 0, 6, 0); - if (mode == GENERAL) - for (valid = 0.; pb < pe; pb++) - valid += (*pb)->u.freq; - else - { - const struct crosstab *const x = xtab[(*pb)->table]; - const int n_cols = get_var_range (x->vars[COL_VAR])->count; - const int n_rows = get_var_range (x->vars[ROW_VAR])->count; - const int count = n_cols * n_rows; + ds_clear (&name); + for (i = 0; i < pt->n_vars; i++) + { + if (i > 0) + ds_put_cstr (&name, " * "); + ds_put_cstr (&name, var_to_string (pt->vars[i])); + } + tab_text (summary, 0, 0, TAB_LEFT, ds_cstr (&name)); - for (valid = 0.; pb < pe; pb++) - { - const double *data = (*pb)->u.data; - int i; + valid = 0.; + for (i = 0; i < pt->n_entries; i++) + valid += pt->entries[i]->freq; - for (i = 0; i < count; i++) - valid += *data++; - } - } - insert_summary (summary, cur_tab++, dict, valid); + n[0] = valid; + n[1] = pt->missing; + n[2] = n[0] + n[1]; + for (i = 0; i < 3; i++) + { + tab_double (summary, i * 2 + 1, 0, TAB_RIGHT, n[i], + &proc->weight_format); + tab_text (summary, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%", + n[i] / n[2] * 100.); + } - pb = pe; + tab_next_row (summary); } + ds_destroy (&name); - while (cur_tab < nxtab) - insert_summary (summary, cur_tab++, dict, 0.); - - submit (summary); -} - -/* Inserts a line into T describing the crosstabulation at index - TAB_INDEX, which has VALID valid observations. */ -static void -insert_summary (struct tab_table *t, int tab_index, - const struct dictionary *dict, - double valid) -{ - struct crosstab *x = xtab[tab_index]; - - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; - - tab_hline (t, TAL_1, 0, 6, 0); - - /* Crosstabulation name. */ - { - char *buf = xmalloca (128 * x->nvar); - char *cp = buf; - int i; - - for (i = 0; i < x->nvar; i++) - { - if (i > 0) - cp = stpcpy (cp, " * "); - - cp = stpcpy (cp, var_to_string (x->vars[i])); - } - tab_text (t, 0, 0, TAB_LEFT, buf); - - freea (buf); - } - - /* Counts and percentages. */ - { - double n[3]; - int i; - - n[0] = valid; - n[1] = x->missing; - n[2] = n[0] + n[1]; - - - for (i = 0; i < 3; i++) - { - tab_double (t, i * 2 + 1, 0, TAB_RIGHT, n[i], wfmt); - tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%", - n[i] / n[2] * 100.); - } - } - - tab_next_row (t); + submit (proc, NULL, summary); } /* Output. */ -/* Tables. */ -static struct tab_table *table; /* Crosstabulation table. */ -static struct tab_table *chisq; /* Chi-square table. */ -static struct tab_table *sym; /* Symmetric measures table. */ -static struct tab_table *risk; /* Risk estimate table. */ -static struct tab_table *direct; /* Directional measures table. */ - -/* Statistics. */ -static int chisq_fisher; /* Did any rows include Fisher's exact test? */ - -/* Column values, number of columns. */ -static union value *cols; -static int n_cols; - -/* Row values, number of rows. */ -static union value *rows; -static int n_rows; - -/* Number of statistically interesting columns/rows (columns/rows with - data in them). */ -static int ns_cols, ns_rows; - -/* Crosstabulation. */ -static const struct crosstab *x; - -/* Number of variables from the crosstabulation to consider. This is - either x->nvar, if pivoting is on, or 2, if pivoting is off. */ -static int nvar; - -/* Matrix contents. */ -static double *mat; /* Matrix proper. */ -static double *row_tot; /* Row totals. */ -static double *col_tot; /* Column totals. */ -static double W; /* Grand total. */ - -static void display_dimensions (struct tab_table *, int first_difference, - struct table_entry *); -static void display_crosstabulation (void); -static void display_chisq (const struct dictionary *); -static void display_symmetric (const struct dictionary *); -static void display_risk (const struct dictionary *); -static void display_directional (void); -static void crosstabs_dim (struct tab_table *, struct outp_driver *, void *); -static void table_value_missing (struct tab_table *table, int c, int r, +static struct tab_table *create_crosstab_table (struct crosstabs_proc *, + struct pivot_table *); +static struct tab_table *create_chisq_table (struct pivot_table *); +static struct tab_table *create_sym_table (struct pivot_table *); +static struct tab_table *create_risk_table (struct pivot_table *); +static struct tab_table *create_direct_table (struct pivot_table *); +static void display_dimensions (struct crosstabs_proc *, struct pivot_table *, + struct tab_table *, int first_difference); +static void display_crosstabulation (struct crosstabs_proc *, + struct pivot_table *, + struct tab_table *); +static void display_chisq (struct pivot_table *, struct tab_table *, + bool *showed_fisher); +static void display_symmetric (struct crosstabs_proc *, struct pivot_table *, + struct tab_table *); +static void display_risk (struct pivot_table *, struct tab_table *); +static void display_directional (struct crosstabs_proc *, struct pivot_table *, + struct tab_table *); +static void crosstabs_dim (struct tab_table *, struct outp_driver *, + void *proc); +static void table_value_missing (struct crosstabs_proc *proc, + struct tab_table *table, int c, int r, unsigned char opt, const union value *v, const struct variable *var); -static void delete_missing (void); +static void delete_missing (struct pivot_table *); +static void build_matrix (struct pivot_table *); /* Output pivot table beginning at PB and continuing until PE, exclusive. For efficiency, *MATP is a pointer to a matrix that can hold *MAXROWS entries. */ static void -output_pivot_table (struct table_entry **pb, struct table_entry **pe, - const struct dictionary *dict, - double **matp, double **row_totp, double **col_totp, - int *maxrows, int *maxcols, int *maxcells) +output_pivot_table (struct crosstabs_proc *proc, struct pivot_table *pt) { - /* Subtable. */ - struct table_entry **tb = pb, **te; /* Table begin, table end. */ - int tc = pe - pb; /* Table count. */ - - /* Table entry for header comparison. */ - struct table_entry *cmp = NULL; - - x = xtab[(*pb)->table]; - enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols); - - nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2; - - /* Crosstabulation table initialization. */ - if (num_cells) - { - table = tab_create (nvar + n_cols, - (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1); - tab_headers (table, nvar - 1, 0, 2, 0); - - /* First header line. */ - tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0, - TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR])); - - tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1); - - /* Second header line. */ - { - int i; - - for (i = 2; i < nvar; i++) - tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1, - TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i])); - tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE, - var_get_name (x->vars[ROW_VAR])); - for (i = 0; i < n_cols; i++) - table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i], - x->vars[COL_VAR]); - tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total")); - } - - tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2); - tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1); - - /* Title. */ - { - char *title = xmalloca (x->nvar * 64 + 128); - char *cp = title; - int i; - - if (cmd.pivot == CRS_PIVOT) - for (i = 0; i < nvar; i++) - { - if (i) - cp = stpcpy (cp, " by "); - cp = stpcpy (cp, var_get_name (x->vars[i])); - } - else - { - cp = spprintf (cp, "%s by %s for", - var_get_name (x->vars[0]), - var_get_name (x->vars[1])); - for (i = 2; i < nvar; i++) - { - char buf[64], *bufp; - - if (i > 2) - *cp++ = ','; - *cp++ = ' '; - cp = stpcpy (cp, var_get_name (x->vars[i])); - *cp++ = '='; - format_short (buf, var_get_print_format (x->vars[i]), - &(*pb)->values[i]); - for (bufp = buf; isspace ((unsigned char) *bufp); bufp++) - ; - cp = stpcpy (cp, bufp); - } - } - - cp = stpcpy (cp, " ["); - for (i = 0; i < num_cells; i++) - { - struct tuple - { - int value; - const char *name; - }; - - static const struct tuple cell_names[] = - { - {CRS_CL_COUNT, N_("count")}, - {CRS_CL_ROW, N_("row %")}, - {CRS_CL_COLUMN, N_("column %")}, - {CRS_CL_TOTAL, N_("total %")}, - {CRS_CL_EXPECTED, N_("expected")}, - {CRS_CL_RESIDUAL, N_("residual")}, - {CRS_CL_SRESIDUAL, N_("std. resid.")}, - {CRS_CL_ASRESIDUAL, N_("adj. resid.")}, - {-1, NULL}, - }; - - const struct tuple *t; - - for (t = cell_names; t->value != cells[i]; t++) - assert (t->value != -1); - if (i) - cp = stpcpy (cp, ", "); - cp = stpcpy (cp, gettext (t->name)); - } - strcpy (cp, "]."); - - tab_title (table, "%s", title); - freea (title); - } - - tab_offset (table, 0, 2); - } - else - table = NULL; - - /* Chi-square table initialization. */ - if (cmd.a_statistics[CRS_ST_CHISQ]) - { - chisq = tab_create (6 + (nvar - 2), - (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1); - tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0); - - tab_title (chisq, _("Chi-square tests.")); - - tab_offset (chisq, nvar - 2, 0); - tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic")); - tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value")); - tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df")); - tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE, - _("Asymp. Sig. (2-sided)")); - tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE, - _("Exact. Sig. (2-sided)")); - tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE, - _("Exact. Sig. (1-sided)")); - chisq_fisher = 0; - tab_offset (chisq, 0, 1); - } - else - chisq = NULL; - - /* Symmetric measures. */ - if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC] - || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU] - || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR] - || cmd.a_statistics[CRS_ST_KAPPA]) - { - sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1); - tab_headers (sym, 2 + (nvar - 2), 0, 1, 0); - tab_title (sym, _("Symmetric measures.")); - - tab_offset (sym, nvar - 2, 0); - tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category")); - tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic")); - tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value")); - tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error")); - tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T")); - tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig.")); - tab_offset (sym, 0, 1); - } - else - sym = NULL; - - /* Risk estimate. */ - if (cmd.a_statistics[CRS_ST_RISK]) - { - risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1); - tab_headers (risk, 1 + nvar - 2, 0, 2, 0); - tab_title (risk, _("Risk estimate.")); - - tab_offset (risk, nvar - 2, 0); - tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF, - _("95%% Confidence Interval")); - tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic")); - tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value")); - tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower")); - tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper")); - tab_hline (risk, TAL_1, 2, 3, 1); - tab_vline (risk, TAL_1, 2, 0, 1); - tab_offset (risk, 0, 2); - } - else - risk = NULL; - - /* Directional measures. */ - if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC] - || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA]) - { - direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1); - tab_headers (direct, 3 + (nvar - 2), 0, 1, 0); - tab_title (direct, _("Directional measures.")); - - tab_offset (direct, nvar - 2, 0); - tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category")); - tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic")); - tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type")); - tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value")); - tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error")); - tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T")); - tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig.")); - tab_offset (direct, 0, 1); - } - else - direct = NULL; - - for (;;) - { - /* Find pivot subtable if applicable. */ - te = find_pivot_extent (tb, &tc, 0); - if (te == NULL) - break; + struct tab_table *table = NULL; /* Crosstabulation table. */ + struct tab_table *chisq = NULL; /* Chi-square table. */ + bool showed_fisher = false; + struct tab_table *sym = NULL; /* Symmetric measures table. */ + struct tab_table *risk = NULL; /* Risk estimate table. */ + struct tab_table *direct = NULL; /* Directional measures table. */ + size_t row0, row1; + + enum_var_values (pt, COL_VAR, &pt->cols, &pt->n_cols); + + if (proc->cells) + table = create_crosstab_table (proc, pt); + if (proc->statistics & (1u << CRS_ST_CHISQ)) + chisq = create_chisq_table (pt); + if (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC) + | (1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU) + | (1u << CRS_ST_GAMMA) | (1u << CRS_ST_CORR) + | (1u << CRS_ST_KAPPA))) + sym = create_sym_table (pt); + if (proc->statistics & (1u << CRS_ST_RISK)) + risk = create_risk_table (pt); + if (proc->statistics & ((1u << CRS_ST_LAMBDA) | (1u << CRS_ST_UC) + | (1u << CRS_ST_D) | (1u << CRS_ST_ETA))) + direct = create_direct_table (pt); + + row0 = row1 = 0; + while (find_crosstab (pt, &row0, &row1)) + { + struct pivot_table x; + int first_difference; + + make_pivot_table_subset (pt, row0, row1, &x); /* Find all the row variable values. */ - enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows); + enum_var_values (&x, ROW_VAR, &x.rows, &x.n_rows); - /* Allocate memory space for the column and row totals. */ - if (n_rows > *maxrows) - { - *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp); - row_tot = *row_totp; - *maxrows = n_rows; - } - if (n_cols > *maxcols) - { - *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp); - col_tot = *col_totp; - *maxcols = n_cols; - } + if (size_overflow_p (xtimes (xtimes (x.n_rows, x.n_cols), + sizeof (double)))) + xalloc_die (); + x.row_tot = xmalloc (x.n_rows * sizeof *x.row_tot); + x.col_tot = xmalloc (x.n_cols * sizeof *x.col_tot); + x.mat = xmalloc (x.n_rows * x.n_cols * sizeof *x.mat); /* Allocate table space for the matrix. */ - if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table)) + if (table + && tab_row (table) + (x.n_rows + 1) * proc->n_cells > tab_nr (table)) tab_realloc (table, -1, - MAX (tab_nr (table) + (n_rows + 1) * num_cells, - tab_nr (table) * (pe - pb) / (te - tb))); + MAX (tab_nr (table) + (x.n_rows + 1) * proc->n_cells, + tab_nr (table) * pt->n_entries / x.n_entries)); - if (mode == GENERAL) - { - /* Allocate memory space for the matrix. */ - if (n_cols * n_rows > *maxcells) - { - *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp); - *maxcells = n_cols * n_rows; - } + build_matrix (&x); - mat = *matp; + /* Find the first variable that differs from the last subtable. */ + first_difference = find_first_difference (pt, row0); + if (table) + { + display_dimensions (proc, &x, table, first_difference); + display_crosstabulation (proc, &x, table); + } - /* Build the matrix and calculate column totals. */ - { - union value *cur_col = cols; - union value *cur_row = rows; - double *mp = mat; - double *cp = col_tot; - struct table_entry **p; - - *cp = 0.; - for (p = &tb[0]; p < te; p++) - { - for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col); - cur_row = rows) - { - *++cp = 0.; - for (; cur_row < &rows[n_rows]; cur_row++) - { - *mp = 0.; - mp += n_cols; - } - cur_col++; - mp = &mat[cur_col - cols]; - } + if (proc->exclude == MV_NEVER) + delete_missing (&x); - for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row); - cur_row++) - { - *mp = 0.; - mp += n_cols; - } + if (chisq) + { + display_dimensions (proc, &x, chisq, first_difference); + display_chisq (pt, chisq, &showed_fisher); + } + if (sym) + { + display_dimensions (proc, &x, sym, first_difference); + display_symmetric (proc, pt, sym); + } + if (risk) + { + display_dimensions (proc, &x, risk, first_difference); + display_risk (pt, risk); + } + if (direct) + { + display_dimensions (proc, &x, direct, first_difference); + display_directional (proc, pt, direct); + } - *cp += *mp = (*p)->u.freq; - mp += n_cols; - cur_row++; - } + /* XXX Free data in x. */ + free (x.rows); + } - /* Zero out the rest of the matrix. */ - for (; cur_row < &rows[n_rows]; cur_row++) - { - *mp = 0.; - mp += n_cols; - } - cur_col++; - if (cur_col < &cols[n_cols]) - { - const int rem_cols = n_cols - (cur_col - cols); - int c, r; + submit (proc, NULL, table); - for (c = 0; c < rem_cols; c++) - *++cp = 0.; - mp = &mat[cur_col - cols]; - for (r = 0; r < n_rows; r++) - { - for (c = 0; c < rem_cols; c++) - *mp++ = 0.; - mp += n_cols - rem_cols; - } - } - } - } - else - { - int r, c; - double *tp = col_tot; + if (chisq) + { + if (!showed_fisher) + tab_resize (chisq, 4 + (pt->n_vars - 2), -1); + submit (proc, pt, chisq); + } - assert (mode == INTEGER); - mat = (*tb)->u.data; - ns_cols = n_cols; + submit (proc, pt, sym); + submit (proc, pt, risk); + submit (proc, pt, direct); - /* Calculate column totals. */ - for (c = 0; c < n_cols; c++) - { - double cum = 0.; - double *cp = &mat[c]; + free (pt->cols); +} - for (r = 0; r < n_rows; r++) - cum += cp[r * n_cols]; - *tp++ = cum; - } - } +static void +build_matrix (struct pivot_table *x) +{ + const int col_var_width = var_get_width (x->vars[COL_VAR]); + const int row_var_width = var_get_width (x->vars[ROW_VAR]); + int col, row; + double *mp; + struct table_entry **p; - { - double *cp; + mp = x->mat; + col = row = 0; + for (p = x->entries; p < &x->entries[x->n_entries]; p++) + { + const struct table_entry *te = *p; - for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++) - ns_cols += *cp != 0.; - } + while (!value_equal (&x->rows[row], &te->values[ROW_VAR], row_var_width)) + { + for (; col < x->n_cols; col++) + *mp++ = 0.0; + col = 0; + row++; + } - /* Calculate row totals. */ - { - double *mp = mat; - double *rp = row_tot; - int r, c; + while (!value_equal (&x->cols[col], &te->values[COL_VAR], col_var_width)) + { + *mp++ = 0.0; + col++; + } - for (ns_rows = 0, r = n_rows; r--; ) - { - double cum = 0.; - for (c = n_cols; c--; ) - cum += *mp++; - *rp++ = cum; - if (cum != 0.) - ns_rows++; - } - } + *mp++ = te->freq; + if (++col >= x->n_cols) + { + col = 0; + row++; + } + } + while (mp < &x->mat[x->n_cols * x->n_rows]) + *mp++ = 0.0; + assert (mp == &x->mat[x->n_cols * x->n_rows]); + + /* Column totals, row totals, ns_rows. */ + mp = x->mat; + for (col = 0; col < x->n_cols; col++) + x->col_tot[col] = 0.0; + for (row = 0; row < x->n_rows; row++) + x->row_tot[row] = 0.0; + x->ns_rows = 0; + for (row = 0; row < x->n_rows; row++) + { + bool row_is_empty = true; + for (col = 0; col < x->n_cols; col++) + { + if (*mp != 0.0) + { + row_is_empty = false; + x->col_tot[col] += *mp; + x->row_tot[row] += *mp; + } + mp++; + } + if (!row_is_empty) + x->ns_rows++; + } + assert (mp == &x->mat[x->n_cols * x->n_rows]); - /* Calculate grand total. */ - { - double *tp; - double cum = 0.; - int n; + /* ns_cols. */ + x->ns_cols = 0; + for (col = 0; col < x->n_cols; col++) + for (row = 0; row < x->n_rows; row++) + if (x->mat[col + row * x->n_cols] != 0.0) + { + x->ns_cols++; + break; + } - if (n_rows < n_cols) - tp = row_tot, n = n_rows; - else - tp = col_tot, n = n_cols; - while (n--) - cum += *tp++; - W = cum; - } + /* Grand total. */ + x->total = 0.0; + for (col = 0; col < x->n_cols; col++) + x->total += x->col_tot[col]; +} - /* Find the first variable that differs from the last subtable, - then display the values of the dimensioning variables for - each table that needs it. */ - { - int first_difference = nvar - 1; +static struct tab_table * +create_crosstab_table (struct crosstabs_proc *proc, struct pivot_table *pt) +{ + struct tuple + { + int value; + const char *name; + }; + static const struct tuple names[] = + { + {CRS_CL_COUNT, N_("count")}, + {CRS_CL_ROW, N_("row %")}, + {CRS_CL_COLUMN, N_("column %")}, + {CRS_CL_TOTAL, N_("total %")}, + {CRS_CL_EXPECTED, N_("expected")}, + {CRS_CL_RESIDUAL, N_("residual")}, + {CRS_CL_SRESIDUAL, N_("std. resid.")}, + {CRS_CL_ASRESIDUAL, N_("adj. resid.")}, + }; + const int n_names = sizeof names / sizeof *names; + const struct tuple *t; - if (tb != pb) - for (; ; first_difference--) - { - assert (first_difference >= 2); - if (memcmp (&cmp->values[first_difference], - &(*tb)->values[first_difference], - sizeof *cmp->values)) - break; - } - cmp = *tb; - - if (table) - display_dimensions (table, first_difference, *tb); - if (chisq) - display_dimensions (chisq, first_difference, *tb); - if (sym) - display_dimensions (sym, first_difference, *tb); - if (risk) - display_dimensions (risk, first_difference, *tb); - if (direct) - display_dimensions (direct, first_difference, *tb); + struct tab_table *table; + struct string title; + int i; + + table = tab_create (pt->n_consts + 1 + pt->n_cols + 1, + (pt->n_entries / pt->n_cols) * 3 / 2 * proc->n_cells + 10, + true); + tab_headers (table, pt->n_consts + 1, 0, 2, 0); + + /* First header line. */ + tab_joint_text (table, pt->n_consts + 1, 0, + (pt->n_consts + 1) + (pt->n_cols - 1), 0, + TAB_CENTER | TAT_TITLE, var_get_name (pt->vars[COL_VAR])); + + tab_hline (table, TAL_1, pt->n_consts + 1, + pt->n_consts + 2 + pt->n_cols - 2, 1); + + /* Second header line. */ + for (i = 2; i < pt->n_consts + 2; i++) + tab_joint_text (table, pt->n_consts + 2 - i - 1, 0, + pt->n_consts + 2 - i - 1, 1, + TAB_RIGHT | TAT_TITLE, var_to_string (pt->vars[i])); + tab_text (table, pt->n_consts + 2 - 2, 1, TAB_RIGHT | TAT_TITLE, + var_get_name (pt->vars[ROW_VAR])); + for (i = 0; i < pt->n_cols; i++) + table_value_missing (proc, table, pt->n_consts + 2 + i - 1, 1, TAB_RIGHT, + &pt->cols[i], pt->vars[COL_VAR]); + tab_text (table, pt->n_consts + 2 + pt->n_cols - 1, 1, TAB_CENTER, _("Total")); + + tab_hline (table, TAL_1, 0, pt->n_consts + 2 + pt->n_cols - 1, 2); + tab_vline (table, TAL_1, pt->n_consts + 2 + pt->n_cols - 1, 0, 1); + + /* Title. */ + ds_init_empty (&title); + for (i = 0; i < pt->n_consts + 2; i++) + { + if (i) + ds_put_cstr (&title, " * "); + ds_put_cstr (&title, var_get_name (pt->vars[i])); + } + for (i = 0; i < pt->n_consts; i++) + { + const struct variable *var = pt->const_vars[i]; + ds_put_format (&title, ", %s=", var_get_name (var)); + data_out (&pt->const_values[i], var_get_print_format (var), + ds_put_uninit (&title, var_get_width (var))); + /* XXX remove any leading space in what was just inserted. */ + } + + ds_put_cstr (&title, " ["); + i = 0; + for (t = names; t < &names[n_names]; t++) + if (proc->cells & (1u << t->value)) + { + if (i++) + ds_put_cstr (&title, ", "); + ds_put_cstr (&title, gettext (t->name)); } + ds_put_cstr (&title, "]."); - if (table) - display_crosstabulation (); - if (cmd.miss == CRS_REPORT) - delete_missing (); - if (chisq) - display_chisq (dict); - if (sym) - display_symmetric (dict); - if (risk) - display_risk (dict); - if (direct) - display_directional (); + tab_title (table, "%s", ds_cstr (&title)); + ds_destroy (&title); - tb = te; - free (rows); - } + tab_offset (table, 0, 2); + return table; +} - submit (table); +static struct tab_table * +create_chisq_table (struct pivot_table *pt) +{ + struct tab_table *chisq; + + chisq = tab_create (6 + (pt->n_vars - 2), + pt->n_entries / pt->n_cols * 3 / 2 * N_CHISQ + 10, + 1); + tab_headers (chisq, 1 + (pt->n_vars - 2), 0, 1, 0); + + tab_title (chisq, _("Chi-square tests.")); + + tab_offset (chisq, pt->n_vars - 2, 0); + tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic")); + tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value")); + tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df")); + tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE, + _("Asymp. Sig. (2-sided)")); + tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE, + _("Exact. Sig. (2-sided)")); + tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE, + _("Exact. Sig. (1-sided)")); + chisq = 0; + tab_offset (chisq, 0, 1); + + return chisq; +} - if (chisq) - { - if (!chisq_fisher) - tab_resize (chisq, 4 + (nvar - 2), -1); - submit (chisq); - } +/* Symmetric measures. */ +static struct tab_table * +create_sym_table (struct pivot_table *pt) +{ + struct tab_table *sym; + + sym = tab_create (6 + (pt->n_vars - 2), + pt->n_entries / pt->n_cols * 7 + 10, 1); + tab_headers (sym, 2 + (pt->n_vars - 2), 0, 1, 0); + tab_title (sym, _("Symmetric measures.")); + + tab_offset (sym, pt->n_vars - 2, 0); + tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category")); + tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic")); + tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value")); + tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error")); + tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T")); + tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig.")); + tab_offset (sym, 0, 1); + + return sym; +} - submit (sym); - submit (risk); - submit (direct); +/* Risk estimate. */ +static struct tab_table * +create_risk_table (struct pivot_table *pt) +{ + struct tab_table *risk; + + risk = tab_create (4 + (pt->n_vars - 2), pt->n_entries / pt->n_cols * 4 + 10, + 1); + tab_headers (risk, 1 + pt->n_vars - 2, 0, 2, 0); + tab_title (risk, _("Risk estimate.")); + + tab_offset (risk, pt->n_vars - 2, 0); + tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF, + _("95%% Confidence Interval")); + tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic")); + tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value")); + tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower")); + tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper")); + tab_hline (risk, TAL_1, 2, 3, 1); + tab_vline (risk, TAL_1, 2, 0, 1); + tab_offset (risk, 0, 2); + + return risk; +} - free (cols); +/* Directional measures. */ +static struct tab_table * +create_direct_table (struct pivot_table *pt) +{ + struct tab_table *direct; + + direct = tab_create (7 + (pt->n_vars - 2), + pt->n_entries / pt->n_cols * 7 + 10, 1); + tab_headers (direct, 3 + (pt->n_vars - 2), 0, 1, 0); + tab_title (direct, _("Directional measures.")); + + tab_offset (direct, pt->n_vars - 2, 0); + tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category")); + tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic")); + tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type")); + tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value")); + tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error")); + tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T")); + tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig.")); + tab_offset (direct, 0, 1); + + return direct; } + /* Delete missing rows and columns for statistical analysis when /MISSING=REPORT. */ static void -delete_missing (void) +delete_missing (struct pivot_table *pt) { - { - int r; - - for (r = 0; r < n_rows; r++) - if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER)) - { - int c; - - for (c = 0; c < n_cols; c++) - mat[c + r * n_cols] = 0.; - ns_rows--; - } - } + int r, c; - { - int c; + for (r = 0; r < pt->n_rows; r++) + if (var_is_num_missing (pt->vars[ROW_VAR], pt->rows[r].f, MV_USER)) + { + for (c = 0; c < pt->n_cols; c++) + pt->mat[c + r * pt->n_cols] = 0.; + pt->ns_rows--; + } - for (c = 0; c < n_cols; c++) - if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)) - { - int r; - for (r = 0; r < n_rows; r++) - mat[c + r * n_cols] = 0.; - ns_cols--; - } - } + for (c = 0; c < pt->n_cols; c++) + if (var_is_num_missing (pt->vars[COL_VAR], pt->cols[c].f, MV_USER)) + { + for (r = 0; r < pt->n_rows; r++) + pt->mat[c + r * pt->n_cols] = 0.; + pt->ns_cols--; + } } /* Prepare table T for submission, and submit it. */ static void -submit (struct tab_table *t) +submit (struct crosstabs_proc *proc, struct pivot_table *pt, + struct tab_table *t) { int i; @@ -1515,30 +1349,31 @@ submit (struct tab_table *t) return; } tab_offset (t, 0, 0); - if (t != table) - for (i = 2; i < nvar; i++) - tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE, - var_to_string (x->vars[i])); + if (pt != NULL) + for (i = 2; i < pt->n_vars; i++) + tab_text (t, pt->n_vars - i - 1, 0, TAB_RIGHT | TAT_TITLE, + var_to_string (pt->vars[i])); tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1); tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1, tab_nr (t) - 1); tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1, tab_nr (t) - 1); tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1); - tab_dim (t, crosstabs_dim, NULL); + tab_dim (t, crosstabs_dim, proc); tab_submit (t); } /* Sets the widths of all the columns and heights of all the rows in table T for driver D. */ static void -crosstabs_dim (struct tab_table *t, struct outp_driver *d, void *aux UNUSED) +crosstabs_dim (struct tab_table *t, struct outp_driver *d, void *proc_) { + struct crosstabs_proc *proc = proc_; int i; /* Width of a numerical column. */ int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL); - if (cmd.miss == CRS_REPORT) + if (proc->exclude == MV_NEVER) c += outp_string_width (d, "M", OUTP_PROPORTIONAL); /* Set width for header columns. */ @@ -1569,142 +1404,93 @@ crosstabs_dim (struct tab_table *t, struct outp_driver *d, void *aux UNUSED) t->h[i] = tab_natural_height (t, d, i); } -static struct table_entry **find_pivot_extent_general (struct table_entry **tp, - int *cnt, int pivot); -static struct table_entry **find_pivot_extent_integer (struct table_entry **tp, - int *cnt, int pivot); - -/* Calls find_pivot_extent_general or find_pivot_extent_integer, as - appropriate. */ -static struct table_entry ** -find_pivot_extent (struct table_entry **tp, int *cnt, int pivot) -{ - return (mode == GENERAL - ? find_pivot_extent_general (tp, cnt, pivot) - : find_pivot_extent_integer (tp, cnt, pivot)); -} - -/* Find the extent of a region in TP that contains one table. If - PIVOT != 0 that means a set of table entries with identical table - number; otherwise they also have to have the same values for every - dimension after the row and column dimensions. The table that is - searched starts at TP and has length CNT. Returns the first entry - after the last one in the table; sets *CNT to the number of - remaining values. If there are no entries in TP at all, returns - NULL. A yucky interface, admittedly, but it works. */ -static struct table_entry ** -find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot) +static bool +find_crosstab (struct pivot_table *pt, size_t *row0p, size_t *row1p) { - struct table_entry *fp = *tp; - struct crosstab *x; - - if (*cnt == 0) - return NULL; - x = xtab[(*tp)->table]; - for (;;) - { - tp++; - if (--*cnt == 0) - break; - assert (*cnt > 0); - - if ((*tp)->table != fp->table) - break; - if (pivot) - continue; - - if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2))) - break; - } - - return tp; -} + size_t row0 = *row1p; + size_t row1; -/* Integer mode correspondent to find_pivot_extent_general(). This - could be optimized somewhat, but I just don't give a crap about - CROSSTABS performance in integer mode, which is just a - CROSSTABS wart as far as I'm concerned. + if (row0 >= pt->n_entries) + return false; - That said, feel free to send optimization patches to me. */ -static struct table_entry ** -find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot) -{ - struct table_entry *fp = *tp; - struct crosstab *x; - - if (*cnt == 0) - return NULL; - x = xtab[(*tp)->table]; - for (;;) + for (row1 = row0 + 1; row1 < pt->n_entries; row1++) { - tp++; - if (--*cnt == 0) - break; - assert (*cnt > 0); - - if ((*tp)->table != fp->table) - break; - if (pivot) - continue; - - if (memcmp (&(*tp)->values[2], &fp->values[2], - sizeof (union value) * (x->nvar - 2))) - break; + struct table_entry *a = pt->entries[row0]; + struct table_entry *b = pt->entries[row1]; + if (compare_table_entry_vars_3way (a, b, pt, 2, pt->n_vars) != 0) + break; } - - return tp; + *row0p = row0; + *row1p = row1; + return true; } /* Compares `union value's A_ and B_ and returns a strcmp()-like result. WIDTH_ points to an int which is either 0 for a numeric value or a string width for a string value. */ static int -compare_value (const void *a_, const void *b_, const void *width_) +compare_value_3way (const void *a_, const void *b_, const void *width_) { const union value *a = a_; const union value *b = b_; - const int *pwidth = width_; - const int width = *pwidth; + const int *width = width_; - if (width == 0) - return (a->f < b->f) ? -1 : (a->f > b->f); - else - return strncmp (a->s, b->s, width); + return value_compare_3way (a, b, *width); } /* Given an array of ENTRY_CNT table_entry structures starting at ENTRIES, creates a sorted list of the values that the variable with index VAR_IDX takes on. The values are returned as a - malloc()'darray stored in *VALUES, with the number of values + malloc()'d array stored in *VALUES, with the number of values stored in *VALUE_CNT. */ static void -enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx, - union value **values, int *value_cnt) +enum_var_values (const struct pivot_table *pt, int var_idx, + union value **valuesp, int *n_values) { - const struct variable *v = xtab[(*entries)->table]->vars[var_idx]; + const struct variable *var = pt->vars[var_idx]; + struct var_range *range = get_var_range (var); + union value *values; + size_t i; - if (mode == GENERAL) + if (range) { - int width = MIN (var_get_width (v), MAX_SHORT_STRING); - int i; - - *values = xnmalloc (entry_cnt, sizeof **values); - for (i = 0; i < entry_cnt; i++) - (*values)[i] = entries[i]->values[var_idx]; - *value_cnt = sort_unique (*values, entry_cnt, sizeof **values, - compare_value, &width); + values = *valuesp = xnmalloc (range->count, sizeof *values); + *n_values = range->count; + for (i = 0; i < range->count; i++) + values[i].f = range->min + i; } else { - struct var_range *vr = get_var_range (v); - int i; + int width = var_get_width (var); + struct hmapx_node *node; + const union value *iter; + struct hmapx set; + + hmapx_init (&set); + for (i = 0; i < pt->n_entries; i++) + { + const struct table_entry *te = pt->entries[i]; + const union value *value = &te->values[var_idx]; + size_t hash = value_hash (value, width, 0); + + HMAPX_FOR_EACH_WITH_HASH (iter, node, hash, &set) + if (value_equal (iter, value, width)) + goto next_entry; + + hmapx_insert (&set, (union value *) value, hash); + + next_entry: ; + } - assert (mode == INTEGER); - *values = xnmalloc (vr->count, sizeof **values); - for (i = 0; i < vr->count; i++) - (*values)[i].f = i + vr->min; - *value_cnt = vr->count; + *n_values = hmapx_count (&set); + values = *valuesp = xnmalloc (*n_values, sizeof *values); + i = 0; + HMAPX_FOR_EACH (iter, node, &set) + values[i++] = *iter; + hmapx_destroy (&set); + + sort (values, *n_values, sizeof *values, compare_value_3way, &width); } } @@ -1712,7 +1498,8 @@ enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx, from V, displayed with print format spec from variable VAR. When in REPORT missing-value mode, missing values have an M appended. */ static void -table_value_missing (struct tab_table *table, int c, int r, unsigned char opt, +table_value_missing (struct crosstabs_proc *proc, + struct tab_table *table, int c, int r, unsigned char opt, const union value *v, const struct variable *var) { struct substring s; @@ -1726,9 +1513,9 @@ table_value_missing (struct tab_table *table, int c, int r, unsigned char opt, } s.string = tab_alloc (table, print->w); - format_short (s.string, print, v); - s.length = strlen (s.string); - if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER)) + data_out (v, print, s.string); + s.length = print->w; + if (proc->exclude == MV_NEVER && var_is_num_missing (var, v->f, MV_USER)) s.string[s.length++] = 'M'; while (s.length && *s.string == ' ') { @@ -1739,19 +1526,20 @@ table_value_missing (struct tab_table *table, int c, int r, unsigned char opt, } /* Draws a line across TABLE at the current row to indicate the most - major dimension variable with index FIRST_DIFFERENCE out of NVAR + major dimension variable with index FIRST_DIFFERENCE out of N_VARS that changed, and puts the values that changed into the table. TB - and X must be the corresponding table_entry and crosstab, + and PT must be the corresponding table_entry and crosstab, respectively. */ static void -display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb) +display_dimensions (struct crosstabs_proc *proc, struct pivot_table *pt, + struct tab_table *table, int first_difference) { - tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0); + tab_hline (table, TAL_1, pt->n_vars - first_difference - 1, tab_nc (table) - 1, 0); for (; first_difference >= 2; first_difference--) - table_value_missing (table, nvar - first_difference - 1, 0, - TAB_RIGHT, &tb->values[first_difference], - x->vars[first_difference]); + table_value_missing (proc, table, pt->n_vars - first_difference - 1, 0, + TAB_RIGHT, &pt->entries[0]->values[first_difference], + pt->vars[first_difference]); } /* Put VALUE into cell (C,R) of TABLE, suffixed with character @@ -1784,208 +1572,196 @@ format_cell_entry (struct tab_table *table, int c, int r, double value, /* Displays the crosstabulation table. */ static void -display_crosstabulation (void) +display_crosstabulation (struct crosstabs_proc *proc, struct pivot_table *pt, + struct tab_table *table) { - { - int r; + int last_row; + int r, c, i; + double *mp; - for (r = 0; r < n_rows; r++) - table_value_missing (table, nvar - 2, r * num_cells, - TAB_RIGHT, &rows[r], x->vars[ROW_VAR]); - } - tab_text (table, nvar - 2, n_rows * num_cells, + for (r = 0; r < pt->n_rows; r++) + table_value_missing (proc, table, pt->n_vars - 2, r * proc->n_cells, + TAB_RIGHT, &pt->rows[r], pt->vars[ROW_VAR]); + + tab_text (table, pt->n_vars - 2, pt->n_rows * proc->n_cells, TAB_LEFT, _("Total")); /* Put in the actual cells. */ - { - double *mp = mat; - int r, c, i; - - tab_offset (table, nvar - 1, -1); - for (r = 0; r < n_rows; r++) - { - if (num_cells > 1) - tab_hline (table, TAL_1, -1, n_cols, 0); - for (c = 0; c < n_cols; c++) - { - bool mark_missing = false; - double expected_value = row_tot[r] * col_tot[c] / W; - if (cmd.miss == CRS_REPORT - && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER) - || var_is_num_missing (x->vars[ROW_VAR], rows[r].f, - MV_USER))) - mark_missing = true; - for (i = 0; i < num_cells; i++) - { - double v; - int suffix = 0; - - switch (cells[i]) - { - case CRS_CL_COUNT: - v = *mp; - break; - case CRS_CL_ROW: - v = *mp / row_tot[r] * 100.; - suffix = '%'; - break; - case CRS_CL_COLUMN: - v = *mp / col_tot[c] * 100.; - suffix = '%'; - break; - case CRS_CL_TOTAL: - v = *mp / W * 100.; - suffix = '%'; - break; - case CRS_CL_EXPECTED: - v = expected_value; - break; - case CRS_CL_RESIDUAL: - v = *mp - expected_value; - break; - case CRS_CL_SRESIDUAL: - v = (*mp - expected_value) / sqrt (expected_value); - break; - case CRS_CL_ASRESIDUAL: - v = ((*mp - expected_value) - / sqrt (expected_value - * (1. - row_tot[r] / W) - * (1. - col_tot[c] / W))); - break; - default: - NOT_REACHED (); - } - - format_cell_entry (table, c, i, v, suffix, mark_missing); - } + mp = pt->mat; + tab_offset (table, pt->n_vars - 1, -1); + for (r = 0; r < pt->n_rows; r++) + { + if (proc->n_cells > 1) + tab_hline (table, TAL_1, -1, pt->n_cols, 0); + for (c = 0; c < pt->n_cols; c++) + { + bool mark_missing = false; + double expected_value = pt->row_tot[r] * pt->col_tot[c] / pt->total; + if (proc->exclude == MV_NEVER + && (var_is_num_missing (pt->vars[COL_VAR], pt->cols[c].f, MV_USER) + || var_is_num_missing (pt->vars[ROW_VAR], pt->rows[r].f, + MV_USER))) + mark_missing = true; + for (i = 0; i < proc->n_cells; i++) + { + double v; + int suffix = 0; + + switch (proc->a_cells[i]) + { + case CRS_CL_COUNT: + v = *mp; + break; + case CRS_CL_ROW: + v = *mp / pt->row_tot[r] * 100.; + suffix = '%'; + break; + case CRS_CL_COLUMN: + v = *mp / pt->col_tot[c] * 100.; + suffix = '%'; + break; + case CRS_CL_TOTAL: + v = *mp / pt->total * 100.; + suffix = '%'; + break; + case CRS_CL_EXPECTED: + v = expected_value; + break; + case CRS_CL_RESIDUAL: + v = *mp - expected_value; + break; + case CRS_CL_SRESIDUAL: + v = (*mp - expected_value) / sqrt (expected_value); + break; + case CRS_CL_ASRESIDUAL: + v = ((*mp - expected_value) + / sqrt (expected_value + * (1. - pt->row_tot[r] / pt->total) + * (1. - pt->col_tot[c] / pt->total))); + break; + default: + NOT_REACHED (); + } + format_cell_entry (table, c, i, v, suffix, mark_missing); + } - mp++; - } + mp++; + } - tab_offset (table, -1, tab_row (table) + num_cells); - } - } + tab_offset (table, -1, tab_row (table) + proc->n_cells); + } /* Row totals. */ - { - int r, i; - - tab_offset (table, -1, tab_row (table) - num_cells * n_rows); - for (r = 0; r < n_rows; r++) - { - bool mark_missing = false; - - if (cmd.miss == CRS_REPORT - && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER)) - mark_missing = true; - - for (i = 0; i < num_cells; i++) - { - char suffix = 0; - double v; + tab_offset (table, -1, tab_row (table) - proc->n_cells * pt->n_rows); + for (r = 0; r < pt->n_rows; r++) + { + bool mark_missing = false; - switch (cells[i]) - { - case CRS_CL_COUNT: - v = row_tot[r]; - break; - case CRS_CL_ROW: - v = 100.0; - suffix = '%'; - break; - case CRS_CL_COLUMN: - v = row_tot[r] / W * 100.; - suffix = '%'; - break; - case CRS_CL_TOTAL: - v = row_tot[r] / W * 100.; - suffix = '%'; - break; - case CRS_CL_EXPECTED: - case CRS_CL_RESIDUAL: - case CRS_CL_SRESIDUAL: - case CRS_CL_ASRESIDUAL: - v = 0.; - break; - default: - NOT_REACHED (); - } + if (proc->exclude == MV_NEVER + && var_is_num_missing (pt->vars[ROW_VAR], pt->rows[r].f, MV_USER)) + mark_missing = true; - format_cell_entry (table, n_cols, 0, v, suffix, mark_missing); - tab_next_row (table); - } - } - } + for (i = 0; i < proc->n_cells; i++) + { + char suffix = 0; + double v; + + switch (proc->a_cells[i]) + { + case CRS_CL_COUNT: + v = pt->row_tot[r]; + break; + case CRS_CL_ROW: + v = 100.0; + suffix = '%'; + break; + case CRS_CL_COLUMN: + v = pt->row_tot[r] / pt->total * 100.; + suffix = '%'; + break; + case CRS_CL_TOTAL: + v = pt->row_tot[r] / pt->total * 100.; + suffix = '%'; + break; + case CRS_CL_EXPECTED: + case CRS_CL_RESIDUAL: + case CRS_CL_SRESIDUAL: + case CRS_CL_ASRESIDUAL: + v = 0.; + break; + default: + NOT_REACHED (); + } + + format_cell_entry (table, pt->n_cols, 0, v, suffix, mark_missing); + tab_next_row (table); + } + } /* Column totals, grand total. */ - { - int c; - int last_row = 0; - - if (num_cells > 1) - tab_hline (table, TAL_1, -1, n_cols, 0); - for (c = 0; c <= n_cols; c++) - { - double ct = c < n_cols ? col_tot[c] : W; - bool mark_missing = false; - int i; - - if (cmd.miss == CRS_REPORT && c < n_cols - && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)) - mark_missing = true; - - for (i = 0; i < num_cells; i++) - { - char suffix = 0; - double v; - - switch (cells[i]) - { - case CRS_CL_COUNT: - v = ct; - break; - case CRS_CL_ROW: - v = ct / W * 100.; - suffix = '%'; - break; - case CRS_CL_COLUMN: - v = 100.; - suffix = '%'; - break; - case CRS_CL_TOTAL: - v = ct / W * 100.; - suffix = '%'; - break; - case CRS_CL_EXPECTED: - case CRS_CL_RESIDUAL: - case CRS_CL_SRESIDUAL: - case CRS_CL_ASRESIDUAL: - continue; - default: - NOT_REACHED (); - } + last_row = 0; + if (proc->n_cells > 1) + tab_hline (table, TAL_1, -1, pt->n_cols, 0); + for (c = 0; c <= pt->n_cols; c++) + { + double ct = c < pt->n_cols ? pt->col_tot[c] : pt->total; + bool mark_missing = false; + int i; - format_cell_entry (table, c, i, v, suffix, mark_missing); - } - last_row = i; - } + if (proc->exclude == MV_NEVER && c < pt->n_cols + && var_is_num_missing (pt->vars[COL_VAR], pt->cols[c].f, MV_USER)) + mark_missing = true; - tab_offset (table, -1, tab_row (table) + last_row); - } + for (i = 0; i < proc->n_cells; i++) + { + char suffix = 0; + double v; + + switch (proc->a_cells[i]) + { + case CRS_CL_COUNT: + v = ct; + break; + case CRS_CL_ROW: + v = ct / pt->total * 100.; + suffix = '%'; + break; + case CRS_CL_COLUMN: + v = 100.; + suffix = '%'; + break; + case CRS_CL_TOTAL: + v = ct / pt->total * 100.; + suffix = '%'; + break; + case CRS_CL_EXPECTED: + case CRS_CL_RESIDUAL: + case CRS_CL_SRESIDUAL: + case CRS_CL_ASRESIDUAL: + continue; + default: + NOT_REACHED (); + } + + format_cell_entry (table, c, i, v, suffix, mark_missing); + } + last_row = i; + } + tab_offset (table, -1, tab_row (table) + last_row); tab_offset (table, 0, -1); } -static void calc_r (double *X, double *Y, double *, double *, double *); -static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *); +static void calc_r (struct pivot_table *, + double *PT, double *Y, double *, double *, double *); +static void calc_chisq (struct pivot_table *, + double[N_CHISQ], int[N_CHISQ], double *, double *); /* Display chi-square statistics. */ static void -display_chisq (const struct dictionary *dict) +display_chisq (struct pivot_table *pt, struct tab_table *chisq, + bool *showed_fisher) { - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; - static const char *chisq_stats[N_CHISQ] = { N_("Pearson Chi-Square"), @@ -2001,9 +1777,9 @@ display_chisq (const struct dictionary *dict) int i; - calc_chisq (chisq_v, df, &fisher1, &fisher2); + calc_chisq (pt, chisq_v, df, &fisher1, &fisher2); - tab_offset (chisq, nvar - 2, -1); + tab_offset (chisq, pt->n_vars - 2, -1); for (i = 0; i < N_CHISQ; i++) { @@ -2016,13 +1792,13 @@ display_chisq (const struct dictionary *dict) if (i != 2) { tab_double (chisq, 1, 0, TAB_RIGHT, chisq_v[i], NULL); - tab_double (chisq, 2, 0, TAB_RIGHT, df[i], wfmt); + tab_double (chisq, 2, 0, TAB_RIGHT, df[i], &pt->weight_format); tab_double (chisq, 3, 0, TAB_RIGHT, gsl_cdf_chisq_Q (chisq_v[i], df[i]), NULL); } else { - chisq_fisher = 1; + *showed_fisher = true; tab_double (chisq, 4, 0, TAB_RIGHT, fisher2, NULL); tab_double (chisq, 5, 0, TAB_RIGHT, fisher1, NULL); } @@ -2030,22 +1806,22 @@ display_chisq (const struct dictionary *dict) } tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases")); - tab_double (chisq, 1, 0, TAB_RIGHT, W, wfmt); + tab_double (chisq, 1, 0, TAB_RIGHT, pt->total, &pt->weight_format); tab_next_row (chisq); tab_offset (chisq, 0, -1); } -static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC], - double[N_SYMMETRIC]); +static int calc_symmetric (struct crosstabs_proc *, struct pivot_table *, + double[N_SYMMETRIC], double[N_SYMMETRIC], + double[N_SYMMETRIC], + double[3], double[3], double[3]); /* Display symmetric measures. */ static void -display_symmetric (const struct dictionary *dict) +display_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, + struct tab_table *sym) { - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; - static const char *categories[] = { N_("Nominal by Nominal"), @@ -2074,12 +1850,14 @@ display_symmetric (const struct dictionary *dict) int last_cat = -1; double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC]; + double somers_d_v[3], somers_d_ase[3], somers_d_t[3]; int i; - if (!calc_symmetric (sym_v, sym_ase, sym_t)) + if (!calc_symmetric (proc, pt, sym_v, sym_ase, sym_t, + somers_d_v, somers_d_ase, somers_d_t)) return; - tab_offset (sym, nvar - 2, -1); + tab_offset (sym, pt->n_vars - 2, -1); for (i = 0; i < N_SYMMETRIC; i++) { @@ -2103,80 +1881,85 @@ display_symmetric (const struct dictionary *dict) } tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases")); - tab_double (sym, 2, 0, TAB_RIGHT, W, wfmt); + tab_double (sym, 2, 0, TAB_RIGHT, pt->total, &pt->weight_format); tab_next_row (sym); tab_offset (sym, 0, -1); } -static int calc_risk (double[], double[], double[], union value *); +static int calc_risk (struct pivot_table *, + double[], double[], double[], union value *); /* Display risk estimate. */ static void -display_risk (const struct dictionary *dict) +display_risk (struct pivot_table *pt, struct tab_table *risk) { - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; - char buf[256]; double risk_v[3], lower[3], upper[3]; union value c[2]; int i; - if (!calc_risk (risk_v, upper, lower, c)) + if (!calc_risk (pt, risk_v, upper, lower, c)) return; - tab_offset (risk, nvar - 2, -1); + tab_offset (risk, pt->n_vars - 2, -1); for (i = 0; i < 3; i++) { + const struct variable *cv = pt->vars[COL_VAR]; + const struct variable *rv = pt->vars[ROW_VAR]; + int cvw = var_get_width (cv); + int rvw = var_get_width (rv); + if (risk_v[i] == SYSMIS) continue; switch (i) { case 0: - if (var_is_numeric (x->vars[COL_VAR])) + if (var_is_numeric (cv)) sprintf (buf, _("Odds Ratio for %s (%g / %g)"), - var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f); + var_get_name (cv), c[0].f, c[1].f); else sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"), - var_get_name (x->vars[COL_VAR]), - var_get_width (x->vars[COL_VAR]), c[0].s, - var_get_width (x->vars[COL_VAR]), c[1].s); + var_get_name (cv), + cvw, value_str (&c[0], cvw), + cvw, value_str (&c[1], cvw)); break; case 1: case 2: - if (var_is_numeric (x->vars[ROW_VAR])) + if (var_is_numeric (rv)) sprintf (buf, _("For cohort %s = %g"), - var_get_name (x->vars[ROW_VAR]), rows[i - 1].f); + var_get_name (rv), pt->rows[i - 1].f); else sprintf (buf, _("For cohort %s = %.*s"), - var_get_name (x->vars[ROW_VAR]), - var_get_width (x->vars[ROW_VAR]), rows[i - 1].s); + var_get_name (rv), + rvw, value_str (&pt->rows[i - 1], rvw)); break; } tab_text (risk, 0, 0, TAB_LEFT, buf); tab_double (risk, 1, 0, TAB_RIGHT, risk_v[i], NULL); - tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL); - tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL); + tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL); + tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL); tab_next_row (risk); } tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases")); - tab_double (risk, 1, 0, TAB_RIGHT, W, wfmt); + tab_double (risk, 1, 0, TAB_RIGHT, pt->total, &pt->weight_format); tab_next_row (risk); tab_offset (risk, 0, -1); } -static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL], +static int calc_directional (struct crosstabs_proc *, struct pivot_table *, + double[N_DIRECTIONAL], double[N_DIRECTIONAL], double[N_DIRECTIONAL]); /* Display directional measures. */ static void -display_directional (void) +display_directional (struct crosstabs_proc *proc, struct pivot_table *pt, + struct tab_table *direct) { static const char *categories[] = { @@ -2241,10 +2024,10 @@ display_directional (void) int i; - if (!calc_directional (direct_v, direct_ase, direct_t)) + if (!calc_directional (proc, pt, direct_v, direct_ase, direct_t)) return; - tab_offset (direct, nvar - 2, -1); + tab_offset (direct, pt->n_vars - 2, -1); for (i = 0; i < N_DIRECTIONAL; i++) { @@ -2268,9 +2051,9 @@ display_directional (void) if (k == 0) string = NULL; else if (k == 1) - string = var_get_name (x->vars[0]); + string = var_get_name (pt->vars[0]); else - string = var_get_name (x->vars[1]); + string = var_get_name (pt->vars[1]); tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF, gettext (stats_names[j][k]), string); @@ -2293,14 +2076,14 @@ display_directional (void) /* Statistical calculations. */ /* Returns the value of the gamma (factorial) function for an integer - argument X. */ + argument PT. */ static double -gamma_int (double x) +gamma_int (double pt) { double r = 1; int i; - for (i = 2; i < x; i++) + for (i = 2; i < pt; i++) r *= i; return r; } @@ -2331,7 +2114,7 @@ swap (int *a, int *b) static void calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2) { - int x; + int pt; if (MIN (c, d) < MIN (a, b)) swap (&a, &c), swap (&b, &d); @@ -2346,19 +2129,20 @@ calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2) } *fisher1 = 0.; - for (x = 0; x <= a; x++) - *fisher1 += Pr (a - x, b + x, c + x, d - x); + for (pt = 0; pt <= a; pt++) + *fisher1 += Pr (a - pt, b + pt, c + pt, d - pt); *fisher2 = *fisher1; - for (x = 1; x <= b; x++) - *fisher2 += Pr (a + x, b - x, c - x, d + x); + for (pt = 1; pt <= b; pt++) + *fisher2 += Pr (a + pt, b - pt, c - pt, d + pt); } /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS columns with values COLS and N_ROWS rows with values ROWS. Values - in the matrix sum to W. */ + in the matrix sum to pt->total. */ static void -calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ], +calc_chisq (struct pivot_table *pt, + double chisq[N_CHISQ], int df[N_CHISQ], double *fisher1, double *fisher2) { int r, c; @@ -2367,19 +2151,19 @@ calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ], chisq[2] = chisq[3] = chisq[4] = SYSMIS; *fisher1 = *fisher2 = SYSMIS; - df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1); + df[0] = df[1] = (pt->ns_cols - 1) * (pt->ns_rows - 1); - if (ns_rows <= 1 || ns_cols <= 1) + if (pt->ns_rows <= 1 || pt->ns_cols <= 1) { chisq[0] = chisq[1] = SYSMIS; return; } - for (r = 0; r < n_rows; r++) - for (c = 0; c < n_cols; c++) + for (r = 0; r < pt->n_rows; r++) + for (c = 0; c < pt->n_cols; c++) { - const double expected = row_tot[r] * col_tot[c] / W; - const double freq = mat[n_cols * r + c]; + const double expected = pt->row_tot[r] * pt->col_tot[c] / pt->total; + const double freq = pt->mat[pt->n_cols * r + c]; const double residual = freq - expected; chisq[0] += residual * residual / expected; @@ -2396,7 +2180,7 @@ calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ], chisq[1] = SYSMIS; /* Calculate Yates and Fisher exact test. */ - if (ns_cols == 2 && ns_rows == 2) + if (pt->ns_cols == 2 && pt->ns_rows == 2) { double f11, f12, f21, f22; @@ -2404,8 +2188,8 @@ calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ], int nz_cols[2]; int i, j; - for (i = j = 0; i < n_cols; i++) - if (col_tot[i] != 0.) + for (i = j = 0; i < pt->n_cols; i++) + if (pt->col_tot[i] != 0.) { nz_cols[j++] = i; if (j == 2) @@ -2414,18 +2198,18 @@ calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ], assert (j == 2); - f11 = mat[nz_cols[0]]; - f12 = mat[nz_cols[1]]; - f21 = mat[nz_cols[0] + n_cols]; - f22 = mat[nz_cols[1] + n_cols]; + f11 = pt->mat[nz_cols[0]]; + f12 = pt->mat[nz_cols[1]]; + f21 = pt->mat[nz_cols[0] + pt->n_cols]; + f22 = pt->mat[nz_cols[1] + pt->n_cols]; } /* Yates. */ { - const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W; + const double pt_ = fabs (f11 * f22 - f12 * f21) - 0.5 * pt->total; - if (x > 0.) - chisq[3] = (W * x * x + if (pt_ > 0.) + chisq[3] = (pt->total * pow2 (pt_) / (f11 + f12) / (f21 + f22) / (f11 + f21) / (f12 + f22)); else @@ -2440,21 +2224,22 @@ calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ], } /* Calculate Mantel-Haenszel. */ - if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR])) + if (var_is_numeric (pt->vars[ROW_VAR]) && var_is_numeric (pt->vars[COL_VAR])) { double r, ase_0, ase_1; - calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1); + calc_r (pt, (double *) pt->rows, (double *) pt->cols, &r, &ase_0, &ase_1); - chisq[4] = (W - 1.) * r * r; + chisq[4] = (pt->total - 1.) * r * r; df[4] = 1; } } /* Calculate the value of Pearson's r. r is stored into R, ase_1 into ASE_1, and ase_0 into ASE_0. The row and column values must be - passed in X and Y. */ + passed in PT and Y. */ static void -calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1) +calc_r (struct pivot_table *pt, + double *PT, double *Y, double *r, double *ase_0, double *ase_1) { double SX, SY, S, T; double Xbar, Ybar; @@ -2463,52 +2248,52 @@ calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1) double sum_Yc, sum_Y2c; int i, j; - for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++) - for (j = 0; j < n_cols; j++) + for (sum_X2Y2f = sum_XYf = 0., i = 0; i < pt->n_rows; i++) + for (j = 0; j < pt->n_cols; j++) { - double fij = mat[j + i * n_cols]; - double product = X[i] * Y[j]; + double fij = pt->mat[j + i * pt->n_cols]; + double product = PT[i] * Y[j]; double temp = fij * product; sum_XYf += temp; sum_X2Y2f += temp * product; } - for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++) + for (sum_Xr = sum_X2r = 0., i = 0; i < pt->n_rows; i++) { - sum_Xr += X[i] * row_tot[i]; - sum_X2r += pow2 (X[i]) * row_tot[i]; + sum_Xr += PT[i] * pt->row_tot[i]; + sum_X2r += pow2 (PT[i]) * pt->row_tot[i]; } - Xbar = sum_Xr / W; + Xbar = sum_Xr / pt->total; - for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++) + for (sum_Yc = sum_Y2c = 0., i = 0; i < pt->n_cols; i++) { - sum_Yc += Y[i] * col_tot[i]; - sum_Y2c += Y[i] * Y[i] * col_tot[i]; + sum_Yc += Y[i] * pt->col_tot[i]; + sum_Y2c += Y[i] * Y[i] * pt->col_tot[i]; } - Ybar = sum_Yc / W; + Ybar = sum_Yc / pt->total; - S = sum_XYf - sum_Xr * sum_Yc / W; - SX = sum_X2r - pow2 (sum_Xr) / W; - SY = sum_Y2c - pow2 (sum_Yc) / W; + S = sum_XYf - sum_Xr * sum_Yc / pt->total; + SX = sum_X2r - pow2 (sum_Xr) / pt->total; + SY = sum_Y2c - pow2 (sum_Yc) / pt->total; T = sqrt (SX * SY); *r = S / T; - *ase_0 = sqrt ((sum_X2Y2f - pow2 (sum_XYf) / W) / (sum_X2r * sum_Y2c)); + *ase_0 = sqrt ((sum_X2Y2f - pow2 (sum_XYf) / pt->total) / (sum_X2r * sum_Y2c)); { double s, c, y, t; - for (s = c = 0., i = 0; i < n_rows; i++) - for (j = 0; j < n_cols; j++) + for (s = c = 0., i = 0; i < pt->n_rows; i++) + for (j = 0; j < pt->n_cols; j++) { double Xresid, Yresid; double temp; - Xresid = X[i] - Xbar; + Xresid = PT[i] - Xbar; Yresid = Y[j] - Ybar; temp = (T * Xresid * Yresid - ((S / (2. * T)) * (Xresid * Xresid * SY + Yresid * Yresid * SX))); - y = mat[j + i * n_cols] * temp * temp - c; + y = pt->mat[j + i * pt->n_cols] * temp * temp - c; t = s + y; c = (t - s) - y; s = t; @@ -2517,88 +2302,73 @@ calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1) } } -static double somers_d_v[3]; -static double somers_d_ase[3]; -static double somers_d_t[3]; - /* Calculate symmetric statistics and their asymptotic standard errors. Returns 0 if none could be calculated. */ static int -calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], - double t[N_SYMMETRIC]) +calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, + double v[N_SYMMETRIC], double ase[N_SYMMETRIC], + double t[N_SYMMETRIC], + double somers_d_v[3], double somers_d_ase[3], + double somers_d_t[3]) { - int q = MIN (ns_rows, ns_cols); + int q, i; + q = MIN (pt->ns_rows, pt->ns_cols); if (q <= 1) return 0; - { - int i; - - if (v) - for (i = 0; i < N_SYMMETRIC; i++) - v[i] = ase[i] = t[i] = SYSMIS; - } + for (i = 0; i < N_SYMMETRIC; i++) + v[i] = ase[i] = t[i] = SYSMIS; /* Phi, Cramer's V, contingency coefficient. */ - if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]) + if (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC))) { double Xp = 0.; /* Pearson chi-square. */ + int r, c; - { - int r, c; - - for (r = 0; r < n_rows; r++) - for (c = 0; c < n_cols; c++) - { - const double expected = row_tot[r] * col_tot[c] / W; - const double freq = mat[n_cols * r + c]; - const double residual = freq - expected; + for (r = 0; r < pt->n_rows; r++) + for (c = 0; c < pt->n_cols; c++) + { + const double expected = pt->row_tot[r] * pt->col_tot[c] / pt->total; + const double freq = pt->mat[pt->n_cols * r + c]; + const double residual = freq - expected; - Xp += residual * residual / expected; - } - } + Xp += residual * residual / expected; + } - if (cmd.a_statistics[CRS_ST_PHI]) + if (proc->statistics & (1u << CRS_ST_PHI)) { - v[0] = sqrt (Xp / W); - v[1] = sqrt (Xp / (W * (q - 1))); + v[0] = sqrt (Xp / pt->total); + v[1] = sqrt (Xp / (pt->total * (q - 1))); } - if (cmd.a_statistics[CRS_ST_CC]) - v[2] = sqrt (Xp / (Xp + W)); + if (proc->statistics & (1u << CRS_ST_CC)) + v[2] = sqrt (Xp / (Xp + pt->total)); } - if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU] - || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D]) + if (proc->statistics & ((1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU) + | (1u << CRS_ST_GAMMA) | (1u << CRS_ST_D))) { double *cum; double Dr, Dc; double P, Q; double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum; double btau_var; + int r, c; - { - int r, c; - - Dr = Dc = W * W; - for (r = 0; r < n_rows; r++) - Dr -= pow2 (row_tot[r]); - for (c = 0; c < n_cols; c++) - Dc -= pow2 (col_tot[c]); - } - - { - int r, c; + Dr = Dc = pow2 (pt->total); + for (r = 0; r < pt->n_rows; r++) + Dr -= pow2 (pt->row_tot[r]); + for (c = 0; c < pt->n_cols; c++) + Dc -= pow2 (pt->col_tot[c]); - cum = xnmalloc (n_cols * n_rows, sizeof *cum); - for (c = 0; c < n_cols; c++) - { - double ct = 0.; + cum = xnmalloc (pt->n_cols * pt->n_rows, sizeof *cum); + for (c = 0; c < pt->n_cols; c++) + { + double ct = 0.; - for (r = 0; r < n_rows; r++) - cum[c + r * n_cols] = ct += mat[c + r * n_cols]; - } - } + for (r = 0; r < pt->n_rows; r++) + cum[c + r * pt->n_cols] = ct += pt->mat[c + r * pt->n_cols]; + } /* P and Q. */ { @@ -2606,44 +2376,44 @@ calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], double Cij, Dij; P = Q = 0.; - for (i = 0; i < n_rows; i++) + for (i = 0; i < pt->n_rows; i++) { Cij = Dij = 0.; - for (j = 1; j < n_cols; j++) - Cij += col_tot[j] - cum[j + i * n_cols]; + for (j = 1; j < pt->n_cols; j++) + Cij += pt->col_tot[j] - cum[j + i * pt->n_cols]; if (i > 0) - for (j = 1; j < n_cols; j++) - Dij += cum[j + (i - 1) * n_cols]; + for (j = 1; j < pt->n_cols; j++) + Dij += cum[j + (i - 1) * pt->n_cols]; for (j = 0;;) { - double fij = mat[j + i * n_cols]; + double fij = pt->mat[j + i * pt->n_cols]; P += fij * Cij; Q += fij * Dij; - if (++j == n_cols) + if (++j == pt->n_cols) break; - assert (j < n_cols); + assert (j < pt->n_cols); - Cij -= col_tot[j] - cum[j + i * n_cols]; - Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols]; + Cij -= pt->col_tot[j] - cum[j + i * pt->n_cols]; + Dij += pt->col_tot[j - 1] - cum[j - 1 + i * pt->n_cols]; if (i > 0) { - Cij += cum[j - 1 + (i - 1) * n_cols]; - Dij -= cum[j + (i - 1) * n_cols]; + Cij += cum[j - 1 + (i - 1) * pt->n_cols]; + Dij -= cum[j + (i - 1) * pt->n_cols]; } } } } - if (cmd.a_statistics[CRS_ST_BTAU]) + if (proc->statistics & (1u << CRS_ST_BTAU)) v[3] = (P - Q) / sqrt (Dr * Dc); - if (cmd.a_statistics[CRS_ST_CTAU]) - v[4] = (q * (P - Q)) / ((W * W) * (q - 1)); - if (cmd.a_statistics[CRS_ST_GAMMA]) + if (proc->statistics & (1u << CRS_ST_CTAU)) + v[4] = (q * (P - Q)) / (pow2 (pt->total) * (q - 1)); + if (proc->statistics & (1u << CRS_ST_GAMMA)) v[5] = (P - Q) / (P + Q); /* ASE for tau-b, tau-c, gamma. Calculations could be @@ -2653,26 +2423,26 @@ calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], double Cij, Dij; btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.; - for (i = 0; i < n_rows; i++) + for (i = 0; i < pt->n_rows; i++) { Cij = Dij = 0.; - for (j = 1; j < n_cols; j++) - Cij += col_tot[j] - cum[j + i * n_cols]; + for (j = 1; j < pt->n_cols; j++) + Cij += pt->col_tot[j] - cum[j + i * pt->n_cols]; if (i > 0) - for (j = 1; j < n_cols; j++) - Dij += cum[j + (i - 1) * n_cols]; + for (j = 1; j < pt->n_cols; j++) + Dij += cum[j + (i - 1) * pt->n_cols]; for (j = 0;;) { - double fij = mat[j + i * n_cols]; + double fij = pt->mat[j + i * pt->n_cols]; - if (cmd.a_statistics[CRS_ST_BTAU]) + if (proc->statistics & (1u << CRS_ST_BTAU)) { const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij) - + v[3] * (row_tot[i] * Dc - + col_tot[j] * Dr)); + + v[3] * (pt->row_tot[i] * Dc + + pt->col_tot[j] * Dr)); btau_cum += fij * temp * temp; } @@ -2681,84 +2451,84 @@ calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], ctau_cum += fij * temp * temp; } - if (cmd.a_statistics[CRS_ST_GAMMA]) + if (proc->statistics & (1u << CRS_ST_GAMMA)) { const double temp = Q * Cij - P * Dij; gamma_cum += fij * temp * temp; } - if (cmd.a_statistics[CRS_ST_D]) + if (proc->statistics & (1u << CRS_ST_D)) { d_yx_cum += fij * pow2 (Dr * (Cij - Dij) - - (P - Q) * (W - row_tot[i])); + - (P - Q) * (pt->total - pt->row_tot[i])); d_xy_cum += fij * pow2 (Dc * (Dij - Cij) - - (Q - P) * (W - col_tot[j])); + - (Q - P) * (pt->total - pt->col_tot[j])); } - if (++j == n_cols) + if (++j == pt->n_cols) break; - assert (j < n_cols); + assert (j < pt->n_cols); - Cij -= col_tot[j] - cum[j + i * n_cols]; - Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols]; + Cij -= pt->col_tot[j] - cum[j + i * pt->n_cols]; + Dij += pt->col_tot[j - 1] - cum[j - 1 + i * pt->n_cols]; if (i > 0) { - Cij += cum[j - 1 + (i - 1) * n_cols]; - Dij -= cum[j + (i - 1) * n_cols]; + Cij += cum[j - 1 + (i - 1) * pt->n_cols]; + Dij -= cum[j + (i - 1) * pt->n_cols]; } } } } btau_var = ((btau_cum - - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc)))) + - (pt->total * pow2 (pt->total * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc)))) / pow2 (Dr * Dc)); - if (cmd.a_statistics[CRS_ST_BTAU]) + if (proc->statistics & (1u << CRS_ST_BTAU)) { ase[3] = sqrt (btau_var); - t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W) + t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / pt->total) / (Dr * Dc))); } - if (cmd.a_statistics[CRS_ST_CTAU]) + if (proc->statistics & (1u << CRS_ST_CTAU)) { - ase[4] = ((2 * q / ((q - 1) * W * W)) - * sqrt (ctau_cum - (P - Q) * (P - Q) / W)); + ase[4] = ((2 * q / ((q - 1) * pow2 (pt->total))) + * sqrt (ctau_cum - (P - Q) * (P - Q) / pt->total)); t[4] = v[4] / ase[4]; } - if (cmd.a_statistics[CRS_ST_GAMMA]) + if (proc->statistics & (1u << CRS_ST_GAMMA)) { ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum)); t[5] = v[5] / (2. / (P + Q) - * sqrt (ctau_cum - (P - Q) * (P - Q) / W)); + * sqrt (ctau_cum - (P - Q) * (P - Q) / pt->total)); } - if (cmd.a_statistics[CRS_ST_D]) + if (proc->statistics & (1u << CRS_ST_D)) { somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr)); somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc); somers_d_t[0] = (somers_d_v[0] / (4 / (Dc + Dr) - * sqrt (ctau_cum - pow2 (P - Q) / W))); + * sqrt (ctau_cum - pow2 (P - Q) / pt->total))); somers_d_v[1] = (P - Q) / Dc; somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum); somers_d_t[1] = (somers_d_v[1] / (2. / Dc - * sqrt (ctau_cum - pow2 (P - Q) / W))); + * sqrt (ctau_cum - pow2 (P - Q) / pt->total))); somers_d_v[2] = (P - Q) / Dr; somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum); somers_d_t[2] = (somers_d_v[2] / (2. / Dr - * sqrt (ctau_cum - pow2 (P - Q) / W))); + * sqrt (ctau_cum - pow2 (P - Q) / pt->total))); } free (cum); } /* Spearman correlation, Pearson's r. */ - if (cmd.a_statistics[CRS_ST_CORR]) + if (proc->statistics & (1u << CRS_ST_CORR)) { - double *R = xmalloca (sizeof *R * n_rows); - double *C = xmalloca (sizeof *C * n_cols); + double *R = xmalloc (sizeof *R * pt->n_rows); + double *C = xmalloc (sizeof *C * pt->n_cols); { double y, t, c = 0., s = 0.; @@ -2766,14 +2536,14 @@ calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], for (;;) { - R[i] = s + (row_tot[i] + 1.) / 2.; - y = row_tot[i] - c; + R[i] = s + (pt->row_tot[i] + 1.) / 2.; + y = pt->row_tot[i] - c; t = s + y; c = (t - s) - y; s = t; - if (++i == n_rows) + if (++i == pt->n_rows) break; - assert (i < n_rows); + assert (i < pt->n_rows); } } @@ -2783,73 +2553,73 @@ calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], for (;;) { - C[j] = s + (col_tot[j] + 1.) / 2; - y = col_tot[j] - c; + C[j] = s + (pt->col_tot[j] + 1.) / 2; + y = pt->col_tot[j] - c; t = s + y; c = (t - s) - y; s = t; - if (++j == n_cols) + if (++j == pt->n_cols) break; - assert (j < n_cols); + assert (j < pt->n_cols); } } - calc_r (R, C, &v[6], &t[6], &ase[6]); + calc_r (pt, R, C, &v[6], &t[6], &ase[6]); t[6] = v[6] / t[6]; - freea (R); - freea (C); + free (R); + free (C); - calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]); + calc_r (pt, (double *) pt->rows, (double *) pt->cols, &v[7], &t[7], &ase[7]); t[7] = v[7] / t[7]; } /* Cohen's kappa. */ - if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols) + if (proc->statistics & (1u << CRS_ST_KAPPA) && pt->ns_rows == pt->ns_cols) { double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci; int i, j; for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0; - i < ns_rows; i++, j++) + i < pt->ns_rows; i++, j++) { double prod, sum; - while (col_tot[j] == 0.) + while (pt->col_tot[j] == 0.) j++; - prod = row_tot[i] * col_tot[j]; - sum = row_tot[i] + col_tot[j]; + prod = pt->row_tot[i] * pt->col_tot[j]; + sum = pt->row_tot[i] + pt->col_tot[j]; - sum_fii += mat[j + i * n_cols]; + sum_fii += pt->mat[j + i * pt->n_cols]; sum_rici += prod; - sum_fiiri_ci += mat[j + i * n_cols] * sum; + sum_fiiri_ci += pt->mat[j + i * pt->n_cols] * sum; sum_riciri_ci += prod * sum; } - for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++) - for (j = 0; j < ns_cols; j++) + for (sum_fijri_ci2 = 0., i = 0; i < pt->ns_rows; i++) + for (j = 0; j < pt->ns_cols; j++) { - double sum = row_tot[i] + col_tot[j]; - sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum; + double sum = pt->row_tot[i] + pt->col_tot[j]; + sum_fijri_ci2 += pt->mat[j + i * pt->n_cols] * sum * sum; } - v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici); + v[8] = (pt->total * sum_fii - sum_rici) / (pow2 (pt->total) - sum_rici); - ase[8] = sqrt ((W * W * sum_rici + ase[8] = sqrt ((pow2 (pt->total) * sum_rici + sum_rici * sum_rici - - W * sum_riciri_ci) - / (W * (W * W - sum_rici) * (W * W - sum_rici))); + - pt->total * sum_riciri_ci) + / (pt->total * (pow2 (pt->total) - sum_rici) * (pow2 (pt->total) - sum_rici))); #if 0 - t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii)) - / pow2 (W * W - sum_rici)) - + ((2. * (W - sum_fii) + t[8] = v[8] / sqrt (pt->total * (((sum_fii * (pt->total - sum_fii)) + / pow2 (pow2 (pt->total) - sum_rici)) + + ((2. * (pt->total - sum_fii) * (2. * sum_fii * sum_rici - - W * sum_fiiri_ci)) - / cube (W * W - sum_rici)) - + (pow2 (W - sum_fii) - * (W * sum_fijri_ci2 - 4. + - pt->total * sum_fiiri_ci)) + / cube (pow2 (pt->total) - sum_rici)) + + (pow2 (pt->total - sum_fii) + * (pt->total * sum_fijri_ci2 - 4. * sum_rici * sum_rici) - / pow4 (W * W - sum_rici)))); + / pow4 (pow2 (pt->total) - sum_rici)))); #else t[8] = v[8] / ase[8]; #endif @@ -2860,7 +2630,8 @@ calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], /* Calculate risk estimate. */ static int -calc_risk (double *value, double *upper, double *lower, union value *c) +calc_risk (struct pivot_table *pt, + double *value, double *upper, double *lower, union value *c) { double f11, f12, f21, f22; double v; @@ -2872,15 +2643,15 @@ calc_risk (double *value, double *upper, double *lower, union value *c) value[i] = upper[i] = lower[i] = SYSMIS; } - if (ns_rows != 2 || ns_cols != 2) + if (pt->ns_rows != 2 || pt->ns_cols != 2) return 0; { int nz_cols[2]; int i, j; - for (i = j = 0; i < n_cols; i++) - if (col_tot[i] != 0.) + for (i = j = 0; i < pt->n_cols; i++) + if (pt->col_tot[i] != 0.) { nz_cols[j++] = i; if (j == 2) @@ -2889,13 +2660,13 @@ calc_risk (double *value, double *upper, double *lower, union value *c) assert (j == 2); - f11 = mat[nz_cols[0]]; - f12 = mat[nz_cols[1]]; - f21 = mat[nz_cols[0] + n_cols]; - f22 = mat[nz_cols[1] + n_cols]; + f11 = pt->mat[nz_cols[0]]; + f12 = pt->mat[nz_cols[1]]; + f21 = pt->mat[nz_cols[0] + pt->n_cols]; + f22 = pt->mat[nz_cols[1] + pt->n_cols]; - c[0] = cols[nz_cols[0]]; - c[1] = cols[nz_cols[1]]; + c[0] = pt->cols[nz_cols[0]]; + c[1] = pt->cols[nz_cols[1]]; } value[0] = (f11 * f22) / (f12 * f21); @@ -2920,7 +2691,8 @@ calc_risk (double *value, double *upper, double *lower, union value *c) /* Calculate directional measures. */ static int -calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], +calc_directional (struct crosstabs_proc *proc, struct pivot_table *pt, + double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], double t[N_DIRECTIONAL]) { { @@ -2931,27 +2703,27 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], } /* Lambda. */ - if (cmd.a_statistics[CRS_ST_LAMBDA]) + if (proc->statistics & (1u << CRS_ST_LAMBDA)) { - double *fim = xnmalloc (n_rows, sizeof *fim); - int *fim_index = xnmalloc (n_rows, sizeof *fim_index); - double *fmj = xnmalloc (n_cols, sizeof *fmj); - int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index); + double *fim = xnmalloc (pt->n_rows, sizeof *fim); + int *fim_index = xnmalloc (pt->n_rows, sizeof *fim_index); + double *fmj = xnmalloc (pt->n_cols, sizeof *fmj); + int *fmj_index = xnmalloc (pt->n_cols, sizeof *fmj_index); double sum_fim, sum_fmj; double rm, cm; int rm_index, cm_index; int i, j; /* Find maximum for each row and their sum. */ - for (sum_fim = 0., i = 0; i < n_rows; i++) + for (sum_fim = 0., i = 0; i < pt->n_rows; i++) { - double max = mat[i * n_cols]; + double max = pt->mat[i * pt->n_cols]; int index = 0; - for (j = 1; j < n_cols; j++) - if (mat[j + i * n_cols] > max) + for (j = 1; j < pt->n_cols; j++) + if (pt->mat[j + i * pt->n_cols] > max) { - max = mat[j + i * n_cols]; + max = pt->mat[j + i * pt->n_cols]; index = j; } @@ -2960,15 +2732,15 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], } /* Find maximum for each column. */ - for (sum_fmj = 0., j = 0; j < n_cols; j++) + for (sum_fmj = 0., j = 0; j < pt->n_cols; j++) { - double max = mat[j]; + double max = pt->mat[j]; int index = 0; - for (i = 1; i < n_rows; i++) - if (mat[j + i * n_cols] > max) + for (i = 1; i < pt->n_rows; i++) + if (pt->mat[j + i * pt->n_cols] > max) { - max = mat[j + i * n_cols]; + max = pt->mat[j + i * pt->n_cols]; index = i; } @@ -2977,83 +2749,83 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], } /* Find maximum row total. */ - rm = row_tot[0]; + rm = pt->row_tot[0]; rm_index = 0; - for (i = 1; i < n_rows; i++) - if (row_tot[i] > rm) + for (i = 1; i < pt->n_rows; i++) + if (pt->row_tot[i] > rm) { - rm = row_tot[i]; + rm = pt->row_tot[i]; rm_index = i; } /* Find maximum column total. */ - cm = col_tot[0]; + cm = pt->col_tot[0]; cm_index = 0; - for (j = 1; j < n_cols; j++) - if (col_tot[j] > cm) + for (j = 1; j < pt->n_cols; j++) + if (pt->col_tot[j] > cm) { - cm = col_tot[j]; + cm = pt->col_tot[j]; cm_index = j; } - v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm); - v[1] = (sum_fmj - rm) / (W - rm); - v[2] = (sum_fim - cm) / (W - cm); + v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * pt->total - rm - cm); + v[1] = (sum_fmj - rm) / (pt->total - rm); + v[2] = (sum_fim - cm) / (pt->total - cm); - /* ASE1 for Y given X. */ + /* ASE1 for Y given PT. */ { double accum; - for (accum = 0., i = 0; i < n_rows; i++) - for (j = 0; j < n_cols; j++) + for (accum = 0., i = 0; i < pt->n_rows; i++) + for (j = 0; j < pt->n_cols; j++) { const int deltaj = j == cm_index; - accum += (mat[j + i * n_cols] + accum += (pt->mat[j + i * pt->n_cols] * pow2 ((j == fim_index[i]) - deltaj + v[0] * deltaj)); } - ase[2] = sqrt (accum - W * v[0]) / (W - cm); + ase[2] = sqrt (accum - pt->total * v[0]) / (pt->total - cm); } - /* ASE0 for Y given X. */ + /* ASE0 for Y given PT. */ { double accum; - for (accum = 0., i = 0; i < n_rows; i++) + for (accum = 0., i = 0; i < pt->n_rows; i++) if (cm_index != fim_index[i]) - accum += (mat[i * n_cols + fim_index[i]] - + mat[i * n_cols + cm_index]); - t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm)); + accum += (pt->mat[i * pt->n_cols + fim_index[i]] + + pt->mat[i * pt->n_cols + cm_index]); + t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / pt->total) / (pt->total - cm)); } - /* ASE1 for X given Y. */ + /* ASE1 for PT given Y. */ { double accum; - for (accum = 0., i = 0; i < n_rows; i++) - for (j = 0; j < n_cols; j++) + for (accum = 0., i = 0; i < pt->n_rows; i++) + for (j = 0; j < pt->n_cols; j++) { const int deltaj = i == rm_index; - accum += (mat[j + i * n_cols] + accum += (pt->mat[j + i * pt->n_cols] * pow2 ((i == fmj_index[j]) - deltaj + v[0] * deltaj)); } - ase[1] = sqrt (accum - W * v[0]) / (W - rm); + ase[1] = sqrt (accum - pt->total * v[0]) / (pt->total - rm); } - /* ASE0 for X given Y. */ + /* ASE0 for PT given Y. */ { double accum; - for (accum = 0., j = 0; j < n_cols; j++) + for (accum = 0., j = 0; j < pt->n_cols; j++) if (rm_index != fmj_index[j]) - accum += (mat[j + n_cols * fmj_index[j]] - + mat[j + n_cols * rm_index]); - t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm)); + accum += (pt->mat[j + pt->n_cols * fmj_index[j]] + + pt->mat[j + pt->n_cols * rm_index]); + t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / pt->total) / (pt->total - rm)); } /* Symmetric ASE0 and ASE1. */ @@ -3061,18 +2833,18 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], double accum0; double accum1; - for (accum0 = accum1 = 0., i = 0; i < n_rows; i++) - for (j = 0; j < n_cols; j++) + for (accum0 = accum1 = 0., i = 0; i < pt->n_rows; i++) + for (j = 0; j < pt->n_cols; j++) { int temp0 = (fmj_index[j] == i) + (fim_index[i] == j); int temp1 = (i == rm_index) + (j == cm_index); - accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1); - accum1 += (mat[j + i * n_cols] + accum0 += pt->mat[j + i * pt->n_cols] * pow2 (temp0 - temp1); + accum1 += (pt->mat[j + i * pt->n_cols] * pow2 (temp0 + (v[0] - 1.) * temp1)); } - ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm); - t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W)) - / (2. * W - rm - cm)); + ase[0] = sqrt (accum1 - 4. * pt->total * v[0] * v[0]) / (2. * pt->total - rm - cm); + t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / pt->total)) + / (2. * pt->total - rm - cm)); } free (fim); @@ -3084,123 +2856,131 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], double sum_fij2_ri, sum_fij2_ci; double sum_ri2, sum_cj2; - for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++) - for (j = 0; j < n_cols; j++) + for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < pt->n_rows; i++) + for (j = 0; j < pt->n_cols; j++) { - double temp = pow2 (mat[j + i * n_cols]); - sum_fij2_ri += temp / row_tot[i]; - sum_fij2_ci += temp / col_tot[j]; + double temp = pow2 (pt->mat[j + i * pt->n_cols]); + sum_fij2_ri += temp / pt->row_tot[i]; + sum_fij2_ci += temp / pt->col_tot[j]; } - for (sum_ri2 = 0., i = 0; i < n_rows; i++) - sum_ri2 += pow2 (row_tot[i]); + for (sum_ri2 = 0., i = 0; i < pt->n_rows; i++) + sum_ri2 += pow2 (pt->row_tot[i]); - for (sum_cj2 = 0., j = 0; j < n_cols; j++) - sum_cj2 += pow2 (col_tot[j]); + for (sum_cj2 = 0., j = 0; j < pt->n_cols; j++) + sum_cj2 += pow2 (pt->col_tot[j]); - v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2); - v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2); + v[3] = (pt->total * sum_fij2_ci - sum_ri2) / (pow2 (pt->total) - sum_ri2); + v[4] = (pt->total * sum_fij2_ri - sum_cj2) / (pow2 (pt->total) - sum_cj2); } } - if (cmd.a_statistics[CRS_ST_UC]) + if (proc->statistics & (1u << CRS_ST_UC)) { double UX, UY, UXY, P; double ase1_yx, ase1_xy, ase1_sym; int i, j; - for (UX = 0., i = 0; i < n_rows; i++) - if (row_tot[i] > 0.) - UX -= row_tot[i] / W * log (row_tot[i] / W); + for (UX = 0., i = 0; i < pt->n_rows; i++) + if (pt->row_tot[i] > 0.) + UX -= pt->row_tot[i] / pt->total * log (pt->row_tot[i] / pt->total); - for (UY = 0., j = 0; j < n_cols; j++) - if (col_tot[j] > 0.) - UY -= col_tot[j] / W * log (col_tot[j] / W); + for (UY = 0., j = 0; j < pt->n_cols; j++) + if (pt->col_tot[j] > 0.) + UY -= pt->col_tot[j] / pt->total * log (pt->col_tot[j] / pt->total); - for (UXY = P = 0., i = 0; i < n_rows; i++) - for (j = 0; j < n_cols; j++) + for (UXY = P = 0., i = 0; i < pt->n_rows; i++) + for (j = 0; j < pt->n_cols; j++) { - double entry = mat[j + i * n_cols]; + double entry = pt->mat[j + i * pt->n_cols]; if (entry <= 0.) continue; - P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry))); - UXY -= entry / W * log (entry / W); + P += entry * pow2 (log (pt->col_tot[j] * pt->row_tot[i] / (pt->total * entry))); + UXY -= entry / pt->total * log (entry / pt->total); } - for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++) - for (j = 0; j < n_cols; j++) + for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < pt->n_rows; i++) + for (j = 0; j < pt->n_cols; j++) { - double entry = mat[j + i * n_cols]; + double entry = pt->mat[j + i * pt->n_cols]; if (entry <= 0.) continue; - ase1_yx += entry * pow2 (UY * log (entry / row_tot[i]) - + (UX - UXY) * log (col_tot[j] / W)); - ase1_xy += entry * pow2 (UX * log (entry / col_tot[j]) - + (UY - UXY) * log (row_tot[i] / W)); + ase1_yx += entry * pow2 (UY * log (entry / pt->row_tot[i]) + + (UX - UXY) * log (pt->col_tot[j] / pt->total)); + ase1_xy += entry * pow2 (UX * log (entry / pt->col_tot[j]) + + (UY - UXY) * log (pt->row_tot[i] / pt->total)); ase1_sym += entry * pow2 ((UXY - * log (row_tot[i] * col_tot[j] / (W * W))) - - (UX + UY) * log (entry / W)); + * log (pt->row_tot[i] * pt->col_tot[j] / pow2 (pt->total))) + - (UX + UY) * log (entry / pt->total)); } v[5] = 2. * ((UX + UY - UXY) / (UX + UY)); - ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym); - t[5] = v[5] / ((2. / (W * (UX + UY))) - * sqrt (P - pow2 (UX + UY - UXY) / W)); + ase[5] = (2. / (pt->total * pow2 (UX + UY))) * sqrt (ase1_sym); + t[5] = v[5] / ((2. / (pt->total * (UX + UY))) + * sqrt (P - pow2 (UX + UY - UXY) / pt->total)); v[6] = (UX + UY - UXY) / UX; - ase[6] = sqrt (ase1_xy) / (W * UX * UX); - t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX)); + ase[6] = sqrt (ase1_xy) / (pt->total * UX * UX); + t[6] = v[6] / (sqrt (P - pt->total * pow2 (UX + UY - UXY)) / (pt->total * UX)); v[7] = (UX + UY - UXY) / UY; - ase[7] = sqrt (ase1_yx) / (W * UY * UY); - t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY)); + ase[7] = sqrt (ase1_yx) / (pt->total * UY * UY); + t[7] = v[7] / (sqrt (P - pt->total * pow2 (UX + UY - UXY)) / (pt->total * UY)); } /* Somers' D. */ - if (cmd.a_statistics[CRS_ST_D]) + if (proc->statistics & (1u << CRS_ST_D)) { - int i; + double v_dummy[N_SYMMETRIC]; + double ase_dummy[N_SYMMETRIC]; + double t_dummy[N_SYMMETRIC]; + double somers_d_v[3]; + double somers_d_ase[3]; + double somers_d_t[3]; - if (!sym) - calc_symmetric (NULL, NULL, NULL); - for (i = 0; i < 3; i++) - { - v[8 + i] = somers_d_v[i]; - ase[8 + i] = somers_d_ase[i]; - t[8 + i] = somers_d_t[i]; - } + if (calc_symmetric (proc, pt, v_dummy, ase_dummy, t_dummy, + somers_d_v, somers_d_ase, somers_d_t)) + { + int i; + for (i = 0; i < 3; i++) + { + v[8 + i] = somers_d_v[i]; + ase[8 + i] = somers_d_ase[i]; + t[8 + i] = somers_d_t[i]; + } + } } /* Eta. */ - if (cmd.a_statistics[CRS_ST_ETA]) + if (proc->statistics & (1u << CRS_ST_ETA)) { { double sum_Xr, sum_X2r; double SX, SXW; int i, j; - for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++) + for (sum_Xr = sum_X2r = 0., i = 0; i < pt->n_rows; i++) { - sum_Xr += rows[i].f * row_tot[i]; - sum_X2r += pow2 (rows[i].f) * row_tot[i]; + sum_Xr += pt->rows[i].f * pt->row_tot[i]; + sum_X2r += pow2 (pt->rows[i].f) * pt->row_tot[i]; } - SX = sum_X2r - pow2 (sum_Xr) / W; + SX = sum_X2r - pow2 (sum_Xr) / pt->total; - for (SXW = 0., j = 0; j < n_cols; j++) + for (SXW = 0., j = 0; j < pt->n_cols; j++) { double cum; - for (cum = 0., i = 0; i < n_rows; i++) + for (cum = 0., i = 0; i < pt->n_rows; i++) { - SXW += pow2 (rows[i].f) * mat[j + i * n_cols]; - cum += rows[i].f * mat[j + i * n_cols]; + SXW += pow2 (pt->rows[i].f) * pt->mat[j + i * pt->n_cols]; + cum += pt->rows[i].f * pt->mat[j + i * pt->n_cols]; } - SXW -= cum * cum / col_tot[j]; + SXW -= cum * cum / pt->col_tot[j]; } v[11] = sqrt (1. - SXW / SX); } @@ -3210,24 +2990,24 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], double SY, SYW; int i, j; - for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++) + for (sum_Yc = sum_Y2c = 0., i = 0; i < pt->n_cols; i++) { - sum_Yc += cols[i].f * col_tot[i]; - sum_Y2c += pow2 (cols[i].f) * col_tot[i]; + sum_Yc += pt->cols[i].f * pt->col_tot[i]; + sum_Y2c += pow2 (pt->cols[i].f) * pt->col_tot[i]; } - SY = sum_Y2c - sum_Yc * sum_Yc / W; + SY = sum_Y2c - sum_Yc * sum_Yc / pt->total; - for (SYW = 0., i = 0; i < n_rows; i++) + for (SYW = 0., i = 0; i < pt->n_rows; i++) { double cum; - for (cum = 0., j = 0; j < n_cols; j++) + for (cum = 0., j = 0; j < pt->n_cols; j++) { - SYW += pow2 (cols[j].f) * mat[j + i * n_cols]; - cum += cols[j].f * mat[j + i * n_cols]; + SYW += pow2 (pt->cols[j].f) * pt->mat[j + i * pt->n_cols]; + cum += pt->cols[j].f * pt->mat[j + i * pt->n_cols]; } - SYW -= cum * cum / row_tot[i]; + SYW -= cum * cum / pt->row_tot[i]; } v[12] = sqrt (1. - SYW / SY); } @@ -3236,34 +3016,6 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], return 1; } -/* A wrapper around data_out() that limits string output to short - string width and null terminates the result. */ -static void -format_short (char *s, const struct fmt_spec *fp, const union value *v) -{ - struct fmt_spec fmt_subst; - - /* Limit to short string width. */ - if (fmt_is_string (fp->type)) - { - fmt_subst = *fp; - - assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX); - if (fmt_subst.type == FMT_A) - fmt_subst.w = MIN (8, fmt_subst.w); - else - fmt_subst.w = MIN (16, fmt_subst.w); - - fp = &fmt_subst; - } - - /* Format. */ - data_out (v, fp, s); - - /* Null terminate. */ - s[fp->w] = '\0'; -} - /* Local Variables: mode: c diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index 4a7f1f03..a829112e 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -143,7 +143,7 @@ struct factor_result { struct ll ll; - union value *value[2]; + union value value[2]; /* An array of factor metrics, one for each variable */ struct factor_metrics *metrics; @@ -171,6 +171,7 @@ factor_destroy (struct xfactor *fctr) int v; struct factor_result *result = ll_data (ll, struct factor_result, ll); + int i; for (v = 0; v < n_dependent_vars; ++v) { @@ -189,8 +190,10 @@ factor_destroy (struct xfactor *fctr) casereader_destroy (result->metrics[v].up_reader); } - free (result->value[0]); - free (result->value[1]); + for (i = 0; i < 2; i++) + if (fctr->indep_var[i]) + value_destroy (&result->value[i], + var_get_width (fctr->indep_var[i])); free (result->metrics); ll = ll_next (ll); free (result); @@ -600,7 +603,7 @@ show_boxplot_variables (const struct variable **dependent_var, #if 0 ds_put_format (&title, "%s = ", var_get_name (fctr->indep_var[0])); - var_append_value_name (fctr->indep_var[0], result->value[0], &title); + var_append_value_name (fctr->indep_var[0], &result->value[0], &title); #endif chart_write_title (ch, ds_cstr (&title)); @@ -877,6 +880,11 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, int v; int n_extrema = 1; struct factor_result *result = xzalloc (sizeof (*result)); + int i; + + for (i = 0; i < 2; i++) + if (factor->indep_var[i]) + value_init (&result->value[i], var_get_width (factor->indep_var[i])); result->metrics = xcalloc (n_dependent_vars, sizeof (*result->metrics)); @@ -888,16 +896,10 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, if (c != NULL) { if ( level > 0) - { - result->value[0] = - value_dup (case_data (c, factor->indep_var[0]), - var_get_width (factor->indep_var[0])); - - if ( level > 1) - result->value[1] = - value_dup (case_data (c, factor->indep_var[1]), - var_get_width (factor->indep_var[1])); - } + for (i = 0; i < 2; i++) + if (factor->indep_var[i]) + value_copy (&result->value[i], case_data (c, factor->indep_var[i]), + var_get_width (factor->indep_var[i])); case_unref (c); } @@ -921,7 +923,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, struct subcase up_ordering; subcase_init_var (&up_ordering, dependent_vars[v], SC_ASCEND); writer = sort_create_writer (&up_ordering, - casereader_get_value_cnt (reader)); + casereader_get_proto (reader)); subcase_destroy (&up_ordering); } else @@ -929,15 +931,15 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, /* but in this case, sorting is unnecessary, so an ordinary casewriter is sufficient */ writer = - autopaging_writer_create (casereader_get_value_cnt (reader)); + autopaging_writer_create (casereader_get_proto (reader)); } /* Sort or just iterate, whilst calculating moments etc */ while ((c = casereader_read (input)) != NULL) { - const casenumber loc = - case_data_idx (c, casereader_get_value_cnt (reader) - 1)->f; + int n_vals = caseproto_get_n_widths (casereader_get_proto (reader)); + const casenumber loc = case_data_idx (c, n_vals - 1)->f; const double weight = wv ? case_data (c, wv)->f : 1.0; const union value *value = case_data (c, dependent_vars[v]); @@ -1092,12 +1094,12 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, for (v = 0; v < n_dependent_vars; ++v) { struct factor_metrics *metric = &result->metrics[v]; + int n_vals = caseproto_get_n_widths (casereader_get_proto ( + metric->up_reader)); metric->box_whisker = box_whisker_create ((struct tukey_hinges *) metric->tukey_hinges, - cmd->v_id, - casereader_get_value_cnt (metric->up_reader) - - 1); + cmd->v_id, n_vals - 1); order_stats_accumulate ((struct order_stats **) &metric->box_whisker, 1, @@ -1318,7 +1320,7 @@ show_summary (const struct variable **dependent_var, int n_dep_var, { int j = 0; struct ll *ll; - union value *last_value = NULL; + const union value *last_value = NULL; if ( v > 0 ) tab_hline (tbl, TAL_1, 0, n_cols -1 , @@ -1344,15 +1346,15 @@ show_summary (const struct variable **dependent_var, int n_dep_var, { if ( last_value == NULL || - compare_values_short (last_value, result->value[0], - fctr->indep_var[0])) + !value_equal (last_value, &result->value[0], + var_get_width (fctr->indep_var[0]))) { struct string str; - last_value = result->value[0]; + last_value = &result->value[0]; ds_init_empty (&str); - var_append_value_name (fctr->indep_var[0], result->value[0], + var_append_value_name (fctr->indep_var[0], &result->value[0], &str); tab_text (tbl, 1, @@ -1376,7 +1378,7 @@ show_summary (const struct variable **dependent_var, int n_dep_var, ds_init_empty (&str); var_append_value_name (fctr->indep_var[1], - result->value[1], &str); + &result->value[1], &str); tab_text (tbl, 2, heading_rows + j + @@ -1539,7 +1541,7 @@ show_descriptives (const struct variable **dependent_var, struct string vstr; ds_init_empty (&vstr); var_append_value_name (fctr->indep_var[0], - result->value[0], &vstr); + &result->value[0], &vstr); tab_text (tbl, 1, heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, @@ -1920,7 +1922,7 @@ show_extremes (const struct variable **dependent_var, struct string vstr; ds_init_empty (&vstr); var_append_value_name (fctr->indep_var[0], - result->value[0], &vstr); + &result->value[0], &vstr); tab_text (tbl, 1, heading_rows + row_var_start + row_result_start, @@ -2052,7 +2054,7 @@ show_percentiles (const struct variable **dependent_var, struct string vstr; ds_init_empty (&vstr); var_append_value_name (fctr->indep_var[0], - result->value[0], &vstr); + &result->value[0], &vstr); tab_text (tbl, 1, heading_rows + row_var_start + i * PERCENTILE_ROWS, @@ -2155,13 +2157,13 @@ factor_to_string_concise (const struct xfactor *fctr, { if (fctr->indep_var[0]) { - var_append_value_name (fctr->indep_var[0], result->value[0], str); + var_append_value_name (fctr->indep_var[0], &result->value[0], str); if ( fctr->indep_var[1] ) { ds_put_cstr (str, ","); - var_append_value_name (fctr->indep_var[1], result->value[1], str); + var_append_value_name (fctr->indep_var[1], &result->value[1], str); ds_put_cstr (str, ")"); } @@ -2179,14 +2181,14 @@ factor_to_string (const struct xfactor *fctr, { ds_put_format (str, "(%s = ", var_get_name (fctr->indep_var[0])); - var_append_value_name (fctr->indep_var[0], result->value[0], str); + var_append_value_name (fctr->indep_var[0], &result->value[0], str); if ( fctr->indep_var[1] ) { ds_put_cstr (str, ","); ds_put_format (str, "%s = ", var_get_name (fctr->indep_var[1])); - var_append_value_name (fctr->indep_var[1], result->value[1], str); + var_append_value_name (fctr->indep_var[1], &result->value[1], str); } ds_put_cstr (str, ")"); } diff --git a/src/language/stats/flip.c b/src/language/stats/flip.c index 7583374b..3ca2413f 100644 --- a/src/language/stats/flip.c +++ b/src/language/stats/flip.c @@ -49,26 +49,27 @@ #define _(msgid) gettext (msgid) /* List of variable names. */ -struct varname +struct var_names { - struct varname *next; - char name[SHORT_NAME_LEN + 1]; + const char **names; + size_t n_names, allocated_names; }; +static void var_names_init (struct var_names *); +static void var_names_add (struct pool *, struct var_names *, const char *); + /* Represents a FLIP input program. */ struct flip_pgm { struct pool *pool; /* Pool containing FLIP data. */ - const struct variable **var; /* Variables to transpose. */ - size_t var_cnt; /* Number of elements in `var'. */ - int case_cnt; /* Pre-flip case count. */ + size_t n_vars; /* Pre-flip number of variables. */ + int n_cases; /* Pre-flip number of cases. */ - struct variable *new_names; /* Variable containing new variable names. */ - struct varname *new_names_head; /* First new variable. */ - struct varname *new_names_tail; /* Last new variable. */ + struct variable *new_names_var; /* Variable with new variable names. */ + struct var_names old_names; /* Variable names before FLIP. */ + struct var_names new_names; /* Variable names after FLIP. */ FILE *file; /* Temporary file containing data. */ - union value *input_buf; /* Input buffer for temporary file. */ size_t cases_read; /* Number of cases already read. */ bool error; /* Error reading temporary file? */ }; @@ -77,17 +78,16 @@ static const struct casereader_class flip_casereader_class; static void destroy_flip_pgm (struct flip_pgm *); static bool flip_file (struct flip_pgm *); -static bool build_dictionary (struct dictionary *, struct flip_pgm *); -static bool write_flip_case (struct flip_pgm *, const struct ccase *); +static void make_new_var (struct dictionary *, const char *name); /* Parses and executes FLIP. */ int cmd_flip (struct lexer *lexer, struct dataset *ds) { struct dictionary *dict = dataset_dict (ds); + const struct variable **vars; struct flip_pgm *flip; struct casereader *input, *reader; - union value *output_buf; struct ccase *c; size_t i; bool ok; @@ -97,14 +97,12 @@ cmd_flip (struct lexer *lexer, struct dataset *ds) "Temporary transformations will be made permanent.")); flip = pool_create_container (struct flip_pgm, pool); - flip->var = NULL; - flip->var_cnt = 0; - flip->case_cnt = 0; - flip->new_names = NULL; - flip->new_names_head = NULL; - flip->new_names_tail = NULL; + flip->n_vars = 0; + flip->n_cases = 0; + flip->new_names_var = NULL; + var_names_init (&flip->old_names); + var_names_init (&flip->new_names); flip->file = NULL; - flip->input_buf = NULL; flip->cases_read = 0; flip->error = false; @@ -112,39 +110,37 @@ cmd_flip (struct lexer *lexer, struct dataset *ds) if (lex_match_id (lexer, "VARIABLES")) { lex_match (lexer, '='); - if (!parse_variables_const (lexer, dict, &flip->var, &flip->var_cnt, + if (!parse_variables_const (lexer, dict, &vars, &flip->n_vars, PV_NO_DUPLICATE)) goto error; lex_match (lexer, '/'); } else - dict_get_vars (dict, &flip->var, &flip->var_cnt, DC_SYSTEM); - pool_register (flip->pool, free, flip->var); + dict_get_vars (dict, &vars, &flip->n_vars, DC_SYSTEM); + pool_register (flip->pool, free, vars); lex_match (lexer, '/'); if (lex_match_id (lexer, "NEWNAMES")) { lex_match (lexer, '='); - flip->new_names = parse_variable (lexer, dict); - if (!flip->new_names) + flip->new_names_var = parse_variable (lexer, dict); + if (!flip->new_names_var) goto error; } else - flip->new_names = dict_lookup_var (dict, "CASE_LBL"); + flip->new_names_var = dict_lookup_var (dict, "CASE_LBL"); - if (flip->new_names) + if (flip->new_names_var) { - for (i = 0; i < flip->var_cnt; i++) - if (flip->var[i] == flip->new_names) + for (i = 0; i < flip->n_vars; i++) + if (vars[i] == flip->new_names_var) { - remove_element (flip->var, flip->var_cnt, sizeof *flip->var, i); - flip->var_cnt--; + remove_element (vars, flip->n_vars, sizeof *vars, i); + flip->n_vars--; break; } } - output_buf = pool_nalloc (flip->pool, flip->var_cnt, sizeof *output_buf); - flip->file = pool_tmpfile (flip->pool); if (flip->file == NULL) { @@ -152,18 +148,11 @@ cmd_flip (struct lexer *lexer, struct dataset *ds) goto error; } - /* Write variable names as first case. */ - for (i = 0; i < flip->var_cnt; i++) - buf_copy_str_rpad (output_buf[i].s, MAX_SHORT_STRING, - var_get_name (flip->var[i])); - if (fwrite (output_buf, sizeof *output_buf, - flip->var_cnt, flip->file) != (size_t) flip->var_cnt) - { - msg (SE, _("Error writing FLIP file: %s."), strerror (errno)); - goto error; - } - - flip->case_cnt = 1; + /* Save old variable names for use as values of CASE_LBL + variable in flipped file. */ + for (i = 0; i < flip->n_vars; i++) + var_names_add (flip->pool, &flip->old_names, + pool_strdup (flip->pool, var_get_name (vars[i]))); /* Read the active file into a flip_sink. */ proc_discard_output (ds); @@ -171,7 +160,33 @@ cmd_flip (struct lexer *lexer, struct dataset *ds) input = proc_open (ds); while ((c = casereader_read (input)) != NULL) { - write_flip_case (flip, c); + flip->n_cases++; + for (i = 0; i < flip->n_vars; i++) + { + const struct variable *v = vars[i]; + double out = var_is_numeric (v) ? case_num (c, v) : SYSMIS; + fwrite (&out, sizeof out, 1, flip->file); + } + if (flip->new_names_var != NULL) + { + const union value *value = case_data (c, flip->new_names_var); + const char *name; + if (var_is_numeric (flip->new_names_var)) + { + double f = value->f; + name = (f == SYSMIS ? "VSYSMIS" + : f < INT_MIN ? "VNEGINF" + : f > INT_MAX ? "VPOSINF" + : pool_asprintf (flip->pool, "V%d", (int) f)); + } + else + { + int width = var_get_width (flip->new_names_var); + name = pool_strdup0 (flip->pool, + value_str (value, width), width); + } + var_names_add (flip->pool, &flip->new_names, name); + } case_unref (c); } ok = casereader_destroy (input); @@ -186,15 +201,20 @@ cmd_flip (struct lexer *lexer, struct dataset *ds) /* Flip the dictionary. */ dict_clear (dict); - if (!build_dictionary (dict, flip)) - { - proc_discard_active_file (ds); - goto error; - } + dict_create_var_assert (dict, "CASE_LBL", 8); + for (i = 0; i < flip->n_cases; i++) + if (flip->new_names.n_names) + make_new_var (dict, flip->new_names.names[i]); + else + { + char s[VAR_NAME_LEN + 1]; + sprintf (s, "VAR%03d", i); + dict_create_var_assert (dict, s, 0); + } /* Set up flipped data for reading. */ - reader = casereader_create_sequential (NULL, dict_get_next_value_idx (dict), - flip->var_cnt, + reader = casereader_create_sequential (NULL, dict_get_proto (dict), + flip->n_vars, &flip_casereader_class, flip); proc_set_active_file_data (ds, reader); return lex_end_of_command (lexer); @@ -213,10 +233,11 @@ destroy_flip_pgm (struct flip_pgm *flip) } /* Make a new variable with base name NAME, which is bowdlerized and - mangled until acceptable, and returns success. */ -static int -make_new_var (struct dictionary *dict, char name[]) + mangled until acceptable. */ +static void +make_new_var (struct dictionary *dict, const char *name_) { + char *name = xstrdup (name_); char *cp; /* Trim trailing spaces. */ @@ -225,7 +246,7 @@ make_new_var (struct dictionary *dict, char name[]) *--cp = '\0'; /* Fix invalid characters. */ - for (cp = name; *cp && cp < name + SHORT_NAME_LEN; cp++) + for (cp = name; *cp && cp < name + VAR_NAME_LEN; cp++) if (cp == name) { if (!lex_is_id1 (*cp) || *cp == '$') @@ -239,115 +260,24 @@ make_new_var (struct dictionary *dict, char name[]) *cp = '\0'; str_uppercase (name); - if (dict_create_var (dict, name, 0)) - return 1; - - /* Add numeric extensions until acceptable. */ - { - const int len = (int) strlen (name); - char n[SHORT_NAME_LEN + 1]; - int i; - - for (i = 1; i < 10000000; i++) - { - int ofs = MIN (7 - intlog10 (i), len); - memcpy (n, name, ofs); - sprintf (&n[ofs], "%d", i); - - if (dict_create_var (dict, n, 0)) - return 1; - } - } - - msg (SE, _("Could not create acceptable variant for variable %s."), name); - return 0; -} - -/* Make a new dictionary for all the new variable names. */ -static bool -build_dictionary (struct dictionary *dict, struct flip_pgm *flip) -{ - dict_create_var_assert (dict, "CASE_LBL", 8); - - if (flip->new_names_head == NULL) + /* Use the mangled name, if it is available, or add numeric + extensions until we find one that is. */ + if (!dict_create_var (dict, name, 0)) { + int len = strlen (name); int i; - - if (flip->case_cnt > 99999) - { - msg (SE, _("Cannot create more than 99999 variable names.")); - return false; - } - - for (i = 0; i < flip->case_cnt - 1; i++) - { - struct variable *v; - char s[SHORT_NAME_LEN + 1]; - - sprintf (s, "VAR%03d", i); - v = dict_create_var_assert (dict, s, 0); - } - } - else - { - struct varname *v; - - for (v = flip->new_names_head; v; v = v->next) - if (!make_new_var (dict, v->name)) - return false; - } - - return true; -} - -/* Writes case C to the FLIP sink. - Returns true if successful, false if an I/O error occurred. */ -static bool -write_flip_case (struct flip_pgm *flip, const struct ccase *c) -{ - size_t i; - - flip->case_cnt++; - - if (flip->new_names != NULL) - { - struct varname *v = pool_alloc (flip->pool, sizeof *v); - v->next = NULL; - if (var_is_numeric (flip->new_names)) + for (i = 1; ; i++) { - double f = case_num (c, flip->new_names); - - if (f == SYSMIS) - strcpy (v->name, "VSYSMIS"); - else if (f < INT_MIN) - strcpy (v->name, "VNEGINF"); - else if (f > INT_MAX) - strcpy (v->name, "VPOSINF"); - else - snprintf (v->name, sizeof v->name, "V%d", (int) f); - } - else - { - int width = MIN (var_get_width (flip->new_names), MAX_SHORT_STRING); - memcpy (v->name, case_str (c, flip->new_names), width); - v->name[width] = 0; - } + char n[VAR_NAME_LEN + 1]; + int ofs = MIN (VAR_NAME_LEN - 1 - intlog10 (i), len); + memcpy (n, name, ofs); + sprintf (&n[ofs], "%d", i); - if (flip->new_names_head == NULL) - flip->new_names_head = v; - else - flip->new_names_tail->next = v; - flip->new_names_tail = v; - } - - /* Write to external file. */ - for (i = 0; i < flip->var_cnt; i++) - { - const struct variable *v = flip->var[i]; - double out = var_is_numeric (v) ? case_num (c, v) : SYSMIS; - fwrite (&out, sizeof out, 1, flip->file); + if (dict_create_var (dict, n, 0)) + break; + } } - return true; + free (name); } /* Transposes the external file into a new file. */ @@ -357,14 +287,14 @@ flip_file (struct flip_pgm *flip) size_t case_bytes; size_t case_capacity; size_t case_idx; - union value *input_buf, *output_buf; + double *input_buf, *output_buf; FILE *input_file, *output_file; /* Allocate memory for many cases. */ - case_bytes = flip->var_cnt * sizeof *input_buf; + case_bytes = flip->n_vars * sizeof *input_buf; case_capacity = settings_get_workspace () / case_bytes; - if (case_capacity > flip->case_cnt * 2) - case_capacity = flip->case_cnt * 2; + if (case_capacity > flip->n_cases * 2) + case_capacity = flip->n_cases * 2; if (case_capacity < 2) case_capacity = 2; for (;;) @@ -386,7 +316,7 @@ flip_file (struct flip_pgm *flip) /* Use half the allocated memory for input_buf, half for output_buf. */ case_capacity /= 2; - output_buf = input_buf + flip->var_cnt * case_capacity; + output_buf = input_buf + flip->n_vars * case_capacity; input_file = flip->file; if (fseek (input_file, 0, SEEK_SET) != 0) @@ -402,9 +332,9 @@ flip_file (struct flip_pgm *flip) return false; } - for (case_idx = 0; case_idx < flip->case_cnt; ) + for (case_idx = 0; case_idx < flip->n_cases; ) { - unsigned long read_cases = MIN (flip->case_cnt - case_idx, + unsigned long read_cases = MIN (flip->n_cases - case_idx, case_capacity); size_t i; @@ -417,16 +347,16 @@ flip_file (struct flip_pgm *flip) return false; } - for (i = 0; i < flip->var_cnt; i++) + for (i = 0; i < flip->n_vars; i++) { unsigned long j; for (j = 0; j < read_cases; j++) - output_buf[j] = input_buf[i + j * flip->var_cnt]; + output_buf[j] = input_buf[i + j * flip->n_vars]; if (fseeko (output_file, sizeof *input_buf * (case_idx - + (off_t) i * flip->case_cnt), + + (off_t) i * flip->n_cases), SEEK_SET) != 0) { msg (SE, _("Error seeking FLIP source file: %s."), @@ -467,17 +397,19 @@ flip_file (struct flip_pgm *flip) /* Reads and returns one case. Returns a null pointer at end of file or if an I/O error occurred. */ static struct ccase * -flip_casereader_read (struct casereader *reader UNUSED, void *flip_) +flip_casereader_read (struct casereader *reader, void *flip_) { struct flip_pgm *flip = flip_; struct ccase *c; size_t i; - if (flip->error || flip->cases_read >= flip->var_cnt) - return NULL; + if (flip->error || flip->cases_read >= flip->n_vars) + return false; - c = case_create (flip->case_cnt); - for (i = 0; i < flip->case_cnt; i++) + c = case_create (casereader_get_proto (reader)); + value_copy_str_rpad (case_data_rw_idx (c, 0), 8, + flip->old_names.names[flip->cases_read], ' '); + for (i = 0; i < flip->n_cases; i++) { double in; if (fread (&in, sizeof in, 1, flip->file) != 1) @@ -493,7 +425,7 @@ flip_casereader_read (struct casereader *reader UNUSED, void *flip_) flip->error = true; return NULL; } - case_data_rw_idx (c, i)->f = in; + case_data_rw_idx (c, i + 1)->f = in; } flip->cases_read++; @@ -520,3 +452,21 @@ static const struct casereader_class flip_casereader_class = NULL, NULL, }; + +static void +var_names_init (struct var_names *vn) +{ + vn->names = NULL; + vn->n_names = 0; + vn->allocated_names = 0; +} + +static void +var_names_add (struct pool *pool, struct var_names *vn, const char *name) +{ + if (vn->n_names >= vn->allocated_names) + vn->names = pool_2nrealloc (pool, vn->names, &vn->allocated_names, + sizeof *vn->names); + vn->names[vn->n_names++] = name; +} + diff --git a/src/language/stats/freq.c b/src/language/stats/freq.c index 95285747..1a0ecd49 100644 --- a/src/language/stats/freq.c +++ b/src/language/stats/freq.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,7 +30,7 @@ compare_freq ( const void *_f1, const void *_f2, const void *_var) const struct freq *f2 = _f2; const struct variable *var = _var; - return compare_values_short (f1->value, f2->value, var ); + return value_compare_3way (&f1->value, &f2->value, var_get_width (var)); } unsigned int @@ -38,15 +38,16 @@ hash_freq (const void *_f, const void *var) { const struct freq *f = _f; - return hash_value_short (f->value, var); + return value_hash (&f->value, var_get_width (var), 0); } /* Free function to be used on FR whose value parameter has been copied */ void -free_freq_mutable_hash (void *fr, const void *var UNUSED) +free_freq_mutable_hash (void *fr, const void *var_) { + const struct variable *var = var_; struct freq_mutable *freq = fr; - free (freq->value); + value_destroy (&freq->value, var_get_width (var)); free (freq); } diff --git a/src/language/stats/freq.h b/src/language/stats/freq.h index 98af8a6f..b06a36f6 100644 --- a/src/language/stats/freq.h +++ b/src/language/stats/freq.h @@ -21,14 +21,14 @@ union value ; /* Frequency table entry. */ struct freq { - const union value *value; /* The value. */ + const union value value; /* The value. */ double count; /* The number of occurrences of the value. */ }; /* Non const version of frequency table entry. */ struct freq_mutable { - union value *value; /* The value. */ + union value value; /* The value. */ double count; /* The number of occurrences of the value. */ }; diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index 82670e3e..8295f51a 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -205,10 +205,10 @@ static struct pool *syntax_pool; /* For syntax-related data. */ struct freq_tab { struct hsh_table *data; /* Undifferentiated data. */ - struct freq *valid; /* Valid freqs. */ + struct freq_mutable *valid; /* Valid freqs. */ int n_valid; /* Number of total freqs. */ - struct freq *missing; /* Missing freqs. */ + struct freq_mutable *missing; /* Missing freqs. */ int n_missing; /* Number of missing freqs. */ /* Statistics. */ @@ -230,11 +230,7 @@ struct var_freqs /* Statistics. */ double stat[frq_n_stats]; - /* Width and format for analysis and display. - This is normally the same as "width" and "print" in struct - variable, but in SPSS-compatible mode only the first - MAX_SHORT_STRING bytes of long string variables are - included. */ + /* Variable attributes. */ int width; struct fmt_spec print; }; @@ -510,21 +506,20 @@ calc (const struct ccase *c, const struct dataset *ds) struct var_freqs *vf = get_var_freqs (v); struct freq_tab *ft = &vf->tab; - struct freq target; - struct freq **fpp; + struct freq_mutable target; + struct freq_mutable **fpp; - target.value = (union value *) val; - fpp = (struct freq **) hsh_probe (ft->data, &target); + target.value = *val; + fpp = (struct freq_mutable **) hsh_probe (ft->data, &target); if (*fpp != NULL) (*fpp)->count += weight; else { - struct freq *fp = pool_alloc (data_pool, sizeof *fp); + struct freq_mutable *fp = pool_alloc (data_pool, sizeof *fp); fp->count = weight; - fp->value = pool_clone (data_pool, - val, - MAX (MAX_SHORT_STRING, vf->width)); + value_init_pool (data_pool, &fp->value, vf->width); + value_copy (&fp->value, val, vf->width); *fpp = fp; } } @@ -656,15 +651,15 @@ get_freq_comparator (int frq_sort, enum val_type val_type) } } -/* Returns true iff the value in struct freq F is non-missing +/* Returns true iff the value in struct freq_mutable F is non-missing for variable V. */ static bool not_missing (const void *f_, const void *v_) { - const struct freq *f = f_; + const struct freq_mutable *f = f_; const struct variable *v = v_; - return !var_is_value_missing (v, f->value, MV_ANY); + return !var_is_value_missing (v, &f->value, MV_ANY); } /* Summarizes the frequency table data for variable V. */ @@ -675,7 +670,7 @@ postprocess_freq_tab (const struct variable *v) struct freq_tab *ft; size_t count; void *const *data; - struct freq *freqs, *f; + struct freq_mutable *freqs, *f; size_t i; ft = &get_var_freqs (v)->tab; @@ -689,7 +684,7 @@ postprocess_freq_tab (const struct variable *v) freqs = xnmalloc (count, sizeof *freqs); for (i = 0; i < count; i++) { - struct freq *f = data[i]; + struct freq_mutable *f = data[i]; freqs[i] = *f; } @@ -764,12 +759,6 @@ frq_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_freque vf->groups = NULL; vf->width = var_get_width (v); vf->print = *var_get_print_format (v); - if (vf->width > MAX_SHORT_STRING && settings_get_algorithm () == COMPATIBLE) - { - enum fmt_type type = var_get_print_format (v)->type; - vf->width = MAX_SHORT_STRING; - vf->print.w = MAX_SHORT_STRING * (type == FMT_AHEX ? 2 : 1); - } } return 1; } @@ -891,12 +880,12 @@ add_percentile (double x) static int compare_value_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) { - const struct freq *a = a_; - const struct freq *b = b_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; - if (a->value[0].f > b->value[0].f) + if (a->value.f > b->value.f) return 1; - else if (a->value[0].f < b->value[0].f) + else if (a->value.f < b->value.f) return -1; else return 0; @@ -906,12 +895,12 @@ compare_value_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) static int compare_value_alpha_a (const void *a_, const void *b_, const void *v_) { - const struct freq *a = a_; - const struct freq *b = b_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); - return memcmp (a->value[0].s, b->value[0].s, vf->width); + return value_compare_3way (&a->value, &b->value, vf->width); } /* Descending numeric compare of values. */ @@ -933,17 +922,17 @@ compare_value_alpha_d (const void *a, const void *b, const void *v) static int compare_freq_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) { - const struct freq *a = a_; - const struct freq *b = b_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; if (a->count > b->count) return 1; else if (a->count < b->count) return -1; - if (a->value[0].f > b->value[0].f) + if (a->value.f > b->value.f) return 1; - else if (a->value[0].f < b->value[0].f) + else if (a->value.f < b->value.f) return -1; else return 0; @@ -954,8 +943,8 @@ compare_freq_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) static int compare_freq_alpha_a (const void *a_, const void *b_, const void *v_) { - const struct freq *a = a_; - const struct freq *b = b_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); @@ -964,7 +953,7 @@ compare_freq_alpha_a (const void *a_, const void *b_, const void *v_) else if (a->count < b->count) return -1; else - return memcmp (a->value[0].s, b->value[0].s, vf->width); + return value_compare_3way (&a->value, &b->value, vf->width); } /* Descending numeric compare of frequency; @@ -972,17 +961,17 @@ compare_freq_alpha_a (const void *a_, const void *b_, const void *v_) static int compare_freq_numeric_d (const void *a_, const void *b_, const void *aux UNUSED) { - const struct freq *a = a_; - const struct freq *b = b_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; if (a->count > b->count) return -1; else if (a->count < b->count) return 1; - if (a->value[0].f > b->value[0].f) + if (a->value.f > b->value.f) return 1; - else if (a->value[0].f < b->value[0].f) + else if (a->value.f < b->value.f) return -1; else return 0; @@ -993,8 +982,8 @@ compare_freq_numeric_d (const void *a_, const void *b_, const void *aux UNUSED) static int compare_freq_alpha_d (const void *a_, const void *b_, const void *v_) { - const struct freq *a = a_; - const struct freq *b = b_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); @@ -1003,7 +992,7 @@ compare_freq_alpha_d (const void *a_, const void *b_, const void *v_) else if (a->count < b->count) return 1; else - return memcmp (a->value[0].s, b->value[0].s, vf->width); + return value_compare_3way (&a->value, &b->value, vf->width); } /* Frequency table display. */ @@ -1038,7 +1027,7 @@ dump_full (const struct variable *v, const struct variable *wv) int n_categories; struct var_freqs *vf; struct freq_tab *ft; - struct freq *f; + struct freq_mutable *f; struct tab_table *t; int r; double cum_total = 0.0; @@ -1097,12 +1086,12 @@ dump_full (const struct variable *v, const struct variable *wv) if (lab) { - const char *label = var_lookup_value_label (v, &f->value[0]); + const char *label = var_lookup_value_label (v, &f->value); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, f->value, &vf->print); + tab_value (t, 0 + lab, r, TAB_NONE, &f->value, &vf->print); tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt); tab_double (t, 2 + lab, r, TAB_NONE, percent, NULL); tab_double (t, 3 + lab, r, TAB_NONE, valid_percent, NULL); @@ -1115,12 +1104,12 @@ dump_full (const struct variable *v, const struct variable *wv) if (lab) { - const char *label = var_lookup_value_label (v, &f->value[0]); + const char *label = var_lookup_value_label (v, &f->value); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, f->value, &vf->print); + tab_value (t, 0 + lab, r, TAB_NONE, &f->value, &vf->print); tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt); tab_double (t, 2 + lab, r, TAB_NONE, f->count / ft->total_cases * 100.0, NULL); @@ -1170,7 +1159,7 @@ dump_condensed (const struct variable *v, const struct variable *wv) int n_categories; struct var_freqs *vf; struct freq_tab *ft; - struct freq *f; + struct freq_mutable *f; struct tab_table *t; int r; double cum_total = 0.0; @@ -1196,7 +1185,7 @@ dump_condensed (const struct variable *v, const struct variable *wv) percent = f->count / ft->total_cases * 100.0; cum_total += f->count / ft->valid_cases * 100.0; - tab_value (t, 0, r, TAB_NONE, f->value, &vf->print); + tab_value (t, 0, r, TAB_NONE, &f->value, &vf->print); tab_double (t, 1, r, TAB_NONE, f->count, wfmt); tab_double (t, 2, r, TAB_NONE, percent, NULL); tab_double (t, 3, r, TAB_NONE, cum_total, NULL); @@ -1204,7 +1193,7 @@ dump_condensed (const struct variable *v, const struct variable *wv) } for (; f < &ft->valid[n_categories]; f++) { - tab_value (t, 0, r, TAB_NONE, f->value, &vf->print); + tab_value (t, 0, r, TAB_NONE, &f->value, &vf->print); tab_double (t, 1, r, TAB_NONE, f->count, wfmt); tab_double (t, 2, r, TAB_NONE, f->count / ft->total_cases * 100.0, NULL); @@ -1230,7 +1219,7 @@ calc_stats (const struct variable *v, double d[frq_n_stats]) struct freq_tab *ft = &get_var_freqs (v)->tab; double W = ft->valid_cases; struct moments *m; - struct freq *f=0; + struct freq_mutable *f=0; int most_often; double X_mode; @@ -1266,7 +1255,7 @@ calc_stats (const struct variable *v, double d[frq_n_stats]) if ( percentiles[i].flag ) { - percentiles[i].x2 = f->value[0].f; + percentiles[i].x2 = f->value.f; percentiles[i].x1 = prev_value; percentiles[i].flag2 = 1; continue; @@ -1276,7 +1265,7 @@ calc_stats (const struct variable *v, double d[frq_n_stats]) { if ( f->count > 1 && rank - (f->count - 1) > tp ) { - percentiles[i].x2 = percentiles[i].x1 = f->value[0].f; + percentiles[i].x2 = percentiles[i].x1 = f->value.f; percentiles[i].flag2 = 1; } else @@ -1287,14 +1276,14 @@ calc_stats (const struct variable *v, double d[frq_n_stats]) continue; } } - prev_value = f->value[0].f; + prev_value = f->value.f; } for (i = 0; i < n_percentiles; i++) { /* Catches the case when p == 100% */ if ( ! percentiles[i].flag2 ) - percentiles[i].x1 = percentiles[i].x2 = f->value[0].f; + percentiles[i].x1 = percentiles[i].x2 = f->value.f; /* printf("percentile %d (p==%.2f); X1 = %g; X2 = %g\n", @@ -1330,7 +1319,7 @@ calc_stats (const struct variable *v, double d[frq_n_stats]) if (most_often < f->count) { most_often = f->count; - X_mode = f->value[0].f; + X_mode = f->value.f; } else if (most_often == f->count) { @@ -1343,16 +1332,16 @@ calc_stats (const struct variable *v, double d[frq_n_stats]) /* Calculate moments. */ m = moments_create (MOMENT_KURTOSIS); for (f = ft->valid; f < ft->missing; f++) - moments_pass_one (m, f->value[0].f, f->count); + moments_pass_one (m, f->value.f, f->count); for (f = ft->valid; f < ft->missing; f++) - moments_pass_two (m, f->value[0].f, f->count); + moments_pass_two (m, f->value.f, f->count); moments_calculate (m, NULL, &d[frq_mean], &d[frq_variance], &d[frq_skew], &d[frq_kurt]); moments_destroy (m); /* Formulas below are taken from _SPSS Statistical Algorithms_. */ - d[frq_min] = ft->valid[0].value[0].f; - d[frq_max] = ft->valid[ft->n_valid - 1].value[0].f; + d[frq_min] = ft->valid[0].value.f; + d[frq_max] = ft->valid[ft->n_valid - 1].value.f; d[frq_mode] = X_mode; d[frq_range] = d[frq_max] - d[frq_min]; d[frq_sum] = d[frq_mean] * W; @@ -1450,16 +1439,16 @@ freq_tab_to_hist (const struct freq_tab *ft, const struct variable *var) struct hsh_iterator hi; struct hsh_table *fh = ft->data; - struct freq *frq; + struct freq_mutable *frq; /* Find out the extremes of the x value */ for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) ) { - if (var_is_value_missing(var, frq->value, MV_ANY)) + if (var_is_value_missing(var, &frq->value, MV_ANY)) continue; - if ( frq->value[0].f < x_min ) x_min = frq->value[0].f ; - if ( frq->value[0].f > x_max ) x_max = frq->value[0].f ; + if ( frq->value.f < x_min ) x_min = frq->value.f ; + if ( frq->value.f > x_max ) x_max = frq->value.f ; } hist = histogram_create (bins, x_min, x_max); @@ -1467,7 +1456,7 @@ freq_tab_to_hist (const struct freq_tab *ft, const struct variable *var) for( i = 0 ; i < ft->n_valid ; ++i ) { frq = &ft->valid[i]; - histogram_add ((struct histogram *)hist, frq->value[0].f, frq->count); + histogram_add ((struct histogram *)hist, frq->value.f, frq->count); } return (struct histogram *)hist; @@ -1498,10 +1487,10 @@ freq_tab_to_slice_array(const struct freq_tab *frq_tab, for (i = 0 ; i < *n_slices ; ++i ) { - const struct freq *frq = &frq_tab->valid[i]; + const struct freq_mutable *frq = &frq_tab->valid[i]; ds_init_empty (&slices[i].label); - var_append_value_name (var, frq->value, &slices[i].label); + var_append_value_name (var, &frq->value, &slices[i].label); slices[i].magnetude = frq->count; } diff --git a/src/language/stats/oneway.q b/src/language/stats/oneway.q index 8794ad5c..2c1c19e8 100644 --- a/src/language/stats/oneway.q +++ b/src/language/stats/oneway.q @@ -571,7 +571,6 @@ show_contrast_coeffs (short *bad_contrast) { int n_cols = 2 + ostensible_number_of_groups; int n_rows = 2 + cmd.sbc_contrast; - union value *group_value; int count = 0; void *const *group_values; @@ -618,13 +617,16 @@ show_contrast_coeffs (short *bad_contrast) count < hsh_count (global_group_hash); ++count) { + double *group_value_p; + union value group_value; int i; struct string vstr; - group_value = group_values[count]; ds_init_empty (&vstr); - var_append_value_name (indep_var, group_value, &vstr); + group_value_p = group_values[count]; + group_value.f = *group_value_p; + var_append_value_name (indep_var, &group_value, &vstr); tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, ds_cstr (&vstr)); @@ -878,10 +880,25 @@ precalc (struct cmd_oneway *cmd UNUSED) } } +static int +compare_double_3way (const void *a_, const void *b_, const void *aux UNUSED) +{ + const double *a = a_; + const double *b = b_; + return *a < *b ? -1 : *a > *b; +} + +static unsigned +do_hash_double (const void *value_, const void *aux UNUSED) +{ + const double *value = value_; + return hash_double (*value, 0); +} + static void -free_value (void *value_, const void *aux UNUSED) +free_double (void *value_, const void *aux UNUSED) { - union value *value = value_; + double *value = value_; free (value); } @@ -908,9 +925,9 @@ run_oneway (struct cmd_oneway *cmd, taint = taint_clone (casereader_get_taint (input)); global_group_hash = hsh_create (4, - compare_values_short, - hash_value_short, - free_value, + compare_double_3way, + do_hash_double, + free_double, indep_var); precalc (cmd); @@ -931,9 +948,12 @@ run_oneway (struct cmd_oneway *cmd, const double weight = dict_get_case_weight (dict, c, NULL); const union value *indep_val = case_data (c, indep_var); - void **p = hsh_probe (global_group_hash, indep_val); + void **p = hsh_probe (global_group_hash, &indep_val->f); if (*p == NULL) - *p = value_dup (indep_val, var_get_width (indep_var)); + { + double *value = *p = xmalloc (sizeof *value); + *value = indep_val->f; + } for (i = 0; i < n_vars; ++i) { diff --git a/src/language/stats/rank.q b/src/language/stats/rank.q index 3bbd39d1..8ae4076d 100644 --- a/src/language/stats/rank.q +++ b/src/language/stats/rank.q @@ -249,7 +249,7 @@ rank_cmd (struct dataset *ds, const struct subcase *sc, proc_discard_output (ds); split_grouper = casegrouper_create_splits (proc_open (ds), d); - output = autopaging_writer_create (dict_get_next_value_idx (d)); + output = autopaging_writer_create (dict_get_proto (d)); while (casegrouper_get_next_group (split_grouper, &split_group)) { diff --git a/src/language/stats/reliability.q b/src/language/stats/reliability.q index 0e8165a4..166a4dfa 100644 --- a/src/language/stats/reliability.q +++ b/src/language/stats/reliability.q @@ -16,26 +16,27 @@ #include -#include "xalloc.h" -#include "xmalloca.h" +#include -#include "gettext.h" -#define _(msgid) gettext (msgid) -#define N_(msgid) msgid - -#include +#include +#include +#include #include #include -#include -#include -#include -#include - +#include #include - +#include +#include #include #include +#include "xalloc.h" +#include "xmalloca.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + /* (headers) */ /* (specification) @@ -335,7 +336,7 @@ run_reliability (struct casereader *input, struct dataset *ds, struct cronbach *s = &rel->sc[si]; - s->totals_idx = casereader_get_value_cnt (input); + s->totals_idx = caseproto_get_n_widths (casereader_get_proto (input)); input = casereader_create_append_numeric (input, append_sum, s, NULL); diff --git a/src/language/stats/sign.c b/src/language/stats/sign.c index 2ef60899..a5a27212 100644 --- a/src/language/stats/sign.c +++ b/src/language/stats/sign.c @@ -17,8 +17,6 @@ #include #include "sign.h" -#include - #include #include #include @@ -31,6 +29,9 @@ #include #include +#include "minmax.h" +#include "xalloc.h" + #include "gettext.h" #define _(msgid) gettext (msgid) diff --git a/src/language/stats/t-test.q b/src/language/stats/t-test.q index 3affde20..a3f4cf8f 100644 --- a/src/language/stats/t-test.q +++ b/src/language/stats/t-test.q @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,7 @@ #include #include "xalloc.h" +#include "xmemdup0.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -54,529 +56,378 @@ /* (specification) "T-TEST" (tts_): - +groups=custom; - testval=double; - +variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); - +pairs=custom; - missing=miss:!analysis/listwise, - incl:include/!exclude; - +format=fmt:!labels/nolabels; - criteria=:cin(d:criteria,"%s > 0. && %s < 1."). + +groups=custom; + testval=double; + +variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); + +pairs=custom; + missing=miss:!analysis/listwise, + incl:include/!exclude; + +format=fmt:!labels/nolabels; + criteria=:cin(d:criteria,"%s > 0. && %s < 1."). */ /* (declarations) */ /* (functions) */ - -/* Variable for the GROUPS subcommand, if given. */ -static struct variable *indep_var; - enum comparison { - CMP_LE = -2, - CMP_EQ = 0, + CMP_LE, + CMP_EQ, }; -struct group_properties -{ - /* The comparison criterion */ - enum comparison criterion; - - /* The independent variable */ - struct variable *indep_var; - - union { - /* The value of the independent variable at which groups are determined to - belong to one group or the other */ - double critical_value; - - - /* The values of the independent variable for each group */ - union value g_value[2]; - } v ; - -}; - - -static struct group_properties gp ; - - - -/* PAIRS: Number of pairs to be compared ; each pair. */ -static int n_pairs = 0 ; +/* A pair of variables to be compared. */ struct pair -{ - /* The variables comprising the pair */ - const struct variable *v[2]; - - /* The number of valid variable pairs */ - double n; - - /* The sum of the members */ - double sum[2]; - - /* sum of squares of the members */ - double ssq[2]; - - /* Std deviation of the members */ - double std_dev[2]; - - - /* Sample Std deviation of the members */ - double s_std_dev[2]; - - /* The means of the members */ - double mean[2]; - - /* The correlation coefficient between the variables */ - double correlation; - - /* The sum of the differences */ - double sum_of_diffs; - - /* The sum of the products */ - double sum_of_prod; - - /* The mean of the differences */ - double mean_diff; - - /* The sum of the squares of the differences */ - double ssq_diffs; + { + const struct variable *v[2]; /* The paired variables. */ + double n; /* The number of valid variable pairs */ + double sum[2]; /* The sum of the members */ + double ssq[2]; /* sum of squares of the members */ + double std_dev[2]; /* Std deviation of the members */ + double s_std_dev[2]; /* Sample Std deviation of the members */ + double mean[2]; /* The means of the members */ + double correlation; /* Correlation coefficient between the variables. */ + double sum_of_diffs; /* The sum of the differences */ + double sum_of_prod; /* The sum of the products */ + double mean_diff; /* The mean of the differences */ + double ssq_diffs; /* The sum of the squares of the differences */ + double std_dev_diff; /* The std deviation of the differences */ + }; - /* The std deviation of the differences */ - double std_dev_diff; +/* Which mode was T-TEST invoked */ +enum t_test_mode { + T_1_SAMPLE, /* One-sample tests. */ + T_IND_SAMPLES, /* Independent-sample tests. */ + T_PAIRED /* Paired-sample tests. */ }; -static struct pair *pairs=0; - -static int parse_value (struct lexer *lexer, union value * v, enum val_type); +/* Total state of a T-TEST procedure. */ +struct t_test_proc + { + enum t_test_mode mode; /* Mode that T-TEST was invoked in. */ + double criteria; /* Confidence interval in (0, 1). */ + enum mv_class exclude; /* Classes of missing values to exclude. */ + bool listwise_missing; /* Drop whole case if one missing var? */ + struct fmt_spec weight_format; /* Format of weight variable. */ + + /* Dependent variables. */ + const struct variable **vars; + size_t n_vars; + + /* For mode == T_1_SAMPLE. */ + double testval; + + /* For mode == T_PAIRED only. */ + struct pair *pairs; + size_t n_pairs; + + /* For mode == T_IND_SAMPLES only. */ + struct variable *indep_var; /* Independent variable. */ + enum comparison criterion; /* Type of comparison. */ + double critical_value; /* CMP_LE only: Grouping threshold value. */ + union value g_value[2]; /* CMP_EQ only: Per-group indep var values. */ + }; -/* Structures and Functions for the Statistics Summary Box */ -struct ssbox; -typedef void populate_ssbox_func (struct ssbox *ssb, - const struct dictionary *, - struct cmd_t_test *cmd); -typedef void finalize_ssbox_func (struct ssbox *ssb); +static int parse_value (struct lexer *, union value *, int width); +/* Statistics Summary Box */ struct ssbox -{ - struct tab_table *t; - - populate_ssbox_func *populate; - finalize_ssbox_func *finalize; - -}; - -/* Create a ssbox */ -void ssbox_create (struct ssbox *ssb, struct cmd_t_test *cmd, int mode); - -/* Populate a ssbox according to cmd */ -void ssbox_populate (struct ssbox *ssb, const struct dictionary *dict, - struct cmd_t_test *cmd); - -/* Submit and destroy a ssbox */ -void ssbox_finalize (struct ssbox *ssb); - -/* A function to create, populate and submit the Paired Samples Correlation - box */ -static void pscbox (const struct dictionary *); + { + struct tab_table *t; + void (*populate) (struct ssbox *, struct t_test_proc *); + void (*finalize) (struct ssbox *); + }; +static void ssbox_create (struct ssbox *, struct t_test_proc *); +static void ssbox_populate (struct ssbox *, struct t_test_proc *); +static void ssbox_finalize (struct ssbox *); -/* Structures and Functions for the Test Results Box */ -struct trbox; +/* Paired Samples Correlation box */ +static void pscbox (struct t_test_proc *); -typedef void populate_trbox_func (struct trbox *trb, - const struct dictionary *dict, - struct cmd_t_test *cmd); -typedef void finalize_trbox_func (struct trbox *trb); +/* Test Results Box. */ struct trbox { struct tab_table *t; - populate_trbox_func *populate; - finalize_trbox_func *finalize; -}; - -/* Create a trbox */ -void trbox_create (struct trbox *trb, struct cmd_t_test *cmd, int mode); - -/* Populate a ssbox according to cmd */ -static void trbox_populate (struct trbox *trb, const struct dictionary *dict, - struct cmd_t_test *cmd); - -/* Submit and destroy a ssbox */ -void trbox_finalize (struct trbox *trb); - -/* Which mode was T-TEST invoked */ -enum { - T_1_SAMPLE = 0 , - T_IND_SAMPLES, - T_PAIRED -}; - - -static int common_calc (const struct dictionary *dict, - const struct ccase *, void *, - enum mv_class); -static void common_precalc (struct cmd_t_test *); -static void common_postcalc (struct cmd_t_test *); - -static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, enum mv_class); -static void one_sample_precalc (struct cmd_t_test *); -static void one_sample_postcalc (struct cmd_t_test *); - -static int paired_calc (const struct dictionary *dict, const struct ccase *, - struct cmd_t_test*, enum mv_class); -static void paired_precalc (struct cmd_t_test *); -static void paired_postcalc (struct cmd_t_test *); - -static void group_precalc (struct cmd_t_test *); -static int group_calc (const struct dictionary *dict, const struct ccase *, - struct cmd_t_test *, enum mv_class); -static void group_postcalc (struct cmd_t_test *); - + void (*populate) (struct trbox *, struct t_test_proc *); + void (*finalize) (struct trbox *); + }; -static void calculate (struct cmd_t_test *, - struct casereader *, - const struct dataset *); +static void trbox_create (struct trbox *, struct t_test_proc *); +static void trbox_populate (struct trbox *, struct t_test_proc *); +static void trbox_finalize (struct trbox *); -static int mode; +static void calculate (struct t_test_proc *, struct casereader *, + const struct dataset *); static int compare_group_binary (const struct group_statistics *a, - const struct group_statistics *b, - const struct group_properties *p); - - -static unsigned hash_group_binary (const struct group_statistics *g, - const struct group_properties *p); - - + const struct group_statistics *b, + const struct t_test_proc *); +static unsigned hash_group_binary (const struct group_statistics *g, + const struct t_test_proc *p); int cmd_t_test (struct lexer *lexer, struct dataset *ds) { struct cmd_t_test cmd; + struct t_test_proc proc; struct casegrouper *grouper; struct casereader *group; - bool ok; - - if ( !parse_t_test (lexer, ds, &cmd, NULL) ) - return CMD_FAILURE; + struct variable *wv; + bool ok = false; - if (! cmd.sbc_criteria) - cmd.criteria=0.95; + proc.pairs = NULL; + proc.n_pairs = 0; + proc.vars = NULL; + proc.indep_var = NULL; + if (!parse_t_test (lexer, ds, &cmd, &proc)) + goto parse_failed; - { - int m=0; - if (cmd.sbc_testval) ++m; - if (cmd.sbc_groups) ++m; - if (cmd.sbc_pairs) ++m; - - if ( m != 1) - { - msg (SE, - _ ("TESTVAL, GROUPS and PAIRS subcommands are mutually exclusive.") - ); - free_t_test (&cmd); - return CMD_FAILURE; - } - } - - if (cmd.sbc_testval) - mode=T_1_SAMPLE; - else if (cmd.sbc_groups) - mode=T_IND_SAMPLES; - else - mode=T_PAIRED; + wv = dict_get_weight (dataset_dict (ds)); + proc.weight_format = wv ? *var_get_print_format (wv) : F_8_0; - if ( mode == T_PAIRED) + if ((cmd.sbc_testval != 0) + (cmd.sbc_groups != 0) + (cmd.sbc_pairs != 0) + != 1) { - if (cmd.sbc_variables) - { - msg (SE, _ ("VARIABLES subcommand is not appropriate with PAIRS")); - free_t_test (&cmd); - return CMD_FAILURE; - } - else - { - /* Iterate through the pairs and put each variable that is a - member of a pair into cmd.v_variables */ + msg (SE, _("Exactly one of TESTVAL, GROUPS and PAIRS subcommands " + "must be specified.")); + goto done; + } - int i; - struct hsh_iterator hi; - struct const_hsh_table *hash; - const struct variable *v; + proc.mode = (cmd.sbc_testval ? T_1_SAMPLE + : cmd.sbc_groups ? T_IND_SAMPLES + : T_PAIRED); + proc.criteria = cmd.sbc_criteria ? cmd.criteria : 0.95; + proc.exclude = cmd.incl != TTS_INCLUDE ? MV_ANY : MV_SYSTEM; + proc.listwise_missing = cmd.miss == TTS_LISTWISE; - hash = const_hsh_create (n_pairs, compare_vars_by_name, hash_var_by_name, - 0, 0); + if (proc.mode == T_1_SAMPLE) + proc.testval = cmd.n_testval[0]; - for (i=0; i < n_pairs; ++i) - { - const_hsh_insert (hash, pairs[i].v[0]); - const_hsh_insert (hash, pairs[i].v[1]); - } + if (proc.mode == T_PAIRED) + { + size_t i, j; - assert (cmd.n_variables == 0); - cmd.n_variables = const_hsh_count (hash); - - cmd.v_variables = xnrealloc (cmd.v_variables, cmd.n_variables, - sizeof *cmd.v_variables); - /* Iterate through the hash */ - for (i=0,v = const_hsh_first (hash, &hi); - v != 0; - v = const_hsh_next (hash, &hi) ) - cmd.v_variables[i++]=v; - const_hsh_destroy (hash); + if (cmd.sbc_variables) + { + msg (SE, _("VARIABLES subcommand may not be used with PAIRS.")); + goto done; } + + /* Fill proc.vars with the unique variables from pairs. */ + proc.n_vars = proc.n_pairs * 2; + proc.vars = xmalloc (sizeof *proc.vars * proc.n_vars); + for (i = j = 0; i < proc.n_pairs; i++) + { + proc.vars[j++] = proc.pairs[i].v[0]; + proc.vars[j++] = proc.pairs[i].v[1]; + } + proc.n_vars = sort_unique (proc.vars, proc.n_vars, sizeof *proc.vars, + compare_var_ptrs_by_name, NULL); } - else if ( !cmd.sbc_variables) + else { - msg (SE, _ ("One or more VARIABLES must be specified.")); - free_t_test (&cmd); - return CMD_FAILURE; + if (!cmd.n_variables) + { + msg (SE, _("One or more VARIABLES must be specified.")); + goto done; + } + proc.n_vars = cmd.n_variables; + proc.vars = cmd.v_variables; + cmd.v_variables = NULL; } /* Data pass. */ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); while (casegrouper_get_next_group (grouper, &group)) - calculate (&cmd, group, ds); + calculate (&proc, group, ds); ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; - n_pairs=0; - free (pairs); - pairs=0; - - if ( mode == T_IND_SAMPLES) + if (proc.mode == T_IND_SAMPLES) { int v; /* Destroy any group statistics we created */ - for (v = 0 ; v < cmd.n_variables ; ++v ) + for (v = 0; v < proc.n_vars; v++) { - struct group_proc *grpp = group_proc_get (cmd.v_variables[v]); + struct group_proc *grpp = group_proc_get (proc.vars[v]); hsh_destroy (grpp->group_hash); } } +done: free_t_test (&cmd); - return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; +parse_failed: + if (proc.indep_var != NULL) + { + int width = var_get_width (proc.indep_var); + value_destroy (&proc.g_value[0], width); + value_destroy (&proc.g_value[1], width); + } + free (proc.vars); + free (proc.pairs); + return ok ? CMD_SUCCESS : CMD_FAILURE; } static int -tts_custom_groups (struct lexer *lexer, struct dataset *ds, struct cmd_t_test *cmd UNUSED, - void *aux UNUSED) +tts_custom_groups (struct lexer *lexer, struct dataset *ds, + struct cmd_t_test *cmd UNUSED, void *proc_) { - int n_group_values=0; + struct t_test_proc *proc = proc_; + int n_values; + int width; lex_match (lexer, '='); - indep_var = parse_variable (lexer, dataset_dict (ds)); - if (!indep_var) + proc->indep_var = parse_variable (lexer, dataset_dict (ds)); + if (proc->indep_var == NULL) { lex_error (lexer, "expecting variable name in GROUPS subcommand"); return 0; } - - if (var_is_long_string (indep_var)) - { - msg (SE, _ ("Long string variable %s is not valid here."), - var_get_name (indep_var)); - return 0; - } + width = var_get_width (proc->indep_var); + value_init (&proc->g_value[0], width); + value_init (&proc->g_value[1], width); if (!lex_match (lexer, '(')) + n_values = 0; + else { - if (var_is_numeric (indep_var)) - { - gp.v.g_value[0].f = 1; - gp.v.g_value[1].f = 2; - - gp.criterion = CMP_EQ; - - n_group_values = 2; - - return 1; - } + if (!parse_value (lexer, &proc->g_value[0], width)) + return 0; + lex_match (lexer, ','); + if (lex_match (lexer, ')')) + n_values = 1; else - { - msg (SE, _ ("When applying GROUPS to a string variable, two " - "values must be specified.")); - return 0; - } + { + if (!parse_value (lexer, &proc->g_value[1], width) + || !lex_force_match (lexer, ')')) + return 0; + n_values = 2; + } } - if (!parse_value (lexer, &gp.v.g_value[0], var_get_width (indep_var))) - return 0; - - lex_match (lexer, ','); - if (lex_match (lexer, ')')) + if (var_is_numeric (proc->indep_var)) { - if (var_is_alpha (indep_var)) + proc->criterion = n_values == 1 ? CMP_LE : CMP_EQ; + if (n_values == 1) + proc->critical_value = proc->g_value[0].f; + else if (n_values == 0) { - msg (SE, _ ("When applying GROUPS to a string variable, two " - "values must be specified.")); - return 0; + proc->g_value[0].f = 1; + proc->g_value[1].f = 2; } - gp.criterion = CMP_LE; - gp.v.critical_value = gp.v.g_value[0].f; - - n_group_values = 1; - return 1; } - - if (!parse_value (lexer, &gp.v.g_value[1], var_get_width (indep_var))) - return 0; - - n_group_values = 2; - if (!lex_force_match (lexer, ')')) - return 0; - - if ( n_group_values == 2 ) - gp.criterion = CMP_EQ ; else - gp.criterion = CMP_LE ; - - - if ( var_is_alpha (indep_var)) { - buf_copy_rpad (gp.v.g_value [0].s, var_get_width (indep_var), - gp.v.g_value [0].s, strlen (gp.v.g_value[0].s)); - - buf_copy_rpad (gp.v.g_value [1].s, var_get_width (indep_var), - gp.v.g_value [1].s, strlen (gp.v.g_value[1].s)); + proc->criterion = CMP_EQ; + if (n_values != 2) + { + msg (SE, _("When applying GROUPS to a string variable, two " + "values must be specified.")); + return 0; + } } - return 1; } +static void +add_pair (struct t_test_proc *proc, + const struct variable *v0, const struct variable *v1) +{ + struct pair *p = &proc->pairs[proc->n_pairs++]; + p->v[0] = v0; + p->v[1] = v1; +} static int -tts_custom_pairs (struct lexer *lexer, struct dataset *ds, struct cmd_t_test *cmd UNUSED, void *aux UNUSED) +tts_custom_pairs (struct lexer *lexer, struct dataset *ds, + struct cmd_t_test *cmd UNUSED, void *proc_) { - const struct variable **vars; - size_t n_vars; - size_t n_pairs_local; + struct t_test_proc *proc = proc_; - size_t n_before_WITH; - size_t n_after_WITH = SIZE_MAX; - int paired ; /* Was the PAIRED keyword given ? */ + const struct variable **vars1 = NULL; + size_t n_vars1 = 0; - lex_match (lexer, '='); + const struct variable **vars2 = NULL; + size_t n_vars2 = 0; - n_vars=0; - if (!parse_variables_const (lexer, dataset_dict (ds), &vars, &n_vars, - PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) - { - free (vars); - return 0; - } - assert (n_vars); + bool paired = false; - n_before_WITH = 0; - if (lex_match (lexer, T_WITH)) - { - n_before_WITH = n_vars; - if (!parse_variables_const (lexer, dataset_dict (ds), &vars, &n_vars, - PV_DUPLICATE | PV_APPEND - | PV_NUMERIC | PV_NO_SCRATCH)) - { - free (vars); - return 0; - } - n_after_WITH = n_vars - n_before_WITH; - } - - paired = (lex_match (lexer, '(') && lex_match_id (lexer, "PAIRED") && lex_match (lexer, ')')); - - /* Determine the number of pairs needed */ - if (paired) - { - if (n_before_WITH != n_after_WITH) - { - free (vars); - msg (SE, _ ("PAIRED was specified but the number of variables " - "preceding WITH (%zu) did not match the number " - "following (%zu)."), - n_before_WITH, n_after_WITH); - return 0; - } - n_pairs_local = n_before_WITH; - } - else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ - { - n_pairs_local = n_before_WITH * n_after_WITH ; - } - else /* Neither WITH nor PAIRED keyword given */ - { - if (n_vars < 2) - { - free (vars); - msg (SE, _ ("At least two variables must be specified " - "on PAIRS.")); - return 0; - } - - /* how many ways can you pick 2 from n_vars ? */ - n_pairs_local = n_vars * (n_vars - 1) / 2; - } + size_t n_total_pairs; + size_t i, j; + lex_match (lexer, '='); - /* Allocate storage for the pairs */ - pairs = xnrealloc (pairs, n_pairs + n_pairs_local, sizeof *pairs); + if (!parse_variables_const (lexer, dataset_dict (ds), &vars1, &n_vars1, + PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) + return 0; - /* Populate the pairs with the appropriate variables */ - if ( paired ) + if (lex_match (lexer, T_WITH)) { - int i; - - assert (n_pairs_local == n_vars / 2); - for (i = 0; i < n_pairs_local; ++i) - { - pairs[i].v[n_pairs] = vars[i]; - pairs[i].v[n_pairs + 1] = vars[i + n_pairs_local]; - } + if (!parse_variables_const (lexer, dataset_dict (ds), &vars2, &n_vars2, + PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) + { + free (vars1); + return 0; + } + + if (lex_match (lexer, '(') + && lex_match_id (lexer, "PAIRED") + && lex_match (lexer, ')')) + { + paired = true; + if (n_vars1 != n_vars2) + { + msg (SE, _("PAIRED was specified but the number of variables " + "preceding WITH (%zu) did not match the number " + "following (%zu)."), + n_vars1, n_vars2); + free (vars1); + free (vars2); + return 0; + } + } } - else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ + else { - int i,j; - size_t p = n_pairs; - - for (i=0 ; i < n_before_WITH ; ++i ) + if (n_vars1 < 2) { - for (j=0 ; j < n_after_WITH ; ++j) - { - pairs[p].v[0] = vars[i]; - pairs[p].v[1] = vars[j+n_before_WITH]; - ++p; - } + free (vars1); + msg (SE, _("At least two variables must be specified on PAIRS.")); + return 0; } } - else /* Neither WITH nor PAIRED given */ - { - size_t i,j; - size_t p=n_pairs; - for (i=0 ; i < n_vars ; ++i ) - { - for (j=i+1 ; j < n_vars ; ++j) - { - pairs[p].v[0] = vars[i]; - pairs[p].v[1] = vars[j]; - ++p; - } - } - } + /* Allocate storage for the new pairs. */ + n_total_pairs = proc->n_pairs + (paired ? n_vars1 + : n_vars2 > 0 ? n_vars1 * n_vars2 + : n_vars1 * (n_vars1 - 1) / 2); + proc->pairs = xnrealloc (proc->pairs, n_total_pairs, sizeof *proc->pairs); - n_pairs+=n_pairs_local; + /* Populate the pairs with the appropriate variables. */ + if (paired) + for (i = 0; i < n_vars1; i++) + add_pair (proc, vars1[i], vars2[i]); + else if (n_vars2 > 0) + for (i = 0; i < n_vars1; i++) + for (j = 0; j < n_vars2; j++) + add_pair (proc, vars1[i], vars2[j]); + else + for (i = 0; i < n_vars1; i++) + for (j = i + 1; j < n_vars1; j++) + add_pair (proc, vars1[i], vars1[j]); + assert (proc->n_pairs == n_total_pairs); - free (vars); + free (vars1); + free (vars2); return 1; } /* Parses the current token (numeric or string, depending on type) - value v and returns success. */ + value v and returns success. */ static int -parse_value (struct lexer *lexer, union value * v, enum val_type type) +parse_value (struct lexer *lexer, union value *v, int width) { - if (type == VAL_NUMERIC) + if (width == 0) { if (!lex_force_num (lexer)) return 0; @@ -586,462 +437,365 @@ parse_value (struct lexer *lexer, union value * v, enum val_type type) { if (!lex_force_string (lexer)) return 0; - memset (v->s, ' ', MAX_SHORT_STRING); - strncpy (v->s, ds_cstr (lex_tokstr (lexer)), ds_length (lex_tokstr (lexer))); + value_copy_str_rpad (v, width, ds_cstr (lex_tokstr (lexer)), ' '); } lex_get (lexer); return 1; } + +/* Implementation of the SSBOX object. */ +static void ssbox_base_init (struct ssbox *, int cols, int rows); +static void ssbox_base_finalize (struct ssbox *); +static void ssbox_one_sample_init (struct ssbox *, struct t_test_proc *); +static void ssbox_independent_samples_init (struct ssbox *, struct t_test_proc *); +static void ssbox_paired_init (struct ssbox *, struct t_test_proc *); -/* Implementation of the SSBOX object */ - -void ssbox_base_init (struct ssbox *this, int cols,int rows); - -void ssbox_base_finalize (struct ssbox *ssb); - -void ssbox_one_sample_init (struct ssbox *this, - struct cmd_t_test *cmd ); - -void ssbox_independent_samples_init (struct ssbox *this, - struct cmd_t_test *cmd); - -void ssbox_paired_init (struct ssbox *this, - struct cmd_t_test *cmd); - - -/* Factory to create an ssbox */ -void -ssbox_create (struct ssbox *ssb, struct cmd_t_test *cmd, int mode) +/* Factory to create an ssbox. */ +static void +ssbox_create (struct ssbox *ssb, struct t_test_proc *proc) { - switch (mode) - { - case T_1_SAMPLE: - ssbox_one_sample_init (ssb,cmd); - break; - case T_IND_SAMPLES: - ssbox_independent_samples_init (ssb,cmd); - break; - case T_PAIRED: - ssbox_paired_init (ssb,cmd); - break; - default: - NOT_REACHED (); - } + switch (proc->mode) + { + case T_1_SAMPLE: + ssbox_one_sample_init (ssb, proc); + break; + case T_IND_SAMPLES: + ssbox_independent_samples_init (ssb, proc); + break; + case T_PAIRED: + ssbox_paired_init (ssb, proc); + break; + default: + NOT_REACHED (); + } } - - /* Despatcher for the populate method */ -void -ssbox_populate (struct ssbox *ssb, const struct dictionary *dict, - struct cmd_t_test *cmd) +static void +ssbox_populate (struct ssbox *ssb, struct t_test_proc *proc) { - ssb->populate (ssb, dict, cmd); + ssb->populate (ssb, proc); } - /* Despatcher for finalize */ -void +static void ssbox_finalize (struct ssbox *ssb) { ssb->finalize (ssb); } - /* Submit the box and clear up */ -void +static void ssbox_base_finalize (struct ssbox *ssb) { tab_submit (ssb->t); } - - /* Initialize a ssbox struct */ -void -ssbox_base_init (struct ssbox *this, int cols,int rows) +static void +ssbox_base_init (struct ssbox *this, int cols, int rows) { this->finalize = ssbox_base_finalize; this->t = tab_create (cols, rows, 0); tab_columns (this->t, SOM_COL_DOWN, 1); - tab_headers (this->t,0,0,1,0); - tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols -1, rows -1 ); - tab_hline (this->t, TAL_2,0,cols-1,1); + tab_headers (this->t, 0, 0, 1, 0); + tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1); + tab_hline (this->t, TAL_2, 0, cols- 1, 1); tab_dim (this->t, tab_natural_dimensions, NULL); } + +/* ssbox implementations. */ -void ssbox_one_sample_populate (struct ssbox *ssb, - const struct dictionary *, - struct cmd_t_test *cmd); +static void ssbox_one_sample_populate (struct ssbox *, struct t_test_proc *); +static void ssbox_independent_samples_populate (struct ssbox *, + struct t_test_proc *); +static void ssbox_paired_populate (struct ssbox *, struct t_test_proc *); /* Initialize the one_sample ssbox */ -void -ssbox_one_sample_init (struct ssbox *this, - struct cmd_t_test *cmd ) +static void +ssbox_one_sample_init (struct ssbox *this, struct t_test_proc *proc) { - const int hsize=5; - const int vsize=cmd->n_variables+1; + const int hsize = 5; + const int vsize = proc->n_vars + 1; this->populate = ssbox_one_sample_populate; - ssbox_base_init (this, hsize,vsize); - tab_title (this->t, _ ("One-Sample Statistics")); - tab_vline (this->t, TAL_2, 1,0,vsize - 1); - tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _ ("N")); - tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _ ("Mean")); - tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation")); - tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _ ("SE. Mean")); + ssbox_base_init (this, hsize, vsize); + tab_title (this->t, _("One-Sample Statistics")); + tab_vline (this->t, TAL_2, 1, 0, vsize - 1); + tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); } -static void ssbox_independent_samples_populate (struct ssbox *ssb, - const struct dictionary *, - struct cmd_t_test *cmd); - /* Initialize the independent samples ssbox */ -void -ssbox_independent_samples_init (struct ssbox *this, - struct cmd_t_test *cmd) +static void +ssbox_independent_samples_init (struct ssbox *this, struct t_test_proc *proc) { int hsize=6; - int vsize = cmd->n_variables*2 +1; + int vsize = proc->n_vars * 2 + 1; this->populate = ssbox_independent_samples_populate; - ssbox_base_init (this, hsize,vsize); - tab_vline (this->t, TAL_GAP, 1, 0,vsize - 1); - tab_title (this->t, _ ("Group Statistics")); - tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, var_get_name (indep_var)); - tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _ ("N")); - tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _ ("Mean")); - tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation")); - tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _ ("SE. Mean")); + ssbox_base_init (this, hsize, vsize); + tab_vline (this->t, TAL_GAP, 1, 0, vsize - 1); + tab_title (this->t, _("Group Statistics")); + tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, + var_get_name (proc->indep_var)); + tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); } - /* Populate the ssbox for independent samples */ static void ssbox_independent_samples_populate (struct ssbox *ssb, - const struct dictionary *dict, - struct cmd_t_test *cmd) + struct t_test_proc *proc) { int i; - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - - char *val_lab[2] = {NULL, NULL}; + char *val_lab[2]; double indep_value[2]; - char prefix[2][3]={"",""}; + char prefix[2][3]; - if ( var_is_numeric (indep_var) ) + for (i = 0; i < 2; i++) { - const char *s; + union value *value = &proc->g_value[i]; + int width = var_get_width (proc->indep_var); - s = var_lookup_value_label (indep_var, &gp.v.g_value[0]); - val_lab[0] = s ? xstrdup (s) : NULL; + indep_value[i] = (proc->criterion == CMP_LE ? proc->critical_value + : value->f); - s = var_lookup_value_label (indep_var, &gp.v.g_value[1]); - val_lab[1] = s ? xstrdup (s) : NULL; - } - else - { - val_lab[0] = xcalloc (sizeof (char), MAX_SHORT_STRING + 1); - val_lab[1] = xcalloc (sizeof (char), MAX_SHORT_STRING + 1); - memcpy (val_lab[0], gp.v.g_value[0].s, MAX_SHORT_STRING); - memcpy (val_lab[1], gp.v.g_value[1].s, MAX_SHORT_STRING); + if (val_type_from_width (width) == VAL_NUMERIC) + { + const char *s = var_lookup_value_label (proc->indep_var, value); + val_lab[i] = s ? xstrdup (s) : xasprintf ("%g", indep_value[i]); + } + else + val_lab[i] = xmemdup0 (value_str (value, width), width); } - if (gp.criterion == CMP_LE ) + if (proc->criterion == CMP_LE) { - strcpy (prefix[0],">="); - strcpy (prefix[1],"<"); - indep_value[0] = gp.v.critical_value; - indep_value[1] = gp.v.critical_value; + strcpy (prefix[0], ">="); + strcpy (prefix[1], "<"); } else { - indep_value[0] = gp.v.g_value[0].f; - indep_value[1] = gp.v.g_value[1].f; + strcpy (prefix[0], ""); + strcpy (prefix[1], ""); } - assert (ssb->t); - - for (i=0; i < cmd->n_variables; ++i) + for (i = 0; i < proc->n_vars; i++) { - const struct variable *var = cmd->v_variables[i]; + const struct variable *var = proc->vars[i]; struct hsh_table *grp_hash = group_proc_get (var)->group_hash; int count=0; - tab_text (ssb->t, 0, i*2+1, TAB_LEFT, - var_get_name (cmd->v_variables[i])); - - if (val_lab[0]) - tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, - "%s%s", prefix[0], val_lab[0]); - else - tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, - "%s%g", prefix[0], indep_value[0]); - - - if (val_lab[1]) - tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF, - "%s%s", prefix[1], val_lab[1]); - else - tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF, - "%s%g", prefix[1], indep_value[1]); - + tab_text (ssb->t, 0, i * 2 + 1, TAB_LEFT, + var_get_name (proc->vars[i])); + tab_text (ssb->t, 1, i * 2 + 1, TAB_LEFT | TAT_PRINTF, + "%s%s", prefix[0], val_lab[0]); + tab_text (ssb->t, 1, i * 2 + 1+ 1, TAB_LEFT | TAT_PRINTF, + "%s%s", prefix[1], val_lab[1]); /* Fill in the group statistics */ - for ( count = 0 ; count < 2 ; ++count ) + for (count = 0; count < 2; count++) { union value search_val; - struct group_statistics *gs; - if ( gp.criterion == CMP_LE ) - { - if ( count == 0 ) - { - /* >= case */ - search_val.f = gp.v.critical_value + 1.0; - } - else - { - /* less than ( < ) case */ - search_val.f = gp.v.critical_value - 1.0; - } - } + if (proc->criterion == CMP_LE) + search_val.f = proc->critical_value + (count == 0 ? 1.0 : -1.0); else - { - search_val = gp.v.g_value[count]; - } + search_val = proc->g_value[count]; - gs = hsh_find (grp_hash, (void *) &search_val); + gs = hsh_find (grp_hash, &search_val); assert (gs); - tab_double (ssb->t, 2, i*2+count+1, TAB_RIGHT, gs->n, wfmt); - tab_double (ssb->t, 3, i*2+count+1, TAB_RIGHT, gs->mean, NULL); - tab_double (ssb->t, 4, i*2+count+1, TAB_RIGHT, gs->std_dev, NULL); - tab_double (ssb->t, 5, i*2+count+1, TAB_RIGHT, gs->se_mean, NULL); + tab_double (ssb->t, 2, i * 2 + count+ 1, TAB_RIGHT, gs->n, + &proc->weight_format); + tab_double (ssb->t, 3, i * 2 + count+ 1, TAB_RIGHT, gs->mean, NULL); + tab_double (ssb->t, 4, i * 2 + count+ 1, TAB_RIGHT, gs->std_dev, + NULL); + tab_double (ssb->t, 5, i * 2 + count+ 1, TAB_RIGHT, gs->se_mean, + NULL); } } free (val_lab[0]); free (val_lab[1]); } - -static void ssbox_paired_populate (struct ssbox *ssb, - const struct dictionary *dict, - struct cmd_t_test *cmd); - /* Initialize the paired values ssbox */ -void -ssbox_paired_init (struct ssbox *this, struct cmd_t_test *cmd UNUSED) +static void +ssbox_paired_init (struct ssbox *this, struct t_test_proc *proc) { - int hsize=6; - - int vsize = n_pairs*2+1; + int hsize = 6; + int vsize = proc->n_pairs * 2 + 1; this->populate = ssbox_paired_populate; - ssbox_base_init (this, hsize,vsize); - tab_title (this->t, _ ("Paired Sample Statistics")); - tab_vline (this->t,TAL_GAP,1,0,vsize-1); - tab_vline (this->t,TAL_2,2,0,vsize-1); - tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _ ("Mean")); - tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _ ("N")); - tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation")); - tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _ ("SE. Mean")); + ssbox_base_init (this, hsize, vsize); + tab_title (this->t, _("Paired Sample Statistics")); + tab_vline (this->t, TAL_GAP, 1, 0, vsize - 1); + tab_vline (this->t, TAL_2, 2, 0, vsize - 1); + tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); } - /* Populate the ssbox for paired values */ -void -ssbox_paired_populate (struct ssbox *ssb, const struct dictionary *dict, - struct cmd_t_test *cmd UNUSED) +static void +ssbox_paired_populate (struct ssbox *ssb, struct t_test_proc *proc) { int i; - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - - assert (ssb->t); - - for (i=0; i < n_pairs; ++i) + for (i = 0; i < proc->n_pairs; i++) { + struct pair *p = &proc->pairs[i]; int j; - tab_text (ssb->t, 0, i*2+1, TAB_LEFT | TAT_PRINTF , _ ("Pair %d"),i); - - for (j=0 ; j < 2 ; ++j) + tab_text (ssb->t, 0, i * 2 + 1, TAB_LEFT | TAT_PRINTF, _("Pair %d"), i); + for (j=0; j < 2; j++) { - struct group_statistics *gs; - - gs = &group_proc_get (pairs[i].v[j])->ugs; - /* Titles */ - - tab_text (ssb->t, 1, i*2+j+1, TAB_LEFT, - var_get_name (pairs[i].v[j])); + tab_text (ssb->t, 1, i * 2 + j + 1, TAB_LEFT, + var_get_name (p->v[j])); /* Values */ - tab_double (ssb->t,2, i*2+j+1, TAB_RIGHT, pairs[i].mean[j], NULL); - tab_double (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, wfmt); - tab_double (ssb->t,4, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j], NULL); - tab_double (ssb->t,5, i*2+j+1, TAB_RIGHT, - pairs[i].std_dev[j]/sqrt (pairs[i].n), NULL); - + tab_double (ssb->t, 2, i * 2 + j + 1, TAB_RIGHT, p->mean[j], NULL); + tab_double (ssb->t, 3, i * 2 + j + 1, TAB_RIGHT, p->n, + &proc->weight_format); + tab_double (ssb->t, 4, i * 2 + j + 1, TAB_RIGHT, p->std_dev[j], + NULL); + tab_double (ssb->t, 5, i * 2 + j + 1, TAB_RIGHT, + p->std_dev[j] /sqrt (p->n), NULL); } } } /* Populate the one sample ssbox */ -void -ssbox_one_sample_populate (struct ssbox *ssb, const struct dictionary *dict, - struct cmd_t_test *cmd) +static void +ssbox_one_sample_populate (struct ssbox *ssb, struct t_test_proc *proc) { int i; - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - - assert (ssb->t); - - for (i=0; i < cmd->n_variables; ++i) + for (i = 0; i < proc->n_vars; i++) { - struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; - tab_text (ssb->t, 0, i+1, TAB_LEFT, var_get_name (cmd->v_variables[i])); - tab_double (ssb->t,1, i+1, TAB_RIGHT, gs->n, wfmt); - tab_double (ssb->t,2, i+1, TAB_RIGHT, gs->mean, NULL); - tab_double (ssb->t,3, i+1, TAB_RIGHT, gs->std_dev, NULL); - tab_double (ssb->t,4, i+1, TAB_RIGHT, gs->se_mean, NULL); + tab_text (ssb->t, 0, i + 1, TAB_LEFT, var_get_name (proc->vars[i])); + tab_double (ssb->t, 1, i + 1, TAB_RIGHT, gs->n, &proc->weight_format); + tab_double (ssb->t, 2, i + 1, TAB_RIGHT, gs->mean, NULL); + tab_double (ssb->t, 3, i + 1, TAB_RIGHT, gs->std_dev, NULL); + tab_double (ssb->t, 4, i + 1, TAB_RIGHT, gs->se_mean, NULL); } } - - - + /* Implementation of the Test Results box struct */ -void trbox_base_init (struct trbox *self,size_t n_vars, int cols); -void trbox_base_finalize (struct trbox *trb); - -void trbox_independent_samples_init (struct trbox *trb, - struct cmd_t_test *cmd ); - -static void trbox_independent_samples_populate (struct trbox *trb, - const struct dictionary *dict, - struct cmd_t_test *cmd); - -void trbox_one_sample_init (struct trbox *self, - struct cmd_t_test *cmd ); - -static void trbox_one_sample_populate (struct trbox *trb, - const struct dictionary *, - struct cmd_t_test *cmd); - -void trbox_paired_init (struct trbox *self, - struct cmd_t_test *cmd ); - -static void trbox_paired_populate (struct trbox *trb, - const struct dictionary *, - struct cmd_t_test *cmd); - - +static void trbox_base_init (struct trbox *, size_t n_vars, int cols); +static void trbox_base_finalize (struct trbox *); +static void trbox_independent_samples_init (struct trbox *, + struct t_test_proc *); +static void trbox_independent_samples_populate (struct trbox *, + struct t_test_proc *); +static void trbox_one_sample_init (struct trbox *, struct t_test_proc *); +static void trbox_one_sample_populate (struct trbox *, struct t_test_proc *); +static void trbox_paired_init (struct trbox *, struct t_test_proc *); +static void trbox_paired_populate (struct trbox *, struct t_test_proc *); /* Create a trbox according to mode*/ -void -trbox_create (struct trbox *trb, - struct cmd_t_test *cmd, int mode) +static void +trbox_create (struct trbox *trb, struct t_test_proc *proc) { - switch (mode) - { - case T_1_SAMPLE: - trbox_one_sample_init (trb,cmd); - break; - case T_IND_SAMPLES: - trbox_independent_samples_init (trb,cmd); - break; - case T_PAIRED: - trbox_paired_init (trb,cmd); - break; - default: - NOT_REACHED (); - } + switch (proc->mode) + { + case T_1_SAMPLE: + trbox_one_sample_init (trb, proc); + break; + case T_IND_SAMPLES: + trbox_independent_samples_init (trb, proc); + break; + case T_PAIRED: + trbox_paired_init (trb, proc); + break; + default: + NOT_REACHED (); + } } -/* Populate a trbox according to cmd */ +/* Populate a trbox according to proc */ static void -trbox_populate (struct trbox *trb, const struct dictionary *dict, - struct cmd_t_test *cmd) +trbox_populate (struct trbox *trb, struct t_test_proc *proc) { - trb->populate (trb, dict, cmd); + trb->populate (trb, proc); } /* Submit and destroy a trbox */ -void +static void trbox_finalize (struct trbox *trb) { trb->finalize (trb); } /* Initialize the independent samples trbox */ -void +static void trbox_independent_samples_init (struct trbox *self, - struct cmd_t_test *cmd UNUSED) + struct t_test_proc *proc) { - const int hsize=11; - const int vsize=cmd->n_variables*2+3; + const int hsize = 11; + const int vsize = proc->n_vars * 2 + 3; assert (self); self->populate = trbox_independent_samples_populate; - trbox_base_init (self,cmd->n_variables*2,hsize); - tab_title (self->t,_ ("Independent Samples Test")); - tab_hline (self->t,TAL_1,2,hsize-1,1); - tab_vline (self->t,TAL_2,2,0,vsize-1); - tab_vline (self->t,TAL_1,4,0,vsize-1); - tab_box (self->t,-1,-1,-1,TAL_1, 2,1,hsize-2,vsize-1); - tab_hline (self->t,TAL_1, hsize-2,hsize-1,2); - tab_box (self->t,-1,-1,-1,TAL_1, hsize-2,2,hsize-1,vsize-1); + trbox_base_init (self, proc->n_vars * 2, hsize); + tab_title (self->t, _("Independent Samples Test")); + tab_hline (self->t, TAL_1, 2, hsize - 1, 1); + tab_vline (self->t, TAL_2, 2, 0, vsize - 1); + tab_vline (self->t, TAL_1, 4, 0, vsize - 1); + tab_box (self->t, -1, -1, -1, TAL_1, 2, 1, hsize - 2, vsize - 1); + tab_hline (self->t, TAL_1, hsize - 2, hsize - 1, 2); + tab_box (self->t, -1, -1, -1, TAL_1, hsize - 2, 2, hsize - 1, vsize - 1); tab_joint_text (self->t, 2, 0, 3, 0, - TAB_CENTER,_ ("Levene's Test for Equality of Variances")); - tab_joint_text (self->t, 4,0,hsize-1,0, - TAB_CENTER,_ ("t-test for Equality of Means")); - - tab_text (self->t,2,2, TAB_CENTER | TAT_TITLE,_ ("F")); - tab_text (self->t,3,2, TAB_CENTER | TAT_TITLE,_ ("Sig.")); - tab_text (self->t,4,2, TAB_CENTER | TAT_TITLE,_ ("t")); - tab_text (self->t,5,2, TAB_CENTER | TAT_TITLE,_ ("df")); - tab_text (self->t,6,2, TAB_CENTER | TAT_TITLE,_ ("Sig. (2-tailed)")); - tab_text (self->t,7,2, TAB_CENTER | TAT_TITLE,_ ("Mean Difference")); - tab_text (self->t,8,2, TAB_CENTER | TAT_TITLE,_ ("Std. Error Difference")); - tab_text (self->t,9,2, TAB_CENTER | TAT_TITLE,_ ("Lower")); - tab_text (self->t,10,2, TAB_CENTER | TAT_TITLE,_ ("Upper")); + TAB_CENTER, _("Levene's Test for Equality of Variances")); + tab_joint_text (self->t, 4, 0, hsize- 1, 0, + TAB_CENTER, _("t-test for Equality of Means")); + + tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("F")); + tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig.")); + tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("t")); + tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("df")); + tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); + tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference")); + tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Difference")); + tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Lower")); + tab_text (self->t, 10, 2, TAB_CENTER | TAT_TITLE, _("Upper")); tab_joint_text (self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF, - _ ("%g%% Confidence Interval of the Difference"), - cmd->criteria*100.0); - + _("%g%% Confidence Interval of the Difference"), + proc->criteria * 100.0); } /* Populate the independent samples trbox */ static void trbox_independent_samples_populate (struct trbox *self, - const struct dictionary *dict UNUSED, - struct cmd_t_test *cmd) + struct t_test_proc *proc) { int i; - assert (self); - for (i=0; i < cmd->n_variables; ++i) + for (i = 0; i < proc->n_vars; i++) { - double p,q; + double p, q; double t; double df; @@ -1052,789 +806,645 @@ trbox_independent_samples_populate (struct trbox *self, double std_err_diff; double mean_diff; - const struct variable *var = cmd->v_variables[i]; + double se2; + + const struct variable *var = proc->vars[i]; struct group_proc *grp_data = group_proc_get (var); struct hsh_table *grp_hash = grp_data->group_hash; - struct group_statistics *gs0 ; - struct group_statistics *gs1 ; + struct group_statistics *gs0; + struct group_statistics *gs1; union value search_val; - if ( gp.criterion == CMP_LE ) - search_val.f = gp.v.critical_value - 1.0; + if (proc->criterion == CMP_LE) + search_val.f = proc->critical_value - 1.0; else - search_val = gp.v.g_value[0]; + search_val = proc->g_value[0]; - gs0 = hsh_find (grp_hash, (void *) &search_val); + gs0 = hsh_find (grp_hash, &search_val); assert (gs0); - if ( gp.criterion == CMP_LE ) - search_val.f = gp.v.critical_value + 1.0; + if (proc->criterion == CMP_LE) + search_val.f = proc->critical_value + 1.0; else - search_val = gp.v.g_value[1]; + search_val = proc->g_value[1]; - gs1 = hsh_find (grp_hash, (void *) &search_val); + gs1 = hsh_find (grp_hash, &search_val); assert (gs1); - tab_text (self->t, 0, i*2+3, TAB_LEFT, var_get_name (cmd->v_variables[i])); - - tab_text (self->t, 1, i*2+3, TAB_LEFT, _ ("Equal variances assumed")); - - - tab_double (self->t, 2, i*2+3, TAB_CENTER, grp_data->levene, NULL); + tab_text (self->t, 0, i * 2 + 3, TAB_LEFT, var_get_name (proc->vars[i])); + tab_text (self->t, 1, i * 2 + 3, TAB_LEFT, _("Equal variances assumed")); + tab_double (self->t, 2, i * 2 + 3, TAB_CENTER, grp_data->levene, NULL); /* Now work out the significance of the Levene test */ - df1 = 1; df2 = grp_data->ugs.n - 2; + df1 = 1; + df2 = grp_data->ugs.n - 2; q = gsl_cdf_fdist_Q (grp_data->levene, df1, df2); + tab_double (self->t, 3, i * 2 + 3, TAB_CENTER, q, NULL); - tab_double (self->t, 3, i*2+3, TAB_CENTER, q, NULL); - - df = gs0->n + gs1->n - 2.0 ; - tab_double (self->t, 5, i*2+3, TAB_RIGHT, df, NULL); + df = gs0->n + gs1->n - 2.0; + tab_double (self->t, 5, i * 2 + 3, TAB_RIGHT, df, NULL); - pooled_variance = ( (gs0->n )*pow2 (gs0->s_std_dev) - + - (gs1->n )*pow2 (gs1->s_std_dev) - ) / df ; + pooled_variance = (gs0->n * pow2 (gs0->s_std_dev) + + gs1->n *pow2 (gs1->s_std_dev)) / df ; - t = (gs0->mean - gs1->mean) / sqrt (pooled_variance) ; - t /= sqrt ((gs0->n + gs1->n)/ (gs0->n*gs1->n)); + t = (gs0->mean - gs1->mean) / sqrt (pooled_variance); + t /= sqrt ((gs0->n + gs1->n) / (gs0->n * gs1->n)); - tab_double (self->t, 4, i*2+3, TAB_RIGHT, t, NULL); + tab_double (self->t, 4, i * 2 + 3, TAB_RIGHT, t, NULL); p = gsl_cdf_tdist_P (t, df); q = gsl_cdf_tdist_Q (t, df); - tab_double (self->t, 6, i*2+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL); + tab_double (self->t, 6, i * 2 + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), + NULL); mean_diff = gs0->mean - gs1->mean; - tab_double (self->t, 7, i*2+3, TAB_RIGHT, mean_diff, NULL); + tab_double (self->t, 7, i * 2 + 3, TAB_RIGHT, mean_diff, NULL); - std_err_diff = sqrt ( pow2 (gs0->se_mean) + pow2 (gs1->se_mean)); - tab_double (self->t, 8, i*2+3, TAB_RIGHT, std_err_diff, NULL); - + std_err_diff = sqrt (pow2 (gs0->se_mean) + pow2 (gs1->se_mean)); + tab_double (self->t, 8, i * 2 + 3, TAB_RIGHT, std_err_diff, NULL); /* Now work out the confidence interval */ - q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + q = (1 - proc->criteria)/2.0; /* 2-tailed test */ - t = gsl_cdf_tdist_Qinv (q,df); - tab_double (self->t, 9, i*2+3, TAB_RIGHT, - mean_diff - t * std_err_diff, NULL); + t = gsl_cdf_tdist_Qinv (q, df); + tab_double (self->t, 9, i * 2 + 3, TAB_RIGHT, + mean_diff - t * std_err_diff, NULL); - tab_double (self->t, 10, i*2+3, TAB_RIGHT, - mean_diff + t * std_err_diff, NULL); + tab_double (self->t, 10, i * 2 + 3, TAB_RIGHT, + mean_diff + t * std_err_diff, NULL); - { - double se2; /* Now for the \sigma_1 != \sigma_2 case */ - tab_text (self->t, 1, i*2+3+1, - TAB_LEFT, _ ("Equal variances not assumed")); - + tab_text (self->t, 1, i * 2 + 3 + 1, + TAB_LEFT, _("Equal variances not assumed")); - se2 = (pow2 (gs0->s_std_dev)/ (gs0->n -1) ) + - (pow2 (gs1->s_std_dev)/ (gs1->n -1) ); + se2 = ((pow2 (gs0->s_std_dev) / (gs0->n - 1)) + + (pow2 (gs1->s_std_dev) / (gs1->n - 1))); - t = mean_diff / sqrt (se2) ; - tab_double (self->t, 4, i*2+3+1, TAB_RIGHT, t, NULL); + t = mean_diff / sqrt (se2); + tab_double (self->t, 4, i * 2 + 3 + 1, TAB_RIGHT, t, NULL); - df = pow2 (se2) / ( - (pow2 (pow2 (gs0->s_std_dev)/ (gs0->n - 1 )) - / (gs0->n -1 ) - ) - + - (pow2 (pow2 (gs1->s_std_dev)/ (gs1->n - 1 )) - / (gs1->n -1 ) - ) - ) ; - - tab_double (self->t, 5, i*2+3+1, TAB_RIGHT, df, NULL); + df = pow2 (se2) / ((pow2 (pow2 (gs0->s_std_dev) / (gs0->n - 1)) + / (gs0->n - 1)) + + (pow2 (pow2 (gs1->s_std_dev) / (gs1->n - 1)) + / (gs1->n - 1))); + tab_double (self->t, 5, i * 2 + 3 + 1, TAB_RIGHT, df, NULL); p = gsl_cdf_tdist_P (t, df); q = gsl_cdf_tdist_Q (t, df); - tab_double (self->t, 6, i*2+3+1, TAB_RIGHT, 2.0* (t>0?q:p), NULL); + tab_double (self->t, 6, i * 2 + 3 + 1, TAB_RIGHT, 2.0 * (t > 0 ? q : p), + NULL); /* Now work out the confidence interval */ - q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + q = (1 - proc->criteria) / 2.0; /* 2-tailed test */ t = gsl_cdf_tdist_Qinv (q, df); - tab_double (self->t, 7, i*2+3+1, TAB_RIGHT, mean_diff, NULL); - - - tab_double (self->t, 8, i*2+3+1, TAB_RIGHT, std_err_diff, NULL); - - - tab_double (self->t, 9, i*2+3+1, TAB_RIGHT, - mean_diff - t * std_err_diff, NULL); - - tab_double (self->t, 10, i*2+3+1, TAB_RIGHT, - mean_diff + t * std_err_diff, NULL); - } + tab_double (self->t, 7, i * 2 + 3 + 1, TAB_RIGHT, mean_diff, NULL); + tab_double (self->t, 8, i * 2 + 3 + 1, TAB_RIGHT, std_err_diff, NULL); + tab_double (self->t, 9, i * 2 + 3 + 1, TAB_RIGHT, + mean_diff - t * std_err_diff, NULL); + tab_double (self->t, 10, i * 2 + 3 + 1, TAB_RIGHT, + mean_diff + t * std_err_diff, NULL); } } /* Initialize the paired samples trbox */ -void -trbox_paired_init (struct trbox *self, - struct cmd_t_test *cmd UNUSED) +static void +trbox_paired_init (struct trbox *self, struct t_test_proc *proc) { - const int hsize=10; - const int vsize=n_pairs+3; + const int vsize=proc->n_pairs+ 3; self->populate = trbox_paired_populate; - trbox_base_init (self,n_pairs,hsize); - tab_title (self->t, _ ("Paired Samples Test")); - tab_hline (self->t,TAL_1,2,6,1); - tab_vline (self->t,TAL_2,2,0,vsize - 1); - tab_joint_text (self->t,2,0,6,0,TAB_CENTER,_ ("Paired Differences")); - tab_box (self->t,-1,-1,-1,TAL_1, 2,1,6,vsize-1); - tab_box (self->t,-1,-1,-1,TAL_1, 6,0,hsize-1,vsize-1); - tab_hline (self->t,TAL_1,5,6, 2); - tab_vline (self->t,TAL_GAP,6,0,1); + trbox_base_init (self, proc->n_pairs, hsize); + tab_title (self->t, _("Paired Samples Test")); + tab_hline (self->t, TAL_1, 2, 6, 1); + tab_vline (self->t, TAL_2, 2, 0, vsize - 1); + tab_joint_text (self->t, 2, 0, 6, 0, TAB_CENTER, _("Paired Differences")); + tab_box (self->t, -1, -1, -1, TAL_1, 2, 1, 6, vsize - 1); + tab_box (self->t, -1, -1, -1, TAL_1, 6, 0, hsize - 1, vsize - 1); + tab_hline (self->t, TAL_1, 5, 6, 2); + tab_vline (self->t, TAL_GAP, 6, 0, 1); tab_joint_text (self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF, - _ ("%g%% Confidence Interval of the Difference"), - cmd->criteria*100.0); - - tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _ ("Mean")); - tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation")); - tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _ ("Std. Error Mean")); - tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _ ("Lower")); - tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _ ("Upper")); - tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _ ("t")); - tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _ ("df")); - tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _ ("Sig. (2-tailed)")); + _("%g%% Confidence Interval of the Difference"), + proc->criteria*100.0); + + tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Mean")); + tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower")); + tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper")); + tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _("t")); + tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _("df")); + tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); } /* Populate the paired samples trbox */ static void trbox_paired_populate (struct trbox *trb, - const struct dictionary *dict, - struct cmd_t_test *cmd UNUSED) + struct t_test_proc *proc) { int i; - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - - for (i=0; i < n_pairs; ++i) + for (i = 0; i < proc->n_pairs; i++) { - double p,q; + struct pair *pair = &proc->pairs[i]; + double p, q; double se_mean; - double n = pairs[i].n; + double n = pair->n; double t; double df = n - 1; - tab_text (trb->t, 0, i+3, TAB_LEFT | TAT_PRINTF, _ ("Pair %d"),i); - - tab_text (trb->t, 1, i+3, TAB_LEFT | TAT_PRINTF, "%s - %s", - var_get_name (pairs[i].v[0]), - var_get_name (pairs[i].v[1])); - - tab_double (trb->t, 2, i+3, TAB_RIGHT, pairs[i].mean_diff, NULL); - - tab_double (trb->t, 3, i+3, TAB_RIGHT, pairs[i].std_dev_diff, NULL); + tab_text (trb->t, 0, i + 3, TAB_LEFT | TAT_PRINTF, _("Pair %d"), i); + tab_text (trb->t, 1, i + 3, TAB_LEFT | TAT_PRINTF, "%s - %s", + var_get_name (pair->v[0]), + var_get_name (pair->v[1])); + tab_double (trb->t, 2, i + 3, TAB_RIGHT, pair->mean_diff, NULL); + tab_double (trb->t, 3, i + 3, TAB_RIGHT, pair->std_dev_diff, NULL); /* SE Mean */ - se_mean = pairs[i].std_dev_diff / sqrt (n) ; - tab_double (trb->t, 4, i+3, TAB_RIGHT, se_mean, NULL); + se_mean = pair->std_dev_diff / sqrt (n); + tab_double (trb->t, 4, i + 3, TAB_RIGHT, se_mean, NULL); /* Now work out the confidence interval */ - q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + q = (1 - proc->criteria) / 2.0; /* 2-tailed test */ t = gsl_cdf_tdist_Qinv (q, df); - tab_double (trb->t, 5, i+3, TAB_RIGHT, - pairs[i].mean_diff - t * se_mean , NULL); - - tab_double (trb->t, 6, i+3, TAB_RIGHT, - pairs[i].mean_diff + t * se_mean , NULL); + tab_double (trb->t, 5, i + 3, TAB_RIGHT, + pair->mean_diff - t * se_mean, NULL); + tab_double (trb->t, 6, i + 3, TAB_RIGHT, + pair->mean_diff + t * se_mean, NULL); - t = (pairs[i].mean[0] - pairs[i].mean[1]) - / sqrt ( - ( pow2 (pairs[i].s_std_dev[0]) + pow2 (pairs[i].s_std_dev[1]) - - 2 * pairs[i].correlation * - pairs[i].s_std_dev[0] * pairs[i].s_std_dev[1] ) - / (n - 1) - ); + t = ((pair->mean[0] - pair->mean[1]) + / sqrt ((pow2 (pair->s_std_dev[0]) + pow2 (pair->s_std_dev[1]) + - (2 * pair->correlation + * pair->s_std_dev[0] * pair->s_std_dev[1])) + / (n - 1))); - tab_double (trb->t, 7, i+3, TAB_RIGHT, t, NULL); + tab_double (trb->t, 7, i + 3, TAB_RIGHT, t, NULL); /* Degrees of freedom */ - tab_double (trb->t, 8, i+3, TAB_RIGHT, df, wfmt); - - p = gsl_cdf_tdist_P (t,df); - q = gsl_cdf_tdist_P (t,df); + tab_double (trb->t, 8, i + 3, TAB_RIGHT, df, &proc->weight_format); - tab_double (trb->t, 9, i+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL); + p = gsl_cdf_tdist_P (t, df); + q = gsl_cdf_tdist_P (t, df); + tab_double (trb->t, 9, i + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), NULL); } } /* Initialize the one sample trbox */ -void -trbox_one_sample_init (struct trbox *self, struct cmd_t_test *cmd ) +static void +trbox_one_sample_init (struct trbox *self, struct t_test_proc *proc) { - const int hsize=7; - const int vsize=cmd->n_variables+3; + const int hsize = 7; + const int vsize = proc->n_vars + 3; self->populate = trbox_one_sample_populate; - trbox_base_init (self, cmd->n_variables,hsize); - tab_title (self->t, _ ("One-Sample Test")); + trbox_base_init (self, proc->n_vars, hsize); + tab_title (self->t, _("One-Sample Test")); tab_hline (self->t, TAL_1, 1, hsize - 1, 1); tab_vline (self->t, TAL_2, 1, 0, vsize - 1); - tab_joint_text (self->t, 1, 0, hsize-1,0, TAB_CENTER | TAT_PRINTF, - _ ("Test Value = %f"), cmd->n_testval[0]); + tab_joint_text (self->t, 1, 0, hsize - 1, 0, TAB_CENTER | TAT_PRINTF, + _("Test Value = %f"), proc->testval); - tab_box (self->t, -1, -1, -1, TAL_1, 1,1,hsize-1,vsize-1); + tab_box (self->t, -1, -1, -1, TAL_1, 1, 1, hsize - 1, vsize - 1); - tab_joint_text (self->t,5,1,6,1,TAB_CENTER | TAT_PRINTF, - _ ("%g%% Confidence Interval of the Difference"), - cmd->criteria*100.0); - - tab_vline (self->t,TAL_GAP,6,1,1); - tab_hline (self->t,TAL_1,5,6,2); - tab_text (self->t, 1, 2, TAB_CENTER | TAT_TITLE, _ ("t")); - tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _ ("df")); - tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _ ("Sig. (2-tailed)")); - tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _ ("Mean Difference")); - tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _ ("Lower")); - tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _ ("Upper")); + tab_joint_text (self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF, + _("%g%% Confidence Interval of the Difference"), + proc->criteria * 100.0); + tab_vline (self->t, TAL_GAP, 6, 1, 1); + tab_hline (self->t, TAL_1, 5, 6, 2); + tab_text (self->t, 1, 2, TAB_CENTER | TAT_TITLE, _("t")); + tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("df")); + tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); + tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference")); + tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower")); + tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper")); } - /* Populate the one sample trbox */ static void -trbox_one_sample_populate (struct trbox *trb, - const struct dictionary *dict, - struct cmd_t_test *cmd) +trbox_one_sample_populate (struct trbox *trb, struct t_test_proc *proc) { int i; - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - assert (trb->t); - for (i=0; i < cmd->n_variables; ++i) + for (i = 0; i < proc->n_vars; i++) { double t; - double p,q; + double p, q; double df; - struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; - + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; - tab_text (trb->t, 0, i+3, TAB_LEFT, var_get_name (cmd->v_variables[i])); + tab_text (trb->t, 0, i + 3, TAB_LEFT, var_get_name (proc->vars[i])); - t = (gs->mean - cmd->n_testval[0] ) * sqrt (gs->n) / gs->std_dev ; + t = (gs->mean - proc->testval) * sqrt (gs->n) / gs->std_dev; - tab_double (trb->t, 1, i+3, TAB_RIGHT, t, NULL); + tab_double (trb->t, 1, i + 3, TAB_RIGHT, t, NULL); /* degrees of freedom */ df = gs->n - 1; - tab_double (trb->t, 2, i+3, TAB_RIGHT, df, wfmt); + tab_double (trb->t, 2, i + 3, TAB_RIGHT, df, &proc->weight_format); p = gsl_cdf_tdist_P (t, df); q = gsl_cdf_tdist_Q (t, df); /* Multiply by 2 to get 2-tailed significance, makeing sure we've got the correct tail*/ - tab_double (trb->t, 3, i+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL); - - tab_double (trb->t, 4, i+3, TAB_RIGHT, gs->mean_diff, NULL); + tab_double (trb->t, 3, i + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), NULL); + tab_double (trb->t, 4, i + 3, TAB_RIGHT, gs->mean_diff, NULL); - q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + q = (1 - proc->criteria) / 2.0; /* 2-tailed test */ t = gsl_cdf_tdist_Qinv (q, df); - tab_double (trb->t, 5, i+3, TAB_RIGHT, + tab_double (trb->t, 5, i + 3, TAB_RIGHT, gs->mean_diff - t * gs->se_mean, NULL); - - tab_double (trb->t, 6, i+3, TAB_RIGHT, + tab_double (trb->t, 6, i + 3, TAB_RIGHT, gs->mean_diff + t * gs->se_mean, NULL); } } /* Base initializer for the generalized trbox */ -void +static void trbox_base_init (struct trbox *self, size_t data_rows, int cols) { const size_t rows = 3 + data_rows; self->finalize = trbox_base_finalize; self->t = tab_create (cols, rows, 0); - tab_headers (self->t,0,0,3,0); - tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols -1, rows -1); - tab_hline (self->t, TAL_2,0,cols-1,3); + tab_headers (self->t, 0, 0, 3, 0); + tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1); + tab_hline (self->t, TAL_2, 0, cols- 1, 3); tab_dim (self->t, tab_natural_dimensions, NULL); } - /* Base finalizer for the trbox */ -void +static void trbox_base_finalize (struct trbox *trb) { tab_submit (trb->t); } - -/* Create , populate and submit the Paired Samples Correlation box */ +/* Create, populate and submit the Paired Samples Correlation box */ static void -pscbox (const struct dictionary *dict) +pscbox (struct t_test_proc *proc) { - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - - const int rows = 1 + n_pairs; - const int cols = 5; + const int rows=1+proc->n_pairs; + const int cols=5; int i; struct tab_table *table; - table = tab_create (cols,rows,0); + table = tab_create (cols, rows, 0); tab_columns (table, SOM_COL_DOWN, 1); - tab_headers (table,0,0,1,0); - tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols -1, rows -1 ); + tab_headers (table, 0, 0, 1, 0); + tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1); tab_hline (table, TAL_2, 0, cols - 1, 1); tab_vline (table, TAL_2, 2, 0, rows - 1); tab_dim (table, tab_natural_dimensions, NULL); - tab_title (table, _ ("Paired Samples Correlations")); + tab_title (table, _("Paired Samples Correlations")); /* column headings */ - tab_text (table, 2,0, TAB_CENTER | TAT_TITLE, _ ("N")); - tab_text (table, 3,0, TAB_CENTER | TAT_TITLE, _ ("Correlation")); - tab_text (table, 4,0, TAB_CENTER | TAT_TITLE, _ ("Sig.")); + tab_text (table, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (table, 3, 0, TAB_CENTER | TAT_TITLE, _("Correlation")); + tab_text (table, 4, 0, TAB_CENTER | TAT_TITLE, _("Sig.")); - for (i=0; i < n_pairs; ++i) + for (i = 0; i < proc->n_pairs; i++) { - double p,q; - - double df = pairs[i].n -2; - - double correlation_t = - pairs[i].correlation * sqrt (df) / - sqrt (1 - pow2 (pairs[i].correlation)); - + struct pair *pair = &proc->pairs[i]; + double p, q; + double df = pair->n -2; + double correlation_t = (pair->correlation * sqrt (df) / + sqrt (1 - pow2 (pair->correlation))); /* row headings */ - tab_text (table, 0,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _ ("Pair %d"), i); - - tab_text (table, 1,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _ ("%s & %s"), - var_get_name (pairs[i].v[0]), - var_get_name (pairs[i].v[1])); - + tab_text (table, 0, i + 1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, + _("Pair %d"), i); + tab_text (table, 1, i + 1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, + _("%s & %s"), + var_get_name (pair->v[0]), + var_get_name (pair->v[1])); /* row data */ - tab_double (table, 2, i+1, TAB_RIGHT, pairs[i].n, wfmt); - tab_double (table, 3, i+1, TAB_RIGHT, pairs[i].correlation, NULL); + tab_double (table, 2, i + 1, TAB_RIGHT, pair->n, &proc->weight_format); + tab_double (table, 3, i + 1, TAB_RIGHT, pair->correlation, NULL); p = gsl_cdf_tdist_P (correlation_t, df); q = gsl_cdf_tdist_Q (correlation_t, df); - - tab_double (table, 4, i+1, TAB_RIGHT, 2.0* (correlation_t>0?q:p), NULL); + tab_double (table, 4, i + 1, TAB_RIGHT, + 2.0 * (correlation_t > 0 ? q : p), NULL); } tab_submit (table); } - - - - + /* Calculation Implementation */ -/* Per case calculations common to all variants of the T test */ -static int +/* Calculations common to all variants of the T test. */ +static void common_calc (const struct dictionary *dict, - const struct ccase *c, - void *_cmd, - enum mv_class exclude) + struct t_test_proc *proc, + struct casereader *reader) { + struct ccase *c; int i; - struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd; - - double weight = dict_get_case_weight (dict, c, NULL); - - /* Listwise has to be implicit if the independent variable is missing ?? */ - if ( cmd->sbc_groups ) + for (i = 0; i < proc->n_vars; i++) { - if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude)) - return 0; - } - - for (i = 0; i < cmd->n_variables ; ++i) - { - const struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v); - - if (!var_is_value_missing (v, val, exclude)) - { - struct group_statistics *gs; - gs = &group_proc_get (v)->ugs; - - gs->n += weight; - gs->sum += weight * val->f; - gs->ssq += weight * pow2 (val->f); - } + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; + gs->sum = 0; + gs->n = 0; + gs->ssq = 0; + gs->sum_diff = 0; } - return 0; -} - -/* Pre calculations common to all variants of the T test */ -static void -common_precalc ( struct cmd_t_test *cmd ) -{ - int i=0; - for (i=0; i< cmd->n_variables ; ++i) + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { - struct group_statistics *gs; - gs= &group_proc_get (cmd->v_variables[i])->ugs; - - gs->sum=0; - gs->n=0; - gs->ssq=0; - gs->sum_diff=0; + double weight = dict_get_case_weight (dict, c, NULL); + + /* Listwise has to be implicit if the independent variable + is missing ?? */ + if (proc->mode == T_IND_SAMPLES) + { + if (var_is_value_missing (proc->indep_var, + case_data (c, proc->indep_var), + proc->exclude)) + continue; + } + + for (i = 0; i < proc->n_vars; i++) + { + const struct variable *v = proc->vars[i]; + const union value *val = case_data (c, v); + + if (!var_is_value_missing (v, val, proc->exclude)) + { + struct group_statistics *gs; + gs = &group_proc_get (v)->ugs; + + gs->n += weight; + gs->sum += weight * val->f; + gs->ssq += weight * pow2 (val->f); + } + } } -} + casereader_destroy (reader); -/* Post calculations common to all variants of the T test */ -void -common_postcalc (struct cmd_t_test *cmd) -{ - int i=0; - - for (i=0; i< cmd->n_variables ; ++i) + for (i = 0; i < proc->n_vars; i++) { - struct group_statistics *gs; - gs= &group_proc_get (cmd->v_variables[i])->ugs; - - gs->mean=gs->sum / gs->n; - gs->s_std_dev= sqrt ( - ( (gs->ssq / gs->n ) - pow2 (gs->mean)) - ) ; - - gs->std_dev= sqrt ( - gs->n/ (gs->n-1) * - ( (gs->ssq / gs->n ) - pow2 (gs->mean)) - ) ; + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; + gs->mean = gs->sum / gs->n; + gs->s_std_dev = sqrt (((gs->ssq / gs->n) - pow2 (gs->mean))); + gs->std_dev = sqrt (gs->n / (gs->n- 1) + * ((gs->ssq / gs->n) - pow2 (gs->mean))); gs->se_mean = gs->std_dev / sqrt (gs->n); - gs->mean_diff= gs->sum_diff / gs->n; + gs->mean_diff = gs->sum_diff / gs->n; } } -/* Per case calculations for one sample t test */ +/* Calculations for one sample T test. */ static int -one_sample_calc (const struct dictionary *dict, - const struct ccase *c, void *cmd_, - enum mv_class exclude) +one_sample_calc (const struct dictionary *dict, struct t_test_proc *proc, + struct casereader *reader) { + struct ccase *c; int i; - struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_; - - double weight = dict_get_case_weight (dict, c, NULL); - - - for (i=0; i< cmd->n_variables ; ++i) + for (i = 0; i < proc->n_vars; i++) { - struct group_statistics *gs; - const struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v); - - gs= &group_proc_get (cmd->v_variables[i])->ugs; - - if (!var_is_value_missing (v, val, exclude)) - gs->sum_diff += weight * (val->f - cmd->n_testval[0]); + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; + gs->sum_diff = 0; } - return 0; -} - -/* Pre calculations for one sample t test */ -static void -one_sample_precalc ( struct cmd_t_test *cmd ) -{ - int i=0; - - for (i=0; i< cmd->n_variables ; ++i) + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { - struct group_statistics *gs; - gs= &group_proc_get (cmd->v_variables[i])->ugs; - - gs->sum_diff=0; + double weight = dict_get_case_weight (dict, c, NULL); + for (i = 0; i < proc->n_vars; i++) + { + const struct variable *v = proc->vars[i]; + struct group_statistics *gs = &group_proc_get (v)->ugs; + const union value *val = case_data (c, v); + if (!var_is_value_missing (v, val, proc->exclude)) + gs->sum_diff += weight * (val->f - proc->testval); + } } -} -/* Post calculations for one sample t test */ -static void -one_sample_postcalc (struct cmd_t_test *cmd) -{ - int i=0; - - for (i=0; i< cmd->n_variables ; ++i) + for (i = 0; i < proc->n_vars; i++) { - struct group_statistics *gs; - gs= &group_proc_get (cmd->v_variables[i])->ugs; - - gs->mean_diff = gs->sum_diff / gs->n ; + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; + gs->mean_diff = gs->sum_diff / gs->n; } -} - - -static void -paired_precalc (struct cmd_t_test *cmd UNUSED) -{ - int i; - - for (i=0; i < n_pairs ; ++i ) - { - pairs[i].n = 0; - pairs[i].sum[0] = 0; pairs[i].sum[1] = 0; - pairs[i].ssq[0] = 0; pairs[i].ssq[1] = 0; - pairs[i].sum_of_prod = 0; - pairs[i].correlation = 0; - pairs[i].sum_of_diffs = 0; - pairs[i].ssq_diffs = 0; - } + casereader_destroy (reader); + return 0; } - static int -paired_calc (const struct dictionary *dict, const struct ccase *c, - struct cmd_t_test *cmd UNUSED, enum mv_class exclude) +paired_calc (const struct dictionary *dict, struct t_test_proc *proc, + struct casereader *reader) { + struct ccase *c; int i; - double weight = dict_get_case_weight (dict, c, NULL); - - for (i=0; i < n_pairs ; ++i ) + for (i = 0; i < proc->n_pairs; i++) { - const struct variable *v0 = pairs[i].v[0]; - const struct variable *v1 = pairs[i].v[1]; - - const union value *val0 = case_data (c, v0); - const union value *val1 = case_data (c, v1); - - if (!var_is_value_missing (v0, val0, exclude) && - !var_is_value_missing (v1, val1, exclude)) - { - pairs[i].n += weight; - pairs[i].sum[0] += weight * val0->f; - pairs[i].sum[1] += weight * val1->f; - - pairs[i].ssq[0] += weight * pow2 (val0->f); - pairs[i].ssq[1] += weight * pow2 (val1->f); - - pairs[i].sum_of_prod += weight * val0->f * val1->f ; - - pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ; - pairs[i].ssq_diffs += weight * pow2 (val0->f - val1->f); - } + struct pair *pair = &proc->pairs[i]; + pair->n = 0; + pair->sum[0] = pair->sum[1] = 0; + pair->ssq[0] = pair->ssq[1] = 0; + pair->sum_of_prod = 0; + pair->correlation = 0; + pair->sum_of_diffs = 0; + pair->ssq_diffs = 0; } - return 0; -} - -static void -paired_postcalc (struct cmd_t_test *cmd UNUSED) -{ - int i; + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) + { + double weight = dict_get_case_weight (dict, c, NULL); + for (i = 0; i < proc->n_pairs; i++) + { + struct pair *pair = &proc->pairs[i]; + const struct variable *v0 = pair->v[0]; + const struct variable *v1 = pair->v[1]; + + const union value *val0 = case_data (c, v0); + const union value *val1 = case_data (c, v1); + + if (!var_is_value_missing (v0, val0, proc->exclude) + && !var_is_value_missing (v1, val1, proc->exclude)) + { + pair->n += weight; + pair->sum[0] += weight * val0->f; + pair->sum[1] += weight * val1->f; + pair->ssq[0] += weight * pow2 (val0->f); + pair->ssq[1] += weight * pow2 (val1->f); + pair->sum_of_prod += weight * val0->f * val1->f; + pair->sum_of_diffs += weight * (val0->f - val1->f); + pair->ssq_diffs += weight * pow2 (val0->f - val1->f); + } + } + } - for (i=0; i < n_pairs ; ++i ) + for (i = 0; i < proc->n_pairs; i++) { + struct pair *pair = &proc->pairs[i]; + const double n = pair->n; int j; - const double n = pairs[i].n; - for (j=0; j < 2 ; ++j) + for (j=0; j < 2; j++) { - pairs[i].mean[j] = pairs[i].sum[j] / n ; - pairs[i].s_std_dev[j] = sqrt ((pairs[i].ssq[j] / n - - pow2 (pairs[i].mean[j])) - ); - - pairs[i].std_dev[j] = sqrt (n/ (n-1)* (pairs[i].ssq[j] / n - - pow2 (pairs[i].mean[j])) - ); + pair->mean[j] = pair->sum[j] / n; + pair->s_std_dev[j] = sqrt ((pair->ssq[j] / n + - pow2 (pair->mean[j]))); + pair->std_dev[j] = sqrt (n / (n- 1) * (pair->ssq[j] / n + - pow2 (pair->mean[j]))); } - pairs[i].correlation = pairs[i].sum_of_prod / pairs[i].n - - pairs[i].mean[0] * pairs[i].mean[1] ; + pair->correlation = (pair->sum_of_prod / pair->n + - pair->mean[0] * pair->mean[1]); /* correlation now actually contains the covariance */ + pair->correlation /= pair->std_dev[0] * pair->std_dev[1]; + pair->correlation *= pair->n / (pair->n - 1); - pairs[i].correlation /= pairs[i].std_dev[0] * pairs[i].std_dev[1]; - pairs[i].correlation *= pairs[i].n / ( pairs[i].n - 1 ); - - pairs[i].mean_diff = pairs[i].sum_of_diffs / n ; - - pairs[i].std_dev_diff = sqrt ( n / (n - 1) * ( - ( pairs[i].ssq_diffs / n ) - - - pow2 (pairs[i].mean_diff ) - ) ); + pair->mean_diff = pair->sum_of_diffs / n; + pair->std_dev_diff = sqrt (n / (n - 1) * ((pair->ssq_diffs / n) + - pow2 (pair->mean_diff))); } + + casereader_destroy (reader); + return 0; } -static void -group_precalc (struct cmd_t_test *cmd ) +static int +group_calc (const struct dictionary *dict, struct t_test_proc *proc, + struct casereader *reader) { + struct ccase *c; int i; - int j; - for (i=0; i< cmd->n_variables ; ++i) + for (i = 0; i < proc->n_vars; i++) { - struct group_proc *ttpr = group_proc_get (cmd->v_variables[i]); + struct group_proc *ttpr = group_proc_get (proc->vars[i]); + int j; /* There's always 2 groups for a T - TEST */ ttpr->n_groups = 2; - - gp.indep_var = indep_var; - ttpr->group_hash = hsh_create (2, - (hsh_compare_func *) compare_group_binary, - (hsh_hash_func *) hash_group_binary, - (hsh_free_func *) free_group, - (void *) &gp ); + (hsh_compare_func *) compare_group_binary, + (hsh_hash_func *) hash_group_binary, + (hsh_free_func *) free_group, + proc); - for (j=0 ; j < 2 ; ++j) + for (j = 0; j < 2; j++) { struct group_statistics *gs = xmalloc (sizeof *gs); - gs->sum = 0; gs->n = 0; gs->ssq = 0; - - if ( gp.criterion == CMP_EQ ) - { - gs->id = gp.v.g_value[j]; - } + if (proc->criterion == CMP_EQ) + gs->id = proc->g_value[j]; else { - if ( j == 0 ) - gs->id.f = gp.v.critical_value - 1.0 ; + if (j == 0) + gs->id.f = proc->critical_value - 1.0; else - gs->id.f = gp.v.critical_value + 1.0 ; + gs->id.f = proc->critical_value + 1.0; } - hsh_insert ( ttpr->group_hash, (void *) gs ); + hsh_insert (ttpr->group_hash, gs); } } -} - -static int -group_calc (const struct dictionary *dict, - const struct ccase *c, struct cmd_t_test *cmd, - enum mv_class exclude) -{ - int i; - - const double weight = dict_get_case_weight (dict, c, NULL); - - const union value *gv; - - if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude)) - return 0; - - gv = case_data (c, indep_var); - - for (i=0; i< cmd->n_variables ; ++i) + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { - const struct variable *var = cmd->v_variables[i]; - const union value *val = case_data (c, var); - struct hsh_table *grp_hash = group_proc_get (var)->group_hash; - struct group_statistics *gs; - - gs = hsh_find (grp_hash, (void *) gv); - - /* If the independent variable doesn't match either of the values - for this case then move on to the next case */ - if ( ! gs ) - return 0; - - if (!var_is_value_missing (var, val, exclude)) - { - gs->n += weight; - gs->sum += weight * val->f; - gs->ssq += weight * pow2 (val->f); - } + const double weight = dict_get_case_weight (dict, c, NULL); + const union value *gv; + + if (var_is_value_missing (proc->indep_var, + case_data (c, proc->indep_var), proc->exclude)) + continue; + + gv = case_data (c, proc->indep_var); + for (i = 0; i < proc->n_vars; i++) + { + const struct variable *var = proc->vars[i]; + const union value *val = case_data (c, var); + struct hsh_table *grp_hash = group_proc_get (var)->group_hash; + struct group_statistics *gs = hsh_find (grp_hash, gv); + + /* If the independent variable doesn't match either of the values + for this case then move on to the next case. */ + if (gs == NULL) + break; + + if (!var_is_value_missing (var, val, proc->exclude)) + { + gs->n += weight; + gs->sum += weight * val->f; + gs->ssq += weight * pow2 (val->f); + } + } } - return 0; -} - - -static void -group_postcalc ( struct cmd_t_test *cmd ) -{ - int i; - - for (i = 0; i < cmd->n_variables ; ++i) + for (i = 0; i < proc->n_vars; i++) { - const struct variable *var = cmd->v_variables[i]; + const struct variable *var = proc->vars[i]; struct hsh_table *grp_hash = group_proc_get (var)->group_hash; struct hsh_iterator g; struct group_statistics *gs; - int count=0; + int count = 0; - for (gs = hsh_first (grp_hash,&g); - gs != 0; - gs = hsh_next (grp_hash,&g)) + for (gs = hsh_first (grp_hash, &g); gs != NULL; + gs = hsh_next (grp_hash, &g)) { gs->mean = gs->sum / gs->n; - - gs->s_std_dev= sqrt ( - ( (gs->ssq / gs->n ) - pow2 (gs->mean)) - ) ; - - gs->std_dev= sqrt ( - gs->n/ (gs->n-1) * - ( (gs->ssq / gs->n ) - pow2 (gs->mean)) - ) ; - + gs->s_std_dev = sqrt (((gs->ssq / gs->n) - pow2 (gs->mean))); + gs->std_dev = sqrt (gs->n / (gs->n- 1) + * ((gs->ssq / gs->n) - pow2 (gs->mean))); gs->se_mean = gs->std_dev / sqrt (gs->n); - count ++; + count++; } assert (count == 2); } -} + casereader_destroy (reader); + return 0; +} static void -calculate (struct cmd_t_test *cmd, - struct casereader *input, const struct dataset *ds) +calculate (struct t_test_proc *proc, + struct casereader *input, const struct dataset *ds) { const struct dictionary *dict = dataset_dict (ds); struct ssbox stat_summary_box; struct trbox test_results_box; - - struct casereader *pass1, *pass2, *pass3; struct taint *taint; struct ccase *c; - enum mv_class exclude = cmd->miss != TTS_INCLUDE ? MV_ANY : MV_SYSTEM; - c = casereader_peek (input, 0); if (c == NULL) { @@ -1844,94 +1454,90 @@ calculate (struct cmd_t_test *cmd, output_split_file_values (ds, c); case_unref (c); - if ( cmd->miss == TTS_LISTWISE ) + if (proc->listwise_missing) input = casereader_create_filter_missing (input, - cmd->v_variables, - cmd->n_variables, - exclude, NULL, NULL); - + proc->vars, + proc->n_vars, + proc->exclude, NULL, NULL); input = casereader_create_filter_weight (input, dict, NULL, NULL); - taint = taint_clone (casereader_get_taint (input)); - casereader_split (input, &pass1, &pass2); - common_precalc (cmd); - for (; (c = casereader_read (pass1)) != NULL; case_unref (c)) - common_calc (dict, c, cmd, exclude); - casereader_destroy (pass1); - common_postcalc (cmd); - - switch (mode) + common_calc (dict, proc, casereader_clone (input)); + switch (proc->mode) { case T_1_SAMPLE: - one_sample_precalc (cmd); - for (; (c = casereader_read (pass2)) != NULL; case_unref (c)) - one_sample_calc (dict, c, cmd, exclude); - one_sample_postcalc (cmd); + one_sample_calc (dict, proc, input); break; case T_PAIRED: - paired_precalc (cmd); - for (; (c = casereader_read (pass2)) != NULL; case_unref (c)) - paired_calc (dict, c, cmd, exclude); - paired_postcalc (cmd); + paired_calc (dict, proc, input); break; case T_IND_SAMPLES: - pass3 = casereader_clone (pass2); - - group_precalc (cmd); - for (; (c = casereader_read (pass2)) != NULL; case_unref (c)) - group_calc (dict, c, cmd, exclude); - group_postcalc (cmd); - - levene (dict, pass3, indep_var, cmd->n_variables, cmd->v_variables, - exclude); + group_calc (dict, proc, casereader_clone (input)); + levene (dict, input, proc->indep_var, proc->n_vars, proc->vars, + proc->exclude); break; + default: + NOT_REACHED (); } - casereader_destroy (pass2); if (!taint_has_tainted_successor (taint)) { - ssbox_create (&stat_summary_box,cmd,mode); - ssbox_populate (&stat_summary_box, dict, cmd); + ssbox_create (&stat_summary_box, proc); + ssbox_populate (&stat_summary_box, proc); ssbox_finalize (&stat_summary_box); - if ( mode == T_PAIRED ) - pscbox (dict); + if (proc->mode == T_PAIRED) + pscbox (proc); - trbox_create (&test_results_box, cmd, mode); - trbox_populate (&test_results_box, dict, cmd); + trbox_create (&test_results_box, proc); + trbox_populate (&test_results_box, proc); trbox_finalize (&test_results_box); } taint_destroy (taint); } -short which_group (const struct group_statistics *g, - const struct group_properties *p); +/* return 0 if G belongs to group 0, + 1 if it belongs to group 1, + 2 if it belongs to neither group */ +static int +which_group (const struct group_statistics *g, + const struct t_test_proc *proc) +{ + int width = var_get_width (proc->indep_var); + + if (0 == value_compare_3way (&g->id, &proc->g_value[0], width)) + return 0; + + if (0 == value_compare_3way (&g->id, &proc->g_value[1], width)) + return 1; + + return 2; +} /* Return -1 if the id of a is less than b; +1 if greater than and 0 if equal */ static int compare_group_binary (const struct group_statistics *a, - const struct group_statistics *b, - const struct group_properties *p) + const struct group_statistics *b, + const struct t_test_proc *proc) { - short flag_a; - short flag_b; + int flag_a; + int flag_b; - if ( p->criterion == CMP_LE ) + if (proc->criterion == CMP_LE) { - flag_a = ( a->id.f < p->v.critical_value ) ; - flag_b = ( b->id.f < p->v.critical_value ) ; + flag_a = (a->id.f < proc->critical_value); + flag_b = (b->id.f < proc->critical_value); } else { - flag_a = which_group (a, p); - flag_b = which_group (b, p); + flag_a = which_group (a, proc); + flag_b = which_group (b, proc); } - if (flag_a < flag_b ) - return -1; + if (flag_a < flag_b) + return - 1; return (flag_a > flag_b); } @@ -1941,38 +1547,11 @@ compare_group_binary (const struct group_statistics *a, static unsigned hash_group_binary (const struct group_statistics *g, - const struct group_properties *p) -{ - short flag = -1; - - if ( p->criterion == CMP_LE ) - { - flag = ( g->id.f < p->v.critical_value ) ; - } - else if ( p->criterion == CMP_EQ) - { - flag = which_group (g,p); - } - else - NOT_REACHED (); - - return flag; -} - -/* return 0 if G belongs to group 0, - 1 if it belongs to group 1, - 2 if it belongs to neither group */ -short -which_group (const struct group_statistics *g, - const struct group_properties *p) + const struct t_test_proc *proc) { - if ( 0 == compare_values_short (&g->id, &p->v.g_value[0], p->indep_var)) - return 0; - - if ( 0 == compare_values_short (&g->id, &p->v.g_value[1], p->indep_var)) - return 1; - - return 2; + return (proc->criterion == CMP_LE + ? g->id.f < proc->critical_value + : which_group (g, proc)); } /* diff --git a/src/language/stats/wilcoxon.c b/src/language/stats/wilcoxon.c index ebc2f1e2..37d2b191 100644 --- a/src/language/stats/wilcoxon.c +++ b/src/language/stats/wilcoxon.c @@ -17,23 +17,33 @@ #include + #include "wilcoxon.h" -#include + +#include +#include +#include +#include + #include #include +#include +#include +#include #include -#include +#include +#include +#include #include -#include -#include -#include -#include +#include +#include #include -#include -#include +#include #include -#include -#include +#include + +#include "minmax.h" +#include "xalloc.h" static double append_difference (const struct ccase *c, casenumber n UNUSED, void *aux) @@ -79,10 +89,17 @@ wilcoxon_execute (const struct dataset *ds, struct wilcoxon_state *ws = xcalloc (sizeof (*ws), t2s->n_pairs); const struct variable *weight = dict_get_weight (dict); struct variable *weightx = var_create_internal (WEIGHT_IDX); + struct caseproto *proto; input = casereader_create_filter_weight (input, dict, &warn, NULL); + proto = caseproto_create (); + proto = caseproto_add_width (proto, 0); + proto = caseproto_add_width (proto, 0); + if (weight != NULL) + proto = caseproto_add_width (proto, 0); + for (i = 0 ; i < t2s->n_pairs; ++i ) { struct casereader *r = casereader_clone (input); @@ -91,8 +108,6 @@ wilcoxon_execute (const struct dataset *ds, struct subcase ordering; variable_pair *vp = &t2s->pairs[i]; - const int reader_width = weight ? 3 : 2; - ws[i].sign = var_create_internal (0); ws[i].absdiff = var_create_internal (1); @@ -101,12 +116,12 @@ wilcoxon_execute (const struct dataset *ds, NULL, NULL); subcase_init_var (&ordering, ws[i].absdiff, SC_ASCEND); - writer = sort_create_writer (&ordering, reader_width); + writer = sort_create_writer (&ordering, proto); subcase_destroy (&ordering); for (; (c = casereader_read (r)) != NULL; case_unref (c)) { - struct ccase *output = case_create (reader_width); + struct ccase *output = case_create (proto); double d = append_difference (c, 0, vp); if (d > 0) @@ -140,6 +155,7 @@ wilcoxon_execute (const struct dataset *ds, casereader_destroy (r); ws[i].reader = casewriter_make_reader (writer); } + caseproto_unref (proto); for (i = 0 ; i < t2s->n_pairs; ++i ) { diff --git a/src/language/tests/datasheet-check.c b/src/language/tests/datasheet-check.c index 53b62c31..27fb27fc 100644 --- a/src/language/tests/datasheet-check.c +++ b/src/language/tests/datasheet-check.c @@ -26,12 +26,13 @@ #include #include #include -#include #include #include +#include #include #include #include +#include #include #include @@ -55,34 +56,65 @@ lazy_callback (void *ds_) #define MAX_COLS 5 -/* Checks that READER contains the ROW_CNT rows and COLUMN_CNT +static bool +check_caseproto (struct mc *mc, const struct caseproto *benchmark, + const struct caseproto *test, const char *test_name) +{ + size_t n_columns = caseproto_get_n_widths (benchmark); + size_t col; + bool ok; + + if (n_columns != caseproto_get_n_widths (test)) + { + mc_error (mc, "%s column count (%zu) does not match expected (%zu)", + test_name, caseproto_get_n_widths (test), n_columns); + return false; + } + + ok = true; + for (col = 0; col < n_columns; col++) + { + int benchmark_width = caseproto_get_width (benchmark, col); + int test_width = caseproto_get_width (test, col); + if (benchmark_width != test_width) + { + mc_error (mc, "%s column %zu width (%d) differs from expected (%d)", + test_name, col, test_width, benchmark_width); + ok = false; + } + } + return ok; +} + +/* Checks that READER contains the N_ROWS rows and N_COLUMNS columns of data in ARRAY, reporting any errors via MC. */ static void check_datasheet_casereader (struct mc *mc, struct casereader *reader, - double array[MAX_ROWS][MAX_COLS], - size_t row_cnt, size_t column_cnt) + union value array[MAX_ROWS][MAX_COLS], + size_t n_rows, const struct caseproto *proto) { - if (casereader_get_case_cnt (reader) != row_cnt) + size_t n_columns = caseproto_get_n_widths (proto); + + if (!check_caseproto (mc, proto, casereader_get_proto (reader), + "casereader")) + return; + else if (casereader_get_case_cnt (reader) != n_rows) { if (casereader_get_case_cnt (reader) == CASENUMBER_MAX - && casereader_count_cases (reader) == row_cnt) + && casereader_count_cases (reader) == n_rows) mc_error (mc, "datasheet casereader has unknown case count"); else mc_error (mc, "casereader row count (%lu) does not match " "expected (%zu)", (unsigned long int) casereader_get_case_cnt (reader), - row_cnt); + n_rows); } - else if (casereader_get_value_cnt (reader) != column_cnt) - mc_error (mc, "casereader column count (%zu) does not match " - "expected (%zu)", - casereader_get_value_cnt (reader), column_cnt); else { struct ccase *c; size_t row; - for (row = 0; row < row_cnt; row++) + for (row = 0; row < n_rows; row++) { size_t col; @@ -90,40 +122,54 @@ check_datasheet_casereader (struct mc *mc, struct casereader *reader, if (c == NULL) { mc_error (mc, "casereader_read failed reading row %zu of %zu " - "(%zu columns)", row, row_cnt, column_cnt); + "(%zu columns)", row, n_rows, n_columns); return; } - for (col = 0; col < column_cnt; col++) - if (case_num_idx (c, col) != array[row][col]) - mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: " - "%g != %g", - row, col, row_cnt, column_cnt, - case_num_idx (c, col), array[row][col]); + for (col = 0; col < n_columns; col++) + { + int width = caseproto_get_width (proto, col); + if (!value_equal (case_data_idx (c, col), &array[row][col], + width)) + { + if (width == 0) + mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: " + "%g != %g", + row, col, n_rows, n_columns, + case_num_idx (c, col), array[row][col].f); + else + mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: " + "'%.*s' != '%.*s'", + row, col, n_rows, n_columns, + width, case_str_idx (c, col), + width, value_str (&array[row][col], width)); + } + } case_unref (c); } c = casereader_read (reader); if (c != NULL) - mc_error (mc, "casereader has extra cases (expected %zu)", row_cnt); + mc_error (mc, "casereader has extra cases (expected %zu)", n_rows); } } -/* Checks that datasheet DS contains has ROW_CNT rows, COLUMN_CNT +/* Checks that datasheet DS contains has N_ROWS rows, N_COLUMNS columns, and the same contents as ARRAY, reporting any mismatches via mc_error. Then, adds DS to MC as a new state. */ static void check_datasheet (struct mc *mc, struct datasheet *ds, - double array[MAX_ROWS][MAX_COLS], - size_t row_cnt, size_t column_cnt) + union value array[MAX_ROWS][MAX_COLS], + size_t n_rows, const struct caseproto *proto) { + size_t n_columns = caseproto_get_n_widths (proto); struct datasheet *ds2; struct casereader *reader; unsigned long int serial = 0; - assert (row_cnt < MAX_ROWS); - assert (column_cnt < MAX_COLS); + assert (n_rows < MAX_ROWS); + assert (n_columns < MAX_COLS); /* If it is a duplicate hash, discard the state before checking its consistency, to save time. */ @@ -134,33 +180,93 @@ check_datasheet (struct mc *mc, struct datasheet *ds, } /* Check contents of datasheet via datasheet functions. */ - if (row_cnt != datasheet_get_row_cnt (ds)) + if (!check_caseproto (mc, proto, datasheet_get_proto (ds), "datasheet")) + { + /* check_caseproto emitted errors already. */ + } + else if (n_rows != datasheet_get_n_rows (ds)) mc_error (mc, "row count (%lu) does not match expected (%zu)", - (unsigned long int) datasheet_get_row_cnt (ds), row_cnt); - else if (column_cnt != datasheet_get_column_cnt (ds)) - mc_error (mc, "column count (%zu) does not match expected (%zu)", - datasheet_get_column_cnt (ds), column_cnt); + (unsigned long int) datasheet_get_n_rows (ds), n_rows); else { size_t row, col; + bool difference = false; - for (row = 0; row < row_cnt; row++) - for (col = 0; col < column_cnt; col++) + for (row = 0; row < n_rows; row++) + for (col = 0; col < n_columns; col++) { + int width = caseproto_get_width (proto, col); + union value *av = &array[row][col]; union value v; - if (!datasheet_get_value (ds, row, col, &v, 1)) + + value_init (&v, width); + if (!datasheet_get_value (ds, row, col, &v)) NOT_REACHED (); - if (v.f != array[row][col]) - mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: %g != %g", - row, col, row_cnt, column_cnt, v.f, array[row][col]); + if (!value_equal (&v, av, width)) + { + if (width == 0) + mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: " + "%g != %g", row, col, n_rows, n_columns, + v.f, av->f); + else + mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: " + "'%.*s' != '%.*s'", + row, col, n_rows, n_columns, + width, value_str (&v, width), + width, value_str (av, width)); + difference = true; + } + value_destroy (&v, width); } + + if (difference) + { + struct string s; + + mc_error (mc, "expected:"); + ds_init_empty (&s); + for (row = 0; row < n_rows; row++) + { + ds_clear (&s); + ds_put_format (&s, "row %zu:", row); + for (col = 0; col < n_columns; col++) + { + const union value *v = &array[row][col]; + int width = caseproto_get_width (proto, col); + if (width == 0) + ds_put_format (&s, " %g", v->f); + else + ds_put_format (&s, " '%.*s'", width, value_str (v, width)); + } + mc_error (mc, "%s", ds_cstr (&s)); + } + + mc_error (mc, "actual:"); + ds_init_empty (&s); + for (row = 0; row < n_rows; row++) + { + ds_clear (&s); + ds_put_format (&s, "row %zu:", row); + for (col = 0; col < n_columns; col++) + { + union value v; + value_init (&v, 0); + if (!datasheet_get_value (ds, row, col, &v)) + NOT_REACHED (); + ds_put_format (&s, " %g", v.f); + } + mc_error (mc, "%s", ds_cstr (&s)); + } + + ds_destroy (&s); + } } /* Check that datasheet contents are correct when read through casereader. */ ds2 = clone_datasheet (ds); reader = datasheet_make_reader (ds2); - check_datasheet_casereader (mc, reader, array, row_cnt, column_cnt); + check_datasheet_casereader (mc, reader, array, n_rows, proto); casereader_destroy (reader); /* Check that datasheet contents are correct when read through @@ -168,18 +274,18 @@ check_datasheet (struct mc *mc, struct datasheet *ds, valuable because otherwise there is no non-GUI code that uses the lazy_casereader. */ ds2 = clone_datasheet (ds); - reader = lazy_casereader_create (column_cnt, row_cnt, + reader = lazy_casereader_create (datasheet_get_proto (ds2), n_rows, lazy_callback, ds2, &serial); - check_datasheet_casereader (mc, reader, array, row_cnt, column_cnt); + check_datasheet_casereader (mc, reader, array, n_rows, proto); if (lazy_casereader_destroy (reader, serial)) { /* Lazy casereader was never instantiated. This will only happen if there are no rows (because in that case casereader_read never gets called). */ datasheet_destroy (ds2); - if (row_cnt != 0) + if (n_rows != 0) mc_error (mc, "lazy casereader not instantiated, but should " - "have been (size %zu,%zu)", row_cnt, column_cnt); + "have been (size %zu,%zu)", n_rows, n_columns); } else { @@ -188,9 +294,9 @@ check_datasheet (struct mc *mc, struct datasheet *ds, (casereader_read in this case) was performed on the lazy casereader. */ casereader_destroy (reader); - if (row_cnt == 0) + if (n_rows == 0) mc_error (mc, "lazy casereader instantiated, but should not " - "have been (size %zu,%zu)", row_cnt, column_cnt); + "have been (size %zu,%zu)", n_rows, n_columns); } mc_add_state (mc, ds); @@ -198,32 +304,78 @@ check_datasheet (struct mc *mc, struct datasheet *ds, /* Extracts the contents of DS into DATA. */ static void -extract_data (const struct datasheet *ds, double data[MAX_ROWS][MAX_COLS]) +extract_data (const struct datasheet *ds, union value data[MAX_ROWS][MAX_COLS]) { - size_t column_cnt = datasheet_get_column_cnt (ds); - size_t row_cnt = datasheet_get_row_cnt (ds); + const struct caseproto *proto = datasheet_get_proto (ds); + size_t n_columns = datasheet_get_n_columns (ds); + size_t n_rows = datasheet_get_n_rows (ds); size_t row, col; - assert (row_cnt < MAX_ROWS); - assert (column_cnt < MAX_COLS); - for (row = 0; row < row_cnt; row++) - for (col = 0; col < column_cnt; col++) + assert (n_rows < MAX_ROWS); + assert (n_columns < MAX_COLS); + for (row = 0; row < n_rows; row++) + for (col = 0; col < n_columns; col++) { - union value v; - if (!datasheet_get_value (ds, row, col, &v, 1)) + int width = caseproto_get_width (proto, col); + union value *v = &data[row][col]; + value_init (v, width); + if (!datasheet_get_value (ds, row, col, v)) NOT_REACHED (); - data[row][col] = v.f; } } +/* Copies the contents of ODATA into DATA. Each of the N_ROWS + rows of ODATA and DATA must have prototype PROTO. */ +static void +clone_data (size_t n_rows, const struct caseproto *proto, + union value odata[MAX_ROWS][MAX_COLS], + union value data[MAX_ROWS][MAX_COLS]) +{ + size_t n_columns = caseproto_get_n_widths (proto); + size_t row, col; + + assert (n_rows < MAX_ROWS); + assert (n_columns < MAX_COLS); + for (row = 0; row < n_rows; row++) + for (col = 0; col < n_columns; col++) + { + int width = caseproto_get_width (proto, col); + const union value *ov = &odata[row][col]; + union value *v = &data[row][col]; + value_init (v, width); + value_copy (v, ov, width); + } +} + +static void +release_data (size_t n_rows, const struct caseproto *proto, + union value data[MAX_ROWS][MAX_COLS]) +{ + size_t n_columns = caseproto_get_n_widths (proto); + size_t row, col; + + assert (n_rows < MAX_ROWS); + assert (n_columns < MAX_COLS); + for (col = 0; col < n_columns; col++) + { + int width = caseproto_get_width (proto, col); + if (value_needs_init (width)) + for (row = 0; row < n_rows; row++) + value_destroy (&data[row][col], width); + } +} + /* Clones the structure and contents of ODS into *DS, and the contents of ODATA into DATA. */ static void -clone_model (const struct datasheet *ods, double odata[MAX_ROWS][MAX_COLS], - struct datasheet **ds, double data[MAX_ROWS][MAX_COLS]) +clone_model (const struct datasheet *ods, + union value odata[MAX_ROWS][MAX_COLS], + struct datasheet **ds, + union value data[MAX_ROWS][MAX_COLS]) { *ds = clone_datasheet (ods); - memcpy (data, odata, MAX_ROWS * MAX_COLS * sizeof **data); + clone_data (datasheet_get_n_rows (ods), datasheet_get_proto (ods), + odata, data); } /* "init" function for struct mc_class. */ @@ -238,34 +390,41 @@ datasheet_mc_init (struct mc *mc) /* Create unbacked datasheet. */ ds = datasheet_create (NULL); mc_name_operation (mc, "empty datasheet"); - check_datasheet (mc, ds, NULL, 0, 0); + check_datasheet (mc, ds, NULL, 0, caseproto_create ()); } else { /* Create datasheet with backing. */ struct casewriter *writer; struct casereader *reader; - double data[MAX_ROWS][MAX_COLS]; - int row; + union value data[MAX_ROWS][MAX_COLS]; + struct caseproto *proto; + int row, col; assert (params->backing_rows > 0 && params->backing_rows <= MAX_ROWS); assert (params->backing_cols > 0 && params->backing_cols <= MAX_COLS); - writer = mem_writer_create (params->backing_cols); + /* XXX support different backing column widths */ + proto = caseproto_create (); + for (col = 0; col < params->backing_cols; col++) + proto = caseproto_add_width (proto, 0); + + writer = mem_writer_create (proto); for (row = 0; row < params->backing_rows; row++) { struct ccase *c; - int col; - c = case_create (params->backing_cols); + c = case_create (proto); for (col = 0; col < params->backing_cols; col++) { double value = params->next_value++; - data[row][col] = value; + data[row][col].f = value; case_data_rw_idx (c, col)->f = value; } casewriter_write (writer, c); } + caseproto_unref (proto); + reader = casewriter_make_reader (writer); assert (reader != NULL); @@ -273,7 +432,24 @@ datasheet_mc_init (struct mc *mc) mc_name_operation (mc, "datasheet with (%d,%d) backing", params->backing_rows, params->backing_cols); check_datasheet (mc, ds, data, - params->backing_rows, params->backing_cols); + params->backing_rows, proto); + } +} + +static void +value_from_param (union value *value, int width, int idx) +{ + if (width == 0) + value->f = idx; + else + { + unsigned int hash = hash_int (idx, 0); + char *string = value_str_rw (value, width); + int offset; + + assert (width < 32); + for (offset = 0; offset < width; offset++) + string[offset] = "ABCDEFGHIJ"[(hash >> offset) % 10]; } } @@ -283,92 +459,119 @@ datasheet_mc_mutate (struct mc *mc, const void *ods_) { struct datasheet_test_params *params = mc_get_aux (mc); + static const int widths[] = {0, 1, 11}; + const size_t n_widths = sizeof widths / sizeof *widths; + const struct datasheet *ods = ods_; - double odata[MAX_ROWS][MAX_COLS]; - double data[MAX_ROWS][MAX_COLS]; - size_t column_cnt = datasheet_get_column_cnt (ods); - size_t row_cnt = datasheet_get_row_cnt (ods); - size_t pos, new_pos, cnt; + union value odata[MAX_ROWS][MAX_COLS]; + union value data[MAX_ROWS][MAX_COLS]; + const struct caseproto *oproto = datasheet_get_proto (ods); + size_t n_columns = datasheet_get_n_columns (ods); + size_t n_rows = datasheet_get_n_rows (ods); + size_t pos, new_pos, cnt, width_idx; extract_data (ods, odata); - /* Insert all possible numbers of columns in all possible - positions. */ - for (pos = 0; pos <= column_cnt; pos++) - for (cnt = 0; cnt <= params->max_cols - column_cnt; cnt++) - if (mc_include_state (mc)) - { - struct datasheet *ds; - union value new[MAX_COLS]; - size_t i, j; - - mc_name_operation (mc, "insert %zu columns at %zu", cnt, pos); - clone_model (ods, odata, &ds, data); - - for (i = 0; i < cnt; i++) - new[i].f = params->next_value++; - - if (!datasheet_insert_columns (ds, new, cnt, pos)) - mc_error (mc, "datasheet_insert_columns failed"); + /* Insert a column in each possible position. */ + if (n_columns < params->max_cols) + for (pos = 0; pos <= n_columns; pos++) + for (width_idx = 0; width_idx < n_widths; width_idx++) + if (mc_include_state (mc)) + { + int width = widths[width_idx]; + struct caseproto *proto; + struct datasheet *ds; + union value new; + size_t i; - for (i = 0; i < row_cnt; i++) - { - insert_range (&data[i][0], column_cnt, sizeof data[i][0], - pos, cnt); - for (j = 0; j < cnt; j++) - data[i][pos + j] = new[j].f; - } + mc_name_operation (mc, "insert column at %zu " + "(from %zu to %zu columns)", + pos, n_columns, n_columns + 1); + clone_model (ods, odata, &ds, data); - check_datasheet (mc, ds, data, row_cnt, column_cnt + cnt); - } + value_init (&new, width); + value_from_param (&new, width, params->next_value++); + if (!datasheet_insert_column (ds, &new, width, pos)) + mc_error (mc, "datasheet_insert_column failed"); + proto = caseproto_insert_width (caseproto_ref (oproto), + pos, width); + + for (i = 0; i < n_rows; i++) + { + insert_element (&data[i][0], n_columns, sizeof data[i][0], + pos); + value_init (&data[i][pos], width); + value_copy (&data[i][pos], &new, width); + } + value_destroy (&new, width); + + check_datasheet (mc, ds, data, n_rows, proto); + release_data (n_rows, proto, data); + caseproto_unref (proto); + } /* Delete all possible numbers of columns from all possible positions. */ - for (pos = 0; pos < column_cnt; pos++) - for (cnt = 0; cnt < column_cnt - pos; cnt++) + for (pos = 0; pos < n_columns; pos++) + for (cnt = 0; cnt < n_columns - pos; cnt++) if (mc_include_state (mc)) { + struct caseproto *proto; struct datasheet *ds; - size_t i; + size_t i, j; - mc_name_operation (mc, "delete %zu columns at %zu", cnt, pos); + mc_name_operation (mc, "delete %zu columns at %zu " + "(from %zu to %zu columns)", + cnt, pos, n_columns, n_columns - cnt); clone_model (ods, odata, &ds, data); datasheet_delete_columns (ds, pos, cnt); + proto = caseproto_remove_widths (caseproto_ref (oproto), pos, cnt); - for (i = 0; i < row_cnt; i++) - remove_range (&data[i], column_cnt, sizeof *data[i], pos, cnt); + for (i = 0; i < n_rows; i++) + { + for (j = pos; j < pos + cnt; j++) + value_destroy (&data[i][j], caseproto_get_width (oproto, j)); + remove_range (&data[i], n_columns, sizeof *data[i], pos, cnt); + } - check_datasheet (mc, ds, data, row_cnt, column_cnt - cnt); + check_datasheet (mc, ds, data, n_rows, proto); + release_data (n_rows, proto, data); + caseproto_unref (proto); } /* Move all possible numbers of columns from all possible existing positions to all possible new positions. */ - for (pos = 0; pos < column_cnt; pos++) - for (cnt = 0; cnt < column_cnt - pos; cnt++) - for (new_pos = 0; new_pos < column_cnt - cnt; new_pos++) + for (pos = 0; pos < n_columns; pos++) + for (cnt = 0; cnt < n_columns - pos; cnt++) + for (new_pos = 0; new_pos < n_columns - cnt; new_pos++) if (mc_include_state (mc)) { + struct caseproto *proto; struct datasheet *ds; size_t i; clone_model (ods, odata, &ds, data); - mc_name_operation (mc, "move %zu columns from %zu to %zu", - cnt, pos, new_pos); + mc_name_operation (mc, "move %zu columns (of %zu) from %zu to %zu", + cnt, n_columns, pos, new_pos); datasheet_move_columns (ds, pos, new_pos, cnt); - for (i = 0; i < row_cnt; i++) - move_range (&data[i], column_cnt, sizeof data[i][0], + for (i = 0; i < n_rows; i++) + move_range (&data[i], n_columns, sizeof data[i][0], pos, new_pos, cnt); + proto = caseproto_move_widths (caseproto_ref (oproto), + pos, new_pos, cnt); - check_datasheet (mc, ds, data, row_cnt, column_cnt); + check_datasheet (mc, ds, data, n_rows, proto); + release_data (n_rows, proto, data); + caseproto_unref (proto); } /* Insert all possible numbers of rows in all possible positions. */ - for (pos = 0; pos <= row_cnt; pos++) - for (cnt = 0; cnt <= params->max_rows - row_cnt; cnt++) + for (pos = 0; pos <= n_rows; pos++) + for (cnt = 0; cnt <= params->max_rows - n_rows; cnt++) if (mc_include_state (mc)) { struct datasheet *ds; @@ -376,64 +579,80 @@ datasheet_mc_mutate (struct mc *mc, const void *ods_) size_t i, j; clone_model (ods, odata, &ds, data); - mc_name_operation (mc, "insert %zu rows at %zu", cnt, pos); + mc_name_operation (mc, "insert %zu rows at %zu " + "(from %zu to %zu rows)", + cnt, pos, n_rows, n_rows + cnt); for (i = 0; i < cnt; i++) { - c[i] = case_create (column_cnt); - for (j = 0; j < column_cnt; j++) - case_data_rw_idx (c[i], j)->f = params->next_value++; + c[i] = case_create (oproto); + for (j = 0; j < n_columns; j++) + value_from_param (case_data_rw_idx (c[i], j), + caseproto_get_width (oproto, j), + params->next_value++); } - insert_range (data, row_cnt, sizeof data[pos], pos, cnt); + insert_range (data, n_rows, sizeof data[pos], pos, cnt); for (i = 0; i < cnt; i++) - for (j = 0; j < column_cnt; j++) - data[i + pos][j] = case_num_idx (c[i], j); + for (j = 0; j < n_columns; j++) + { + int width = caseproto_get_width (oproto, j); + value_init (&data[i + pos][j], width); + value_copy (&data[i + pos][j], case_data_idx (c[i], j), width); + } if (!datasheet_insert_rows (ds, pos, c, cnt)) mc_error (mc, "datasheet_insert_rows failed"); - check_datasheet (mc, ds, data, row_cnt + cnt, column_cnt); + check_datasheet (mc, ds, data, n_rows + cnt, oproto); + release_data (n_rows + cnt, oproto, data); } /* Delete all possible numbers of rows from all possible positions. */ - for (pos = 0; pos < row_cnt; pos++) - for (cnt = 0; cnt < row_cnt - pos; cnt++) + for (pos = 0; pos < n_rows; pos++) + for (cnt = 0; cnt < n_rows - pos; cnt++) if (mc_include_state (mc)) { struct datasheet *ds; clone_model (ods, odata, &ds, data); - mc_name_operation (mc, "delete %zu rows at %zu", cnt, pos); + mc_name_operation (mc, "delete %zu rows at %zu " + "(from %zu to %zu rows)", + cnt, pos, n_rows, n_rows - cnt); datasheet_delete_rows (ds, pos, cnt); - remove_range (&data[0], row_cnt, sizeof data[0], pos, cnt); + release_data (cnt, oproto, &data[pos]); + remove_range (&data[0], n_rows, sizeof data[0], pos, cnt); - check_datasheet (mc, ds, data, row_cnt - cnt, column_cnt); + check_datasheet (mc, ds, data, n_rows - cnt, oproto); + release_data (n_rows - cnt, oproto, data); } /* Move all possible numbers of rows from all possible existing positions to all possible new positions. */ - for (pos = 0; pos < row_cnt; pos++) - for (cnt = 0; cnt < row_cnt - pos; cnt++) - for (new_pos = 0; new_pos < row_cnt - cnt; new_pos++) + for (pos = 0; pos < n_rows; pos++) + for (cnt = 0; cnt < n_rows - pos; cnt++) + for (new_pos = 0; new_pos < n_rows - cnt; new_pos++) if (mc_include_state (mc)) { struct datasheet *ds; clone_model (ods, odata, &ds, data); - mc_name_operation (mc, "move %zu rows from %zu to %zu", - cnt, pos, new_pos); + mc_name_operation (mc, "move %zu rows (of %zu) from %zu to %zu", + cnt, n_rows, pos, new_pos); datasheet_move_rows (ds, pos, new_pos, cnt); - move_range (&data[0], row_cnt, sizeof data[0], + move_range (&data[0], n_rows, sizeof data[0], pos, new_pos, cnt); - check_datasheet (mc, ds, data, row_cnt, column_cnt); + check_datasheet (mc, ds, data, n_rows, oproto); + release_data (n_rows, oproto, data); } + + release_data (n_rows, oproto, odata); } /* "destroy" function for struct mc_class. */ @@ -452,7 +671,7 @@ datasheet_mc_destroy (const struct mc *mc UNUSED, void *ds_) Returns the results of the model checking run. */ struct mc_results * -datasheet_test (struct mc_options *options, void *params_) +datasheet_test (struct mc_options *options UNUSED, void *params_ UNUSED) { struct datasheet_test_params *params = params_; static const struct mc_class datasheet_mc_class = diff --git a/src/language/xforms/compute.c b/src/language/xforms/compute.c index 9f65491e..09fc2155 100644 --- a/src/language/xforms/compute.c +++ b/src/language/xforms/compute.c @@ -175,7 +175,7 @@ compute_str (void *compute_, struct ccase **c, casenumber case_num) { *c = case_unshare (*c); expr_evaluate_str (compute->rvalue, *c, case_num, - case_data_rw (*c, compute->variable)->s, + case_str_rw (*c, compute->variable), compute->width); } @@ -216,8 +216,7 @@ compute_str_vec (void *compute_, struct ccase **c, casenumber case_num) vr = vector_get_var (compute->vector, rindx - 1); *c = case_unshare (*c); expr_evaluate_str (compute->rvalue, *c, case_num, - case_data_rw (*c, vr)->s, - var_get_width (vr)); + case_str_rw (*c, vr), var_get_width (vr)); } return TRNS_CONTINUE; diff --git a/src/language/xforms/recode.c b/src/language/xforms/recode.c index e2074823..57a742c9 100644 --- a/src/language/xforms/recode.c +++ b/src/language/xforms/recode.c @@ -55,25 +55,18 @@ enum map_in_type MAP_CONVERT /* "123" => 123. */ }; -/* A value involved in a RECODE mapping. */ -union recode_value - { - double f; /* Numeric. */ - char *c; /* Short or long string. */ - }; - /* Describes input values to be mapped. */ struct map_in { enum map_in_type type; /* One of MAP_*. */ - union recode_value x, y; /* Source values. */ + union value x, y; /* Source values. */ }; /* Describes the value used as output from a mapping. */ struct map_out { bool copy_input; /* If true, copy input to output. */ - union recode_value value; /* If copy_input false, recoded value. */ + union value value; /* If copy_input false, recoded value. */ int width; /* If copy_input false, output value width. */ }; @@ -103,6 +96,8 @@ struct recode_trns /* Mappings. */ struct mapping *mappings; /* Value mappings. */ size_t map_cnt; /* Number of mappings. */ + int max_src_width; /* Maximum width of src_vars[*]. */ + int max_dst_width; /* Maximum width of any map_out in mappings. */ }; static bool parse_src_vars (struct lexer *, struct recode_trns *, const struct dictionary *dict); @@ -193,18 +188,17 @@ parse_src_vars (struct lexer *lexer, static bool parse_mappings (struct lexer *lexer, struct recode_trns *trns) { - size_t max_src_width; size_t map_allocated; bool have_dst_type; size_t i; /* Find length of longest source variable. */ - max_src_width = var_get_width (trns->src_vars[0]); + trns->max_src_width = var_get_width (trns->src_vars[0]); for (i = 1; i < trns->var_cnt; i++) { size_t var_width = var_get_width (trns->src_vars[i]); - if (var_width > max_src_width) - max_src_width = var_width; + if (var_width > trns->max_src_width) + trns->max_src_width = var_width; } /* Parse the mappings in parentheses. */ @@ -232,7 +226,7 @@ parse_mappings (struct lexer *lexer, struct recode_trns *trns) struct map_in in; if (!parse_map_in (lexer, &in, trns->pool, - trns->src_type, max_src_width)) + trns->src_type, trns->max_src_width)) return false; add_mapping (trns, &map_allocated, &in); lex_match (lexer, ','); @@ -296,9 +290,7 @@ parse_map_in (struct lexer *lexer, struct map_in *in, struct pool *pool, { if (lex_match_id (lexer, "ELSE")) - { set_map_in_generic (in, MAP_ELSE); - } else if (src_type == VAL_NUMERIC) { if (lex_match_id (lexer, "MISSING")) @@ -376,8 +368,9 @@ set_map_in_str (struct map_in *in, struct pool *pool, const struct string *string, size_t width) { in->type = MAP_SINGLE; - in->x.c = pool_alloc_unaligned (pool, width); - buf_copy_rpad (in->x.c, width, ds_data (string), ds_length (string)); + value_init_pool (pool, &in->x, width); + value_copy_buf_rpad (&in->x, width, + ds_data (string), ds_length (string), ' '); } /* Parses a mapping output value into OUT, allocating memory from @@ -427,9 +420,17 @@ set_map_out_str (struct map_out *out, struct pool *pool, const char *string = ds_data (value); size_t length = ds_length (value); + if (length == 0) + { + /* A length of 0 will yield a numeric value, which is not + what we want. */ + string = " "; + length = 1; + } + out->copy_input = false; - out->value.c = pool_alloc_unaligned (pool, length); - memcpy (out->value.c, string, length); + value_init_pool (pool, &out->value, length); + memcpy (value_str_rw (&out->value, length), string, length); out->width = length; } @@ -514,26 +515,22 @@ parse_dst_vars (struct lexer *lexer, struct recode_trns *trns, static void enlarge_dst_widths (struct recode_trns *trns) { - size_t max_dst_width; size_t i; - max_dst_width = 0; + trns->max_dst_width = 0; for (i = 0; i < trns->var_cnt; i++) { const struct variable *v = trns->dst_vars[i]; - if (var_get_width (v) > max_dst_width) - max_dst_width = var_get_width (v); + if (var_get_width (v) > trns->max_dst_width) + trns->max_dst_width = var_get_width (v); } for (i = 0; i < trns->map_cnt; i++) { struct map_out *out = &trns->mappings[i].out; - if (!out->copy_input && out->width < max_dst_width) - { - char *s = pool_alloc_unaligned (trns->pool, max_dst_width + 1); - buf_copy_rpad (s, max_dst_width + 1, out->value.c, out->width); - out->value.c = s; - } + if (!out->copy_input) + value_resize_pool (trns->pool, &out->value, + out->width, trns->max_dst_width); } } @@ -601,7 +598,8 @@ find_src_numeric (struct recode_trns *trns, double value, const struct variable /* Returns the output mapping in TRNS for an input of VALUE with the given WIDTH, or a null pointer if there is no mapping. */ static const struct map_out * -find_src_string (struct recode_trns *trns, const char *value, const struct variable *src_var) +find_src_string (struct recode_trns *trns, const char *value, + const struct variable *src_var) { struct mapping *m; int width = var_get_width (src_var); @@ -615,7 +613,8 @@ find_src_string (struct recode_trns *trns, const char *value, const struct varia switch (in->type) { case MAP_SINGLE: - match = !memcmp (value, in->x.c, width); + match = !memcmp (value, value_str (&in->x, trns->max_src_width), + width); break; case MAP_ELSE: match = true; @@ -657,36 +656,39 @@ recode_trns_proc (void *trns_, struct ccase **c, casenumber case_idx UNUSED) { const struct variable *src_var = trns->src_vars[i]; const struct variable *dst_var = trns->dst_vars[i]; - - const union value *src_data = case_data (*c, src_var); - union value *dst_data = case_data_rw (*c, dst_var); - const struct map_out *out; if (trns->src_type == VAL_NUMERIC) - out = find_src_numeric (trns, src_data->f, src_var); + out = find_src_numeric (trns, case_num (*c, src_var), src_var); else - out = find_src_string (trns, src_data->s, src_var); + out = find_src_string (trns, case_str (*c, src_var), src_var); if (trns->dst_type == VAL_NUMERIC) { + double *dst = &case_data_rw (*c, dst_var)->f; if (out != NULL) - dst_data->f = !out->copy_input ? out->value.f : src_data->f; + *dst = !out->copy_input ? out->value.f : case_num (*c, src_var); else if (trns->src_vars != trns->dst_vars) - dst_data->f = SYSMIS; + *dst = SYSMIS; } else { + char *dst = case_str_rw (*c, dst_var); if (out != NULL) { if (!out->copy_input) - memcpy (dst_data->s, out->value.c, var_get_width (dst_var)); + memcpy (dst, value_str (&out->value, trns->max_dst_width), + var_get_width (dst_var)); else if (trns->src_vars != trns->dst_vars) - buf_copy_rpad (dst_data->s, var_get_width (dst_var), - src_data->s, var_get_width (src_var)); + { + union value *dst_data = case_data_rw (*c, dst_var); + const union value *src_data = case_data (*c, src_var); + value_copy_rpad (dst_data, var_get_width (dst_var), + src_data, var_get_width (src_var), ' '); + } } else if (trns->src_vars != trns->dst_vars) - memset (dst_data->s, ' ', var_get_width (dst_var)); + memset (dst, ' ', var_get_width (dst_var)); } } diff --git a/src/libpspp/automake.mk b/src/libpspp/automake.mk index 3e086471..5e434206 100644 --- a/src/libpspp/automake.mk +++ b/src/libpspp/automake.mk @@ -61,12 +61,16 @@ src_libpspp_libpspp_la_SOURCES = \ src/libpspp/range-set.h \ src/libpspp/sparse-array.c \ src/libpspp/sparse-array.h \ + src/libpspp/sparse-xarray.c \ + src/libpspp/sparse-xarray.h \ src/libpspp/start-date.c \ src/libpspp/start-date.h \ src/libpspp/str.c \ src/libpspp/str.h \ src/libpspp/taint.c \ src/libpspp/taint.h \ + src/libpspp/tmpfile.c \ + src/libpspp/tmpfile.h \ src/libpspp/tower.c \ src/libpspp/tower.h \ src/libpspp/verbose-msg.c \ diff --git a/src/libpspp/str.c b/src/libpspp/str.c index 3cff0492..44d325e0 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -125,9 +125,9 @@ str_compare_rpad (const char *a, const char *b) /* Copies string SRC to buffer DST, of size DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the right with - spaces as needed. */ + copies of PAD as needed. */ void -buf_copy_str_rpad (char *dst, size_t dst_size, const char *src) +buf_copy_str_rpad (char *dst, size_t dst_size, const char *src, char pad) { size_t src_len = strlen (src); if (src_len >= dst_size) @@ -135,15 +135,15 @@ buf_copy_str_rpad (char *dst, size_t dst_size, const char *src) else { memcpy (dst, src, src_len); - memset (&dst[src_len], ' ', dst_size - src_len); + memset (&dst[src_len], pad, dst_size - src_len); } } /* Copies string SRC to buffer DST, of size DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the left with - spaces as needed. */ + copies of PAD as needed. */ void -buf_copy_str_lpad (char *dst, size_t dst_size, const char *src) +buf_copy_str_lpad (char *dst, size_t dst_size, const char *src, char pad) { size_t src_len = strlen (src); if (src_len >= dst_size) @@ -151,40 +151,42 @@ buf_copy_str_lpad (char *dst, size_t dst_size, const char *src) else { size_t pad_cnt = dst_size - src_len; - memset (&dst[0], ' ', pad_cnt); + memset (&dst[0], pad, pad_cnt); memcpy (dst + pad_cnt, src, src_len); } } /* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the left with - spaces as needed. */ + copies of PAD as needed. */ void buf_copy_lpad (char *dst, size_t dst_size, - const char *src, size_t src_size) + const char *src, size_t src_size, + char pad) { if (src_size >= dst_size) memmove (dst, src, dst_size); else { - memset (dst, ' ', dst_size - src_size); + memset (dst, pad, dst_size - src_size); memmove (&dst[dst_size - src_size], src, src_size); } } /* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the right with - spaces as needed. */ + copies of PAD as needed. */ void buf_copy_rpad (char *dst, size_t dst_size, - const char *src, size_t src_size) + const char *src, size_t src_size, + char pad) { if (src_size >= dst_size) memmove (dst, src, dst_size); else { memmove (dst, src, src_size); - memset (&dst[src_size], ' ', dst_size - src_size); + memset (&dst[src_size], pad, dst_size - src_size); } } diff --git a/src/libpspp/str.h b/src/libpspp/str.h index ca990dbc..b057c2bd 100644 --- a/src/libpspp/str.h +++ b/src/libpspp/str.h @@ -34,10 +34,10 @@ void buf_reverse (char *, size_t); char *buf_find_reverse (const char *, size_t, const char *, size_t); int buf_compare_case (const char *, const char *, size_t); int buf_compare_rpad (const char *, size_t, const char *, size_t); -void buf_copy_lpad (char *, size_t, const char *, size_t); -void buf_copy_rpad (char *, size_t, const char *, size_t); -void buf_copy_str_lpad (char *, size_t, const char *); -void buf_copy_str_rpad (char *, size_t, const char *); +void buf_copy_lpad (char *, size_t, const char *, size_t, char pad); +void buf_copy_rpad (char *, size_t, const char *, size_t, char pad); +void buf_copy_str_lpad (char *, size_t, const char *, char pad); +void buf_copy_str_rpad (char *, size_t, const char *, char pad); int str_compare_rpad (const char *, const char *); void str_copy_rpad (char *, size_t, const char *); diff --git a/src/math/coefficient.c b/src/math/coefficient.c index 1f157433..f78895f8 100644 --- a/src/math/coefficient.c +++ b/src/math/coefficient.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2005 Free Software Foundation, Inc. + Copyright (C) 2005, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -185,9 +185,12 @@ pspp_coeff_var_to_coeff (const struct variable *v, struct pspp_coeff **coefs, */ if (val != NULL) { + int width = var_get_width (v); + j = i; - while (j < n_coef && compare_values_short (pspp_coeff_get_value (coefs[j], v), - val, v) != 0) + while (j < n_coef + && value_compare_3way (pspp_coeff_get_value (coefs[j], v), + val, width) != 0) { j++; } diff --git a/src/math/covariance-matrix.c b/src/math/covariance-matrix.c index 275d7cfe..2a00a56d 100644 --- a/src/math/covariance-matrix.c +++ b/src/math/covariance-matrix.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2008 Free Software Foundation, Inc. + Copyright (C) 2008, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -148,7 +148,7 @@ covariance_matrix_init (size_t n_variables, return result; } static size_t -get_n_rows (size_t n_variables, size_t *v_variables[]) +get_n_rows (size_t n_variables, const struct variable *v_variables[]) { size_t i; size_t result = 0; @@ -243,6 +243,7 @@ static void column_iterate (struct design_matrix *cov, const struct variable *v, double ssize, double x, const union value *val1, size_t row) { + int width = var_get_width (v); size_t col; size_t i; double y; @@ -255,7 +256,7 @@ column_iterate (struct design_matrix *cov, const struct variable *v, col += i; y = -1.0 * cat_get_category_count (i, v) / ssize; tmp_val = cat_subscript_to_value (i, v); - if (!compare_values_short (tmp_val, val1, v)) + if (!value_equal (tmp_val, val1, width)) { y += -1.0; } @@ -290,7 +291,7 @@ covariance_pass_two (struct design_matrix *cov, double mean1, double mean2, row += i; x = -1.0 * cat_get_category_count (i, v1) / ssize; tmp_val = cat_subscript_to_value (i, v1); - if (!compare_values_short (tmp_val, val1, v1)) + if (!value_equal (tmp_val, val1, var_get_width (v1))) { x += 1.0; } @@ -370,10 +371,9 @@ covariance_accumulator_hash (const void *h, const void *aux) } if (var_is_alpha (v_max) && var_is_alpha (v_min)) { - unsigned tmp = hash_bytes (val_max, var_get_width (v_max), 0); - tmp ^= hash_bytes (val_min, var_get_width (v_min), 0); - tmp += *n_vars * (*n_vars + 1 + idx_max) + idx_min; - return (size_t) tmp; + unsigned hash = value_hash (val_max, var_get_width (v_max), 0); + hash = value_hash (val_min, var_get_width (v_min), hash); + return hash_int (*n_vars * (*n_vars + 1 + idx_max) + idx_min, hash); } return -1u; } @@ -412,15 +412,15 @@ ordered_match_nodes (const struct covariance_accumulator *c, const struct variab result = result|m; if (var_is_alpha (v1)) { - result |= compare_values_short (val1, c->val1, v1); + result |= value_compare_3way (val1, c->val1, var_get_width (v1)); if (var_is_alpha (v2)) { - result |= compare_values_short (val2, c->val2, v2); + result |= value_compare_3way (val2, c->val2, var_get_width (v2)); } } else if (var_is_alpha (v2)) { - result |= compare_values_short (val2, c->val2, v2); + result |= value_compare_3way (val2, c->val2, var_get_width (v2)); } return result; } @@ -474,7 +474,7 @@ hash_numeric_alpha (const struct variable *v1, const struct variable *v2, if (var_is_numeric (v1) && var_is_alpha (v2)) { result = n_vars * ((n_vars + 1) + var_get_dict_index (v1)) - + var_get_dict_index (v2) + hash_value_short (val, v2); + + var_get_dict_index (v2) + value_hash (val, var_get_width (v2), 0); } else if (var_is_alpha (v1) && var_is_numeric (v2)) { @@ -843,7 +843,7 @@ get_sum (const struct covariance_matrix *cov, size_t i) static void update_ssize (struct design_matrix *dm, size_t i, size_t j, struct covariance_accumulator *ca) { - struct variable *var; + const struct variable *var; double tmp; var = design_matrix_col_to_var (dm, i); if (var_get_dict_index (ca->v1) == var_get_dict_index (var)) diff --git a/src/math/group.c b/src/math/group.c index b5c9e56c..3a483671 100644 --- a/src/math/group.c +++ b/src/math/group.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,7 +35,7 @@ compare_group (const void *a_, { const struct group_statistics *a = a_; const struct group_statistics *b = b_; - return compare_values_short (&a->id, &b->id, var); + return value_compare_3way (&a->id, &b->id, var_get_width (var)); } @@ -44,9 +44,9 @@ unsigned int hash_group (const void *g_, const void *var) { unsigned id_hash; - const struct group_statistics *g = g_;; + const struct group_statistics *g = g_; - id_hash = hash_value_short (&g->id, var); + id_hash = value_hash (&g->id, var_get_width (var), 0); return id_hash; } diff --git a/src/math/interaction.c b/src/math/interaction.c index 05a8bf46..c8795ed5 100644 --- a/src/math/interaction.c +++ b/src/math/interaction.c @@ -50,10 +50,10 @@ struct interaction_variable struct interaction_value { const struct interaction_variable *intr; - union value *val; /* Concatenation of the string values in this - interaction's value, or the product of a bunch - of numeric values for a purely numeric - interaction. + union value val; /* Concatenation of the string values in this + interaction's value, or the product of a bunch + of numeric values for a purely numeric + interaction. */ double f; /* Product of the numerical values in this interaction's value. */ }; @@ -146,10 +146,16 @@ interaction_value_create (const struct interaction_variable *var, const union va if (var != NULL) { + int val_width; + char *val; + result = xmalloc (sizeof (*result)); result->intr = var; n_vars = interaction_get_n_vars (var); - result->val = value_create (n_vars * MAX_SHORT_STRING + 1); + val_width = n_vars * MAX_SHORT_STRING + 1; + value_init (&result->val, val_width); + val = value_str_rw (&result->val, val_width); + val[0] = '\0'; result->f = 1.0; for (i = 0; i < n_vars; i++) { @@ -157,7 +163,7 @@ interaction_value_create (const struct interaction_variable *var, const union va if (var_is_value_missing (member, vals[i], MV_ANY)) { - value_set_missing (result->val, MAX_SHORT_STRING); + value_set_missing (&result->val, MAX_SHORT_STRING); result->f = SYSMIS; break; } @@ -165,7 +171,8 @@ interaction_value_create (const struct interaction_variable *var, const union va { if (var_is_alpha (var->members[i])) { - strncat (result->val->s, vals[i]->s, MAX_SHORT_STRING); + int w = var_get_width (var->members[i]); + strncat (val, value_str (vals[i], w), MAX_SHORT_STRING); } else if (var_is_numeric (var->members[i])) { @@ -188,17 +195,17 @@ interaction_value_create (const struct interaction_variable *var, const union va avoid the error, we set result->f to 1.0 for numeric interactions. */ - result->val->f = result->f; + result->val.f = result->f; result->f = 1.0; } } return result; } -union value * +const union value * interaction_value_get (const struct interaction_value *val) { - return val->val; + return &val->val; } /* @@ -220,7 +227,10 @@ interaction_value_destroy (struct interaction_value *val) { if (val != NULL) { - free (val->val); + size_t n_vars = interaction_get_n_vars (val->intr); + int val_width = n_vars * MAX_SHORT_STRING + 1; + + value_destroy (&val->val, val_width); free (val); } } @@ -234,7 +244,7 @@ interaction_case_data (const struct ccase *ccase, const struct variable *var, { size_t i; size_t n_vars; - const struct interaction_variable *iv; + const struct interaction_variable *iv = NULL; const struct variable *intr; const struct variable *member; const union value **vals = NULL; diff --git a/src/math/interaction.h b/src/math/interaction.h index 73b440be..66025b66 100644 --- a/src/math/interaction.h +++ b/src/math/interaction.h @@ -27,7 +27,7 @@ struct interaction_value * interaction_value_create (const struct interaction_va void interaction_value_destroy (struct interaction_value *); size_t interaction_variable_get_n_vars (const struct interaction_variable *); double interaction_value_get_nonzero_entry (const struct interaction_value *); -union value * interaction_value_get (const struct interaction_value *); +const union value *interaction_value_get (const struct interaction_value *); const struct variable * interaction_variable_get_var (const struct interaction_variable *); size_t interaction_get_n_numeric (const struct interaction_variable *); size_t interaction_get_n_alpha (const struct interaction_variable *); diff --git a/src/math/merge.c b/src/math/merge.c index 2ff57c6e..af44855c 100644 --- a/src/math/merge.c +++ b/src/math/merge.c @@ -44,18 +44,18 @@ struct merge struct subcase ordering; struct merge_input inputs[MAX_MERGE_ORDER]; size_t input_cnt; - size_t value_cnt; + struct caseproto *proto; }; static void do_merge (struct merge *m); struct merge * -merge_create (const struct subcase *ordering, size_t value_cnt) +merge_create (const struct subcase *ordering, const struct caseproto *proto) { struct merge *m = xmalloc (sizeof *m); subcase_clone (&m->ordering, ordering); m->input_cnt = 0; - m->value_cnt = value_cnt; + m->proto = caseproto_ref (proto); return m; } @@ -69,6 +69,7 @@ merge_destroy (struct merge *m) subcase_destroy (&m->ordering); for (i = 0; i < m->input_cnt; i++) casereader_destroy (m->inputs[i].reader); + caseproto_unref (m->proto); free (m); } } @@ -97,7 +98,7 @@ merge_make_reader (struct merge *m) } else if (m->input_cnt == 0) { - struct casewriter *writer = mem_writer_create (m->value_cnt); + struct casewriter *writer = mem_writer_create (m->proto); r = casewriter_make_reader (writer); } else @@ -131,7 +132,7 @@ do_merge (struct merge *m) assert (m->input_cnt > 1); - w = tmpfile_writer_create (m->value_cnt); + w = tmpfile_writer_create (m->proto); for (i = 0; i < m->input_cnt; i++) taint_propagate (casereader_get_taint (m->inputs[i].reader), casewriter_get_taint (w)); diff --git a/src/math/merge.h b/src/math/merge.h index 8bd3384a..5fdb6fc0 100644 --- a/src/math/merge.h +++ b/src/math/merge.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (C) 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,13 +17,11 @@ #ifndef MATH_MERGE_H #define MATH_MERGE_H 1 -#include -#include - -struct subcase; +struct caseproto; struct casereader; +struct subcase; -struct merge *merge_create (const struct subcase *, size_t); +struct merge *merge_create (const struct subcase *, const struct caseproto *); void merge_destroy (struct merge *); void merge_append (struct merge *, struct casereader *); struct casereader *merge_make_reader (struct merge *); diff --git a/src/math/np.c b/src/math/np.c index 99584a51..e61bf58e 100644 --- a/src/math/np.c +++ b/src/math/np.c @@ -16,14 +16,18 @@ #include #include "np.h" -#include -#include -#include -#include + #include -#include +#include +#include + #include #include +#include +#include +#include + +#include "xalloc.h" static void destroy (struct statistic *stat) @@ -56,7 +60,7 @@ acc (struct statistic *s, const struct ccase *cx UNUSED, maximize (&np->y_max, y); minimize (&np->y_min, y); - cp = case_create (n_NP_IDX); + cp = case_create (casewriter_get_proto (np->writer)); case_data_rw_idx (cp, NP_IDX_Y)->f = y; case_data_rw_idx (cp, NP_IDX_NS)->f = ns; case_data_rw_idx (cp, NP_IDX_DNS)->f = dns; @@ -72,6 +76,8 @@ np_create (const struct moments1 *m) struct np *np = xzalloc (sizeof (*np)); struct statistic *stat = (struct statistic *) np; struct order_stats *os = (struct order_stats *) np; + struct caseproto *proto; + int i; np->prev_cc = 0; @@ -82,7 +88,11 @@ np_create (const struct moments1 *m) np->y_min = np->ns_min = np->dns_min = DBL_MAX; np->y_max = np->ns_max = np->dns_max = -DBL_MAX; - np->writer = autopaging_writer_create (n_NP_IDX); + proto = caseproto_create (); + for (i = 0; i < n_NP_IDX; i++) + proto = caseproto_add_width (proto, 0); + np->writer = autopaging_writer_create (proto); + caseproto_unref (proto); os->k = 0; stat->destroy = destroy; diff --git a/src/math/sort.c b/src/math/sort.c index 67aa32d2..8719d877 100644 --- a/src/math/sort.c +++ b/src/math/sort.c @@ -41,7 +41,7 @@ int max_buffers = INT_MAX; struct sort_writer { - size_t value_cnt; + struct caseproto *proto; struct subcase ordering; struct merge *merge; struct pqueue *pqueue; @@ -53,7 +53,8 @@ struct sort_writer static struct casewriter_class sort_casewriter_class; -static struct pqueue *pqueue_create (const struct subcase *, size_t); +static struct pqueue *pqueue_create (const struct subcase *, + const struct caseproto *); static void pqueue_destroy (struct pqueue *); static bool pqueue_is_full (const struct pqueue *); static bool pqueue_is_empty (const struct pqueue *); @@ -63,20 +64,21 @@ static struct ccase *pqueue_pop (struct pqueue *, casenumber *); static void output_record (struct sort_writer *); struct casewriter * -sort_create_writer (const struct subcase *ordering, size_t value_cnt) +sort_create_writer (const struct subcase *ordering, + const struct caseproto *proto) { struct sort_writer *sort; sort = xmalloc (sizeof *sort); - sort->value_cnt = value_cnt; + sort->proto = caseproto_ref (proto); subcase_clone (&sort->ordering, ordering); - sort->merge = merge_create (ordering, value_cnt); - sort->pqueue = pqueue_create (ordering, value_cnt); + sort->merge = merge_create (ordering, proto); + sort->pqueue = pqueue_create (ordering, proto); sort->run = NULL; sort->run_id = 0; sort->run_end = NULL; - return casewriter_create (value_cnt, &sort_casewriter_class, sort); + return casewriter_create (proto, &sort_casewriter_class, sort); } static void @@ -105,6 +107,7 @@ sort_casewriter_destroy (struct casewriter *writer UNUSED, void *sort_) pqueue_destroy (sort->pqueue); casewriter_destroy (sort->run); case_unref (sort->run_end); + caseproto_unref (sort->proto); free (sort); } @@ -117,7 +120,7 @@ sort_casewriter_convert_to_reader (struct casewriter *writer, void *sort_) if (sort->run == NULL && sort->run_id == 0) { /* In-core sort. */ - sort->run = mem_writer_create (casewriter_get_value_cnt (writer)); + sort->run = mem_writer_create (sort->proto); sort->run_id = 1; } while (!pqueue_is_empty (sort->pqueue)) @@ -149,7 +152,7 @@ output_record (struct sort_writer *sort) } if (sort->run == NULL) { - sort->run = tmpfile_writer_create (sort->value_cnt); + sort->run = tmpfile_writer_create (sort->proto); sort->run_id = min_run_id; } @@ -171,7 +174,7 @@ struct casereader * sort_execute (struct casereader *input, const struct subcase *ordering) { struct casewriter *output = - sort_create_writer (ordering, casereader_get_value_cnt (input)); + sort_create_writer (ordering, casereader_get_proto (input)); casereader_transfer (input, output); return casewriter_make_reader (output); } @@ -211,14 +214,13 @@ static int compare_pqueue_records_minheap (const void *a, const void *b, const void *pq_); static struct pqueue * -pqueue_create (const struct subcase *ordering, size_t value_cnt) +pqueue_create (const struct subcase *ordering, const struct caseproto *proto) { struct pqueue *pq; pq = xmalloc (sizeof *pq); subcase_clone (&pq->ordering, ordering); - pq->record_cap - = settings_get_workspace_cases (value_cnt); + pq->record_cap = settings_get_workspace_cases (proto); if (pq->record_cap > max_buffers) pq->record_cap = max_buffers; else if (pq->record_cap < min_buffers) diff --git a/src/math/sort.h b/src/math/sort.h index 1948d0ab..96ac32cc 100644 --- a/src/math/sort.h +++ b/src/math/sort.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,17 +17,15 @@ #ifndef MATH_SORT_H #define MATH_SORT_H 1 -#include -#include - struct subcase; +struct caseproto; struct variable; extern int min_buffers ; extern int max_buffers ; struct casewriter *sort_create_writer (const struct subcase *, - size_t value_cnt); + const struct caseproto *); struct casereader *sort_execute (struct casereader *, const struct subcase *); struct casereader *sort_execute_1var (struct casereader *, const struct variable *); diff --git a/src/math/ts/innovations.c b/src/math/ts/innovations.c index b9a7cf22..3ab2f3ed 100644 --- a/src/math/ts/innovations.c +++ b/src/math/ts/innovations.c @@ -26,10 +26,14 @@ */ #include + +#include +#include + #include #include -#include #include +#include #include #include diff --git a/src/ui/gui/executor.c b/src/ui/gui/executor.c index 711e9563..9336f7ce 100644 --- a/src/ui/gui/executor.c +++ b/src/ui/gui/executor.c @@ -45,7 +45,7 @@ execute_syntax (struct getl_interface *sss) gboolean retval = TRUE; struct casereader *reader; - size_t value_cnt; + const struct caseproto *proto; casenumber case_cnt; unsigned long int lazy_serial; @@ -62,9 +62,9 @@ execute_syntax (struct getl_interface *sss) needed. If the data store casereader is never needed, then it is reused the next time syntax is run, without wrapping it in another layer. */ - value_cnt = psppire_data_store_get_value_count (the_data_store); + proto = psppire_data_store_get_proto (the_data_store); case_cnt = psppire_data_store_get_case_count (the_data_store); - reader = lazy_casereader_create (value_cnt, case_cnt, + reader = lazy_casereader_create (proto, case_cnt, create_casereader_from_data_store, the_data_store, &lazy_serial); proc_set_active_file_data (the_dataset, reader); diff --git a/src/ui/gui/find-dialog.c b/src/ui/gui/find-dialog.c index 5a82b6c9..86d29583 100644 --- a/src/ui/gui/find-dialog.c +++ b/src/ui/gui/find-dialog.c @@ -1,5 +1,5 @@ /* PSPPIRE - a graphical user interface for PSPP. - Copyright (C) 2007 Free Software Foundation + Copyright (C) 2007, 2009 Free Software Foundation This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -285,7 +285,7 @@ forward (casenumber *i, struct datasheet *data UNUSED) static void forward_wrap (casenumber *i, struct datasheet *data) { - if ( ++*i >= datasheet_get_row_cnt (data) ) *i = 0; + if ( ++*i >= datasheet_get_n_rows (data) ) *i = 0; } static void @@ -299,7 +299,7 @@ static void backward_wrap (casenumber *i, struct datasheet *data) { if ( --*i < 0 ) - *i = datasheet_get_row_cnt (data) - 1; + *i = datasheet_get_n_rows (data) - 1; } @@ -344,7 +344,7 @@ cm1c (casenumber current, struct datasheet *data) static casenumber last (casenumber current, struct datasheet *data) { - return datasheet_get_row_cnt (data) ; + return datasheet_get_n_rows (data) ; } static casenumber @@ -439,7 +439,7 @@ struct comparator struct value_comparator { struct comparator parent; - union value *pattern; + union value pattern; }; /* A comparator which matches string values or parts thereof */ @@ -462,7 +462,7 @@ value_compare (const struct comparator *cmptr, const union value *v) { const struct value_comparator *vc = (const struct value_comparator *) cmptr; - return 0 == value_compare_3way (v, vc->pattern, var_get_width (cmptr->var)); + return 0 == value_compare_3way (v, &vc->pattern, var_get_width (cmptr->var)); } @@ -495,8 +495,8 @@ string_value_compare (const struct comparator *cmptr, const struct string_comparator *ssc = (const struct string_comparator *) cmptr; - const char *text = val->s; int width = var_get_width (cmptr->var); + const char *text = value_str (val, width); assert ( ! (cmptr->flags & STR_CMP_LABELS)); @@ -528,7 +528,7 @@ regexp_value_compare (const struct comparator *cmptr, /* We must remove trailing whitespace, otherwise $ will not match where one would expect */ - text = g_strndup (val->s, width); + text = g_strndup (value_str (val, width), width); g_strchomp (text); retval = (0 == regexec (&rec->re, text, 0, 0, 0)); @@ -570,10 +570,10 @@ regexp_destroy (struct comparator *cmptr) } static void -value_destroy (struct comparator *cmptr) +cmptr_value_destroy (struct comparator *cmptr) { struct value_comparator *vc = (struct value_comparator *) cmptr; - free (vc->pattern); + value_destroy (&vc->pattern, var_get_width (cmptr->var)); } @@ -588,19 +588,20 @@ value_comparator_create (const struct variable *var, const char *target) cmptr->flags = 0; cmptr->var = var; cmptr->compare = value_compare ; - cmptr->destroy = value_destroy; + cmptr->destroy = cmptr_value_destroy; width = var_get_width (var); fmt = var_get_write_format (var); - vc->pattern = value_create (width); + value_init (&vc->pattern, width); if ( ! data_in (ss_cstr (target), LEGACY_NATIVE, fmt->type, 0, 0, 0, - vc->pattern, width) ) + &vc->pattern, width) ) { + value_destroy (&vc->pattern, width); free (vc); return NULL; } @@ -739,12 +740,13 @@ find_value (const struct find_dialog *fd, casenumber current_row, flags |= STR_CMP_LABELS; { - union value *val = value_create (width); + union value val; casenumber i; const struct casenum_iterator *ip = get_iteration_params (fd); struct comparator *cmptr = comparator_factory (var, target_string, flags); + value_init (&val, width); if ( ! cmptr) goto finish; @@ -752,10 +754,9 @@ find_value (const struct find_dialog *fd, casenumber current_row, i != ip->end (current_row, fd->data); ip->next (&i, fd->data)) { - datasheet_get_value (fd->data, i, var_get_case_index (var), - val, width); + datasheet_get_value (fd->data, i, var_get_case_index (var), &val); - if ( comparator_compare (cmptr, val)) + if ( comparator_compare (cmptr, &val)) { *row = i; break; @@ -764,6 +765,6 @@ find_value (const struct find_dialog *fd, casenumber current_row, finish: comparator_destroy (cmptr); - free (val); + value_destroy (&val, width); } } diff --git a/src/ui/gui/psppire-data-editor.c b/src/ui/gui/psppire-data-editor.c index 990a134c..7c3fca68 100644 --- a/src/ui/gui/psppire-data-editor.c +++ b/src/ui/gui/psppire-data-editor.c @@ -1622,7 +1622,7 @@ data_sheet_set_clip (PsppireSheet *sheet) /* Construct clip data. */ map = case_map_by_name (ds->dict->dict, clip_dict); - writer = autopaging_writer_create (dict_get_next_value_idx (clip_dict)); + writer = autopaging_writer_create (dict_get_proto (clip_dict)); for (i = range.row0; i <= range.rowi ; ++i ) { struct ccase *old = psppire_data_store_get_case (ds, i); @@ -1669,7 +1669,7 @@ clip_to_text (void) casenumber r; GString *string; - const size_t val_cnt = casereader_get_value_cnt (clip_datasheet); + const size_t val_cnt = caseproto_get_n_widths (casereader_get_proto (clip_datasheet)); const casenumber case_cnt = casereader_get_case_cnt (clip_datasheet); const size_t var_cnt = dict_get_var_cnt (clip_dict); @@ -1711,7 +1711,7 @@ clip_to_html (void) casenumber r; GString *string; - const size_t val_cnt = casereader_get_value_cnt (clip_datasheet); + const size_t val_cnt = caseproto_get_n_widths (casereader_get_proto (clip_datasheet)); const casenumber case_cnt = casereader_get_case_cnt (clip_datasheet); const size_t var_cnt = dict_get_var_cnt (clip_dict); diff --git a/src/ui/gui/psppire-data-store.c b/src/ui/gui/psppire-data-store.c index 45fa8248..8808fd32 100644 --- a/src/ui/gui/psppire-data-store.c +++ b/src/ui/gui/psppire-data-store.c @@ -187,18 +187,18 @@ psppire_data_store_class_init (PsppireDataStoreClass *class) static gboolean -psppire_data_store_insert_values (PsppireDataStore *ds, - gint n_values, gint where); +psppire_data_store_insert_value (PsppireDataStore *ds, + gint width, gint where); -static union value * +static bool psppire_data_store_get_value (const PsppireDataStore *ds, casenumber casenum, size_t idx, - union value *value, int width); + union value *value); static gboolean psppire_data_store_set_value (PsppireDataStore *ds, casenumber casenum, - gint idx, union value *v, gint width); + gint idx, union value *v); @@ -214,7 +214,7 @@ psppire_data_store_get_var_count (const PsppireSheetModel *model) casenumber psppire_data_store_get_case_count (const PsppireDataStore *store) { - return datasheet_get_row_cnt (store->datasheet); + return datasheet_get_n_rows (store->datasheet); } size_t @@ -223,6 +223,12 @@ psppire_data_store_get_value_count (const PsppireDataStore *store) return psppire_dict_get_value_cnt (store->dict); } +const struct caseproto * +psppire_data_store_get_proto (const PsppireDataStore *store) +{ + return psppire_dict_get_proto (store->dict); +} + static casenumber psppire_data_store_get_case_count_wrapper (const PsppireSheetModel *model) { @@ -296,13 +302,15 @@ psppire_data_store_sheet_model_init (PsppireSheetModelIface *iface) */ static void delete_variable_callback (GObject *obj, gint dict_index, - gint case_index, gint val_cnt, + gint case_index, gint width, gpointer data) { PsppireDataStore *store = PSPPIRE_DATA_STORE (data); psppire_sheet_model_columns_deleted (PSPPIRE_SHEET_MODEL (store), dict_index, 1); + datasheet_delete_columns (store->datasheet, case_index, 1); + datasheet_insert_column (store->datasheet, NULL, -1, case_index); #if AXIS_TRANSITION @@ -329,6 +337,7 @@ variable_changed_callback (GObject *obj, gint var_num, gpointer data) static void insert_variable_callback (GObject *obj, gint var_num, gpointer data) { + struct variable *variable; PsppireDataStore *store; gint posn; @@ -336,21 +345,10 @@ insert_variable_callback (GObject *obj, gint var_num, gpointer data) store = PSPPIRE_DATA_STORE (data); - if ( var_num > 0 ) - { - struct variable *variable = - psppire_dict_get_variable (store->dict, var_num); - - g_assert (variable != NULL); - - posn = var_get_case_index (variable); - } - else - { - posn = 0; - } - - psppire_data_store_insert_values (store, 1, posn); + variable = psppire_dict_get_variable (store->dict, var_num); + posn = var_get_case_index (variable); + printf ("insert var_num=%d, posn=%d\n", var_num, posn); + psppire_data_store_insert_value (store, var_get_width (variable), posn); #if AXIS_TRANSITION @@ -361,21 +359,51 @@ insert_variable_callback (GObject *obj, gint var_num, gpointer data) psppire_sheet_model_columns_inserted (PSPPIRE_SHEET_MODEL (store), var_num, 1); } +struct resize_datum_aux + { + int old_width; + int new_width; + }; + + +void +resize_datum (const union value *old, union value *new, void *aux_) +{ + struct resize_datum_aux *aux = aux_; + + if (aux->new_width == 0) + { + /* FIXME: try to parse string as number. */ + new->f = SYSMIS; + } + else if (aux->old_width == 0) + { + /* FIXME: format number as string. */ + value_set_missing (new, aux->new_width); + } + else + value_copy_rpad (new, aux->new_width, old, aux->old_width, ' '); +} static void dict_size_change_callback (GObject *obj, - gint posn, gint adjustment, gpointer data) + gint var_num, gint old_width, gpointer data) { PsppireDataStore *store = PSPPIRE_DATA_STORE (data); + struct variable *variable; + int posn; - const struct variable *v = psppire_dict_get_variable (store->dict, posn); - - const gint new_val_width = value_cnt_from_width (var_get_width (v)); + variable = psppire_dict_get_variable (store->dict, var_num); + posn = var_get_case_index (variable); - if ( adjustment > 0 ) - psppire_data_store_insert_values (store, adjustment, - new_val_width - adjustment + - var_get_case_index(v)); + if (old_width != var_get_width (variable)) + { + struct resize_datum_aux aux; + aux.old_width = old_width; + aux.new_width = var_get_width (variable); + datasheet_resize_column (store->datasheet, posn, aux.new_width, + resize_datum, &aux); + } } @@ -530,28 +558,16 @@ gboolean psppire_data_store_insert_new_case (PsppireDataStore *ds, casenumber posn) { gboolean result; - gint val_cnt, v; + const struct caseproto *proto; struct ccase *cc; g_return_val_if_fail (ds, FALSE); - val_cnt = datasheet_get_column_cnt (ds->datasheet) ; - - g_return_val_if_fail (val_cnt > 0, FALSE); - + proto = datasheet_get_proto (ds->datasheet); + g_return_val_if_fail (caseproto_get_n_widths (proto) > 0, FALSE); g_return_val_if_fail (posn <= psppire_data_store_get_case_count (ds), FALSE); - cc = case_create (val_cnt); - - memset ( case_data_rw_idx (cc, 0), 0, val_cnt * MAX_SHORT_STRING); - - for (v = 0 ; v < psppire_dict_get_var_cnt (ds->dict) ; ++v) - { - const struct variable *pv = psppire_dict_get_variable (ds->dict, v); - if ( var_is_alpha (pv)) - continue; - - case_data_rw (cc, pv)->f = SYSMIS; - } + cc = case_create (proto); + case_set_missing (cc); result = psppire_data_store_insert_case (ds, cc, posn); @@ -568,7 +584,8 @@ psppire_data_store_get_string (PsppireDataStore *store, glong row, glong column) char *text; const struct fmt_spec *fp ; const struct variable *pv ; - union value *v ; + union value v; + int width; GString *s; g_return_val_if_fail (store->dict, NULL); @@ -585,20 +602,20 @@ psppire_data_store_get_string (PsppireDataStore *store, glong row, glong column) g_assert (pv); idx = var_get_case_index (pv); + width = var_get_width (pv); g_assert (idx >= 0); - v = psppire_data_store_get_value (store, row, idx, NULL, - var_get_width (pv)); - - g_return_val_if_fail (v, NULL); + value_init (&v, width); + if (!psppire_data_store_get_value (store, row, idx, &v)) + return NULL; if ( store->show_labels) { - const gchar *label = var_lookup_value_label (pv, v); + const gchar *label = var_lookup_value_label (pv, &v); if (label) { - free (v); + value_destroy (&v, width); return recode_string (UTF8, psppire_dict_encoding (store->dict), label, -1); } @@ -616,7 +633,7 @@ psppire_data_store_get_string (PsppireDataStore *store, glong row, glong column) /* Converts binary value V into printable form in the exactly FP->W character in buffer S according to format specification FP. No null terminator is appended to the buffer. */ - data_out (v, fp, s->str); + data_out (&v, fp, s->str); text = recode_string (UTF8, psppire_dict_encoding (store->dict), s->str, fp->w); @@ -624,7 +641,7 @@ psppire_data_store_get_string (PsppireDataStore *store, glong row, glong column) g_strchomp (text); - free (v); + value_destroy (&v, width); return text; } @@ -636,20 +653,20 @@ psppire_data_store_clear_datum (PsppireSheetModel *model, PsppireDataStore *store = PSPPIRE_DATA_STORE (model); union value v; + int width; const struct variable *pv = psppire_dict_get_variable (store->dict, col); const gint index = var_get_case_index (pv) ; - if ( var_is_numeric (pv)) - v.f = SYSMIS; - else - memcpy (v.s, "", MAX_SHORT_STRING); - - psppire_data_store_set_value (store, row, index, &v, - var_get_width (pv)); + width = var_is_numeric (pv) ? 0 : MAX_SHORT_STRING; + value_init (&v, width); + value_set_missing (&v, width); + psppire_data_store_set_value (store, row, index, &v); + value_destroy (&v, width); psppire_sheet_model_range_changed (model, row, col, row, col); + return TRUE; } @@ -907,53 +924,38 @@ psppire_data_store_insert_case (PsppireDataStore *ds, } -/* Copies the IDXth value from case CASENUM into VALUE. - If VALUE is null, then memory is allocated is allocated with - malloc. Returns the value if successful, NULL on failure. */ -static union value * +/* Copies the IDXth value from case CASENUM into VALUE, which + must be of the correct width for IDX. + Returns true if successful, false on failure. */ +static bool psppire_data_store_get_value (const PsppireDataStore *ds, casenumber casenum, size_t idx, - union value *value, int width) + union value *value) { - bool allocated; - g_return_val_if_fail (ds, false); g_return_val_if_fail (ds->datasheet, false); + g_return_val_if_fail (idx < datasheet_get_n_columns (ds->datasheet), false); - g_return_val_if_fail (idx < datasheet_get_column_cnt (ds->datasheet), false); - - if (value == NULL) - { - value = xnmalloc (value_cnt_from_width (width), sizeof *value); - allocated = true; - } - else - allocated = false; - if (!datasheet_get_value (ds->datasheet, casenum, idx, value, width)) - { - if (allocated) - free (value); - value = NULL; - } - return value; + return datasheet_get_value (ds->datasheet, casenum, idx, value); } /* Set the IDXth value of case C to V. + V must be the correct width for IDX. Returns true if successful, false on I/O error. */ static gboolean psppire_data_store_set_value (PsppireDataStore *ds, casenumber casenum, - gint idx, union value *v, gint width) + gint idx, union value *v) { bool ok; g_return_val_if_fail (ds, FALSE); g_return_val_if_fail (ds->datasheet, FALSE); - g_return_val_if_fail (idx < datasheet_get_column_cnt (ds->datasheet), FALSE); + g_return_val_if_fail (idx < datasheet_get_n_columns (ds->datasheet), FALSE); - ok = datasheet_put_value (ds->datasheet, casenum, idx, v, width); + ok = datasheet_put_value (ds->datasheet, casenum, idx, v); if (ok) g_signal_emit (ds, signals [CASE_CHANGED], 0, casenum); @@ -968,22 +970,24 @@ static gboolean psppire_data_store_data_in (PsppireDataStore *ds, casenumber casenum, gint idx, struct substring input, const struct fmt_spec *fmt) { - union value *value = NULL; + union value value; int width; bool ok; g_return_val_if_fail (ds, FALSE); g_return_val_if_fail (ds->datasheet, FALSE); - g_return_val_if_fail (idx < datasheet_get_column_cnt (ds->datasheet), FALSE); + g_return_val_if_fail (idx < datasheet_get_n_columns (ds->datasheet), FALSE); width = fmt_var_width (fmt); - value = xmalloca (value_cnt_from_width (width) * sizeof *value); - ok = (datasheet_get_value (ds->datasheet, casenum, idx, value, width) - && data_in (input, LEGACY_NATIVE, fmt->type, 0, 0, 0, value, width) - && datasheet_put_value (ds->datasheet, casenum, idx, value, width)); - - freea (value); + g_return_val_if_fail (caseproto_get_width ( + datasheet_get_proto (ds->datasheet), idx) == width, + FALSE); + value_init (&value, width); + ok = (datasheet_get_value (ds->datasheet, casenum, idx, &value) + && data_in (input, LEGACY_NATIVE, fmt->type, 0, 0, 0, &value, width) + && datasheet_put_value (ds->datasheet, casenum, idx, &value)); + value_destroy (&value, width); if (ok) g_signal_emit (ds, signals [CASE_CHANGED], 0, casenum); @@ -991,28 +995,31 @@ psppire_data_store_data_in (PsppireDataStore *ds, casenumber casenum, gint idx, return ok; } -/* Resize the cases in the casefile, by inserting N_VALUES into every - one of them at the position immediately preceeding WHERE. +/* Resize the cases in the casefile, by inserting a value of the + given WIDTH into every one of them at the position immediately + preceding WHERE. */ static gboolean -psppire_data_store_insert_values (PsppireDataStore *ds, - gint n_values, gint where) +psppire_data_store_insert_value (PsppireDataStore *ds, + gint width, gint where) { - g_return_val_if_fail (ds, FALSE); + union value value; - if ( n_values == 0 ) - return FALSE; + g_return_val_if_fail (ds, FALSE); - g_assert (n_values > 0); + g_assert (width >= 0); if ( ! ds->datasheet ) ds->datasheet = datasheet_create (NULL); - { - union value *values = xcalloc (n_values, sizeof *values); - datasheet_insert_columns (ds->datasheet, values, n_values, where); - free (values); - } + value_init (&value, width); + if (width == 0) + value.f = 0; + else + value_set_missing (&value, width); + + printf("insert column width=%d\n", width); + datasheet_insert_column (ds->datasheet, &value, width, where); return TRUE; } @@ -1027,7 +1034,7 @@ get_row_overstrike (const PsppireSheetModel *model, gint row) const struct variable *filter = dict_get_filter (dict); - if ( row < 0 || row >= datasheet_get_row_cnt (ds->datasheet)) + if ( row < 0 || row >= datasheet_get_n_rows (ds->datasheet)) return FALSE; if ( ! filter) @@ -1035,9 +1042,10 @@ get_row_overstrike (const PsppireSheetModel *model, gint row) g_assert (var_is_numeric (filter)); + value_init (&val, 0); if ( ! datasheet_get_value (ds->datasheet, row, var_get_case_index (filter), - &val, 0) ) + &val) ) return FALSE; diff --git a/src/ui/gui/psppire-data-store.h b/src/ui/gui/psppire-data-store.h index bbb44ee2..610b6b45 100644 --- a/src/ui/gui/psppire-data-store.h +++ b/src/ui/gui/psppire-data-store.h @@ -123,7 +123,7 @@ gboolean psppire_data_store_filtered (PsppireDataStore *ds, casenumber psppire_data_store_get_case_count (const PsppireDataStore *ds); size_t psppire_data_store_get_value_count (const PsppireDataStore *ds); - +const struct caseproto *psppire_data_store_get_proto (const PsppireDataStore *); diff --git a/src/ui/gui/psppire-dict.c b/src/ui/gui/psppire-dict.c index afc7d570..8ff92a48 100644 --- a/src/ui/gui/psppire-dict.c +++ b/src/ui/gui/psppire-dict.c @@ -1,5 +1,5 @@ /* PSPPIRE - a graphical user interface for PSPP. - Copyright (C) 2004, 2006, 2007 Free Software Foundation + Copyright (C) 2004, 2006, 2007, 2009 Free Software Foundation This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -243,11 +243,10 @@ addcb (struct dictionary *d, int idx, void *pd) } static void -delcb (struct dictionary *d, int dict_idx, int case_idx, int value_cnt, - void *pd) +delcb (struct dictionary *d, int dict_idx, int case_idx, int width, void *pd) { g_signal_emit (pd, signals [VARIABLE_DELETED], 0, - dict_idx, case_idx, value_cnt ); + dict_idx, case_idx, width ); } static void @@ -257,9 +256,9 @@ mutcb (struct dictionary *d, int idx, void *pd) } static void -resize_cb (struct dictionary *d, int idx, int delta, void *pd) +resize_cb (struct dictionary *d, int idx, int old_width, void *pd) { - g_signal_emit (pd, signals [VARIABLE_RESIZED], 0, idx, delta); + g_signal_emit (pd, signals [VARIABLE_RESIZED], 0, idx, old_width); } static void @@ -473,6 +472,17 @@ psppire_dict_get_value_cnt (const PsppireDict *d) } +/* Returns the prototype for the cases that match the dictionary */ +const struct caseproto * +psppire_dict_get_proto (const PsppireDict *d) +{ + g_return_val_if_fail (d, NULL); + g_return_val_if_fail (d->dict, NULL); + + return dict_get_proto (d->dict); +} + + /* Return a variable by name. Return NULL if it doesn't exist */ @@ -531,17 +541,14 @@ void psppire_dict_resize_variable (PsppireDict *d, const struct variable *pv, gint old_size, gint new_size) { - gint fv; g_return_if_fail (d); g_return_if_fail (d->dict); if ( old_size == new_size ) return ; - fv = var_get_case_index (pv); - g_signal_emit (d, signals [VARIABLE_RESIZED], 0, - fv + old_size, + var_get_dict_index (pv), new_size - old_size ); } diff --git a/src/ui/gui/psppire-dict.h b/src/ui/gui/psppire-dict.h index 54f3e39a..79874932 100644 --- a/src/ui/gui/psppire-dict.h +++ b/src/ui/gui/psppire-dict.h @@ -1,5 +1,5 @@ /* PSPPIRE - a graphical user interface for PSPP. - Copyright (C) 2004 Free Software Foundation + Copyright (C) 2004, 2009 Free Software Foundation This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -72,6 +72,9 @@ gint psppire_dict_get_var_cnt (const PsppireDict *d); /* Return the number of `union value's in the dictionary */ size_t psppire_dict_get_value_cnt (const PsppireDict *d); +/* Returns the prototype for the cases that match the dictionary */ +const struct caseproto *psppire_dict_get_proto (const PsppireDict *d); + /* Return a variable by name. Return NULL if it doesn't exist */ diff --git a/src/ui/gui/psppire-var-store.c b/src/ui/gui/psppire-var-store.c index 092de58c..8c400890 100644 --- a/src/ui/gui/psppire-var-store.c +++ b/src/ui/gui/psppire-var-store.c @@ -1,5 +1,5 @@ /* PSPPIRE - a graphical user interface for PSPP. - Copyright (C) 2006 Free Software Foundation + Copyright (C) 2006, 2009 Free Software Foundation This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -688,20 +688,20 @@ text_for_column (PsppireVarStore *vs, gchar *ss; GString *gstr = g_string_sized_new (10); const struct val_labs *vls = var_get_value_labels (pv); - struct val_labs_iterator *ip = 0; - struct val_lab *vl = val_labs_first_sorted (vls, &ip); + const struct val_lab **labels = val_labs_sorted (vls); + const struct val_lab *vl = labels[0]; + free (labels); g_assert (vl); { gchar *const vstr = value_to_text (vl->value, *write_spec); - g_string_printf (gstr, "{%s,\"%s\"}_", vstr, vl->label); + g_string_printf (gstr, "{%s,\"%s\"}_", + vstr, val_lab_get_label (vl)); g_free (vstr); } - val_labs_done (&ip); - ss = recode_string (UTF8, psppire_dict_encoding (dict), gstr->str, gstr->len); g_string_free (gstr, TRUE); diff --git a/src/ui/gui/text-data-import-dialog.c b/src/ui/gui/text-data-import-dialog.c index c674f17d..70e69d13 100644 --- a/src/ui/gui/text-data-import-dialog.c +++ b/src/ui/gui/text-data-import-dialog.c @@ -330,18 +330,20 @@ apply_dict (const struct dictionary *dict, struct string *s) if (var_has_value_labels (var)) { const struct val_labs *vls = var_get_value_labels (var); - struct val_labs_iterator *iter; - struct val_lab *vl; + const struct val_lab **labels = val_labs_sorted (vls); + size_t n_labels = val_labs_count (vls); + size_t i; syntax_gen_pspp (s, "VALUE LABELS %ss", name); - for (vl = val_labs_first_sorted (vls, &iter); vl != NULL; - vl = val_labs_next (vls, &iter)) + for (i = 0; i < n_labels; i++) { + const struct val_lab *vl = labels[i]; ds_put_cstr (s, "\n "); syntax_gen_value (s, &vl->value, width, format); ds_put_char (s, ' '); - syntax_gen_string (s, ss_cstr (vl->label)); + syntax_gen_string (s, ss_cstr (val_lab_get_label (vl))); } + free (labels); ds_put_cstr (s, ".\n"); } if (var_has_label (var)) @@ -1732,7 +1734,7 @@ parse_field (struct import_assistant *ia, char **outputp, char **tooltipp) { struct substring field; - union value *val; + union value val; struct variable *var; const struct fmt_spec *in; struct fmt_spec out; @@ -1741,7 +1743,7 @@ parse_field (struct import_assistant *ia, field = ia->separators.columns[column].contents[row]; var = dict_get_var (ia->formats.dict, column); - val = value_create (var_get_width (var)); + value_init (&val, var_get_width (var)); in = var_get_print_format (var); out = fmt_for_output_from_input (in); tooltip = NULL; @@ -1749,7 +1751,7 @@ parse_field (struct import_assistant *ia, { msg_disable (); if (!data_in (field, LEGACY_NATIVE, in->type, 0, 0, 0, - val, var_get_width (var))) + &val, var_get_width (var))) { char fmt_string[FMT_STRING_LEN_MAX + 1]; fmt_to_string (in, fmt_string); @@ -1764,16 +1766,16 @@ parse_field (struct import_assistant *ia, { tooltip = xstrdup (_("This input line has too few separators " "to fill in this field.")); - value_set_missing (val, var_get_width (var)); + value_set_missing (&val, var_get_width (var)); } if (outputp != NULL) { char *output = xmalloc (out.w + 1); - data_out (val, &out, output); + data_out (&val, &out, output); output[out.w] = '\0'; *outputp = output; } - free (val); + value_destroy (&val, var_get_width (var)); ok = tooltip == NULL; if (tooltipp != NULL) diff --git a/src/ui/gui/val-labs-dialog.c b/src/ui/gui/val-labs-dialog.c index 42dd0b34..0af80591 100644 --- a/src/ui/gui/val-labs-dialog.c +++ b/src/ui/gui/val-labs-dialog.c @@ -75,7 +75,7 @@ on_label_entry_change (GtkEntry *entry, gpointer data) *var_get_write_format (dialog->pv)); - if ( val_labs_find (dialog->labs, v) ) + if (val_labs_find (dialog->labs, &v)) { gtk_widget_set_sensitive (dialog->change_button, TRUE); gtk_widget_set_sensitive (dialog->add_button, FALSE); @@ -134,7 +134,7 @@ select_treeview_from_value (GtkTreeView *treeview, union value *val) static void on_value_entry_change (GtkEntry *entry, gpointer data) { - char *s; + const char *s; struct val_labs_dialog *dialog = data; @@ -151,7 +151,7 @@ on_value_entry_change (GtkEntry *entry, gpointer data) gtk_entry_set_text (GTK_ENTRY (dialog->label_entry),""); - if ( (s = val_labs_find (dialog->labs, v)) ) + if ( (s = val_labs_find (dialog->labs, &v)) ) { gtk_entry_set_text (GTK_ENTRY (dialog->label_entry), s); gtk_widget_set_sensitive (dialog->add_button, FALSE); @@ -227,14 +227,15 @@ on_delete (GtkWidget *w, GdkEvent *e, gpointer data) /* Return the value-label pair currently selected in the dialog box */ -static struct val_lab * -get_selected_tuple (struct val_labs_dialog *dialog) +static void +get_selected_tuple (struct val_labs_dialog *dialog, + union value *valuep, const char **label) { GtkTreeView *treeview = GTK_TREE_VIEW (dialog->treeview); - static struct val_lab vl; GtkTreeIter iter ; GValue the_value = {0}; + union value value; GtkTreeSelection* sel = gtk_tree_view_get_selection (treeview); @@ -244,12 +245,13 @@ get_selected_tuple (struct val_labs_dialog *dialog) gtk_tree_model_get_value (model, &iter, 1, &the_value); - vl.value.f = g_value_get_double (&the_value); + value.f = g_value_get_double (&the_value); g_value_unset (&the_value); - vl.label = val_labs_find (dialog->labs, vl.value); - - return &vl; + if (valuep != NULL) + *valuep = value; + if (label != NULL) + *label = val_labs_find (dialog->labs, &value); } @@ -268,7 +270,7 @@ on_change (GtkWidget *w, gpointer data) text_to_value (val_text, &v, *var_get_write_format (dialog->pv)); - val_labs_replace (dialog->labs, v, + val_labs_replace (dialog->labs, &v, gtk_entry_get_text (GTK_ENTRY (dialog->label_entry))); gtk_widget_set_sensitive (dialog->change_button, FALSE); @@ -293,7 +295,7 @@ on_add (GtkWidget *w, gpointer data) *var_get_write_format (dialog->pv)); - if ( ! val_labs_add (dialog->labs, v, + if ( ! val_labs_add (dialog->labs, &v, gtk_entry_get_text ( GTK_ENTRY (dialog->label_entry)) ) ) return FALSE; @@ -312,9 +314,13 @@ on_remove (GtkWidget *w, gpointer data) { struct val_labs_dialog *dialog = data; - struct val_lab *vl = get_selected_tuple (dialog); + union value value; + const struct val_lab *vl; - val_labs_remove (dialog->labs, vl->value); + get_selected_tuple (dialog, &value, NULL); + vl = val_labs_lookup (dialog->labs, &value); + if (vl != NULL) + val_labs_remove (dialog->labs, vl); repopulate_dialog (dialog); gtk_widget_grab_focus (dialog->value_entry); @@ -334,14 +340,17 @@ on_select_row (GtkTreeView *treeview, gpointer data) gchar *labeltext; struct val_labs_dialog *dialog = data; - struct val_lab * vl = get_selected_tuple (dialog); + union value value; + const char *label; - gchar *const text = value_to_text (vl->value, - *var_get_write_format (dialog->pv)); + gchar *text; PsppireVarStore *var_store = PSPPIRE_VAR_STORE (psppire_sheet_get_model (dialog->vs)); + get_selected_tuple (dialog, &value, &label); + text = value_to_text (value, *var_get_write_format (dialog->pv)); + g_signal_handler_block (GTK_ENTRY (dialog->value_entry), dialog->value_handler_id); @@ -356,7 +365,7 @@ on_select_row (GtkTreeView *treeview, gpointer data) labeltext = recode_string (UTF8, psppire_dict_encoding (var_store->dict), - vl->label, -1); + label, -1); gtk_entry_set_text (GTK_ENTRY (dialog->label_entry), labeltext); @@ -466,8 +475,9 @@ val_labs_dialog_set_target_variable (struct val_labs_dialog *dialog, static void repopulate_dialog (struct val_labs_dialog *dialog) { - struct val_labs_iterator *vli = 0; - struct val_lab *vl; + const struct val_lab **labels; + size_t n_labels; + size_t i; GtkTreeIter iter; @@ -491,11 +501,11 @@ repopulate_dialog (struct val_labs_dialog *dialog) g_signal_handler_unblock (GTK_ENTRY (dialog->label_entry), dialog->change_handler_id); - - for (vl = val_labs_first_sorted (dialog->labs, &vli); - vl; - vl = val_labs_next (dialog->labs, &vli)) + labels = val_labs_sorted (dialog->labs); + n_labels = val_labs_count (dialog->labs); + for (i = 0; i < n_labels; i++) { + const struct val_lab *vl = labels[i]; gchar *const vstr = value_to_text (vl->value, @@ -504,7 +514,7 @@ repopulate_dialog (struct val_labs_dialog *dialog) gchar *labeltext = recode_string (UTF8, psppire_dict_encoding (var_store->dict), - vl->label, -1); + val_lab_get_label (vl), -1); gchar *const text = g_strdup_printf ("%s = \"%s\"", vstr, labeltext); @@ -519,6 +529,7 @@ repopulate_dialog (struct val_labs_dialog *dialog) g_free (text); g_free (vstr); } + free (labels); gtk_tree_view_set_model (GTK_TREE_VIEW (dialog->treeview), GTK_TREE_MODEL (list_store)); diff --git a/src/ui/gui/variable-info-dialog.c b/src/ui/gui/variable-info-dialog.c index 3b3367f7..fa9d5101 100644 --- a/src/ui/gui/variable-info-dialog.c +++ b/src/ui/gui/variable-info-dialog.c @@ -1,5 +1,5 @@ /* PSPPIRE - a graphical user interface for PSPP. - Copyright (C) 2007 Free Software Foundation + Copyright (C) 2007, 2009 Free Software Foundation This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -116,29 +116,31 @@ populate_text (PsppireDictView *treeview, gpointer data) /* Value Labels */ if ( var_has_value_labels (var)) { - struct val_labs_iterator *vli = 0; - struct val_lab *vl; - const struct val_labs *labs = var_get_value_labels (var); + const struct val_labs *vls = var_get_value_labels (var); + const struct val_lab **labels; + size_t n_labels; + size_t i; g_string_append (gstring, "\n"); g_string_append (gstring, _("Value Labels:\n")); - for (vl = val_labs_first_sorted (labs, &vli); - vl; - vl = val_labs_next (labs, &vli)) - { + labels = val_labs_sorted (vls); + n_labels = val_labs_count (vls); + for (i = 0; i < n_labels; i++) + { + const struct val_lab *vl = labels[i]; gchar *const vstr = value_to_text (vl->value, *var_get_print_format (var)); - text = recode_string (UTF8, psppire_dict_encoding (dict), - vl->label, -1); + val_lab_get_label (vl), -1); g_string_append_printf (gstring, _("%s %s\n"), vstr, text); g_free (text); g_free (vstr); } + free (labels); } gtk_text_buffer_set_text (textbuffer, gstring->str, gstring->len); diff --git a/src/ui/syntax-gen.c b/src/ui/syntax-gen.c index 84415c3e..bf1ee12f 100644 --- a/src/ui/syntax-gen.c +++ b/src/ui/syntax-gen.c @@ -194,7 +194,7 @@ syntax_gen_value (struct string *output, const union value *value, int width, if (width == 0) syntax_gen_number (output, value->f, format); else - syntax_gen_string (output, ss_buffer (value->s, width)); + syntax_gen_string (output, ss_buffer (value_str (value, width), width)); } /* Appends THRU to OUTPUT. If LOW is LOWEST, then diff --git a/tests/automake.mk b/tests/automake.mk index 533744e7..0dbfcc60 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -32,7 +32,6 @@ dist_TESTS = \ tests/command/get-data-txt-examples.sh \ tests/command/get-data-txt-importcases.sh \ tests/command/import-export.sh \ - tests/command/input-program.sh \ tests/command/insert.sh \ tests/command/lag.sh \ tests/command/line-ends.sh \ diff --git a/tests/bugs/crosstabs-crash.sh b/tests/bugs/crosstabs-crash.sh index eea89304..4454319c 100755 --- a/tests/bugs/crosstabs-crash.sh +++ b/tests/bugs/crosstabs-crash.sh @@ -95,7 +95,7 @@ diff -b -w $TEMPDIR/pspp.list - << EOF #---------------#--------+--------+--------+--------+--------+--------# #X * Y # 1| 100.0%| 0| 0.0%| 1| 100.0%# #===============#========#========#========#========#========#========# -2.2 CROSSTABS. X by Y [count]. +2.2 CROSSTABS. X * Y [count]. #===============#==============================================================#========# # # Y | # # #--------+--------+--------+--------+--------+--------+--------+ # diff --git a/tests/bugs/crosstabs-crash2.sh b/tests/bugs/crosstabs-crash2.sh index 060fa234..b0db128a 100755 --- a/tests/bugs/crosstabs-crash2.sh +++ b/tests/bugs/crosstabs-crash2.sh @@ -99,11 +99,13 @@ $TEMPDIR/crosstabs-crash2.sh.sps:6: warning: BEGIN DATA: Missing value(s) for al #---------------#--------+--------+--------+--------+--------+--------# #x * y # 4| 66.7%| 2| 33.3%| 6| 100.0%# #===============#========#========#========#========#========#========# -2.2 CROSSTABS. x by y [count]. +2.2 CROSSTABS. x * y [count]. #===============#===================================#========# # # y | # # #--------+--------+--------+--------+ # -# x#one unit|three lo|two dual|zero non| Total # +# x# one| three| two| zero| Total # +# #unity |lots |duality |none | # +# # | | | | # #---------------#--------+--------+--------+--------+--------# # 1.00# 1.0| .0| .0| 1.0| 2.0# # 2.00# .0| .0| 1.0| .0| 1.0# diff --git a/tests/bugs/t-test-alpha.sh b/tests/bugs/t-test-alpha.sh index f4fc1026..7a614b09 100755 --- a/tests/bugs/t-test-alpha.sh +++ b/tests/bugs/t-test-alpha.sh @@ -98,14 +98,14 @@ diff -b $TEMPDIR/pspp.list - <