Docs

author Ben Pfaff <blp@cs.stanford.edu>

Mon, 15 Aug 2022 05:22:43 +0000 (22:22 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Mon, 15 Aug 2022 05:22:43 +0000 (22:22 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Mon, 15 Aug 2022 05:22:43 +0000 (22:22 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Mon, 15 Aug 2022 05:22:43 +0000 (22:22 -0700)
diff --git a/doc/pspp-figures/ctables18.sps b/doc/pspp-figures/ctables18.sps

index 71248785cc4070dd8611c6710862cef06cc98663..52938dc9e5ace41ea15077659cafe102ae943fc3 100644 (file)
--- a/doc/pspp-figures/ctables18.sps
+++ b/doc/pspp-figures/ctables18.sps
@@ -1,4 +1,4 @@
-DATA LIST LIST NOTABLE/x y z.
+DATA LIST LIST NOTABLE/x (F8.0) y z (F8.2).
  BEGIN DATA.
  1  . 40
  1 10 50
diff --git a/doc/pspp-figures/ctables19.sps b/doc/pspp-figures/ctables19.sps

index ada8823bcce67b1871f7b44f7c0f3a81b0362b26..ee9187af8fc40788c4bb8dc973ce6c834af8b272 100644 (file)
--- a/doc/pspp-figures/ctables19.sps
+++ b/doc/pspp-figures/ctables19.sps
@@ -1,4 +1,4 @@
-DATA LIST LIST NOTABLE/x y z.
+DATA LIST LIST NOTABLE/x (F8.0) y z (F8.2).
  BEGIN DATA.
  1  . 40
  1 10 50
diff --git a/doc/pspp-figures/ctables20.sps b/doc/pspp-figures/ctables20.sps

index 6814a274e5e20ea7d8b7b556db70971abe2ddf88..fe0d6fc039c3b6c49a1d4170ac4d4ba185974af5 100644 (file)
--- a/doc/pspp-figures/ctables20.sps
+++ b/doc/pspp-figures/ctables20.sps
@@ -1,4 +1,4 @@
-DATA LIST LIST NOTABLE/x y z.
+DATA LIST LIST NOTABLE/x (F8.0) y z (F8.2).
  BEGIN DATA.
  1  . 40
  1 10 50
diff --git a/doc/pspp-figures/ctables21.sps b/doc/pspp-figures/ctables21.sps

index 8fb18d8b2b3394e42075f935b716f276ac50b8c7..1efa83e11633b504f6f5d031f67fb6aa792117e6 100644 (file)
--- a/doc/pspp-figures/ctables21.sps
+++ b/doc/pspp-figures/ctables21.sps
@@ -1,4 +1,4 @@
-DATA LIST LIST NOTABLE/x y z.
+DATA LIST LIST NOTABLE/x (F8.0) y z (F8.2).
  BEGIN DATA.
  1  . 40
  1 10 50
diff --git a/doc/pspp-figures/ctables9.sps b/doc/pspp-figures/ctables9.sps

index 133d0d7d01fa3bd811ad20fd3e2f4d461e096f5b..e0494444bbadf3c2ca11a27fe08d6e0714ae2438 100644 (file)
--- a/doc/pspp-figures/ctables9.sps
+++ b/doc/pspp-figures/ctables9.sps
@@ -1,2 +1,4 @@
  GET FILE='nhtsa.sav'.
-CTABLES /TABLE qn20 [C] BY qns3a.
+CTABLES
+    /TABLE qn20 BY qns3a
+    /TABLE qn20 [C] BY qns3a.
diff --git a/doc/statistics.texi b/doc/statistics.texi

index 2e133dd651ad3ded4525ef91f655ba6c1abd1344..1b0a294218608be25f11f92e67b935472d8b9824 100644 (file)
--- a/doc/statistics.texi
+++ b/doc/statistics.texi
@@ -1027,7 +1027,8 @@ An axis expression that names a categorical variable divides the data
  into cells according to the values of that variable.  When all the
  variables named on @code{TABLE} are categorical, by default each cell
  displays the number of cases that it contains, so specifying a single
-variable yields a frequency table:
+variable yields a frequency table, much like the output of the
+@code{FREQUENCIES} command (@pxref{FREQUENCIES}):
  
  @example
  CTABLES /TABLE=AgeGroup.
@@ -1036,7 +1037,8 @@ CTABLES /TABLE=AgeGroup.
  
  @noindent
  Specifying a row and a column categorical variable yields a
-crosstabulation:
+crosstabulation, much like the output of the @code{CROSSTABS} command
+(@pxref{CROSSTABS}):
  
  @example
  CTABLES /TABLE=AgeGroup BY qns3a.
@@ -1121,15 +1123,24 @@ decide whether to treat it as categorical or scalar.  Variables
  assigned the nominal or ordinal measurement level are treated as
  categorical, and scalar variables are treated as scalar.
  
-Use the @code{VARIABLE LEVEL} command to change a variable's
-measurement level (@pxref{VARIABLE LEVEL}).  To treat a variable as
-categorical or scalar only for one use on @code{CTABLES}, add
-@samp{[C]} or @samp{[S]}, respectively, after the variable name.  The
-following example shows how to analyze the scalar variable @code{qn20}
-as categorical:
+When @pspp{} reads data from a file in an external format, such as a
+text file, variables' measurement levels are often unknown.  If
+@code{CTABLES} runs when a variable has an unknown measurement level,
+it makes an initial pass through the data to guess measurement levels
+using the rules described earlier in this manual (@pxref{Measurement
+Level}).  Use the @code{VARIABLE LEVEL} command to set or change a
+variable's measurement level (@pxref{VARIABLE LEVEL}).
+
+To treat a variable as categorical or scalar only for one use on
+@code{CTABLES}, add @samp{[C]} or @samp{[S]}, respectively, after the
+variable name.  The following example shows the output when variable
+@code{qn20} is analyzed as scalar (the default for its measurement
+level) and as categorical:
  
  @example
-CTABLES /TABLE qn20 [C] BY qns3a.
+CTABLES
+    /TABLE qn20 BY qns3a
+    /TABLE qn20 [C] BY qns3a.
  @end example
  @psppoutput {ctables9}
  
@@ -1144,15 +1155,20 @@ sets.
  @node CTABLES Data Summarization
  @subsection Data Summarization
  
+@c TODO Summary function default formats
+
  The @code{CTABLES} command allows the user to control how the data are
-summarized with summary specifications, which are enclosed in square
-brackets following a variable name on the @code{TABLE} subcommand.
-When all the variables are categorical, summary specifications can be
-given for the innermost nested variables on any one axis.  When a
-scalar variable is present, only the scalar variable may have summary
-specifications.  The following example includes a summary
-specification for column and row percentages for categorical
-variables, and mean and median for a scalar variable:
+summarized with @dfn{summary specifications}, syntax that lists one or
+more summary function names, optionally separated by commas, and which
+are enclosed in square brackets following a variable name on the
+@code{TABLE} subcommand.  When all the variables are categorical,
+summary specifications can be given for the innermost nested variables
+on any one axis.  When a scalar variable is present, only the scalar
+variable may have summary specifications.
+
+The following example includes a summary specification for column and
+row percentages for categorical variables, and mean and median for a
+scalar variable:
  
  @example
  CTABLES
@@ -1500,6 +1516,8 @@ column variable category labels, respectively, to the layer axis.
  Only one axis's labels may be moved, whether to the opposite axis or
  to the layer axis.
  
+@c TODO Moving category labels for stacked variables
+
  @subsubheading Effect on Summary Statistics
  
  @code{CLABELS} primarily affects the appearance of tables, not the
@@ -1550,7 +1568,7 @@ variables.  @code{CATEGORIES} applies to the table produced by the
  
  @code{CATEGORIES} does not apply to scalar variables.
  
-@t{VARIABLES} is required.  List the variables for the subcommand
+@t{VARIABLES} is required and must list the variables for the subcommand
  to affect.
  
  There are two way to specify the Categories to include and their sort
@@ -1597,7 +1615,7 @@ A computed category name (@pxref{CTABLES Computed Categories}).
  
  Additional forms, described later, allow for subtotals.
  If multiple elements of the list cover a given category, the last one
-in the list is considered to be a match.
+in the list takes precedence.
  
  @item Implicit categories.
  Without an explicit list of categories, @pspp{} sorts
@@ -1606,13 +1624,15 @@ categories automatically.
  The @code{KEY} setting specifies the sort key.  By default, or with
  @code{KEY=VALUE}, categories are sorted by default.  Categories may
  also be sorted by value label, with @code{KEY=LABEL}, or by the value
-of a summary function, e.g.@: @code{KEY=COUNT}.  For summary
-functions, a variable name may be specified in parentheses, e.g.@:
-@code{KEY=MAXIUM(qnd1)}, and this is required for functions that apply
-only to scalar variables.  The @code{PTILE} function also requires a
-percentage argument, e.g.@: @code{KEY=PTILE(qnd1, 90)}.  Only summary
-functions used in the table may be used, except that @code{COUNT} is
-always allowed.
+of a summary function, e.g.@: @code{KEY=COUNT}.
+@ignore  @c Not yet implemented
+For summary functions, a variable name may be specified in
+parentheses, e.g.@: @code{KEY=MAXIUM(qnd1)}, and this is required for
+functions that apply only to scalar variables.  The @code{PTILE}
+function also requires a percentage argument, e.g.@:
+@code{KEY=PTILE(qnd1, 90)}.  Only summary functions used in the table
+may be used, except that @code{COUNT} is always allowed.
+@end ignore
  
  By default, or with @code{ORDER=A}, categories are sorted in ascending
  order.  Specify @code{ORDER=D} to sort in descending order.
@@ -1625,9 +1645,10 @@ user-missing values.  The system-missing value is always excluded.
  @subsubheading Totals and Subtotals
  
  @code{CATEGORIES} also controls display of totals and subtotals.
-Totals are not displayed by default, or with @code{TOTAL=NO}.  Specify
-@code{TOTAL=YES} to display a total.  By default, the total is labeled
-``Total''; use @code{LABEL="@i{label}"} to override it.
+Totals are not displayed with @code{TOTAL=NO}, which is also the
+default.  Specify @code{TOTAL=YES} to display a total.  By default,
+the total is labeled ``Total''; use @code{LABEL="@i{label}"} to
+override it.
  
  Subtotals are also not displayed by default.  To add one or more
  subtotals, use an explicit category list and insert @code{SUBTOTAL} or
@@ -1638,16 +1659,18 @@ categories that make up the subtotal.  Either way, the default label
  is ``Subtotal'', use @code{SUBTOTAL="@i{label}"} or
  @code{HSUBTOTAL="@i{label}"} to specify a custom label.
  
-By default, or with @code{POSITION=AFTER}, totals come after the last
-category and subtotals apply to categories that precede them.  With
-@code{POSITION=BEFORE}, totals come before the first category and
-subtotals apply to categories that follow them.
+By default, or with @code{POSITION=AFTER}, totals are displayed in the
+output after the last category and subtotals apply to categories that
+precede them.  With @code{POSITION=BEFORE}, totals come before the
+first category and subtotals apply to categories that follow them.
  
  Only categorical variables may have totals and subtotals.  Scalar
  variables may be ``totaled'' indirectly by enabling totals and
  subtotals on a categorical variable within which the scalar variable is
  summarized.
  
+@c TODO Specifying summaries for totals and subtotals
+
  @subsubheading Categories Without Values
  
  Some categories might not be included in the data set being analyzed.
@@ -1657,9 +1680,10 @@ younger'' age group.  By default, or with @code{EMPTY=INCLUDE},
  them, specify @code{EMPTY=EXCLUDE}.
  
  For implicit categories, empty categories potentially include all the
-values with labels for a given variable; for explicit categories, they
-include all the values listed individually and all labeled values
-covered by ranges or @code{MISSING} or @code{OTHERNM}.
+values with value labels for a given variable; for explicit
+categories, they include all the values listed individually and all
+values with value labels that are covered by ranges or @code{MISSING}
+or @code{OTHERNM}.
  
  @node CTABLES Titles
  @subsection Titles
@@ -1671,11 +1695,14 @@ covered by ranges or @code{MISSING} or @code{OTHERNM}.
      [@t{CORNER=}@i{string}@dots{}]
  @end display
  
+@c TODO Describe substitution variables
+
  The @code{TITLES} subcommand sets the title, caption, and corner text
  for the table output for the previous @code{TABLE} subcommand.  The
  title appears above the table, the caption below the table, and the
  corner text appears in the table's upper left corner.  By default, the
  title is ``Custom Tables'' and the caption and corner text are empty.
+With some table output styles, the corner text is not displayed.
  
  @node CTABLES Table Formatting
  @subsection Table Formatting
@@ -1694,13 +1721,13 @@ The @code{FORMAT} subcommand, which must precede the first
  tables.  @code{FORMAT} and all of its settings are optional.
  
  Use @code{MINCOLWIDTH} and @code{MAXCOLWIDTH} to control the minimum
-or maximum width of columns in output tables.  By default, or with
+or maximum width of columns in output tables.  By default, with
  @code{DEFAULT}, column width varies based on content.  Otherwise,
  specify a number for either or both of these settings.  If both are
-specified, @code{MAXCOLWIDTH} must be bigger than @code{MINCOLWIDTH}.
-The default unit, or with @code{UNITS=POINTS}, is points (1/72 inch),
-but specify @code{UNITS=INCHES} to use inches or @code{UNITS=CM} for
-centimeters.
+specified, @code{MAXCOLWIDTH} must be greater than or equal to
+@code{MINCOLWIDTH}.  The default unit, or with @code{UNITS=POINTS}, is
+points (1/72 inch), or specify @code{UNITS=INCHES} to use inches or
+@code{UNITS=CM} for centimeters.
  
  By default, or with @code{EMPTY=ZERO}, zero values are displayed in
  their usual format.  Use @code{EMPTY=BLANK} to use an empty cell
@@ -1730,7 +1757,7 @@ variables listed on @code{VARIABLES}.  The supported values are:
  
  @table @code
  @item DEFAULT
-Uses the setting from @ref{SET TVARS}.
+Use the setting from @code{SET TVARS} (@pxref{SET TVARS}).
  
  @item NAME
  Show only a variable name.
@@ -1829,30 +1856,33 @@ in @code{EXPR(@dots{})}.  A postcompute expression consists of:
  This form evaluates to the summary statistic for @i{category}, e.g.@:
  @code{[1]} evaluates to the value of the summary statistic associated
  with category 1.  The @i{category} may be a number, a quoted string,
-or a quoted time or date value, and all of the categories for a given
-postcompute must have the same form.
+or a quoted time or date value.  All of the categories for a given
+postcompute must have the same form.  The category must appear in all
+the @code{CATEGORIES} list in which the postcompute is used.
  
  @item [@i{min} THRU @i{max}]
  @itemx [LO THRU @i{max}]
  @itemx [@i{min} THRU HI]
  @itemx MISSING
  @itemx OTHERNM
-These forms evaluate to the summary statistics for categories matching
-the given syntax, as described in previous sections (@pxref{CTABLES
-Explicit Category List}).  If more than one category matches, their
-values are summed.
+These forms evaluate to the summary statistics for a category
+specified with the same syntax, as described in previous section
+(@pxref{CTABLES Explicit Category List}).  The category must appear in
+all the @code{CATEGORIES} list in which the postcompute is used.
  
  @item SUBTOTAL
  The summary statistic for the subtotal category.  This form is allowed
-only for variables with exactly one subtotal.
+only if the @code{CATEGORIES} lists that include this postcompute have
+exactly one subtotal.
  
  @item SUBTOTAL[@i{index}]
  The summary statistic for subtotal category @i{index}, where 1 is the
  first subtotal, 2 is the second, and so on.  This form may be used for
-any number of subtotals.
+@code{CATEGORIES} lists with any number of subtotals.
  
  @item TOTAL
-The summary statistic for the total.
+The summary statistic for the total.  The @code{CATEGORIES} lsits that
+include this postcompute must have a total enabled.
  
  @item @i{a} + @i{b}
  @itemx @i{a} - @i{b}
@@ -1919,14 +1949,16 @@ by computed categories are displayed like other categories.  Use
  The @code{WEIGHT} subcommand is optional and must appear before
  @code{TABLE}.  If it appears, it must name a numeric variable, known
  as the @dfn{effective base weight} or @dfn{adjustment weight}.  The
-effective base weight variable is used for the @code{ECOUNT},
-@code{ETOTALN}, and @code{EVALIDN} summary functions.
-
-Cases with zero, missing, or negative effective base weight are
-excluded from all analysis.
+effective base weight variable stands in for the dictionary's weight
+variable (@pxref{WEIGHT}), if any, in most calculations in
+@code{CTABLES}.  The only exceptions are the @code{COUNT},
+@code{TOTALN}, and @code{VALIDN} summary functions, which use the
+dictionary weight instead.
  
  Weights obtained from the @pspp{} dictionary are rounded to the
-nearest integer.  Effective base weights are not rounded.
+nearest integer at the case level.  Effective base weights are not
+rounded.  Regardless of the weighting source, @pspp{} does not analyze
+cases with zero, missing, or negative effective weights.
  
  @node CTABLES Hiding Small Counts
  @subsection Hiding Small Counts
diff --git a/src/data/dictionary.c b/src/data/dictionary.c

index d04a4f70fa5ac64ced2b27552c360123ee978498..a2e3fb8fc1ee2a6e42d6d287eb6bd3047cee9277 100644 (file)
--- a/src/data/dictionary.c
+++ b/src/data/dictionary.c
@@ -1262,7 +1262,7 @@ dict_get_weight (const struct dictionary *d)
  }
  
  /* Returns the value of D's weighting variable in case C, except
-   that a negative weight is returned as 0.  Returns 1 if the
+   that a negative or missing weight is returned as 0.  Returns 1 if the
     dictionary is unweighted.  Will warn about missing, negative,
     or zero values if *WARN_ON_INVALID is true.  The function will
     set *WARN_ON_INVALID to false if an invalid weight is
@@ -1283,6 +1283,15 @@ dict_get_case_weight (const struct dictionary *d, const struct ccase *c,
      }
  }
  
+/* Like dict_get_case_weight(), but additionally rounds each weight to the
+   nearest integer.  */
+double
+dict_get_rounded_case_weight (const struct dictionary *d,
+                              const struct ccase *c, bool *warn_on_invalid)
+{
+  return floor (dict_get_case_weight (d, c, warn_on_invalid) + 0.5);
+}
+
  /* Returns the format to use for weights. */
  const struct fmt_spec *
  dict_get_weight_format (const struct dictionary *d)
diff --git a/src/data/dictionary.h b/src/data/dictionary.h

index 47317a22cbc23e61a56de8602b09123daea92499..3e874d87071349bad38e253aaeb65b9b7a81f966 100644 (file)
--- a/src/data/dictionary.h
+++ b/src/data/dictionary.h
@@ -98,6 +98,8 @@ void dict_set_names_must_be_ids (struct dictionary *, bool);
  /* Weight variable. */
  double dict_get_case_weight (const struct dictionary *,
                              const struct ccase *, bool *);
+double dict_get_rounded_case_weight (const struct dictionary *,
+                                     const struct ccase *, bool *);
  struct variable *dict_get_weight (const struct dictionary *);
  void dict_set_weight (struct dictionary *, struct variable *);
  const struct fmt_spec *dict_get_weight_format (const struct dictionary *);
diff --git a/src/data/variable.c b/src/data/variable.c

index 2e584fe86fa5dd7e9867c27d257186e3ecd4a528..87e7d07823f42271707000938d7501a24f562250 100644 (file)
--- a/src/data/variable.c
+++ b/src/data/variable.c
@@ -1357,15 +1357,16 @@ var_clear_vardict (struct variable *v)
  double
  var_force_valid_weight (const struct variable *wv, double w, bool *warn_on_invalid)
  {
-  if (w < 0.0 || (wv && var_is_num_missing (wv, w)))
-    w = 0.0;
-
-  if (w == 0.0 && warn_on_invalid != NULL && *warn_on_invalid)
+  if (w <= 0.0 || (wv ? var_is_num_missing (wv, w) : w == SYSMIS))
      {
-      *warn_on_invalid = false;
-      msg (SW, _("At least one case in the data file had a weight value "
-                "that was user-missing, system-missing, zero, or "
-                "negative.  These case(s) were ignored."));
+      w = 0.0;
+      if (warn_on_invalid != NULL && *warn_on_invalid)
+        {
+          *warn_on_invalid = false;
+          msg (SW, _("At least one case in the data file had a weight value "
+                     "that was user-missing, system-missing, zero, or "
+                     "negative.  These case(s) were ignored."));
+        }
      }
  
    return w;
diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c

index b2274f3e44d2d3abc2f5644086385eba27ce4b93..bb7f5c37aa973afe6a6b6431e37d81bef9ff9079 100644 (file)
--- a/src/language/stats/ctables.c
+++ b/src/language/stats/ctables.c
@@ -2655,24 +2655,37 @@ ctables_summary_add (union ctables_summary *s,
    switch (ss->function)
      {
      case CTSF_TOTALN:
-    case CTSF_areaPCT_TOTALN:
        s->count += ss->weighted ? d_weight : 1.0;
        break;
  
+    case CTSF_areaPCT_TOTALN:
+      s->count += ss->weighted ? e_weight : 1.0;
+      break;
+
      case CTSF_COUNT:
-    case CTSF_areaPCT_COUNT:
        if (is_scale || !excluded_missing)
          s->count += ss->weighted ? d_weight : 1.0;
        break;
  
+    case CTSF_areaPCT_COUNT:
+      if (is_scale || !excluded_missing)
+        s->count += ss->weighted ? e_weight : 1.0;
+      break;
+
      case CTSF_VALIDN:
-    case CTSF_areaPCT_VALIDN:
        if (is_scale
            ? !is_scale_missing
            : !is_missing)
          s->count += ss->weighted ? d_weight : 1.0;
        break;
  
+    case CTSF_areaPCT_VALIDN:
+      if (is_scale
+          ? !is_scale_missing
+          : !is_missing)
+        s->count += ss->weighted ? e_weight : 1.0;
+      break;
+
      case CTSF_areaID:
        break;
  
@@ -2680,7 +2693,7 @@ ctables_summary_add (union ctables_summary *s,
        if (is_scale
            ? is_scale_missing
            : is_missing)
-        s->count += ss->weighted ? d_weight : 1.0;
+        s->count += ss->weighted ? e_weight : 1.0;
        break;
  
      case CTSF_ECOUNT:
@@ -5264,7 +5277,7 @@ ctables_execute (struct dataset *ds, struct casereader *input,
        for (struct ccase *c = casereader_read (group); c;
             case_unref (c), c = casereader_read (group))
          {
-          double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
+          double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
            double e_weight = (ct->e_weight
                               ? var_force_valid_weight (ct->e_weight,
                                                         case_num (c, ct->e_weight),
author	Ben Pfaff <blp@cs.stanford.edu>
	Mon, 15 Aug 2022 05:22:43 +0000 (22:22 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Mon, 15 Aug 2022 05:22:43 +0000 (22:22 -0700)
doc/pspp-figures/ctables18.sps		patch \| blob \| history
doc/pspp-figures/ctables19.sps		patch \| blob \| history
doc/pspp-figures/ctables20.sps		patch \| blob \| history
doc/pspp-figures/ctables21.sps		patch \| blob \| history
doc/pspp-figures/ctables9.sps		patch \| blob \| history
doc/statistics.texi		patch \| blob \| history
src/data/dictionary.c		patch \| blob \| history
src/data/dictionary.h		patch \| blob \| history
src/data/variable.c		patch \| blob \| history
src/language/stats/ctables.c		patch \| blob \| history