From 86e6b87d7ad411378c3204fe87504c7e6749be78 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 26 Aug 2022 22:46:37 -0700 Subject: [PATCH] docuemntation and refacotring --- doc/automake.mk | 1 + doc/pspp-figures/ctables26.sps | 4 ++ doc/statistics.texi | 106 ++++++++++++++++++++------------- src/language/stats/ctables.c | 23 +++---- 4 files changed, 79 insertions(+), 55 deletions(-) create mode 100644 doc/pspp-figures/ctables26.sps diff --git a/doc/automake.mk b/doc/automake.mk index c39efe933a..cdb6843465 100644 --- a/doc/automake.mk +++ b/doc/automake.mk @@ -142,6 +142,7 @@ FIGURE_SYNTAX = \ doc/pspp-figures/ctables23.sps \ doc/pspp-figures/ctables24.sps \ doc/pspp-figures/ctables25.sps \ + doc/pspp-figures/ctables26.sps \ doc/pspp-figures/crosstabs.sps \ doc/pspp-figures/descriptives.sps \ doc/pspp-figures/flip.sps \ diff --git a/doc/pspp-figures/ctables26.sps b/doc/pspp-figures/ctables26.sps new file mode 100644 index 0000000000..270dfe65ca --- /dev/null +++ b/doc/pspp-figures/ctables26.sps @@ -0,0 +1,4 @@ +GET FILE='nhtsa.sav'. +CTABLES + /TABLE qnd7a [COUNT, TOTALS[COUNT, VALIDN]] + /CATEGORIES VARIABLES=qnd7a TOTAL=YES MISSING=INCLUDE. diff --git a/doc/statistics.texi b/doc/statistics.texi index b1e040283d..351842e94e 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -1697,7 +1697,20 @@ variables may be ``totaled'' indirectly by enabling totals and subtotals on a categorical variable within which the scalar variable is summarized. -@c TODO Specifying summaries for totals and subtotals +By default, @pspp{} uses the same summary functions for totals and +subtotals as other categories. To summarize totals and subtotals +differently, specify the summary functions for totals and subtotals +after the ordinary summary functions inside a nested set of @code{[]} +following @code{TOTALS}. For example, the following syntax displays +@code{COUNT} for individual categories and totals and @code{VALIDN} +for totals, as shown: + +@example +CTABLES + /TABLE qnd7a [COUNT, TOTALS[COUNT, VALIDN]] + /CATEGORIES VARIABLES=qnd7a TOTAL=YES MISSING=INCLUDE. +@end example +@psppoutput {ctables26} @subsubheading Categories Without Values @@ -1823,55 +1836,68 @@ Show nothing. @node CTABLES Missing Value Treatment @subsection Missing Value Treatment +The @code{TABLE} subcommand on @code{CTABLES} specifies two different +kinds of variables: variables that divide tables into cells (which are +always categorical) and variables being summarized (which may be +categorical or scale). @pspp{} treats missing values differently in +each kind of variable: +@itemize @bullet +@item +For variables that divide tables into cells, per-variable category +options determine which data is analyzed. If any of the categories +for such a variable would exclude a case, then that case is not +included. -The sections below describe how @code{CTABLES} treats missing values -in categorical and scale variables. +@item +The treatment of missing values in variables being summarized varies +between scale and scale and categorical variables. The following +section describes their treatment in detail. -@node CTABLES Categorical Missing Values -@subsubsection Categorical Missing Values +By default, each summarized variable is considered separately for +missing value treatment. A section below describes how to consider +missing values listwise for summarizing scale variables. +@end itemize -For categorical variables, in most cases, values that are valid and in -included categories are analyzed, and values that are missing or in -excluded categories are not analyzed. (@xref{CTABLES Per-Variable -Category Options}), for information on included and excluded -categories.) The exact rules are shown in the following chart, in -which cells that contain ``yes'' indicate that a value is analyzed: +@node CTABLES Missing Values for Summary Variables +@subsubsection Missing Values for Summary Variables -@multitable {@headitemfont{System-Missing}} {Included Category} {Excluded Category} -@headitem @tab Included Category @tab Excluded Category -@item @headitemfont{Valid} @tab yes @tab --- -@item @headitemfont{User-Missing} @tab yes [*] @tab --- [+] -@item @headitemfont{System-Missing} @tab n/a [#] @tab --- [+] -@end multitable +For summary variables, values that are valid and in included +categories are analyzed, and values that are missing or in excluded +categories are not analyzed, with the following exceptions: -@table @asis -@item [*] -Exceptions: The ``@t{VALIDN}'' summary functions (@code{VALIDN}, -@code{EVALIDN}, @code{UVALIDN}, @code{@i{area}PCT.VALIDN}, and -@code{U@i{area}PCT.VALIDN}), which only count valid values in included -categories. +@itemize @bullet +@item +The ``@t{VALIDN}'' summary functions (@code{VALIDN}, @code{EVALIDN}, +@code{UVALIDN}, @code{@i{area}PCT.VALIDN}, and +@code{U@i{area}PCT.VALIDN}) only count valid values in included +categories (not missing values in included categories). -@item [+] -Exceptions: The ``@t{TOTALN}'' summary functions (@code{TOTALN}, -@code{ETOTALN}, @code{UTOTALN}, @code{@i{area}PCT.TOTALN}), and -@code{U@i{area}PCT.TOTALN}, which count all values (valid and missing) -in included categories and missing (but not valid) values in excluded +@item +The ``@t{TOTALN}'' summary functions (@code{TOTALN}, @code{ETOTALN}, +@code{UTOTALN}, @code{@i{area}PCT.TOTALN}), and +@code{U@i{area}PCT.TOTALN} count all values (valid and missing) in +included categories and missing (but not valid) values in excluded categories. - -@item [#] -System-missing values are never in included categories. -@end table +@end itemize @noindent -The following table provides another view of the same information: - -@multitable {Missing values in excluded categories} {@code{VALIDN}} {other} {@code{TOTALN}} -@headitem @tab @code{VALIDN} @tab other @tab @code{TOTALN} -@item Valid values in included categories @tab yes @tab yes @tab yes -@item Missing values in included categories @tab --- @tab yes @tab yes -@item Missing values in excluded categories @tab --- @tab --- @tab yes -@item Valid values in excluded categories @tab --- @tab --- @tab --- +For categorical variables, system-missing values are never in included +categories. For scale variables, there is no notion of included and +excluded categories, so all values are effectively included. + +The following table provides another view of the above rules: + +@multitable {@w{ }@w{ }@w{ }@w{ }Missing values in excluded categories} {@t{VALIDN}} {other} {@t{TOTALN}} +@headitem @tab @t{VALIDN} @tab other @tab @t{TOTALN} +@item @headitemfont{Categorical variables:} +@item @w{ }@w{ }@w{ }@w{ }Valid values in included categories @tab yes @tab yes @tab yes +@item @w{ }@w{ }@w{ }@w{ }Missing values in included categories @tab --- @tab yes @tab yes +@item @w{ }@w{ }@w{ }@w{ }Missing values in excluded categories @tab --- @tab --- @tab yes +@item @w{ }@w{ }@w{ }@w{ }Valid values in excluded categories @tab --- @tab --- @tab --- +@item @headitemfont{Scale variables:} +@item @w{ }@w{ }@w{ }@w{ }Valid values @tab yes @tab yes @tab yes +@item @w{ }@w{ }@w{ }@w{ }User- or system-missing values @tab --- @tab yes @tab yes @end multitable @node CTABLES Scale Missing Values diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 7e899d1b66..e38c64d30e 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -2535,8 +2535,6 @@ union ctables_summary double ovalid; double ovalue; }; - - /* XXX multiple response */ }; static void @@ -3226,8 +3224,6 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, || cat->type == CCT_SUBTOTAL || cat->type == CCT_POSTCOMPUTE) { - /* XXX these should be more encompassing I think.*/ - switch (a) { case PIVOT_AXIS_COLUMN: @@ -3341,18 +3337,15 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, { add_weight (a->valid, weight); - for (size_t i = 0; i < s->table->n_sum_vars; i++) - { - /* XXX listwise_missing??? */ - const struct variable *var = s->table->sum_vars[i]; - double addend = case_num (c, var); - if (!var_is_num_missing (var, addend)) - { - struct ctables_sum *sum = &a->sums[i]; + if (!scale_missing) + for (size_t i = 0; i < s->table->n_sum_vars; i++) + { + const struct variable *var = s->table->sum_vars[i]; + double addend = case_num (c, var); + if (!var_is_num_missing (var, addend)) for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) - sum->sum[wt] += addend * weight[wt]; - } - } + a->sums[i].sum[wt] += addend * weight[wt]; + } } } } -- 2.30.2