work on docs

author Ben Pfaff <blp@cs.stanford.edu>

Mon, 14 Mar 2022 16:19:47 +0000 (09:19 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Sat, 2 Apr 2022 01:48:55 +0000 (18:48 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Mon, 14 Mar 2022 16:19:47 +0000 (09:19 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Sat, 2 Apr 2022 01:48:55 +0000 (18:48 -0700)
diff --git a/doc/statistics.texi b/doc/statistics.texi

index 8445b51b099e077155ccac80cc90902fa6725098..d16de5d52e983248731fb953cf019bc86de25ad1 100644 (file)
--- a/doc/statistics.texi
+++ b/doc/statistics.texi
@@ -34,7 +34,7 @@ far.
  * ROC::                         Receiver Operating Characteristic.
  @end menu
  
-@node DESCRIPTIVES, FREQUENCIES, Statistics, Statistics
+@node DESCRIPTIVES
  @section DESCRIPTIVES
  
  @vindex DESCRIPTIVES
@@ -163,7 +163,7 @@ should have these properties since they are normalized versions of the original
  @caption {Descriptives statistics including two normalized variables (Z-scores)}
  @end float
  
-@node FREQUENCIES, EXAMINE, DESCRIPTIVES, Statistics
+@node FREQUENCIES
  @section FREQUENCIES
  
  @vindex FREQUENCIES
@@ -314,7 +314,7 @@ be corrected, or marked as missing before using the data.
  @caption {The relative frequencies of @exvar{sex} and @exvar{occupation}}
  @end float
  
-@node EXAMINE, GRAPH, FREQUENCIES, Statistics
+@node EXAMINE
  @section EXAMINE
  
  @vindex EXAMINE
@@ -491,7 +491,7 @@ If you specify many dependent variables or factor variables
  for which there are many distinct values, then @cmd{EXAMINE} will produce a very
  large quantity of output.
  
-@node GRAPH, CORRELATIONS, EXAMINE, Statistics
+@node GRAPH
  @section GRAPH
  
  @vindex GRAPH
@@ -513,12 +513,12 @@ The @cmd{GRAPH} command produces graphical plots of data. Only one of the subcom
  can be produced per call of @cmd{GRAPH}. The @subcmd{MISSING} is optional.
  
  @menu
-* SCATTERPLOT::                 Cartesian Plots
-* HISTOGRAM::                   Histograms
-* BAR CHART::                   Bar Charts
+* SCATTERPLOT::             Cartesian Plots
+* HISTOGRAM::               Histograms
+* BAR CHART::               Bar Charts
  @end menu
  
-@node SCATTERPLOT, HISTOGRAM, GRAPH, GRAPH
+@node SCATTERPLOT
  @subsection Scatterplot
  @cindex scatterplot
  
@@ -537,7 +537,7 @@ This example produces a scatterplot where @var{height} is plotted versus @var{we
  on the value of the @var{gender} variable, the colour of the datapoint is different. With
  this plot it is possible to analyze gender differences for @var{height} versus @var{weight} relation.
  
-@node HISTOGRAM, BAR CHART, SCATTERPLOT, GRAPH
+@node HISTOGRAM
  @subsection Histogram
  @cindex histogram
  
@@ -553,7 +553,7 @@ GRAPH
          /HISTOGRAM = @var{weight}.
  @end example
  
-@node BAR CHART,  , HISTOGRAM, GRAPH
+@node BAR CHART
  @subsection Bar Chart
  @cindex bar chart
  
@@ -607,7 +607,7 @@ GRAPH  /BAR  = COUNT BY @var{city}.
  
  Bar charts can also be produced using the @ref{FREQUENCIES} and @ref{CROSSTABS} commands.
  
-@node CORRELATIONS, CROSSTABS, GRAPH, Statistics
+@node CORRELATIONS
  @section CORRELATIONS
  
  @vindex CORRELATIONS
@@ -670,7 +670,7 @@ The @subcmd{XPROD} keyword requests cross-product deviations and covariance esti
  be displayed for each pair of variables.
  The keyword @subcmd{ALL} is the union of @subcmd{DESCRIPTIVES} and @subcmd{XPROD}.
  
-@node CROSSTABS, CTABLES, CORRELATIONS, Statistics
+@node CROSSTABS
  @section CROSSTABS
  
  @vindex CROSSTABS
@@ -897,7 +897,7 @@ person's occupation.
  @caption {The results of a test of independence between @exvar{sex} and @exvar{occupation}}
  @end float
  
-@node CTABLES, FACTOR, CROSSTABS, Statistics
+@node CTABLES
  @section CTABLES
  
  @vindex CTABLES
@@ -1002,7 +1002,7 @@ this data set, with a slightly modified dictionary, as
  * CTABLES Data Summarization::
  @end menu
  
-@node CTABLES Basics, CTABLES Data Summarization, CTABLES, CTABLES
+@node CTABLES Basics
  @subsection Basics
  
  The only required subcommand is @code{TABLE}, which specifies the
@@ -1022,7 +1022,7 @@ variables.  At least one must specify an axis expression.
  * CTABLES Multiple Response Sets::
  @end menu
  
-@node CTABLES Categorical Variable Basics, CTABLES Scalar Variable Basics, CTABLES Basics, CTABLES Basics
+@node CTABLES Categorical Variable Basics
  @subsubsection Categorical Variables
  
  An axis expression that names a categorical variable divides the data
@@ -1076,7 +1076,7 @@ CTABLES /TABLE (qn26 + qn27) > qns3a.
  @end example
  @psppoutput {ctables5}
  
-@node CTABLES Scalar Variable Basics, CTABLES Overriding Measurement Level, CTABLES Categorical Variable Basics, CTABLES Basics
+@node CTABLES Scalar Variable Basics
  @subsubsection Scalar Variables
  
  Categorical variables make @code{CTABLES} divide tables into cells.
@@ -1114,7 +1114,7 @@ scalar variable may not nest inside a scalar variable directly or
  indirectly.  Scalar variables may only appear on one axis within
  @code{TABLE}.
  
-@node CTABLES Overriding Measurement Level, CTABLES Multiple Response Sets, CTABLES Scalar Variable Basics, CTABLES Basics
+@node CTABLES Overriding Measurement Level
  @subsubsection Overriding Measurement Level
  
  By default, @code{CTABLES} uses a variable's measurement level to
@@ -1133,13 +1133,13 @@ CTABLES /TABLE qn20 [C] BY qns3a.
  @end example
  @psppoutput {ctables9}
  
-@node CTABLES Multiple Response Sets,  , CTABLES Overriding Measurement Level, CTABLES Basics
+@node CTABLES Multiple Response Sets
  @subsubheading Multiple Response Sets
  
  The @code{CTABLES} command does not yet support multiple response
  sets.
  
-@node CTABLES Data Summarization,  , CTABLES Basics, CTABLES
+@node CTABLES Data Summarization
  @subsection Data Summarization
  
  The @code{CTABLES} command allows the user to control how the data are
@@ -1170,7 +1170,7 @@ CTABLES /TABLE=AgeGroup [COLPCT 'Gender %' PCT5.0,
  @end example
  @psppoutput {ctables11}
  
-Parentheses are a shorthand to apply summary specifications to
+Parentheses provide a shorthand to apply summary specifications to
  multiple variables.  For example, both of these commands:
  
  @example
@@ -1183,14 +1183,14 @@ produce the same output shown below:
  
  @psppoutput {ctables12}
  
-The following sections list the available summary functions.
+The following section lists the available summary functions.
  
  @menu
  * CTABLES Summary Functions for Categorical and Scale Variables::
  @end menu
  
-@node CTABLES Summary Functions for Categorical and Scale Variables,  , CTABLES Data Summarization, CTABLES Data Summarization
-@subsubsection Summary Functions for Categorical and Scale Variables
+@node CTABLES Summary Functions
+@subsubsection Summary Functions
  
  This section lists the summary functions that can be applied to cells
  in @code{CTABLES}.  Many of these functions have an @var{area} in
@@ -1232,26 +1232,25 @@ A column in one layer within a section.
  @end table
  @end itemize
  
-The following table lists summary functions that may be applied to any
-variable regardless of whether it is categorical or scalar, along with
-their default labels:
+The following summary functions may be applied to any variable
+regardless of whether it is categorical or scalar.  The default label
+for each function is listed in parentheses:
  
  @table @asis
  @item @code{COUNT} (``Count'')
  The sum of weights in a cell.
  
-@item @i{area}@code{PCT} or @i{area}@code{PCT.COUNT} (``@i{Area} %'')
+@item @code{@i{area}PCT} or @code{@i{area}PCT.COUNT} (``@i{Area} %'')
  A percentage within the specified @var{area}.
  
-@item @i{area}@code{PCT.VALIDN} (``@i{Area} Valid N %'')
+@item @code{@i{area}PCT.VALIDN} (``@i{Area} Valid N %'')
  A percentage of valid values within the specified @var{area}.
  
-@item @i{area}@code{PCT.TOTALN} (``@i{Area} Total N %'')
+@item @code{@i{area}PCT.TOTALN} (``@i{Area} Total N %'')
  A percentage of total values within the specified @var{area}.
  @end table
  
-The following table lists summary functions that apply only to scale
-variables:
+The following summary functions apply only to scale variables:
  
  @table @asis
  @item @code{MAXIMUM} (``Maximum'')
@@ -1272,7 +1271,7 @@ Sum of weights of user- and system-missing values.
  @item @code{MODE} (``Mode'')
  The highest-frequency value.  Ties are broken by taking the smallest mode.
  
-@item @i{area}@code{PCT.SUM} (``@i{Area} Sum %'')
+@item @code{@i{area}PCT.SUM} (``@i{Area} Sum %'')
  Percentage of the sum of the values across @var{area}.
  
  @item @code{PTILE} @i{n} (``Percentile @i{n}'')
@@ -1306,34 +1305,71 @@ of the dictionary weight variable.  Otherwise, they are equivalent to
  the summary function without the @samp{E}-prefix:
  
  @itemize @bullet
-@item @code{ECOUNT} (``Adjusted Count'')
-@item @code{ETOTALN} (``Adjusted Total N'')
-@item @code{EVALIDN} (``Adjusted Valid N'')
+@item
+@code{ECOUNT} (``Adjusted Count'')
+
+@item
+@code{ETOTALN} (``Adjusted Total N'')
+
+@item
+@code{EVALIDN} (``Adjusted Valid N'')
  @end itemize
  
  The following summary functions with a @samp{U}-prefix are equivalent
-to those without a prefix, except that they use unweighted counts:
+to the same ones without the prefix, except that they use unweighted
+counts:
  
  @itemize @bullet
-@item @code{UCOUNT} (``Unweighted Count'')
-@item @code{U}@i{area}@code{PCT} or @code{U}@i{area}@code{PCT.COUNT} (``Unweighted @i{Area} %'')
-@item @code{U}@i{area}@code{PCT.VALIDN} (``Unweighted @i{Area} Valid N %'')
-@item @code{U}@i{area}@code{PCT.TOTALN} (``Unweighted @i{Area} Total N %'')
-@item @code{UMEAN} (``Unweighted Mean'')
-@item @code{UMEDIAN} (``Unweighted Median'')
-@item @code{UMISSING} (``Unweighted Missing'')
-@item @code{UMODE} (``Unweight Mode'')
-@item @code{U}@i{area}@code{PCT.SUM} (``Unweighted @i{Area} Sum %'')
-@item @code{UPTILE} @i{n} (``Unweighted Percentile @i{n}'') 
-@item @code{USEMEAN} (``Unweighted Std Error of Mean'')
-@item @code{USTDDEV} (``Unweighted Std Deviation'')
-@item @code{USUM} (``Unweighted Sum'')
-@item @code{UTOTALN} (``Unweighted Total N'')
-@item @code{UVALIDN} (``Unweighted Valid N'')
-@item @code{UVARIANCE} (``Unweighted Variance'')
+@item
+@code{UCOUNT} (``Unweighted Count'')
+
+@item
+@code{U@i{area}PCT} or @code{U@i{area}PCT.COUNT} (``Unweighted @i{Area} %'')
+
+@item
+@code{U@i{area}PCT.VALIDN} (``Unweighted @i{Area} Valid N %'')
+
+@item
+@code{U@i{area}PCT.TOTALN} (``Unweighted @i{Area} Total N %'')
+
+@item
+@code{UMEAN} (``Unweighted Mean'')
+
+@item
+@code{UMEDIAN} (``Unweighted Median'')
+
+@item
+@code{UMISSING} (``Unweighted Missing'')
+
+@item
+@code{UMODE} (``Unweight Mode'')
+
+@item
+@code{U@i{area}PCT.SUM} (``Unweighted @i{Area} Sum %'')
+
+@item
+@code{UPTILE} @i{n} (``Unweighted Percentile @i{n}'') 
+
+@item
+@code{USEMEAN} (``Unweighted Std Error of Mean'')
+
+@item
+@code{USTDDEV} (``Unweighted Std Deviation'')
+
+@item
+@code{USUM} (``Unweighted Sum'')
+
+@item
+@code{UTOTALN} (``Unweighted Total N'')
+
+@item
+@code{UVALIDN} (``Unweighted Valid N'')
+
+@item
+@code{UVARIANCE} (``Unweighted Variance'')
  @end itemize
  
-@node FACTOR, GLM, CTABLES, Statistics
+@node FACTOR
  @section FACTOR
  
  @vindex FACTOR
@@ -1484,7 +1520,7 @@ If @subcmd{PAIRWISE} is set, then a case is considered missing only if
  either of the values  for the particular coefficient are missing.
  The default is @subcmd{LISTWISE}.
  
-@node GLM, LOGISTIC REGRESSION, FACTOR, Statistics
+@node GLM
  @section GLM
  
  @vindex GLM
@@ -1543,7 +1579,7 @@ values are considered to be missing as well as system-missing values.
  A case for which any dependent variable or any factor
  variable has a missing value is excluded from the analysis.
  
-@node LOGISTIC REGRESSION, MEANS, GLM, Statistics
+@node LOGISTIC REGRESSION
  @section LOGISTIC REGRESSION
  
  @vindex LOGISTIC REGRESSION
@@ -1629,7 +1665,7 @@ If @subcmd{EXCLUDE} is set, which is the default, user-missing
  values are excluded as well as system-missing values.
  This is the default.
  
-@node MEANS, NPAR TESTS, LOGISTIC REGRESSION, Statistics
+@node MEANS
  @section MEANS
  
  @vindex MEANS
@@ -1798,7 +1834,7 @@ containing a large number of distinct values may result in an extremely large ou
  will not be easy to interpret.
  So you should consider carefully which variables to select for participation in the analysis.
  
-@node NPAR TESTS, T-TEST, MEANS, Statistics
+@node NPAR TESTS
  @section NPAR TESTS
  
  @vindex NPAR TESTS
@@ -1838,23 +1874,23 @@ is used.
  
  
  @menu
-* BINOMIAL::                    Binomial Test
-* CHISQUARE::                   Chi-square Test
-* COCHRAN::                     Cochran Q Test
-* FRIEDMAN::                    Friedman Test
-* KENDALL::                     Kendall's W Test
-* KOLMOGOROV-SMIRNOV::          Kolmogorov Smirnov Test
-* KRUSKAL-WALLIS::              Kruskal-Wallis Test
-* MANN-WHITNEY::                Mann Whitney U Test
-* MCNEMAR::                     McNemar Test
-* MEDIAN::                      Median Test
-* RUNS::                        Runs Test
-* SIGN::                        The Sign Test
-* WILCOXON::                    Wilcoxon Signed Ranks Test
+* BINOMIAL::                Binomial Test
+* CHISQUARE::               Chi-square Test
+* COCHRAN::                 Cochran Q Test
+* FRIEDMAN::                Friedman Test
+* KENDALL::                 Kendall's W Test
+* KOLMOGOROV-SMIRNOV::      Kolmogorov Smirnov Test
+* KRUSKAL-WALLIS::          Kruskal-Wallis Test
+* MANN-WHITNEY::            Mann Whitney U Test
+* MCNEMAR::                 McNemar Test
+* MEDIAN::                  Median Test
+* RUNS::                    Runs Test
+* SIGN::                    The Sign Test
+* WILCOXON::                Wilcoxon Signed Ranks Test
  @end menu
  
  
-@node    BINOMIAL, CHISQUARE, NPAR TESTS, NPAR TESTS
+@node    BINOMIAL
  @subsection Binomial test
  @vindex BINOMIAL
  @cindex binomial test
@@ -1901,7 +1937,7 @@ compute the binomial significance.  Thus, exact results are reported
  even for very large sample sizes.
  
  
-@node    CHISQUARE, COCHRAN, BINOMIAL, NPAR TESTS
+@node    CHISQUARE
  @subsection Chi-square Test
  @vindex CHISQUARE
  @cindex chi-square test
@@ -1963,7 +1999,7 @@ in the population.
  @end float
  
  
-@node COCHRAN, FRIEDMAN, CHISQUARE, NPAR TESTS
+@node COCHRAN
  @subsection Cochran Q Test
  @vindex Cochran
  @cindex Cochran Q test
@@ -1980,7 +2016,7 @@ distinct values (other than missing values).
  The value of Q is displayed along with its Asymptotic significance
  based on a chi-square distribution.
  
-@node FRIEDMAN, KENDALL, COCHRAN, NPAR TESTS
+@node FRIEDMAN
  @subsection Friedman Test
  @vindex FRIEDMAN
  @cindex Friedman test
@@ -1995,7 +2031,7 @@ there is no indication that the distributions are normally distributed.
  A list of variables which contain the measured data must be given.  The procedure
  prints the sum of ranks for each variable, the test statistic and its significance.
  
-@node KENDALL, KOLMOGOROV-SMIRNOV, FRIEDMAN, NPAR TESTS
+@node KENDALL
  @subsection Kendall's W Test
  @vindex KENDALL
  @cindex Kendall's W test
@@ -2012,7 +2048,7 @@ It has the range [0,1] --- a value of zero indicates no agreement between the sa
  unity indicates complete agreement.
  
  
-@node KOLMOGOROV-SMIRNOV, KRUSKAL-WALLIS, KENDALL, NPAR TESTS
+@node KOLMOGOROV-SMIRNOV
  @subsection Kolmogorov-Smirnov Test
  @vindex KOLMOGOROV-SMIRNOV
  @vindex K-S
@@ -2054,7 +2090,7 @@ is tested against a normal distribution of mean 40 and standard deviation 1.5.
  
  The abbreviated subcommand  @subcmd{K-S} may be used in place of @subcmd{KOLMOGOROV-SMIRNOV}.
  
-@node KRUSKAL-WALLIS, MANN-WHITNEY, KOLMOGOROV-SMIRNOV, NPAR TESTS
+@node KRUSKAL-WALLIS
  @subsection Kruskal-Wallis Test
  @vindex KRUSKAL-WALLIS
  @vindex K-W
@@ -2080,7 +2116,7 @@ The abbreviated subcommand  @subcmd{K-W} may be used in place of
  @subcmd{KRUSKAL-WALLIS}.
  
  
-@node MANN-WHITNEY, MCNEMAR, KRUSKAL-WALLIS, NPAR TESTS
+@node MANN-WHITNEY
  @subsection Mann-Whitney U Test
  @vindex MANN-WHITNEY
  @vindex M-W
@@ -2107,7 +2143,7 @@ You may abbreviated the subcommand @subcmd{MANN-WHITNEY} to
  @subcmd{M-W}.
  
  
-@node MCNEMAR, MEDIAN, MANN-WHITNEY, NPAR TESTS
+@node MCNEMAR
  @subsection McNemar Test
  @vindex MCNEMAR
  @cindex McNemar test
@@ -2135,7 +2171,7 @@ The data in each variable must be dichotomous.  If there are more
  than two distinct variables an error will occur and the test will
  not be run.
  
-@node MEDIAN, RUNS, MCNEMAR, NPAR TESTS
+@node MEDIAN
  @subsection Median Test
  @vindex MEDIAN
  @cindex Median test
@@ -2160,7 +2196,7 @@ conducted and the group values used are all values encountered which lie in the
  range [@var{value1},@var{value2}].
  
  
-@node RUNS, SIGN, MEDIAN, NPAR TESTS
+@node RUNS
  @subsection Runs Test
  @vindex RUNS
  @cindex runs test
@@ -2180,7 +2216,7 @@ tested.
  The subcommand shows the number of runs, the asymptotic significance based on the
  length of the data.
  
-@node SIGN, WILCOXON, RUNS, NPAR TESTS
+@node SIGN
  @subsection Sign Test
  @vindex SIGN
  @cindex sign test
@@ -2206,7 +2242,7 @@ If the @code{WITH} keyword is given, but the
  of variable preceding @code{WITH} against variable following
  @code{WITH} are performed.
  
-@node WILCOXON,  , SIGN, NPAR TESTS
+@node WILCOXON
  @subsection Wilcoxon Matched Pairs Signed Ranks Test
  @vindex WILCOXON
  @cindex wilcoxon matched pairs signed ranks test
@@ -2232,7 +2268,7 @@ If the @subcmd{WITH} keyword is given, but the
  of variable preceding @subcmd{WITH} against variable following
  @subcmd{WITH} are performed.
  
-@node T-TEST, ONEWAY, NPAR TESTS, Statistics
+@node T-TEST
  @section T-TEST
  
  @vindex T-TEST
@@ -2297,7 +2333,7 @@ which they would be needed. This is the default.
  * Paired Samples Mode::         Testing two interdependent groups for equal mean
  @end menu
  
-@node One Sample Mode, Independent Samples Mode, T-TEST, T-TEST
+@node One Sample Mode
  @subsection One Sample Mode
  
  The @subcmd{TESTVAL} subcommand invokes the One Sample mode.
@@ -2347,7 +2383,7 @@ from 76.8kg.
  @caption {The results of a one sample T-test of @exvar{weight} using a test value of 76.8kg}
  @end float
  
-@node Independent Samples Mode, Paired Samples Mode, One Sample Mode, T-TEST
+@node Independent Samples Mode
  @subsection Independent Samples Mode
  
  The @subcmd{GROUPS} subcommand invokes Independent Samples mode or
@@ -2435,7 +2471,7 @@ the population the mean height of males and of females are unequal.
  @caption {The results of an independent samples T-test of @exvar{height} by @exvar{sex}}
  @end float
  
-@node Paired Samples Mode,  , Independent Samples Mode, T-TEST
+@node Paired Samples Mode
  @subsection Paired Samples Mode
  
  The @cmd{PAIRS} subcommand introduces Paired Samples mode.
@@ -2455,7 +2491,7 @@ of variable preceding @subcmd{WITH} against variable following
  @subcmd{WITH} are generated.
  
  
-@node ONEWAY, QUICK CLUSTER, T-TEST, Statistics
+@node ONEWAY
  @section ONEWAY
  
  @vindex ONEWAY
@@ -2536,7 +2572,7 @@ Use the optional syntax @code{ALPHA(@var{value})} to indicate that
  @var{value}.  If @code{ALPHA(@var{value})} is not specified, then the
  confidence level used is 0.05.
  
-@node QUICK CLUSTER, RANK, ONEWAY, Statistics
+@node QUICK CLUSTER
  @section QUICK CLUSTER
  @vindex QUICK CLUSTER
  
@@ -2609,7 +2645,7 @@ Each keyword may optionally be followed by a variable name in parentheses to spe
  the new variable which is to contain the saved parameter.  If no variable name is specified,
  then PSPP will create one.
  
-@node RANK, RELIABILITY, QUICK CLUSTER, Statistics
+@node RANK
  @section RANK
  
  @vindex RANK
@@ -2673,7 +2709,7 @@ user-missing are to be excluded from the rank scores. A setting of
  @include regression.texi
  
  
-@node RELIABILITY, ROC, RANK, Statistics
+@node RELIABILITY
  @section RELIABILITY
  
  @vindex RELIABILITY
@@ -2754,7 +2790,7 @@ applied.
  @end float
  
  
-@node ROC,  , RELIABILITY, Statistics
+@node ROC
  @section ROC
  
  @vindex ROC
author	Ben Pfaff <blp@cs.stanford.edu>
	Mon, 14 Mar 2022 16:19:47 +0000 (09:19 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Sat, 2 Apr 2022 01:48:55 +0000 (18:48 -0700)