work on docs

[pspp] / doc / statistics.texi
diff --git a/doc/statistics.texi b/doc/statistics.texi

index 2ec1bc5dc93cc59d6e55a2c8bab01b35240c0f90..b3aa17d178ebca3db313863a9f2243a9c59ec08a 100644 (file)
--- a/doc/statistics.texi
+++ b/doc/statistics.texi
@@ -20,6 +20,7 @@ far.
  * GRAPH::                       Plot data.
  * CORRELATIONS::                Correlation tables.
  * CROSSTABS::                   Crosstabulation tables.
+* CTABLES::                     Custom tables.
  * FACTOR::                      Factor analysis and Principal Components analysis.
  * GLM::                         Univariate Linear Models.
  * LOGISTIC REGRESSION::         Bivariate Logistic Regression.
@@ -29,12 +30,11 @@ far.
  * ONEWAY::                      One way analysis of variance.
  * QUICK CLUSTER::               K-Means clustering.
  * RANK::                        Compute rank scores.
-* REGRESSION::                  Linear regression.
  * RELIABILITY::                 Reliability analysis.
  * ROC::                         Receiver Operating Characteristic.
  @end menu
  
-@node DESCRIPTIVES
+@node DESCRIPTIVES, FREQUENCIES, Statistics, Statistics
  @section DESCRIPTIVES
  
  @vindex DESCRIPTIVES
@@ -142,6 +142,11 @@ first @cmd{DESCRIPTIVES} command.
  @caption {Running two @cmd{DESCRIPTIVES} commands, one with the @subcmd{SAVE} subcommand}
  @end float
  
+@float Screenshot, descriptives:scr
+@psppimage {descriptives}
+@caption {The Descriptives dialog box with two variables and Z-Scores option selected}
+@end float
+
  In @ref{descriptives:res}, we can see that there are 40 valid data for each of the variables
  and no missing values.   The mean average of the height and temperature is 16677.12
  and 37.02 respectively.  The descriptive statistics for temperature seem reasonable.
@@ -158,7 +163,7 @@ should have these properties since they are normalized versions of the original
  @caption {Descriptives statistics including two normalized variables (Z-scores)}
  @end float
  
-@node FREQUENCIES
+@node FREQUENCIES, EXAMINE, DESCRIPTIVES, Statistics
  @section FREQUENCIES
  
  @vindex FREQUENCIES
@@ -290,6 +295,11 @@ If you are using the graphic user interface, the dialog box is set up such that
  by default, several statistics are calculated.   Some are not particularly useful
  for categorical variables, so you may want to disable those.
  
+@float Screenshot, frequencies:scr
+@psppimage {frequencies}
+@caption {The frequencies dialog box with the @exvar{sex} and @exvar{occupation} variables selected}
+@end float
+
  From @ref{frequencies:res} it is evident that there are 33 males, 21 females and
  2 persons for whom their sex has not been entered.
  
@@ -304,7 +314,7 @@ be corrected, or marked as missing before using the data.
  @caption {The relative frequencies of @exvar{sex} and @exvar{occupation}}
  @end float
  
-@node EXAMINE
+@node EXAMINE, GRAPH, FREQUENCIES, Statistics
  @section EXAMINE
  
  @vindex EXAMINE
@@ -481,7 +491,7 @@ If you specify many dependent variables or factor variables
  for which there are many distinct values, then @cmd{EXAMINE} will produce a very
  large quantity of output.
  
-@node GRAPH
+@node GRAPH, CORRELATIONS, EXAMINE, Statistics
  @section GRAPH
  
  @vindex GRAPH
@@ -503,12 +513,12 @@ The @cmd{GRAPH} command produces graphical plots of data. Only one of the subcom
  can be produced per call of @cmd{GRAPH}. The @subcmd{MISSING} is optional.
  
  @menu
-* SCATTERPLOT::             Cartesian Plots
-* HISTOGRAM::               Histograms
-* BAR CHART::               Bar Charts
+* SCATTERPLOT::                 Cartesian Plots
+* HISTOGRAM::                   Histograms
+* BAR CHART::                   Bar Charts
  @end menu
  
-@node SCATTERPLOT
+@node SCATTERPLOT, HISTOGRAM, GRAPH, GRAPH
  @subsection Scatterplot
  @cindex scatterplot
  
@@ -527,7 +537,7 @@ This example produces a scatterplot where @var{height} is plotted versus @var{we
  on the value of the @var{gender} variable, the colour of the datapoint is different. With
  this plot it is possible to analyze gender differences for @var{height} versus @var{weight} relation.
  
-@node HISTOGRAM
+@node HISTOGRAM, BAR CHART, SCATTERPLOT, GRAPH
  @subsection Histogram
  @cindex histogram
  
@@ -543,7 +553,7 @@ GRAPH
          /HISTOGRAM = @var{weight}.
  @end example
  
-@node BAR CHART
+@node BAR CHART,  , HISTOGRAM, GRAPH
  @subsection Bar Chart
  @cindex bar chart
  
@@ -597,7 +607,7 @@ GRAPH  /BAR  = COUNT BY @var{city}.
  
  Bar charts can also be produced using the @ref{FREQUENCIES} and @ref{CROSSTABS} commands.
  
-@node CORRELATIONS
+@node CORRELATIONS, CROSSTABS, GRAPH, Statistics
  @section CORRELATIONS
  
  @vindex CORRELATIONS
@@ -660,7 +670,7 @@ The @subcmd{XPROD} keyword requests cross-product deviations and covariance esti
  be displayed for each pair of variables.
  The keyword @subcmd{ALL} is the union of @subcmd{DESCRIPTIVES} and @subcmd{XPROD}.
  
-@node CROSSTABS
+@node CROSSTABS, CTABLES, CORRELATIONS, Statistics
  @section CROSSTABS
  
  @vindex CROSSTABS
@@ -668,12 +678,8 @@ The keyword @subcmd{ALL} is the union of @subcmd{DESCRIPTIVES} and @subcmd{XPROD
  CROSSTABS
          /TABLES=@var{var_list} BY @var{var_list} [BY @var{var_list}]@dots{}
          /MISSING=@{TABLE,INCLUDE,REPORT@}
-        /WRITE=@{NONE,CELLS,ALL@}
          /FORMAT=@{TABLES,NOTABLES@}
-                @{PIVOT,NOPIVOT@}
                  @{AVALUE,DVALUE@}
-                @{NOINDEX,INDEX@}
-                @{BOX,NOBOX@}
          /CELLS=@{COUNT,ROW,COLUMN,TOTAL,EXPECTED,RESIDUAL,SRESIDUAL,
                  ASRESIDUAL,ALL,NONE@}
          /COUNT=@{ASIS,CASE,CELL@}
@@ -718,8 +724,6 @@ tables and statistics.  When set to @subcmd{REPORT}, which is allowed only in
  integer mode, user-missing values are included in tables but marked with
  a footnote and excluded from statistical calculations.
  
-Currently the @subcmd{WRITE} subcommand is ignored.
-
  The @subcmd{FORMAT} subcommand controls the characteristics of the
  crosstabulation tables to be displayed.  It has a number of possible
  settings:
@@ -727,22 +731,11 @@ settings:
  @itemize @w{}
  @item
  @subcmd{TABLES}, the default, causes crosstabulation tables to be output.
-@subcmd{NOTABLES} suppresses them.
-
-@item
-@subcmd{PIVOT}, the default, causes each @subcmd{TABLES} subcommand to be displayed in a
-pivot table format.  @subcmd{NOPIVOT} causes the old-style crosstabulation format
-to be used.
+@subcmd{NOTABLES}, which is equivalent to @code{CELLS=NONE}, suppresses them.
  
  @item
  @subcmd{AVALUE}, the default, causes values to be sorted in ascending order.
  @subcmd{DVALUE} asserts a descending sort order.
-
-@item
-@subcmd{INDEX} and @subcmd{NOINDEX} are currently ignored.
-
-@item
-@subcmd{BOX} and @subcmd{NOBOX} is currently ignored.
  @end itemize
  
  The @subcmd{CELLS} subcommand controls the contents of each cell in the displayed
@@ -852,7 +845,493 @@ Approximate T is not calculated for symmetric uncertainty coefficient.
  
  Fixes for any of these deficiencies would be welcomed.
  
-@node FACTOR
+@subsection Crosstabs Example
+
+@cindex chi-square test of independence
+
+A researcher wishes to know if, in an industry, a person's sex is related to
+the person's occupation.  To investigate this, she has determined that the
+@file{personnel.sav} is a representative, randomly selected sample of persons.
+The researcher's null hypothesis is that a person's sex has no relation to a
+person's occupation. She uses a chi-squared test of independence to investigate
+the hypothesis.
+
+@float Example, crosstabs:ex
+@psppsyntax {crosstabs.sps}
+@caption {Running crosstabs on the @exvar{sex} and @exvar{occupation} variables}
+@end float
+
+The syntax in @ref{crosstabs:ex} conducts a chi-squared test of independence.
+The line @code{/tables = occupation by sex} indicates that @exvar{occupation}
+and @exvar{sex} are the variables to be tabulated.  To do this using the @gui{}
+you must place these variable names respectively in the @samp{Row} and
+@samp{Column} fields as shown in @ref{crosstabs:scr}.
+
+@float Screenshot, crosstabs:scr
+@psppimage {crosstabs}
+@caption {The Crosstabs dialog box with the @exvar{sex} and @exvar{occupation} variables selected}
+@end float
+
+Similarly, the @samp{Cells} button shows a dialog box to select the @code{count}
+and @code{expected} options.  All other cell options can be deselected for this
+test.
+
+You would use the @samp{Format} and @samp{Statistics}  buttons to select options
+for the @subcmd{FORMAT} and @subcmd{STATISTICS} subcommands.  In this example,
+the @samp{Statistics} requires only the @samp{Chisq} option to be checked.  All
+other options should be unchecked.  No special settings are required from the
+@samp{Format} dialog.
+
+As shown in @ref{crosstabs:res} @cmd{CROSSTABS} generates a contingency table
+containing the observed count and the expected count of each sex and each
+occupation.  The expected count is the count which would be observed if the
+null hypothesis were true.
+
+The significance of the Pearson Chi-Square value is very much larger than the
+normally accepted value of 0.05 and so one cannot reject the null hypothesis.
+Thus the researcher must conclude that a person's sex has no relation to the
+person's occupation.
+
+@float Results, crosstabs:res
+@psppoutput {crosstabs}
+@caption {The results of a test of independence between @exvar{sex} and @exvar{occupation}}
+@end float
+
+@node CTABLES, FACTOR, CROSSTABS, Statistics
+@section CTABLES
+
+@vindex CTABLES
+@cindex custom tables
+@cindex tables, custom
+
+@code{CTABLES} has the following overall syntax.  At least one
+@code{TABLE} subcommand is required:
+
+@display
+@t{CTABLES}
+  @dots{}@i{global subcommands}@dots{}
+  [@t{/TABLE} @i{axis} [@t{BY} @i{axis} [@t{BY} @i{axis}]]
+   @dots{}@i{per-table subcommands}@dots{}]@dots{}
+@end display
+
+@noindent
+where each @i{axis} may be empty or take one of the following forms:
+
+@display
+@i{variable}
+@i{variable} @t{[}@{@t{C} @math{|} @t{S}@}@t{]}
+@i{axis} + @i{axis}
+@i{axis} > @i{axis}
+(@i{axis})
+@i{axis} @t{(}@i{summary} [@i{string}] [@i{format}]@t{)}
+@end display
+
+The following subcommands precede the first @code{TABLE} subcommand
+and apply to all of the output tables.  All of these subcommands are
+optional:
+
+@display
+@t{/FORMAT}
+    [@t{MINCOLWIDTH=}@{@t{DEFAULT} @math{|} @i{width}@}]
+    [@t{MAXCOLWIDTH=}@{@t{DEFAULT} @math{|} @i{width}@}]
+    [@t{UNITS=}@{@t{POINTS} @math{|} @t{INCHES} @math{|} @t{CM}@}]
+    [@t{EMPTY=}@{@t{ZERO} @math{|} @t{BLANK} @math{|} @i{string}@}]
+    [@t{MISSING=}@i{string}]
+@t{/VLABELS}
+    @t{VARIABLES=}@i{variables}
+    @t{DISPLAY}=@{@t{DEFAULT} @math{|} @t{NAME} @math{|} @t{LABEL} @math{|} @t{BOTH} @math{|} @t{NONE}@}
+@t{/MRSETS COUNTDUPLICATES=}@{@t{YES} @math{|} @t{NO}@}
+@t{/SMISSING} @{@t{VARIABLE} @math{|} @t{LISTWISE}@}
+@t{/PCOMPUTE} @t{&}@i{category}@t{=EXPR(}@i{expression}@t{)}
+@t{/PPROPERTIES} @t{&}@i{category}@dots{}
+    [@t{LABEL=}@i{string}]
+    [@t{FORMAT=}[@i{summary} @i{format}]@dots{}]
+    [@t{HIDESOURCECATS=}@{@t{NO} @math{|} @t{YES}@}
+@t{/WEIGHT VARIABLE=}@i{variable}
+@t{/HIDESMALLCOUNTS COUNT=@i{count}}
+@end display
+
+The following subcommands follow @code{TABLE} and apply only to the
+previous @code{TABLE}.  All of these subcommands are optional:
+
+@display
+@t{/SLABELS}
+    [@t{POSITION=}@{@t{COLUMN} @math{|} @t{ROW} @math{|} @t{LAYER}@}]
+    [@t{VISIBLE=}@{@t{YES} @math{|} @t{NO}@}]
+@t{/CLABELS} @{@t{AUTO} @math{|} @{@t{ROWLABELS}@math{|}@t{COLLABELS}@}@t{=}@{@t{OPPOSITE}@math{|}@t{LAYER}@}@}
+@t{/CRITERIA CILEVEL=}@i{percentage}
+@t{/CATEGORIES} @t{VARIABLES=}@i{variables}
+    @{@t{[}@i{value}@t{,} @i{value}@dots{}@t{]}
+   @math{|} [@t{ORDER=}@{@t{A} @math{|} @t{D}@}]
+     [@t{KEY=}@{@t{VALUE} @math{|} @t{LABEL} @math{|} @i{summary}@t{(}@i{variable}@t{)}@}]
+     [@t{MISSING=}@{@t{EXCLUDE} @math{|} @t{INCLUDE}@}]@}
+    [@t{TOTAL=}@{@t{NO} @math{|} @t{YES}@} [@t{LABEL=}@i{string}] [@t{POSITION=}@{@t{AFTER} @math{|} @t{BEFORE}@}]]
+    [@t{EMPTY=}@{@t{INCLUDE} @math{|} @t{EXCLUDE}@}]
+@t{/TITLES}
+    [@t{TITLE=}@i{string}@dots{}]
+    [@t{CAPTION=}@i{string}@dots{}]
+    [@t{CORNER=}@i{string}@dots{}]
+@t{/SIGTEST TYPE=CHISQUARE}
+    [@t{ALPHA=}@i{siglevel}]
+    [@t{INCLUDEMRSETS=}@{@t{YES} @math{|} @t{NO}@}]
+    [@t{CATEGORIES=}@{@t{ALLVISIBLE} @math{|} @t{SUBTOTALS}@}]
+@t{/COMPARETEST TYPE=}@{@t{PROP} @math{|} @t{MEAN}@}
+    [@t{ALPHA=}@i{value}[@t{,} @i{value}]]
+    [@t{ADJUST=}@{@t{BONFERRONI} @math{|} @t{BH} @math{|} @t{NONE}@}]
+    [@t{INCLUDEMRSETS=}@{@t{YES} @math{|} @t{NO}@}]
+    [@t{MEANSVARIANCE=}@{@t{ALLCATS} @math{|} @t{TESTEDCATS}@}]
+    [@t{CATEGORIES=}@{@t{ALLVISIBLE} @math{|} @t{SUBTOTALS}@}]
+    [@t{MERGE=}@{@t{NO} @math{|} @t{YES}@}]
+    [@t{STYLE=}@{@t{APA} @math{|} @t{SIMPLE}@}]
+    [@t{SHOWSIG=}@{@t{NO} @math{|} @t{YES}@}]
+@end display
+
+The @code{CTABLES} (aka ``custom tables'') command produces
+multi-dimensional tables from categorical and scale data.  It offers
+many options for data summarization and formatting.
+
+This section's examples use data from the 2008 (USA) National Survey
+of Drinking and Driving Attitudes and Behaviors, a public domain data
+set from the (USA) National Highway Traffic Administration and
+available at @url{https://data.transportation.gov}.  @pspp{} includes
+this data set, with a slightly modified dictionary, as
+@file{examples/nhtsa.sav}.
+
+@menu
+* CTABLES Basics::
+* CTABLES Data Summarization::
+@end menu
+
+@node CTABLES Basics, CTABLES Data Summarization, CTABLES, CTABLES
+@subsection Basics
+
+The only required subcommand is @code{TABLE}, which specifies the
+variables to include along each axis:
+@display
+@t{/TABLE} @i{rows} [@t{BY} @i{columns} [@t{BY} @i{layers}]]
+@end display
+@noindent
+In @code{TABLE}, each of @var{rows}, @var{columns}, and @var{layers}
+is either empty or an axis expression that specifies one or more
+variables.  At least one must specify an axis expression.
+
+@menu
+* CTABLES Categorical Variable Basics::
+* CTABLES Scalar Variable Basics::
+* CTABLES Overriding Measurement Level::
+* CTABLES Multiple Response Sets::
+@end menu
+
+@node CTABLES Categorical Variable Basics, CTABLES Scalar Variable Basics, CTABLES Basics, CTABLES Basics
+@subsubsection Categorical Variables
+
+An axis expression that names a categorical variable divides the data
+into cells according to the values of that variable.  When all the
+variables named on @code{TABLE} are categorical, by default each cell
+displays the number of cases that it contains, so specifying a single
+variable yields a frequency table:
+
+@example
+CTABLES /TABLE=AgeGroup.
+@end example
+@psppoutput {ctables1}
+
+@noindent
+Specifying a row and a column categorical variable yields a
+crosstabulation:
+
+@example
+CTABLES /TABLE=AgeGroup BY qns3a.
+@end example
+@psppoutput {ctables2}
+
+@noindent
+The @samp{>} ``nesting'' operator nests multiple variables on a single
+axis, e.g.:
+
+@example
+CTABLES /TABLE qn105ba BY AgeGroup > qns3a.
+@end example
+@psppoutput {ctables3}
+
+@noindent
+The @samp{+} ``stacking'' operator allows a single output table to
+include multiple data analyses.  With @samp{+}, @code{CTABLES} divides
+the output table into multiple @dfn{sections}, each of which includes
+an analysis of the full data set.  For example, the following command
+separately tabulates age group and driving frequency by gender:
+
+@example
+CTABLES /TABLE AgeGroup + qn1 BY qns3a.
+@end example
+@psppoutput {ctables4}
+
+@noindent
+If @samp{+} and @samp{>} are used together, @samp{>} binds more
+tightly.  Use parentheses to override operator precedence.  Thus:
+
+@example
+CTABLES /TABLE qn26 + qn27 > qns3a.
+CTABLES /TABLE (qn26 + qn27) > qns3a.
+@end example
+@psppoutput {ctables5}
+
+@node CTABLES Scalar Variable Basics, CTABLES Overriding Measurement Level, CTABLES Categorical Variable Basics, CTABLES Basics
+@subsubsection Scalar Variables
+
+Categorical variables make @code{CTABLES} divide tables into cells.
+With scalar variables, @code{CTABLES} instead calculates a summary
+measure, by default the mean, of the values that fall into a cell.
+For example, if the only variable specified is a scalar variable, then
+the output is a single cell that holds the mean of all of the data:
+
+@example
+CTABLES /TABLE qnd1.
+@end example
+@psppoutput {ctables6}
+
+A scalar variable may nest with categorical variables.  The following
+example shows the mean age of survey respondents across gender and
+language groups:
+
+@example
+CTABLES /TABLE qns3a > qnd1 BY region.
+@end example
+@psppoutput {ctables7}
+
+The order of nesting of scalar and categorical variables affects table
+labeling, but it does not affect the data displayed in the table.  The
+following example shows how the output changes when the nesting order
+of the scalar and categorical variable are interchanged:
+
+@example
+CTABLES /TABLE qnd1 > qns3a BY region.
+@end example
+@psppoutput {ctables8}
+
+Only a single scalar variable may appear in each section; that is, a
+scalar variable may not nest inside a scalar variable directly or
+indirectly.  Scalar variables may only appear on one axis within
+@code{TABLE}.
+
+@node CTABLES Overriding Measurement Level, CTABLES Multiple Response Sets, CTABLES Scalar Variable Basics, CTABLES Basics
+@subsubsection Overriding Measurement Level
+
+By default, @code{CTABLES} uses a variable's measurement level to
+decide whether to treat it as categorical or scalar.  Variables
+assigned the nominal or ordinal measurement level are treated as
+categorical, and scalar variables are treated as scalar.
+
+Use the @code{VARIABLE LEVEL} command to change a variable's
+measurement level.  To treat a variable as categorical or scalar only
+for one use on @code{CTABLES}, add @samp{[C]} or @samp{[S]},
+respectively, after the variable name.  The following example shows
+how to analyze the scalar variable @code{qn20} as categorical:
+
+@example
+CTABLES /TABLE qn20 [C] BY qns3a.
+@end example
+@psppoutput {ctables9}
+
+@node CTABLES Multiple Response Sets,  , CTABLES Overriding Measurement Level, CTABLES Basics
+@subsubheading Multiple Response Sets
+
+The @code{CTABLES} command does not yet support multiple response
+sets.
+
+@node CTABLES Data Summarization,  , CTABLES Basics, CTABLES
+@subsection Data Summarization
+
+The @code{CTABLES} command allows the user to control how the data are
+summarized with summary specifications, which are enclosed in square
+brackets following a variable name on the @code{TABLE} subcommand.
+When all the variables are categorical, summary specifications can be
+given for the innermost nested variables on any one axis.  When a
+scalar variable is present, only the scalar variable may have summary
+specifications.  The following example includes a summary
+specification for column and row percentages for categorical
+variables, and mean and median for a scalar variable:
+
+@example
+CTABLES
+    /TABLE=qnd1 [MEAN, MEDIAN] BY qns3a
+    /TABLE=AgeGroup [COLPCT, ROWPCT] BY qns3a.
+@end example
+@psppoutput {ctables10}
+
+A summary specification may override the default label and format by
+appending a string or format specification or both (in that order) to
+the summary function name.  For example:
+
+@example
+CTABLES /TABLE=AgeGroup [COLPCT 'Gender %' PCT5.0,
+                         ROWPCT 'Age Group %' PCT5.0]
+               BY qns3a.
+@end example
+@psppoutput {ctables11}
+
+Parentheses are a shorthand to apply summary specifications to
+multiple variables.  For example, both of these commands:
+
+@example
+CTABLES /TABLE=AgeGroup[COLPCT] + qns1[COLPCT] BY qns3a.
+CTABLES /TABLE=(AgeGroup + qns1)[COLPCT] BY qns3a.
+@end example
+
+@noindent
+produce the same output shown below:
+
+@psppoutput {ctables12}
+
+The following sections list the available summary functions.
+
+@menu
+* CTABLES Summary Functions for Categorical and Scale Variables::
+@end menu
+
+@node CTABLES Summary Functions for Categorical and Scale Variables,  , CTABLES Data Summarization, CTABLES Data Summarization
+@subsubsection Summary Functions for Categorical and Scale Variables
+
+This section lists the summary functions that can be applied to cells
+in @code{CTABLES}.  Many of these functions have an @var{area} in
+their names.  The supported areas are:
+
+@itemize @bullet
+@item
+Areas that correspond to parts of @dfn{subtables}, whose contents are
+the cells that pair an innermost row variable and an innermost column
+variable:
+
+@table @code
+@item ROW
+A row within a subtable.
+
+@item COL
+A column within a subtable.
+
+@item SUBTABLE
+All the cells in a subtable
+@end table
+
+@item
+Areas that correspond to parts of @dfn{sections}, where stacked
+variables divide each section from another:
+
+@table @code
+@item TABLE
+An entire section.
+
+@item LAYER
+A layer within a section.
+
+@item LAYERROW
+A row in one layer within a section.
+
+@item LAYERCOL
+A column in one layer within a section.
+@end table
+@end itemize
+
+The following summary functions may be applied to any variable
+regardless of whether it is categorical or scalar.
+
+@table @asis
+@item @code{COUNT}
+The sum of weights in a cell.
+
+@item @i{area}@code{PCT} or @i{area}@code{PCT.COUNT}
+A percentage within the specified @var{area}.
+
+@item @i{area}@code{PCT.VALIDN}
+A percentage of valid values within the specified @var{area}.
+
+@item @i{area}@code{PCT.TOTALN}
+A percentage of total values within the specified @var{area}.
+@end table
+
+The following summary functions apply only to scale variables:
+
+@table @asis
+@item @code{MAXIMUM}
+The largest value.
+
+@item @code{MEAN}
+The mean.
+
+@item @code{MEDIAN}
+The median value.
+
+@item @code{MINIMUM}
+The smallest value.
+
+@item @code{MISSING}
+Sum of weights of user- and system-missing values.
+
+@item @code{MODE}
+The highest-frequency value.  Ties are broken by taking the smallest mode.
+
+@item @i{area}@code{PCT.SUM}
+Percentage of the sum of the values across @var{area}.
+
+@item @code{PTILE} @i{n}
+The @var{n}th percentile, where @math{0 @leq{} @var{n} @leq{} 100}.
+
+@item @code{RANGE}
+The maximum minus the minimum.
+
+@item @code{SEMEAN}
+The standard error of the mean.
+
+@item @code{STDDEV}
+The standard deviation.
+
+@item @code{SUM}
+The sum.
+
+@item @code{TOTALN}
+The sum of total count weights.
+
+@item @code{VALIDN}
+The sum of valid count weights.
+
+@item @code{VARIANCE}
+The variance.
+@end table
+
+If the @code{WEIGHT} subcommand specified an adjustment weight
+variable, then the following summary functions use its value instead
+of the dictionary weight variable.  Otherwise, they are equivalent to
+the summary function without the @samp{E}-prefix:
+
+@itemize @bullet
+@item @code{ECOUNT}
+@item @code{ETOTALN}
+@item @code{EVALIDN}
+@end itemize
+
+The following summary functions with a @samp{U}-prefix are equivalent
+to those without a prefix, except that they use unweighted counts:
+
+@itemize @bullet
+@item @code{UCOUNT}
+@item @code{U}@i{area}@code{PCT} or @code{U}@i{area}@code{PCT.COUNT}
+@item @code{U}@i{area}@code{PCT.VALIDN}
+@item @code{U}@i{area}@code{PCT.TOTALN}
+@item @code{UMEAN}
+@item @code{UMEDIAN}
+@item @code{UMISSING}
+@item @code{UMODE}
+@item @code{U}@i{area}@code{PCT.SUM}
+@item @code{UPTILE} @i{n}
+@item @code{USEMEAN}
+@item @code{USTDDEV}
+@item @code{USUM}
+@item @code{UTOTALN}
+@item @code{UVALIDN}
+@item @code{UVARIANCE}
+@end itemize
+
+@node FACTOR, GLM, CTABLES, Statistics
  @section FACTOR
  
  @vindex FACTOR
@@ -1003,7 +1482,7 @@ If @subcmd{PAIRWISE} is set, then a case is considered missing only if
  either of the values  for the particular coefficient are missing.
  The default is @subcmd{LISTWISE}.
  
-@node GLM
+@node GLM, LOGISTIC REGRESSION, FACTOR, Statistics
  @section GLM
  
  @vindex GLM
@@ -1062,7 +1541,7 @@ values are considered to be missing as well as system-missing values.
  A case for which any dependent variable or any factor
  variable has a missing value is excluded from the analysis.
  
-@node LOGISTIC REGRESSION
+@node LOGISTIC REGRESSION, MEANS, GLM, Statistics
  @section LOGISTIC REGRESSION
  
  @vindex LOGISTIC REGRESSION
@@ -1148,7 +1627,7 @@ If @subcmd{EXCLUDE} is set, which is the default, user-missing
  values are excluded as well as system-missing values.
  This is the default.
  
-@node MEANS
+@node MEANS, NPAR TESTS, LOGISTIC REGRESSION, Statistics
  @section MEANS
  
  @vindex MEANS
@@ -1317,7 +1796,7 @@ containing a large number of distinct values may result in an extremely large ou
  will not be easy to interpret.
  So you should consider carefully which variables to select for participation in the analysis.
  
-@node NPAR TESTS
+@node NPAR TESTS, T-TEST, MEANS, Statistics
  @section NPAR TESTS
  
  @vindex NPAR TESTS
@@ -1357,23 +1836,23 @@ is used.
  
  
  @menu
-* BINOMIAL::                Binomial Test
-* CHISQUARE::               Chi-square Test
-* COCHRAN::                 Cochran Q Test
-* FRIEDMAN::                Friedman Test
-* KENDALL::                 Kendall's W Test
-* KOLMOGOROV-SMIRNOV::      Kolmogorov Smirnov Test
-* KRUSKAL-WALLIS::          Kruskal-Wallis Test
-* MANN-WHITNEY::            Mann Whitney U Test
-* MCNEMAR::                 McNemar Test
-* MEDIAN::                  Median Test
-* RUNS::                    Runs Test
-* SIGN::                    The Sign Test
-* WILCOXON::                Wilcoxon Signed Ranks Test
+* BINOMIAL::                    Binomial Test
+* CHISQUARE::                   Chi-square Test
+* COCHRAN::                     Cochran Q Test
+* FRIEDMAN::                    Friedman Test
+* KENDALL::                     Kendall's W Test
+* KOLMOGOROV-SMIRNOV::          Kolmogorov Smirnov Test
+* KRUSKAL-WALLIS::              Kruskal-Wallis Test
+* MANN-WHITNEY::                Mann Whitney U Test
+* MCNEMAR::                     McNemar Test
+* MEDIAN::                      Median Test
+* RUNS::                        Runs Test
+* SIGN::                        The Sign Test
+* WILCOXON::                    Wilcoxon Signed Ranks Test
  @end menu
  
  
-@node    BINOMIAL
+@node    BINOMIAL, CHISQUARE, NPAR TESTS, NPAR TESTS
  @subsection Binomial test
  @vindex BINOMIAL
  @cindex binomial test
@@ -1420,7 +1899,7 @@ compute the binomial significance.  Thus, exact results are reported
  even for very large sample sizes.
  
  
-@node    CHISQUARE
+@node    CHISQUARE, COCHRAN, BINOMIAL, NPAR TESTS
  @subsection Chi-square Test
  @vindex CHISQUARE
  @cindex chi-square test
@@ -1464,6 +1943,11 @@ The analysis is performed as shown in @ref{chisquare:ex}.
  There is only one test variable, @i{viz:} @exvar{sex}.  The other variables in the dataset
  are ignored.
  
+@float Screenshot, chisquare:scr
+@psppimage {chisquare}
+@caption {Performing a chi-square test using the graphic user interface}
+@end float
+
  In @ref{chisquare:res} the summary box shows that in the sample, there are more males
  than females.  However the significance of chi-square result is greater than 0.05
  --- the most commonly accepted p-value --- and therefore
@@ -1477,7 +1961,7 @@ in the population.
  @end float
  
  
-@node COCHRAN
+@node COCHRAN, FRIEDMAN, CHISQUARE, NPAR TESTS
  @subsection Cochran Q Test
  @vindex Cochran
  @cindex Cochran Q test
@@ -1494,7 +1978,7 @@ distinct values (other than missing values).
  The value of Q is displayed along with its Asymptotic significance
  based on a chi-square distribution.
  
-@node FRIEDMAN
+@node FRIEDMAN, KENDALL, COCHRAN, NPAR TESTS
  @subsection Friedman Test
  @vindex FRIEDMAN
  @cindex Friedman test
@@ -1509,7 +1993,7 @@ there is no indication that the distributions are normally distributed.
  A list of variables which contain the measured data must be given.  The procedure
  prints the sum of ranks for each variable, the test statistic and its significance.
  
-@node KENDALL
+@node KENDALL, KOLMOGOROV-SMIRNOV, FRIEDMAN, NPAR TESTS
  @subsection Kendall's W Test
  @vindex KENDALL
  @cindex Kendall's W test
@@ -1526,7 +2010,7 @@ It has the range [0,1] --- a value of zero indicates no agreement between the sa
  unity indicates complete agreement.
  
  
-@node KOLMOGOROV-SMIRNOV
+@node KOLMOGOROV-SMIRNOV, KRUSKAL-WALLIS, KENDALL, NPAR TESTS
  @subsection Kolmogorov-Smirnov Test
  @vindex KOLMOGOROV-SMIRNOV
  @vindex K-S
@@ -1568,7 +2052,7 @@ is tested against a normal distribution of mean 40 and standard deviation 1.5.
  
  The abbreviated subcommand  @subcmd{K-S} may be used in place of @subcmd{KOLMOGOROV-SMIRNOV}.
  
-@node KRUSKAL-WALLIS
+@node KRUSKAL-WALLIS, MANN-WHITNEY, KOLMOGOROV-SMIRNOV, NPAR TESTS
  @subsection Kruskal-Wallis Test
  @vindex KRUSKAL-WALLIS
  @vindex K-W
@@ -1583,9 +2067,10 @@ arbitrary number of populations.  It does not assume normality.
  The data to be compared are specified by @var{var_list}.
  The categorical variable determining the groups to which the
  data belongs is given by @var{var}. The limits @var{lower} and
-@var{upper} specify the valid range of @var{var}. Any cases for
-which @var{var} falls outside [@var{lower}, @var{upper}] are
-ignored.
+@var{upper} specify the valid range of @var{var}.
+If @var{upper} is smaller than @var{lower}, the PSPP will assume their values
+to be reversed. Any cases for which @var{var} falls outside
+[@var{lower}, @var{upper}] are ignored.
  
  The mean rank of each group as well as the chi-squared value and
  significance of the test are printed.
@@ -1593,7 +2078,7 @@ The abbreviated subcommand  @subcmd{K-W} may be used in place of
  @subcmd{KRUSKAL-WALLIS}.
  
  
-@node MANN-WHITNEY
+@node MANN-WHITNEY, MCNEMAR, KRUSKAL-WALLIS, NPAR TESTS
  @subsection Mann-Whitney U Test
  @vindex MANN-WHITNEY
  @vindex M-W
@@ -1620,7 +2105,7 @@ You may abbreviated the subcommand @subcmd{MANN-WHITNEY} to
  @subcmd{M-W}.
  
  
-@node MCNEMAR
+@node MCNEMAR, MEDIAN, MANN-WHITNEY, NPAR TESTS
  @subsection McNemar Test
  @vindex MCNEMAR
  @cindex McNemar test
@@ -1648,7 +2133,7 @@ The data in each variable must be dichotomous.  If there are more
  than two distinct variables an error will occur and the test will
  not be run.
  
-@node MEDIAN
+@node MEDIAN, RUNS, MCNEMAR, NPAR TESTS
  @subsection Median Test
  @vindex MEDIAN
  @cindex Median test
@@ -1673,7 +2158,7 @@ conducted and the group values used are all values encountered which lie in the
  range [@var{value1},@var{value2}].
  
  
-@node RUNS
+@node RUNS, SIGN, MEDIAN, NPAR TESTS
  @subsection Runs Test
  @vindex RUNS
  @cindex runs test
@@ -1693,7 +2178,7 @@ tested.
  The subcommand shows the number of runs, the asymptotic significance based on the
  length of the data.
  
-@node SIGN
+@node SIGN, WILCOXON, RUNS, NPAR TESTS
  @subsection Sign Test
  @vindex SIGN
  @cindex sign test
@@ -1719,7 +2204,7 @@ If the @code{WITH} keyword is given, but the
  of variable preceding @code{WITH} against variable following
  @code{WITH} are performed.
  
-@node WILCOXON
+@node WILCOXON,  , SIGN, NPAR TESTS
  @subsection Wilcoxon Matched Pairs Signed Ranks Test
  @vindex WILCOXON
  @cindex wilcoxon matched pairs signed ranks test
@@ -1745,7 +2230,7 @@ If the @subcmd{WITH} keyword is given, but the
  of variable preceding @subcmd{WITH} against variable following
  @subcmd{WITH} are performed.
  
-@node T-TEST
+@node T-TEST, ONEWAY, NPAR TESTS, Statistics
  @section T-TEST
  
  @vindex T-TEST
@@ -1810,7 +2295,7 @@ which they would be needed. This is the default.
  * Paired Samples Mode::         Testing two interdependent groups for equal mean
  @end menu
  
-@node One Sample Mode
+@node One Sample Mode, Independent Samples Mode, T-TEST, T-TEST
  @subsection One Sample Mode
  
  The @subcmd{TESTVAL} subcommand invokes the One Sample mode.
@@ -1843,6 +2328,12 @@ using the @cmd{SELECT} command.
  @caption {Running a one sample T-Test after excluding all non-positive values}
  @end float
  
+@float Screenshot, one-sample-t:scr
+@psppimage {one-sample-t}
+@caption {Using the One Sample T-Test dialog box to test @exvar{weight} for a mean of 76.8kg}
+@end float
+
+
  @ref{one-sample-t:res} shows that the mean of our sample differs from the test value
  by -1.40kg.  However the significance is very high (0.610).  So one cannot
  reject the null hypothesis, and must conclude there is not enough evidence
@@ -1854,7 +2345,7 @@ from 76.8kg.
  @caption {The results of a one sample T-test of @exvar{weight} using a test value of 76.8kg}
  @end float
  
-@node Independent Samples Mode
+@node Independent Samples Mode, Paired Samples Mode, One Sample Mode, T-TEST
  @subsection Independent Samples Mode
  
  The @subcmd{GROUPS} subcommand invokes Independent Samples mode or
@@ -1902,13 +2393,28 @@ using the @cmd{SELECT} command.
  The null hypothesis is that both males and females are on average
  of equal height.
  
+@float Screenshot, independent-samples-t:scr
+@psppimage {independent-samples-t}
+@caption {Using the Independent Sample T-test dialog, to test for differences of @exvar{height} between values of @exvar{sex}}
+@end float
+
+
  In this case, the grouping variable is @exvar{sex}, so this is entered
  as the variable for the @subcmd{GROUP} subcommand.  The group values are  0 (male) and
  1 (female).
  
  If you are running the proceedure using syntax, then you need to enter
  the values corresponding to each group within parentheses.
-
+If you are using the graphic user interface, then you have to open
+the ``Define Groups'' dialog box and enter the values corresponding
+to each group as shown in @ref{define-groups-t:scr}.  If, as in this case, the dataset has defined value
+labels for the group variable, then you can enter them by label
+or by value.
+
+@float Screenshot, define-groups-t:scr
+@psppimage {define-groups-t}
+@caption {Setting the values of the grouping variable for an Independent Samples T-test}
+@end float
  
  From @ref{independent-samples-t:res}, one can clearly see that the @emph{sample} mean height
  is greater for males than for females.  However in order to see if this
@@ -1927,7 +2433,7 @@ the population the mean height of males and of females are unequal.
  @caption {The results of an independent samples T-test of @exvar{height} by @exvar{sex}}
  @end float
  
-@node Paired Samples Mode
+@node Paired Samples Mode,  , Independent Samples Mode, T-TEST
  @subsection Paired Samples Mode
  
  The @cmd{PAIRS} subcommand introduces Paired Samples mode.
@@ -1947,7 +2453,7 @@ of variable preceding @subcmd{WITH} against variable following
  @subcmd{WITH} are generated.
  
  
-@node ONEWAY
+@node ONEWAY, QUICK CLUSTER, T-TEST, Statistics
  @section ONEWAY
  
  @vindex ONEWAY
@@ -2028,7 +2534,7 @@ Use the optional syntax @code{ALPHA(@var{value})} to indicate that
  @var{value}.  If @code{ALPHA(@var{value})} is not specified, then the
  confidence level used is 0.05.
  
-@node QUICK CLUSTER
+@node QUICK CLUSTER, RANK, ONEWAY, Statistics
  @section QUICK CLUSTER
  @vindex QUICK CLUSTER
  
@@ -2101,7 +2607,7 @@ Each keyword may optionally be followed by a variable name in parentheses to spe
  the new variable which is to contain the saved parameter.  If no variable name is specified,
  then PSPP will create one.
  
-@node RANK
+@node RANK, RELIABILITY, QUICK CLUSTER, Statistics
  @section RANK
  
  @vindex RANK
@@ -2165,7 +2671,7 @@ user-missing are to be excluded from the rank scores. A setting of
  @include regression.texi
  
  
-@node RELIABILITY
+@node RELIABILITY, ROC, RANK, Statistics
  @section RELIABILITY
  
  @vindex RELIABILITY
@@ -2230,6 +2736,11 @@ to use @cmd{COMPUTE} (@pxref{COMPUTE}) and this is what is done in @ref{reliabil
  In this case, all variables in the data set are used.  So we can use the special
  keyword @samp{ALL} (@pxref{BNF}).
  
+@float Screenshot, reliability:src
+@psppimage {reliability}
+@caption {Reliability dialog box with all variables selected}
+@end float
+
  @ref{reliability:res} shows that Cronbach's Alpha is 0.11  which is a value normally considered too
  low to indicate consistency within the data.  This is possibly due to the small number of
  survey questions.  The survey should be redesigned before serious use of the results are
@@ -2241,7 +2752,7 @@ applied.
  @end float
  
  
-@node ROC
+@node ROC,  , RELIABILITY, Statistics
  @section ROC
  
  @vindex ROC