From f1141d27ca616a8c8edc2a1f18067085ceaaf448 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 10 Oct 2020 20:59:08 +0200 Subject: [PATCH] Add extra code examples to the documentation Add some examples of code to the manual and autogenerate the results of running that code. Addresses bug #51079 --- configure.ac | 2 +- doc/automake.mk | 78 +++++++- doc/data-selection.texi | 58 ++++++ doc/examples/autorecode.sps | 20 ++ doc/examples/chisquare.sps | 4 + doc/examples/compute.sps | 7 + doc/examples/count.sps | 5 + doc/examples/descriptives.sps | 8 + doc/examples/flip.sps | 16 ++ doc/examples/frequencies.sps | 4 + doc/examples/independent-samples-t.sps | 6 + doc/examples/means.sps | 4 + doc/examples/one-sample-t.sps | 6 + doc/examples/reliability.sps | 9 + doc/examples/split.sps | 9 + doc/examples/weight.sps | 15 ++ doc/pspp-manual.css | 50 +++++ doc/pspp.texi | 40 +++- doc/statistics.texi | 255 ++++++++++++++++++++++++- doc/transformation.texi | 176 ++++++++++++----- doc/tutorial.texi | 80 ++++---- examples/automake.mk | 4 +- examples/horticulture.sav | Bin 0 -> 2892 bytes examples/personnel.sav | Bin 0 -> 4209 bytes examples/repairs.sav | Bin 1216 -> 1850 bytes tests/language/stats/examine.at | 49 ++--- tests/language/stats/regression.at | 31 +-- 27 files changed, 795 insertions(+), 141 deletions(-) create mode 100644 doc/examples/autorecode.sps create mode 100644 doc/examples/chisquare.sps create mode 100644 doc/examples/compute.sps create mode 100644 doc/examples/count.sps create mode 100644 doc/examples/descriptives.sps create mode 100644 doc/examples/flip.sps create mode 100644 doc/examples/frequencies.sps create mode 100644 doc/examples/independent-samples-t.sps create mode 100644 doc/examples/means.sps create mode 100644 doc/examples/one-sample-t.sps create mode 100644 doc/examples/reliability.sps create mode 100644 doc/examples/split.sps create mode 100644 doc/examples/weight.sps create mode 100644 doc/pspp-manual.css create mode 100644 examples/horticulture.sav create mode 100644 examples/personnel.sav diff --git a/configure.ac b/configure.ac index 3bc4ea1fd6..838b746236 100644 --- a/configure.ac +++ b/configure.ac @@ -22,7 +22,7 @@ AC_INIT([GNU PSPP], [1.5.2], [bug-gnu-pspp@gnu.org], [pspp]) AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_TESTDIR([tests]) -AM_INIT_AUTOMAKE([tar-ustar]) +AM_INIT_AUTOMAKE([tar-ustar info-in-builddir]) dnl Checks for programs. AC_GNU_SOURCE diff --git a/doc/automake.mk b/doc/automake.mk index 5bdfaa6754..61a35f9fe3 100644 --- a/doc/automake.mk +++ b/doc/automake.mk @@ -1,5 +1,5 @@ ## PSPP - a program for statistical analysis. -## Copyright (C) 2019 Free Software Foundation, Inc. +## Copyright (C) 2019, 2020 Free Software Foundation, Inc. ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -91,7 +91,8 @@ docbookdir = $(docdir) dist_docbook_DATA = doc/pspp.xml -CLEANFILES += pspp-dev.dvi $(docbook_DATA) +CLEANFILES += pspp-dev.dvi $(docbook_DATA) doc/pspp.info* + doc: $(INFO_DEPS) $(DVIS) $(PDFS) $(PSS) $(HTMLS) $(dist_docbook_DATA) PHONY += doc @@ -103,4 +104,75 @@ $(top_srcdir)/doc/help-pages-list: $(UI_FILES) test -s $@,tmp mv $@,tmp $@ -EXTRA_DIST += doc/help-pages-list +EXTRA_DIST += \ + doc/help-pages-list \ + doc/pspp-manual.css + + +AM_MAKEINFOFLAGS=-I $(top_srcdir)/doc/examples -I $(top_builddir)/doc/examples +am__TEXINFO_TEX_DIR=:$(top_srcdir)/doc/examples:$(top_builddir)/doc/examples + +################# Example programs ############################## + +EXAMPLE_SYNTAX = \ + doc/examples/autorecode.sps \ + doc/examples/chisquare.sps \ + doc/examples/compute.sps \ + doc/examples/count.sps \ + doc/examples/descriptives.sps \ + doc/examples/flip.sps \ + doc/examples/frequencies.sps \ + doc/examples/means.sps \ + doc/examples/one-sample-t.sps \ + doc/examples/independent-samples-t.sps \ + doc/examples/reliability.sps \ + doc/examples/split.sps \ + doc/examples/weight.sps + + +EXTRA_DIST += $(EXAMPLE_SYNTAX) + +EXAMPLE_OUTPUTS = $(EXAMPLE_SYNTAX:.sps=.out) +EXAMPLE_HTML = $(EXAMPLE_SYNTAX:.sps=.html) + +$(top_builddir)/doc/pspp.info: $(EXAMPLE_OUTPUTS) +$(top_builddir)/doc/pspp.ps: $(EXAMPLE_OUTPUTS) +$(top_builddir)/doc/pspp.dvi: $(EXAMPLE_OUTPUTS) +$(top_builddir)/doc/pspp.html: $(EXAMPLE_HTML) +$(top_builddir)/doc/pspp.pdf: $(EXAMPLE_OUTPUTS) + +# The examples cannot be built until the binary has been built +$(EXAMPLE_OUTPUTS): $(top_builddir)/src/ui/terminal/pspp +$(EXAMPLE_HTML): $(top_builddir)/src/ui/terminal/pspp + +CLEANFILES += $(EXAMPLE_OUTPUTS) + +SUFFIXES: .sps + +# use pspp to process a syntax file and reap the output into a text file +.sps.out: + $(MKDIR_P) $(@D) + where=$$PWD ; \ + (cd $(top_srcdir)/examples; ${abs_builddir}/src/ui/terminal/pspp $(abs_srcdir)/doc/examples/$( and +# everything after and including +.sps.html: + $(MKDIR_P) $(@D) + where=$$PWD ; \ + (cd $(top_srcdir)/examples; ${abs_builddir}/src/ui/terminal/pspp $(abs_srcdir)/doc/examples/$( $@ + +# Insert the link tag for the cascading style sheet. +# But make sure these operations are idempotent. +html-local: + for h in doc/pspp.html/*.html; do \ + if grep -Fq '/i \\\ +' $$h; \ + done + +install-html-local: html-local + $(MKDIR_P) $(DESTDIR)$(prefix)/share/doc/pspp/pspp.html + $(INSTALL_DATA) ${top_srcdir}/doc/pspp-manual.css $(DESTDIR)$(prefix)/share/doc/pspp/pspp.html diff --git a/doc/data-selection.texi b/doc/data-selection.texi index 2f5f9c3285..5c846b3abc 100644 --- a/doc/data-selection.texi +++ b/doc/data-selection.texi @@ -194,6 +194,40 @@ entire active dataset as a single group of data. When @cmd{SPLIT FILE} is specified after @cmd{TEMPORARY}, it affects only the next procedure (@pxref{TEMPORARY}). +@subsection Example Split + +The file @file{horticulture.sav} contains data describing the @exvar{yield} +of a number of horticultural specimens which have been subjected to +various @exvar{treatment}s. If we wanted to investigate linear statistics +of the @exvar{yeild}, one way to do this is using the @cmd{DESCRIPTIVES} (@pxref{DESCRIPTIVES}). +However, it is reasonable to expect the mean to be different depending +on the @exvar{treatment}. So we might want to perform three separate +procedures --- one for each treatment. +@footnote{There are other, possibly better, ways to achieve a similar result +using the @cmd{MEANS} or @cmd{EXAMINE} commands.} +@ref{split:ex} shows how this can be done automatically using +the @cmd{SPLIT FILE} command. + +@float Example, split:ex +@psppsyntax {split.sps} +@caption {Running @cmd{DESCRIPTIVES} on each value of @exvar{treatment}} +@end float + +In @ref{split:res} you can see that the table of descriptive statistics +appears 3 times --- once for each value of @exvar{treatment}. +In this example @samp{N}, the number of observations are identical in +all splits. This is because that experiment was deliberately designed +that way. However in general one can expect a different @samp{N} for each +split. + +@float Example, split:res +@psppsyntax {split.out} +@caption {The results of running @cmd{DESCRIPTIVES} with an active split} +@end float + +Unless @cmd{TEMPORARY} was used, after a split has been defined for +a dataset it remains active until explicitly disabled. + @node TEMPORARY @section TEMPORARY @vindex TEMPORARY @@ -276,3 +310,27 @@ the next procedure (@pxref{TEMPORARY}). @cmd{WEIGHT} does not cause cases in the active dataset to be replicated in memory. + + +@subsection Example Weights + +One could define a dataset containing an inventory of stock items. +It would be reasonable to use a string variable for a description of the +item, and a numeric variable for the number in stock, like in @ref{weight:ex}. + +@float Example, weight:ex +@psppsyntax {weight.sps} +@caption {Setting the weight on the variable @exvar{quantity}} +@end float + +One analysis which most surely would be of interest is +the relative amounts or each item in stock. +However without setting a weight variable, @cmd{FREQUENCIES} (@pxref{FREQUENCIES}) will not +tell us what we want to know, since there is only one case for each stock item. +@ref{weight:res} shows the difference between the weighted and unweighted +frequency tables. + +@float Example, weight:res +@psppoutput {weight} +@caption {Weighted and unweighted frequency tables of @exvar{items}} +@end float diff --git a/doc/examples/autorecode.sps b/doc/examples/autorecode.sps new file mode 100644 index 0000000000..4c0b3f3d25 --- /dev/null +++ b/doc/examples/autorecode.sps @@ -0,0 +1,20 @@ +get file='personnel.sav'. + +* Correct a typing error in the original file. +do if occupation = "Scrientist". + compute occupation = "Scientist". +end if. + +autorecode + variables = occupation into occ + /blank = missing. + +* Delete the old variable. +delete variables occupation. + +* Rename the new variable to the old variable's name. +rename variables (occ = occupation). + +* Inspect the new variable. +display dictionary /variables=occupation. + diff --git a/doc/examples/chisquare.sps b/doc/examples/chisquare.sps new file mode 100644 index 0000000000..e8428f16b1 --- /dev/null +++ b/doc/examples/chisquare.sps @@ -0,0 +1,4 @@ +get file='physiology.sav'. + +npar test + /chisquare=sex. diff --git a/doc/examples/compute.sps b/doc/examples/compute.sps new file mode 100644 index 0000000000..af4b50526c --- /dev/null +++ b/doc/examples/compute.sps @@ -0,0 +1,7 @@ +get file='physiology.sav'. + +* height is in mm so we must divide by 1000 to get metres. +compute bmi = weight / (height/1000)**2. +variable label bmi "Body Mass Index". + +descriptives /weight height bmi. diff --git a/doc/examples/count.sps b/doc/examples/count.sps new file mode 100644 index 0000000000..4897d7912c --- /dev/null +++ b/doc/examples/count.sps @@ -0,0 +1,5 @@ +get file="hotel.sav". + +count low_counts = v1 v2 v3 (low thru 2). + +list /variables v1 v2 v3 low_counts. \ No newline at end of file diff --git a/doc/examples/descriptives.sps b/doc/examples/descriptives.sps new file mode 100644 index 0000000000..6289763330 --- /dev/null +++ b/doc/examples/descriptives.sps @@ -0,0 +1,8 @@ +get file='physiology.sav'. + +descriptives + /variables = height temperature + /save. + +descriptives + /variables = zheight ztemperature. diff --git a/doc/examples/flip.sps b/doc/examples/flip.sps new file mode 100644 index 0000000000..daac3e02f3 --- /dev/null +++ b/doc/examples/flip.sps @@ -0,0 +1,16 @@ +data list notable list /heading (a16) v1 v2 v3 v4 v5 v6 +begin data. +date-of-birth 1970 1989 2001 1966 1976 1982 +sex 1 0 0 1 0 1 +score 10 10 9 3 8 9 +end data. + +echo 'Before FLIP:'. +display variables. +list. + +flip /variables = all /newnames = heading. + +echo 'After FLIP:'. +display variables. +list. \ No newline at end of file diff --git a/doc/examples/frequencies.sps b/doc/examples/frequencies.sps new file mode 100644 index 0000000000..7c639e687e --- /dev/null +++ b/doc/examples/frequencies.sps @@ -0,0 +1,4 @@ +get file='personnel.sav'. + +frequencies /variables = sex occupation + /statistics = none. diff --git a/doc/examples/independent-samples-t.sps b/doc/examples/independent-samples-t.sps new file mode 100644 index 0000000000..8fb93f7d76 --- /dev/null +++ b/doc/examples/independent-samples-t.sps @@ -0,0 +1,6 @@ +get file='physiology.sav'. + +select if (height >= 200). + +t-test /variables = height + /groups = sex(0,1). diff --git a/doc/examples/means.sps b/doc/examples/means.sps new file mode 100644 index 0000000000..26f39ef95c --- /dev/null +++ b/doc/examples/means.sps @@ -0,0 +1,4 @@ +get file='repairs.sav'. + +means tables = mtbf + by factory by environment. diff --git a/doc/examples/one-sample-t.sps b/doc/examples/one-sample-t.sps new file mode 100644 index 0000000000..b3bc56f9a8 --- /dev/null +++ b/doc/examples/one-sample-t.sps @@ -0,0 +1,6 @@ +get file='physiology.sav'. + +select if (weight > 0). + +t-test testval = 76.8 + /variables = weight. diff --git a/doc/examples/reliability.sps b/doc/examples/reliability.sps new file mode 100644 index 0000000000..97b24b5c53 --- /dev/null +++ b/doc/examples/reliability.sps @@ -0,0 +1,9 @@ +get file="hotel.sav". + +* Recode V3 and V5 inverting the sense of the values. +compute v3 = 6 - v3. +compute v5 = 6 - v5. + +reliability + /variables= all + /model=alpha. diff --git a/doc/examples/split.sps b/doc/examples/split.sps new file mode 100644 index 0000000000..65807cdc06 --- /dev/null +++ b/doc/examples/split.sps @@ -0,0 +1,9 @@ +get file='horticulture.sav'. + +* Ensure cases are sorted before splitting. +sort cases by treatment. + +split file by treatment. + +* Run descriptives on the yield variable +descriptives /variable = yield. diff --git a/doc/examples/weight.sps b/doc/examples/weight.sps new file mode 100644 index 0000000000..6fe576ef7f --- /dev/null +++ b/doc/examples/weight.sps @@ -0,0 +1,15 @@ +data list notable list /item (a16) quantity (f8.0). +begin data +nuts 345 +screws 10034 +washers 32012 +bolts 876 +end data. + +echo 'Unweighted frequency table'. +frequencies /variables = item /format=dfreq. + +weight by quantity. + +echo 'Weighted frequency table'. +frequencies /variables = item /format=dfreq. diff --git a/doc/pspp-manual.css b/doc/pspp-manual.css new file mode 100644 index 0000000000..70bd6aa8b9 --- /dev/null +++ b/doc/pspp-manual.css @@ -0,0 +1,50 @@ +/* Use a sans-serif font for the Syntax descriptions of each command. */ +pre.display { + font-family: sans-serif; +} + +.pspp h1 { + font-size: 150%; + margin-left: -1.33em +} +.pspp h2 { + font-size: 125%; + font-weight: bold; + margin-left: -.8em +} +.pspp h3 { + font-size: 100%; + font-weight: bold; + margin-left: -.5em } +.pspp h4 { + font-size: 100%; + margin-left: 0em +} +.pspp h1, .pspp h2, .pspp h3, .pspp h4, .pspp h5, .pspp h6 { + font-family: sans-serif; + color: blue +} + +.pspp code { + font-family: sans-serif +} +.pspp table { + border-collapse: collapse; + margin-bottom: 1em +} + +/* Simple cartouche for syntax examples */ +.pspp-syntax table { + border-collapse: collapse; + border: 1px solid black; + padding: 10px; +} + + +.pspp th { background: #dddddd; font-weight: normal; font-style: oblique } +.pspp caption { + font-family: sans-serif; + text-align: left; + color: dark-grey; +} + diff --git a/doc/pspp.texi b/doc/pspp.texi index 84e4003d85..14e97b07bd 100644 --- a/doc/pspp.texi +++ b/doc/pspp.texi @@ -1,6 +1,6 @@ \input texinfo @c -*- texinfo -*- @c PSPP - a program for statistical analysis. -@c Copyright (C) 2017, 2019 Free Software Foundation, Inc. +@c Copyright (C) 2017, 2019, 2020 Free Software Foundation, Inc. @c Permission is granted to copy, distribute and/or modify this document @c under the terms of the GNU Free Documentation License, Version 1.3 @c or any later version published by the Free Software Foundation; @@ -15,6 +15,38 @@ @c @setchapternewpage odd @c %**end of header +@macro gui +graphic user interface +@end macro + +@macro psppsyntax{FILE} +@html +
+@end html +@cartouche +@verbatiminclude \FILE\ +@end cartouche +@html +
+@end html +@end macro + +@macro psppoutput{FILE} +@ifnothtml +@verbatiminclude \FILE\.out +@end ifnothtml +@ifhtml +@cartouche +@html + +
+@include \FILE\.html +
+ +@end html +@end cartouche +@end ifhtml +@end macro @macro note{param1} @quotation @@ -22,6 +54,12 @@ @end quotation @end macro +@c A macro to indicate variable names used in examples. +@c NOT metasyntactical variables - for that use @var +@macro exvar{VAR} +@b{\VAR\} +@end macro + @include version.texi @c This macro should be used for marking command names. For the purposes of markup, diff --git a/doc/statistics.texi b/doc/statistics.texi index ee8709616f..f038ed6c59 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -1,5 +1,5 @@ @c PSPP - a program for statistical analysis. -@c Copyright (C) 2017 Free Software Foundation, Inc. +@c Copyright (C) 2017, 2020 Free Software Foundation, Inc. @c Permission is granted to copy, distribute and/or modify this document @c under the terms of the GNU Free Documentation License, Version 1.3 @c or any later version published by the Free Software Foundation; @@ -53,8 +53,7 @@ DESCRIPTIVES @end display The @cmd{DESCRIPTIVES} procedure reads the active dataset and outputs -descriptive -statistics requested by the user. In addition, it can optionally +linear descriptive statistics requested by the user. In addition, it can optionally compute Z-scores. The @subcmd{VARIABLES} subcommand, which is required, specifies the list of @@ -124,6 +123,41 @@ in the order that they are specified on the @subcmd{VARIABLES} subcommand. The @subcmd{A} and @subcmd{D} settings request an ascending or descending sort order, respectively. +@subsection Descriptives Example + +The @file{physiology.sav} file contains various physiological data for a sample +of persons. Running the @cmd{DESCRIPTIVES} command on the variables @exvar{height} +and @exvar{temperature} with the default options allows one to see simple linear +statistics for these two variables. In @ref{descriptives:ex}, these variables +are specfied on the @subcmd{VARIABLES} subcommand and the @subcmd{SAVE} option +has been used, to request that Z scores be calculated. + +After the command has completed, this example runs @cmd{DESCRIPTIVES} again, this +time on the @exvar{zheight} and @exvar{ztemperature} variables, +which are the two normalized (Z-score) variables generated by the +first @cmd{DESCRIPTIVES} command. + +@float Example, descriptives:ex +@psppsyntax {descriptives.sps} +@caption {Running two @cmd{DESCRIPTIVES} commands, one with the @subcmd{SAVE} subcommand} +@end float + +In @ref{descriptives:res}, we can see that there are 40 valid data for each of the variables +and no missing values. The mean average of the height and temperature is 16677.12 +and 37.02 respectively. The descriptive statistics for temperature seem reasonable. +However there is a very high standard deviation for @exvar{height} and a suspiciously +low minimum. This is due to a data entry error in the +data (@pxref{Identifying incorrect data}). + +In the second Descriptive Statistics command, one can see that the mean and standard +deviation of both Z score variables is 0 and 1 respectively. All Z score statistics +should have these properties since they are normalized versions of the original scores. + +@float Result, descriptives:res +@psppoutput {descriptives} +@caption {Descriptives statistics including two normalized variables (Z-scores)} +@end float + @node FREQUENCIES @section FREQUENCIES @@ -240,6 +274,36 @@ The @subcmd{FREQ} and @subcmd{PERCENT} options on @subcmd{HISTOGRAM} and The @subcmd{ORDER} subcommand is accepted but ignored. +@subsection Frequencies Example + +@ref{frequencies:ex} runs a frequency analysis on the @exvar{sex} +and @exvar{occupation} variables from the @file{personnel.sav} file. +This is useful to get an general idea of the way in which these nominal +variables are distributed. + +@float Example, frequencies:ex +@psppsyntax {frequencies.sps} +@caption {Running frequencies on the @exvar{sex} and @exvar{occupation} variables} +@end float + +If you are using the graphic user interface, the dialog box is set up such that +by default, several statistics are calculated. These are not particularly useful +for these variables, so you will want to disable those. + +From @ref{frequencies:res} it is evident that there are 33 males, 21 females and +2 persons for whom their sex has not been entered. + +One can also see how many of each occupation there are in the data. +When dealing with string variables used as nominal values, running a frequency +analysis is useful to detect data input entries. Notice that +one @exvar{occupation} value has been mistyped as ``Scrientist''. This entry should +be corrected, or marked as missing before using the data. + +@float Result, frequencies:res +@psppoutput {frequencies} +@caption {The relative frequencies of @exvar{sex} and @exvar{occupation}} +@end float + @node EXAMINE @section EXAMINE @@ -403,8 +467,8 @@ EXAMINE @var{height} @var{weight} BY @end example In this example, we look at the height and weight of a sample of individuals and how they differ between male and female. -A table showing the 3 largest and the 3 smallest values of @var{height} and -@var{weight} for each gender, and for the whole dataset will be shown. +A table showing the 3 largest and the 3 smallest values of @exvar{height} and +@exvar{weight} for each gender, and for the whole dataset will be shown. Boxplots will also be produced. Because @subcmd{/COMPARE = GROUPS} was given, boxplots for male and female will be shown in the same graphic, allowing us to easily see the difference between @@ -725,7 +789,6 @@ The @subcmd{STATISTICS} subcommand selects statistics for computation: @table @asis @item CHISQ -@cindex chisquare @cindex chi-square Pearson chi-square, likelihood ratio, Fisher's exact test, continuity @@ -1210,6 +1273,40 @@ variables should be taken at their face value, however cases which have user missing values for the categorical variables should be omitted from the calculation. +@subsection Example Means + +The dataset in @file{repairs.sav} contains the mean time between failures (@exvar{mtbf}) +for a sample of artifacts produced by different factories and trialed under +different operating conditions. +Since there are four combinations of categorical variables, by simply looking +at the list of data, it would be hard to how the scores vary for each category. +@ref{means:ex} shows one way of tabulating the @exvar{mtbf} in a way which is +easier to understand. + +@float Example, means:ex +@psppsyntax {means.sps} +@caption {Running @cmd{MEANS} on the @exvar{mtbf} score with categories @exvar{factory} and @exvar{environment}} +@end float + +The results are shown in @ref{means:res}. The figures shown indicate the mean, +standard deviation and number of samples in each category. +These figures however do not indicate whether the results are statistically +significant. For that, you would need to use the procedures @cmd{ONEWAY}, @cmd{GLM} or +@cmd{T-TEST} depending on the hypothesis being tested. + +@float Result, means:res +@psppoutput {means} +@caption {The @exvar{mtbf} categorised by @exvar{factory} and @exvar{environment}} +@end float + +Note that there is no limit to the number of variables for which you can calculate +statistics, nor to the number of categorical variables per layer, nor the number +of layers. +However, running @cmd{MEANS} on a large numbers of variables, or with categorical variables +containing a large number of distinct values may result in an extremely large output, which +will not be easy to interpret. +So you should consider carefully which variables to select for participation in the analysis. + @node NPAR TESTS @section NPAR TESTS @@ -1251,7 +1348,7 @@ is used. @menu * BINOMIAL:: Binomial Test -* CHISQUARE:: Chisquare Test +* CHISQUARE:: Chi-square Test * COCHRAN:: Cochran Q Test * FRIEDMAN:: Friedman Test * KENDALL:: Kendall's W Test @@ -1313,11 +1410,10 @@ compute the binomial significance. Thus, exact results are reported even for very large sample sizes. - @node CHISQUARE -@subsection Chisquare Test +@subsection Chi-square Test @vindex CHISQUARE -@cindex chisquare test +@cindex chi-square test @display @@ -1342,6 +1438,34 @@ sum of the frequencies need not be 1. If no @subcmd{/EXPECTED} subcommand is given, then equal frequencies are expected. +@subsubsection Chi-square Example + +A researcher wishes to investigate whether there are an equal number of +persons of each sex in a population. The sample chosen for invesigation +is that from the @file {physiology.sav} dataset. The null hypothesis for +the test is that the population comprises an equal number of males and females. +The analysis is performed as shown in @ref{chisquare:ex}. + +@float Example, chisquare:ex +@psppsyntax {chisquare.sps} +@caption {Performing a chi-square test to check for equal distribution of sexes} +@end float + +There is only one test variable, @i{viz:} @exvar{sex}. The other variables in the dataset +are ignored. + +In @ref{chisquare:res} the summary box shows that in the sample, there are more males +than females. However the significance of chi-square result is greater than 0.05 +--- the most commonly accepted p-value --- and therefore +there is not enough evidence to reject the null hypothesis and one must conclude +that the evidence does not indicate that there is an imbalance of the sexes +in the population. + +@float Result, chisquare:res +@psppoutput {chisquare} +@caption {The results of running a chi-square test on @exvar{sex}} +@end float + @node COCHRAN @subsection Cochran Q Test @@ -1675,6 +1799,39 @@ which you wish to test. In this mode, you must also use the @subcmd{/VARIABLES} subcommand to tell @pspp{} which variables you wish to test. +@subsubsection Example - One Sample T-test + +A researcher wishes to know whether the weight of persons in a population +is different from the national average. +The samples are drawn from the population under investigation and recorded +in the file @file{physiology.sav}. +From the Department of Health, she +knows that the national average weight of healthy adults is 76.8kg. +Accordingly the @subcmd{TESTVAL} is set to 76.8. +The null hypothesis therefore is that the mean average weight of the +population from which the sample was drawn is 76.8kg. + +As previously noted (@pxref{Identifying incorrect data}), one +sample in the dataset contains a weight value +which is clearly incorrect. So this is excluded from the analysis +using the @cmd{SELECT} command. + +@float Example, one-sample-t:ex +@psppsyntax {one-sample-t.sps} +@caption {Running a one sample T-Test after excluding all non-positive values} +@end float + +@ref{one-sample-t:res} shows that the mean of our sample differs from the test value +by -1.40kg. However the significance is very high (0.610). So one cannot +reject the null hypothesis, and must conclude there is not enough evidence +to suggest that the mean weight of the persons in our population is different +from 76.8kg. + +@float Results, one-sample-t:res +@psppoutput {one-sample-t} +@caption {The results of a one sample T-test of @exvar{weight} using a test value of 76.8kg} +@end float + @node Independent Samples Mode @subsection Independent Samples Mode @@ -1701,6 +1858,52 @@ When using this form of the @subcmd{GROUPS} subcommand, missing values in the independent variable are excluded on a listwise basis, regardless of whether @subcmd{/MISSING=LISTWISE} was specified. +@subsubsection Example - Independent Samples T-test + +A researcher wishes to know whether within a population, adult males +are taller than adult females. +The samples are drawn from the population under investigation and recorded +in the file @file{physiology.sav}. + +As previously noted (@pxref{Identifying incorrect data}), one +sample in the dataset contains a height value +which is clearly incorrect. So this is excluded from the analysis +using the @cmd{SELECT} command. + + +@float Example, indepdendent-samples-t:ex +@psppsyntax {independent-samples-t.sps} +@caption {Running a independent samples T-Test after excluding all observations less than 200kg} +@end float + + +The null hypothesis is that both males and females are on average +of equal height. + +In this case, the grouping variable is @exvar{sex}, so this is entered +as the variable for the @subcmd{GROUP} subcommand. The group values are 0 (male) and +1 (female). + +If you are running the proceedure using syntax, then you need to enter +the values corresponding to each group within parentheses. + + +From @ref{independent-samples-t:res}, one can clearly see that the @emph{sample} mean height +is greater for males than for females. However in order to see if this +is a significant result, one must consult the T-Test table. + +The T-Test table contains two rows; one for use if the variance of the samples +in each group may be safely assumed to be equal, and the second row +if the variances in each group may not be safely assumed to be equal. + +In this case however, both rows show a 2-tailed significance less than 0.001 and +one must therefore reject the null hypothesis and conclude that within +the population the mean height of males and of females are unequal. + +@float Result, independent-samples-t:res +@psppoutput {independent-samples-t} +@caption {The results of an independent samples T-test of @exvar{height} by @exvar{sex}} +@end float @node Paired Samples Mode @subsection Paired Samples Mode @@ -1983,6 +2186,38 @@ The @subcmd{SUMMARY} subcommand determines the type of summary analysis to be pe Currently there is only one type: @subcmd{SUMMARY=TOTAL}, which displays per-item analysis tested against the totals. +@subsection Example - Reliability + +Before analysing the results of a survey -- particularly for a multiple choice survey -- +it is desireable to know whether the respondents have considered their answers +or simply provided random answers. + +In the following example the survey results from the file @file{hotel.sav} are used. +All five survey questions are included in the reliability analysis. +However, before running the analysis, the data must be preprocessed. +An examination of the survey questions reveals that two questions, @i{viz:} v3 and v5 +are negatively worded, whereas the others are positively worded. +All questions must be based upon the same scale for the analysis to be meaningful. +One could use the @cmd{RECODE} command (@pxref{RECODE}), however a simpler way is +to use @cmd{COMPUTE} (@pxref{COMPUTE}) and this is what is done in @ref{reliability:ex}. + +@float Example, reliability:ex +@psppsyntax {reliability.sps} +@caption {Investigating the reliability of survey responses} +@end float + +In this case, all variables in the data set are used. So we can use the special +keyword @samp{ALL} (@pxref{BNF}). + +@ref{reliability:res} shows that Cronbach's Alpha is 0.11 which is a value normally considered too +low to indicate consistency within the data. This is possibly due to the small number of +survey questions. The survey should be redesigned before serious use of the results are +applied. + +@float Result, reliability:res +@psppoutput {reliability} +@caption {The results of the reliability command on @file{hotel.sav}} +@end float @node ROC diff --git a/doc/transformation.texi b/doc/transformation.texi index 74d972a2e2..5635903df2 100644 --- a/doc/transformation.texi +++ b/doc/transformation.texi @@ -288,6 +288,40 @@ to numeric values. @subcmd{/BLANK=VALID} is the default. @cmd{AUTORECODE} is a procedure. It causes the data to be read. +@subsection Autorecode Example + +In the file @file{personnel.sav}, the variable @exvar{occupation} is a string +variable. Except for data of a purely commentary nature, string variables +are generally a bad idea. One reason is that data entry errors are easily +overlooked. This has happened in @file{personnel.sav}; one entry which should +read ``Scientist'' has been mistyped as ``Scrientist''. In @ref{autorecode:ex} +first, this error will be corrected, +@footnote{One must use care when correcting such data input errors rather than +msimply marking them as missing. For example, if an occupation has been entered +``Barister'', did the person mean ``Barrister'' or did she mean ``Barista''?} +then we will use @cmd{AUTORECODE} to +create a new numeric variable which takes recoded values of @exvar{occupation}. +Finally, we will remove the old variable and rename the new variable to +the name of the old variable. + +@float Example, autorecode:ex +@psppsyntax {autorecode.sps} +@caption {Changing a string variable to a numeric variable using @cmd{AUTORECODE} +after correcting a data entry error} +@end float + + +Notice in @ref{autorecode:res}, how the new variable has been automatically +allocated value labels which correspond to the strings of the old variable. +This means that in future analyses the descriptive strings are reported instead +of the numeric values. + +@float Result, autorecode:res +@psppoutput {autorecode} +@caption {The properties of the @exvar{occupation} variable following @cmd{AUTORECODE}} +@end float + + @node COMPUTE @section COMPUTE @vindex COMPUTE @@ -330,6 +364,39 @@ When @cmd{COMPUTE} is specified following @cmd{TEMPORARY} (@pxref{TEMPORARY}), the @cmd{LAG} function may not be used (@pxref{LAG}). +@subsection Compute Examples + +The dataset @file{physiology.sav} contains the height and weight of persons. +For some purposes, neither height nor weight alone is of interest. +Epidemiologists are often more interested in the @dfn{body mass index} which +can sometimes be used as a predictor for clinical conditions. +The body mass index is defined as the weight of the person in kg divided +by the square of the person's height in metres. +@footnote{Since BMI is a quantity with a ratio scale and has units, the term ``index'' +is a misnomer, but that is what it is called.} + +@float Example, bmi:ex +@psppsyntax {compute.sps} +@caption {Computing the body mass index from @exvar{weight} and @exvar{height}} +@end float + +@ref{bmi:ex} shows how you can use @cmd{COMPUTE} to generate a new variable called +@exvar{bmi} and have every case's value calculated from the existing values of +@exvar{weight} and @exvar{height}. +It also shows how you can add a label to this new variable (@pxref{VARIABLE LABELS}), +so that a more descriptive label appears in subsequent analyses, and this can be seen +in the ouput from the @cmd{DESCRIPTIVES} command in @ref{bmi:res}. + +The expression which follows the @samp{=} sign can be as complicated as necessary. +@xref{Expressions} for a precise description of the language accepted. + +@float Results, bmi:res +@psppoutput {compute} +@caption {An analysis which includes @exvar{bmi} in its results} +@end float + + + @node COUNT @section COUNT @vindex COUNT @@ -388,52 +455,34 @@ before the procedure is executed---they may not be created as target variables earlier in the command! Break such a command into two separate commands. -The examples below may help to clarify. +@subsection Count Examples -@enumerate A -@item -Assuming @code{Q0}, @code{Q2}, @dots{}, @code{Q9} are numeric variables, -the following commands: +In the survey results in dataset @file{hotel.sav} a manager wishes +to know how many respondents answered with low valued answers to questions +@exvar{v1}, @exvar{v2} and @exvar{v3}. This can be found using the code +in @ref{count:ex}. Specifically, this code creates a new variable, and +populates it with the number of values in @exvar{v1}--@exvar{v2} which +are 2 or lower. -@enumerate -@item -Count the number of times the value 1 occurs through these variables -for each case and assigns the count to variable @code{QCOUNT}. +@float Example, count:ex +@psppsyntax {count.sps} +@caption {Counting low values to responses @exvar{v1}, @exvar{v2} and @exvar{v3}} +@end float -@item -Print out the total number of times the value 1 occurs throughout -@emph{all} cases using @cmd{DESCRIPTIVES}. @xref{DESCRIPTIVES}, for -details. -@end enumerate +In @ref{count:ex} the @cmd{COUNT} transformation creates a new variable, @exvar{low_counts} and +its values are shown using the @cmd{LIST} command. -@example -COUNT QCOUNT=Q0 TO Q9(1). -DESCRIPTIVES QCOUNT /STATISTICS=SUM. -@end example +In @ref{count:res} we can see the values of @exvar{low_counts} after the @cmd{COUNT} +transformation has completed. The first value is 1, because there is only one +variable amoung @exvar{v1}, @exvar{v2} and @exvar{3} which has a value of 2 or less. +The second value is 2, because both @exvar{v1} and @exvar{v2} are 2 or less. -@item -Given these same variables, the following commands: +@float Result, count:res +@psppoutput {count} +@caption {The values of @exvar{v1}, @exvar{v2}, @exvar{v3} and @exvar{low_counts} after +the @cmd{COUNT} transformation has run} +@end float -@enumerate -@item -Count the number of valid values of these variables for each case and -assigns the count to variable @code{QVALID}. - -@item -Multiplies each value of @code{QVALID} by 10 to obtain a percentage of -valid values, using @cmd{COMPUTE}. @xref{COMPUTE}, for details. - -@item -Print out the percentage of valid values across all cases, using -@cmd{DESCRIPTIVES}. @xref{DESCRIPTIVES}, for details. -@end enumerate - -@example -COUNT QVALID=Q0 TO Q9 (LO THRU HI). -COMPUTE QVALID=QVALID*10. -DESCRIPTIVES QVALID /STATISTICS=MEAN. -@end example -@end enumerate @node FLIP @section FLIP @@ -459,7 +508,7 @@ string variable, is used to give names to the variables created by @cmd{FLIP}. Only the first 8 characters of the variable are used. If @subcmd{NEWNAMES} is not -specified then the default is a variable named CASE_LBL, if it exists. +specified then the default is a variable named @exvar{CASE_LBL}, if it exists. If it does not then the variables created by @cmd{FLIP} are named VAR000 through VAR999, then VAR1000, VAR1001, and so on. @@ -471,17 +520,48 @@ extensions are added, starting with 1, until a unique name is found or there are no remaining possibilities. If the latter occurs then the @cmd{FLIP} operation aborts. -The resultant dictionary contains a CASE_LBL variable, a string +The resultant dictionary contains a @exvar{CASE_LBL} variable, a string variable of width 8, which stores the names of the variables in the dictionary before the transposition. Variables names longer than 8 -characters are truncated. If the active dataset is subsequently -transposed using @cmd{FLIP}, this variable can be used to recreate the -original variable names. +characters are truncated. If @cmd{FLIP} is called again on +this dataset, the @exvar{CASE_LBL} variable can be passed to the @subcmd{NEWNAMES} +subcommand to recreate the original variable names. @cmd{FLIP} honors @cmd{N OF CASES} (@pxref{N OF CASES}). It ignores @cmd{TEMPORARY} (@pxref{TEMPORARY}), so that ``temporary'' transformations become permanent. +@subsection Flip Examples + + +In @ref{flip:ex}, data has been entered using @cmd{DATA LIST} (@pxref{DATA LIST}) +such that the first variable in the dataset is a string variable containing +a description of the other data for the case. +Clearly this is not a convenient arrangement for performing statistical analyses, +so it would have been better to think a little more carefully about how the data +should have been arranged. +However often the data is provided by some third party source, and you have +no control over the form. +Fortunately, we can use @cmd{FLIP} to exchange the variables +and cases in the active dataset. + +@float Example, flip:ex +@psppsyntax {flip.sps} +@caption {Using @cmd{FLIP} to exchange variables and cases in a dataset} +@end float + +As you can see in @ref{flip:res} before the @cmd{FLIP} command has run there +are seven variables (six containing data and one for the heading) and three cases. +Afterwards there are four variables (one per case, plus the @exvar{CASE_LBL} variable) +and six cases. +You can delete the @exvar{CASE_LBL} variable (@pxref{DELETE VARIABLES}) if you don't need it. + +@float Results, flip:res +@psppoutput {flip} +@caption {The results of using @cmd{FLIP} to exchange variables and cases in a dataset} +@end float + + @node IF @section IF @vindex IF @@ -720,7 +800,8 @@ variables. Specify @subcmd{BY} and a list of variables to sort by. By default, variables are sorted in ascending order. To override sort order, specify @subcmd{(D)} or -@subcmd{(DOWN)} after a list of variables to get descending order, or @subcmd{(A)} or @subcmd{(UP)} +@subcmd{(DOWN)} after a list of variables to get descending order, or @subcmd{(A)} +or @subcmd{(UP)} for ascending order. These apply to all the listed variables up until the preceding @subcmd{(A)}, @subcmd{(D)}, @subcmd{(UP)} or @subcmd{(DOWN)}. @@ -737,3 +818,6 @@ If workspace is exhausted, it falls back to a merge sort algorithm that involves creates numerous temporary files. @cmd{SORT CASES} may not be specified following @cmd{TEMPORARY}. + +@subsection Sorting Example + diff --git a/doc/tutorial.texi b/doc/tutorial.texi index ea74a2fbfb..2071d2a59d 100644 --- a/doc/tutorial.texi +++ b/doc/tutorial.texi @@ -77,8 +77,8 @@ There are two aspects of data: @end itemize @noindent -For example, a data set which has the variables @var{height}, @var{weight}, and -@var{name}, might have the observations: +For example, a data set which has the variables @exvar{height}, @exvar{weight}, and +@exvar{name}, might have the observations: @example 1881 89.2 Ahmed 1192 107.01 Frank @@ -109,7 +109,7 @@ human observer. However they can also be used for nominal or categorical data. -@ref{data-list} defines two variables @var{forename} and @var{height}, +@ref{data-list} defines two variables @exvar{forename} and @exvar{height}, and reads data into them by manual input. @float Example, data-list @@ -127,7 +127,7 @@ and reads data into them by manual input. @end cartouche @caption{Manual entry of data using the @cmd{DATA LIST} command. Two variables -@var{forename} and @var{height} are defined and subsequently filled +@exvar{forename} and @exvar{height} are defined and subsequently filled with manually entered data.} @end float @@ -150,7 +150,7 @@ variables which you wish to define. @item The text @samp{forename} is the name of the first variable, -and @samp{(A12)} says that the variable @var{forename} is a string +and @samp{(A12)} says that the variable @exvar{forename} is a string variable and that its maximum length is 12 bytes. The second variable's name is specified by the text @samp{height}. Since no format is given, this variable has the default format. @@ -161,7 +161,7 @@ locales which use a period (@samp{.}) as the decimal separator. However if you are using a system with a locale which uses the comma (@samp{,}) as the decimal separator, then you should in the subsequent lines substitute @samp{.} with @samp{,}. -Alternatively, you could explicitly tell @pspp{} that the @var{height} +Alternatively, you could explicitly tell @pspp{} that the @exvar{height} variable is to be read using a period as its decimal separator by appending the text @samp{DOT8.3} after the word @samp{height}. For more information on data formats, @pxref{Input and Output Formats}. @@ -209,7 +209,7 @@ It should show the following output: @end group @end example @noindent -Note that the numeric variable @var{height} is displayed to 2 decimal +Note that the numeric variable @exvar{height} is displayed to 2 decimal places, because the format for that variable is @samp{F8.2}. For a complete description of the @cmd{LIST} command, @pxref{LIST}. @@ -373,20 +373,20 @@ column, suggesting incorrect data entry.} In the output of @ref{ex-descriptives}, the most interesting column is the minimum value. -The @var{weight} variable has a minimum value of less than zero, +The @exvar{weight} variable has a minimum value of less than zero, which is clearly erroneous. -Similarly, the @var{height} variable's minimum value seems to be very low. +Similarly, the @exvar{height} variable's minimum value seems to be very low. In fact, it is more than 5 standard deviations from the mean, and is a seemingly bizarre height for an adult person. We can examine the data in more detail with the @cmd{EXAMINE} command (@pxref{EXAMINE}): -In @ref{ex1} you can see that the lowest value of @var{height} is +In @ref{ex1} you can see that the lowest value of @exvar{height} is 179 (which we suspect to be erroneous), but the second lowest is 1598 which we know from the @cmd{DESCRIPTIVES} command is within 1 standard deviation from the mean. -Similarly the @var{weight} variable has a lowest value which is +Similarly the @exvar{weight} variable has a lowest value which is negative but a plausible value for the second lowest value. This suggests that the two extreme values are outliers and probably represent data entry errors. @@ -444,9 +444,9 @@ command. @end example @noindent The first command says that for any observation which has a -@var{height} value of 179, that value should be changed to the SYSMIS +@exvar{height} value of 179, that value should be changed to the SYSMIS value. -The second command says that any @var{weight} values of zero or less +The second command says that any @exvar{weight} values of zero or less should be changed to SYSMIS. From now on, they will be ignored in analysis. For detailed information about the @cmd{RECODE} command @pxref{RECODE}. @@ -469,10 +469,10 @@ The line @code{display dictionary.} tells @pspp{} to display the variables and associated data. The output from this command has been omitted from the example for the sake of clarity, but you will notice that each of the variables -@var{v1}, @var{v2} @dots{} @var{v5} are measured on a 5 point Likert scale, +@exvar{v1}, @exvar{v2} @dots{} @exvar{v5} are measured on a 5 point Likert scale, with 1 meaning ``Strongly disagree'' and 5 meaning ``Strongly agree''. -Whilst variables @var{v1}, @var{v2} and @var{v4} record responses -to a positively posed question, variables @var{v3} and @var{v5} are +Whilst variables @exvar{v1}, @exvar{v2} and @exvar{v4} record responses +to a positively posed question, variables @exvar{v3} and @exvar{v5} are responses to negatively worded questions. In order to perform meaningful analysis, we need to recode the variables so that they all measure in the same direction. @@ -490,7 +490,7 @@ compute @var{var} = 6 - @var{var}. @end example @noindent @ref{ex-reliability} uses this technique to recode the variables -@var{v3} and @var{v5}. +@exvar{v3} and @exvar{v5}. After applying @cmd{COMPUTE} for both variables, all subsequent commands will use the inverted values. @@ -505,14 +505,14 @@ A sensible check to perform on survey data is the calculation of reliability. This gives the statistician some confidence that the questionnaires have been completed thoughtfully. -If you examine the labels of variables @var{v1}, @var{v3} and @var{v4}, +If you examine the labels of variables @exvar{v1}, @exvar{v3} and @exvar{v4}, you will notice that they ask very similar questions. One would therefore expect the values of these variables (after recoding) to closely follow one another, and we can test that with the @cmd{RELIABILITY} command (@pxref{RELIABILITY}). @ref{ex-reliability} shows a @pspp{} session where the user (after recoding negatively scaled variables) requests reliability statistics for -@var{v1}, @var{v3} and @var{v4}. +@exvar{v1}, @exvar{v3} and @exvar{v4}. @float Example, ex-reliability @cartouche @@ -549,7 +549,7 @@ Case Processing Summary @caption{Recoding negatively scaled variables, and testing for reliability with the @cmd{RELIABILITY} command. The Cronbach Alpha coefficient suggests a high degree of reliability among variables -@var{v1}, @var{v3} and @var{v4}.} +@exvar{v1}, @exvar{v3} and @exvar{v4}.} @end float As a rule of thumb, many statisticians consider a value of Cronbach's Alpha of @@ -571,12 +571,12 @@ test procedure to use. One way to do this uses the @cmd{EXAMINE} command. In @ref{normality}, a researcher was examining the failure rates of equipment produced by an engineering company. The file @file{repairs.sav} contains the mean time between -failures (@var{mtbf}) of some items of equipment subject to the study. +failures (@exvar{mtbf}) of some items of equipment subject to the study. Before performing linear analysis on the data, the researcher wanted to ascertain that the data is normally distributed. A normal distribution has a skewness and kurtosis of zero. -Looking at the skewness of @var{mtbf} in @ref{normality} it is clear +Looking at the skewness of @exvar{mtbf} in @ref{normality} it is clear that the mtbf figures have a lot of positive skew and are therefore not drawn from a normally distributed variable. Positive skew can often be compensated for by applying a logarithmic @@ -587,12 +587,12 @@ compute mtbf_ln = ln (mtbf). @end example @noindent Rather than redefining the existing variable, this use of @cmd{COMPUTE} -defines a new variable @var{mtbf_ln} which is -the natural logarithm of @var{mtbf}. +defines a new variable @exvar{mtbf_ln} which is +the natural logarithm of @exvar{mtbf}. The final command in this example calls @cmd{EXAMINE} on this new variable, and it can be seen from the results that both the skewness and -kurtosis for @var{mtbf_ln} are very close to zero. -This provides some confidence that the @var{mtbf_ln} variable is +kurtosis for @exvar{mtbf_ln} are very close to zero. +This provides some confidence that the @exvar{mtbf_ln} variable is normally distributed and thus safe for linear analysis. In the event that no suitable transformation can be found, then it would be worth considering @@ -676,9 +676,9 @@ Output: @end cartouche @caption{Testing for normality using the @cmd{EXAMINE} command and applying a logarithmic transformation. -The @var{mtbf} variable has a large positive skew and is therefore +The @exvar{mtbf} variable has a large positive skew and is therefore unsuitable for linear statistical analysis. -However the transformed variable (@var{mtbf_ln}) is close to normal and +However the transformed variable (@exvar{mtbf_ln}) is close to normal and would appear to be more suitable.} @end float @@ -753,7 +753,7 @@ However if it is unsafe to assume equal variances, then an alternative calculation is necessary. @pspp{} performs both calculations. -For the @var{height} variable, the output shows the significance of the +For the @exvar{height} variable, the output shows the significance of the Levene test to be 0.33 which means there is a 33% probability that the Levene test produces this outcome when the variances are equal. @@ -761,14 +761,14 @@ Had the significance been less than 0.05, then it would have been unsafe to assu the variances were equal. However, because the value is higher than 0.05 the homogeneity of variances assumption is safe and the ``Equal Variances'' row (the more powerful test) can be used. -Examining this row, the two tailed significance for the @var{height} t-test +Examining this row, the two tailed significance for the @exvar{height} t-test is less than 0.05, so it is safe to reject the null hypothesis and conclude that the mean heights of males and females are unequal. -For the @var{temperature} variable, the significance of the Levene test +For the @exvar{temperature} variable, the significance of the Levene test is 0.58 so again, it is safe to use the row for equal variances. The equal variances row indicates that the two tailed significance for -@var{temperature} is 0.20. Since this is greater than 0.05 we must reject +@exvar{temperature} is 0.20. Since this is greater than 0.05 we must reject the null hypothesis and conclude that there is insufficient evidence to suggest that the body temperature of male and female persons are different. @@ -860,11 +860,11 @@ Output: @end example @end cartouche @caption{The @cmd{T-TEST} command tests for differences of means. -Here, the @var{height} variable's two tailed significance is less than +Here, the @exvar{height} variable's two tailed significance is less than 0.05, so the null hypothesis can be rejected. Thus, the evidence suggests there is a difference between the heights of male and female persons. -However the significance of the test for the @var{temperature} +However the significance of the test for the @exvar{temperature} variable is greater than 0.05 so the null hypothesis cannot be rejected, and there is insufficient evidence to suggest a difference in body temperature.} @@ -929,13 +929,13 @@ Output (excerpts): @end example @end cartouche @caption{Linear regression analysis to find a predictor for -@var{mttr}. -The first attempt, including @var{duty_cycle}, produces some +@exvar{mttr}. +The first attempt, including @exvar{duty_cycle}, produces some unacceptable high significance values. -However the second attempt, which excludes @var{duty_cycle}, produces +However the second attempt, which excludes @exvar{duty_cycle}, produces significance values no higher than 0.06. -This suggests that @var{mtbf} alone may be a suitable predictor -for @var{mttr}.} +This suggests that @exvar{mtbf} alone may be a suitable predictor +for @exvar{mttr}.} @end float The coefficients in the first table suggest that the formula @@ -944,7 +944,7 @@ can be used to predict the time to repair. However, the significance value for the @var{duty_cycle} coefficient is very high, which would make this an unsafe predictor. For this reason, the test was repeated, but omitting the -@var{duty_cycle} variable. +@exvar{duty_cycle} variable. This time, the significance of all coefficients no higher than 0.06, suggesting that at the 0.06 level, the formula @math{@var{mttr} = 10.5 + 3.11 \times @var{mtbf}} is a reliable diff --git a/examples/automake.mk b/examples/automake.mk index f1aadffffa..fd78765c00 100644 --- a/examples/automake.mk +++ b/examples/automake.mk @@ -1,5 +1,5 @@ # PSPP - a program for statistical analysis. -# Copyright (C) 2017 Free Software Foundation, Inc. +# Copyright (C) 2017, 2020 Free Software Foundation, Inc. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -24,6 +24,8 @@ examples_DATA = \ examples/descript.sps \ examples/grid.sps \ examples/hotel.sav \ + examples/horticulture.sav \ + examples/personnel.sav \ examples/physiology.sav \ examples/repairs.sav \ examples/regress.sps \ diff --git a/examples/horticulture.sav b/examples/horticulture.sav new file mode 100644 index 0000000000000000000000000000000000000000..f9bd87f91f519973e811d46a83e5b3ffdabe46ac GIT binary patch literal 2892 zcmY#!^D%PJP}Wok4hRlbaB&QARB-e3aaC~l3sop6E+|ki)HBvIP|#JVurP}^Gtn(b z*3HSxE3ME?&nr~`0wx9q24)~;1Y!_~f>1y*(!sz&!Lc-5!N|bS*vi1t%D@Puff4{T z6=a_P+zt>2XfqIp1i3nf_`3RmGAbQb4MOpsF>2G+dFDibGI-PO65wfjXC_K3FY^5J;m4R3j^p4h?bBwRjHo z=-+>T!6-RDucRnHM}dLifw%lk9|r_Ke9w_xu5dm?^u-~4yZNcz6qy7|@#&Ih^+3>X@|EML7W z03==pOsJXpd5Jlo49CzgyHS587IB8$0_FywJD~tWAA?l3Iv3b|sOB-8b$W0T9BwG$ zFmo6fu8Nnpfx`nuo`FH1ch3iK`alymt=x40oIb$fFmvGUXZW`DiWfp2A@017`wTYm z=yiV6{qgvVx$e9pI3J+7!>^^33)4K9dl(p6_imbv>2C&x<7stj;P^l@?{KA49>Tvc za}e%7rY5!&oF35h#R@)r4^F>m;wv5fR{`@O6kx_z;<|0~!084}pWZ}+KbY|dGlzk} znl<;DFJ`)7C{0n5#U}nd$=C;+55e}p%t81oRCjqBws1PQ?%)oDK7@MFx(VX=;_Jw# znhjXOx8dRD4?cky?uNMsSTj`2eF@I@XyMVJnVaI_P*Rkbl8MMBz;ZTq{r9B-aB-M9 z&};;hpVP3yAB(<*8y;Pg5&977jix=<_JYS3rhQ^=?ms=@_966TaFi|c!VrhKhk?N} zr}VBjhCBnq#`4x2Uo7G?ttA6|;NmcI5cVzX;N5^tUqpcK2Q11sTcpEce=O$lZe9922rdpY2Vq|;j2YNep3doNeb8Z7CW;YIS#x&96S Dj-Lw; literal 0 HcmV?d00001 diff --git a/examples/personnel.sav b/examples/personnel.sav new file mode 100644 index 0000000000000000000000000000000000000000..6b4b7168ee927069907f67230e222e051fa8bc14 GIT binary patch literal 4209 zcmb_fYitx%6dtrl3z!gMlpkQON>UICrC`$#8)qMVw{-0+&mYR|?u8whoteze^d*TH z6CeE25QG?Fi2Nb`BE%mm8i;}@g?dv}H{kwkBHy0bI) z-0z(8edpZU<&Et%>WU{;5-p)=BpyjdNMlQT18Lf{nb{OMZn_gelZqp+$?o$+HAzX9d!cpgc!Qbs_O*PS&O=_ywJ+-lB-A4HN9|x?8cn?Vo z`~k>tHMVqV$xV?Cd_MytI3;+0FYjktzdgd2Z@3rxf5Q*@qs*BL$(yTa4cmBfaD5yi zpUV771+RE#l)rxv-o>fQh_#D!>yDd&r)V>q&z-T@=0qgf@_+d(eOg!sq?ajLq&?EL zoscDfA2Cgj8YD*z>hzOtw##xDacSTE@J_@8w@O(DH5lITaqF1}8d(;fz=}NPdGL)F z+TpWuxDXfbMI87WFZ_Ky@6Q)=(GK%JGjr4bnz{MXbm2Fyy8tenyBIE9L#%(ddt7g} zIl#G1_>(f?ug&?cmN~JuTy1Hdn z_zAA}lZq(9>jAizi*H=6c3B2nQCVF%f8|;M5Pk|s;RgU2eo%xBKli~y&(IUp8^#Oj zRW>IZtLi2&BQWyQ)9^{>Bbh8qgE-qV)I{aNE4BJyc{HD&hR<|fOX8TYvy@H^xqy=FTh;r;uqyG*w8MG{ z(Lh}}#_`Wo?4KLO_jn}?FWb!=qEsIv(-T+KNMU{=qZ@{9=Qu9R1KfQp2M|{`U7Y{( z;Kw6RPf468_mYq#`sHvO?x(=%peEgob})ASJ*-BV!$WShOqLVz`8erw>3LDY^IUP{*)b=oJ3YEYKkUPERu) zKiKoQB92djav|~9<=X2KFHU=O%Pr&i{D(JGr&%>XlPq&Y#V5gfD>{2MG zP#(Fiq(0J3Yv-IR;x%$LhwymbMO;!J%~Rt?T5hOK%yM=EX{|K12hv#3KhGoAM@iiz zemimC#bO?t9GU^9NoQr$feJ(3LU=LD+Jg)zgI~8^b8e`uR)*K7Mjd9R8SpkE^uhHd z=imC$e8m`&d< zqwf^+DDc{-lSRVeb*RoH-;?he-?WUVt)8I+U8I>ovw*>M4;%|WOY*qr=EKLve<;Mq z^1|Nl3%uU^{!!3LNqlck4!!pK6&3t55N`+5bF#jDTwVtyxEn^sXPy-Nv3$Gb>DaGg zo&y^xkCWR8y~sG1k1qXd{kYoh?cs@Rrh&Q(^ew5c$#051Gy*!|cCsI=)*nC6De{-jcj&1MWd>=3W{Gxy z6RPi${4a4&4*W5q3cG~o^!W9S`wrI&=e>k)yZ?OWsjFNsdDx>8)Z@H^U$Ecsk`Q^B zeczefKQ*IWRTIqbiEYd@S-L-1@7(W_eiXlO`Lp?>vJQoP7wk`kctdzes{8xWtyndy=4&OTDZM!USy1cYy*cq}lqw_rl@*2XCddXZmdwJEU zs(CqH_nMyN`U>njoIi<^9++CXZA{jkunW|kuggN+X(?=L#e;(~_|s7G?3kL=vpnCh z@7b9BKwKP8>g&Yz4FkW7%eaIc?qA3|#1+zIlydnIrS9dg+Fzegw^D->2JzZ==#u*> z^Kd-7;P(4P{qHFBJJ1)fTv~6^zr^D6hf<*bf{tU31qI*+b-{Tm(J?rFG*}0P`i>e9 zfRE?%b+`Tn_WPg?0{b{KSV51ooljuCTF^goD|mzRlWqHX4DnW?W3X4Q4y&zjYC+=Q zJ)nj5RUW6*FZ#Fs3g-=bPlJJi=jRie&UkRp##MM;L%NhYL%hB#*vI(3pt*3+fcbao zQu2p*JwHmkE#PM^M@gKYQ*dYY2TTj~%^=HIYsR9r%F)%Q&0x=^HgMb`_ z0+Nvq28Ig$$t4O#28IS!M#fgA1``WyCN{84T>F@nfdi;%@>E9kdXNDi05bqA1C(|3 z3-b)}_wyA6%J~%$5=9K^~X9mi_^n=Xc3`xxeD@_IQVBmv2 zM@UhAL1uDd4pfK*NQ3MHx@qrXrjLQYHjdmN|I^Q!tM{lI-MnOnNLSfB*iStiWu*k&+5zrKbL! z9Koz00c3$BQts$zD=hDDbU1qk1m4(Bp1>?&0aBO(l>Qm%DsbkYsDo9&sj!u+Z5@8_ zI|hZ+U9kVzeB#soEt(F+D`!Pc6!&z1n3b9``3AEJ3rO=M7MOJ@e@IZo~by1`I5#CG5v?WYQ(&^hx+0#M4h06 zF$6di+HzfATyWMN>Ezp~n6NyJNE^)WP+hPz4X{cgVHA4y>?p02{b0 z3>dH=Eh#B59|4_h%N4E+(RkAZ>fpCvpMq3=1G&Fc1@7Q#uuG3X%-z8U^{6>C1l~eC zS`3UDaERC{KrKeZ4_Lp*FQ5gGFoA@i4m^k<@u&;Ym*(dH4*C8^_TaRY!oUCtdZ@Y2 K93c(|Mi~HqM3x=^ delta 445 zcmdnRcYt$(qK|=|rJjL;u0p1ng_&+avTjahUTKAHdS0ml7%(v~Ft7kIBM^fCKZF93 zkq!oC3NES13WlaehE_%vRz~I%3vDJguuNS0c;W$$$uk&LB-w%Ln1L8%0tXO-04Io; z9LnU)$TN8%lkwySOgxO{lixB~@q$zf0Wrv~fXSZB!IQT#E3*9k`!{9sEoOsyAR`ro zZ$^fk`oPNSu0i+LX z-_Pa~pZ0Ij