EXTRA_DIST += $(EXAMPLE_SYNTAX)
-EXAMPLE_SPVS = $(EXAMPLE_SYNTAX:.sps=.spv) doc/examples/tutorial7b.spv
-EXAMPLE_OUTPUTS = $(EXAMPLE_SPVS:.spv=.out)
+EXAMPLE_SPVS = $(EXAMPLE_SYNTAX:.sps=.spv) \
+ doc/examples/tutorial2a.spv \
+ doc/examples/tutorial2b.spv \
+ doc/examples/tutorial5a.spv \
+ doc/examples/tutorial5b.spv \
+ doc/examples/tutorial7a.spv \
+ doc/examples/tutorial7b.spv
+EXAMPLE_TXTS = $(EXAMPLE_SPVS:.spv=.txt)
+EXAMPLE_TEXIS = $(EXAMPLE_TXTS:.txt=.texi)
EXAMPLE_HTML = $(EXAMPLE_SPVS:.spv=.html)
example-spv: $(EXAMPLE_SPVS)
-example-outputs: $(EXAMPLE_OUTPUTS)
+example-txts: $(EXAMPLE_TXTS)
+example-texis: $(EXAMPLE_TEXIS)
example-html: $(EXAMPLE_HTML)
-PHONY += example-outputs example-html
+PHONY += example-spv example-txts example-texis example-html
-$(top_builddir)/doc/pspp.info: $(EXAMPLE_OUTPUTS)
-$(top_builddir)/doc/pspp.ps: $(EXAMPLE_OUTPUTS)
-$(top_builddir)/doc/pspp.dvi: $(EXAMPLE_OUTPUTS)
+$(top_builddir)/doc/pspp.info: $(EXAMPLE_TEXIS)
+$(top_builddir)/doc/pspp.ps: $(EXAMPLE_TEXIS)
+$(top_builddir)/doc/pspp.dvi: $(EXAMPLE_TEXIS)
$(top_builddir)/doc/pspp.html: $(EXAMPLE_HTML)
-$(top_builddir)/doc/pspp.pdf: $(EXAMPLE_OUTPUTS)
-$(top_builddir)/doc/pspp.xml: $(EXAMPLE_OUTPUTS)
+$(top_builddir)/doc/pspp.pdf: $(EXAMPLE_TEXIS)
+$(top_builddir)/doc/pspp.xml: $(EXAMPLE_TEXIS)
-CLEANFILES += $(EXAMPLE_OUTPUTS) $(EXAMPLE_SPVS)
-SUFFIXES += .sps .spv
+CLEANFILES += $(EXAMPLE_TXTS) $(EXAMPLE_SPVS) $(EXAMPLE_TEXIS) $(EXAMPLE_HTML)
+SUFFIXES += .sps .spv .txt .html .texi
# Use pspp to process a syntax file into an output file.
pspp = src/ui/terminal/pspp
&& $(abs_top_builddir)/$(pspp) ../doc/examples/$(<F) -o - -O format=spv) > $@.tmp
$(AM_V_at)mv $@.tmp $@
-# The tutorial only wants some parts of the output here.
+# In some cases, the tutorial only wants some parts of the output.
pspp_output = utilities/pspp-output
+convert = $(AM_V_GEN)$(pspp_output) convert $< $@
+doc/examples/tutorial2a.spv: doc/examples/tutorial2.spv $(pspp_output)
+ $(convert) --command='Descriptives'
+doc/examples/tutorial2b.spv: doc/examples/tutorial2.spv $(pspp_output)
+ $(convert) --label='Extreme Values'
+doc/examples/tutorial5a.spv: doc/examples/tutorial5.spv $(pspp_output)
+ $(convert) --commands=examine --nth-command=1 --labels=descriptives
+doc/examples/tutorial5b.spv: doc/examples/tutorial5.spv $(pspp_output)
+ $(convert) --commands=examine --nth-command=2 --labels=descriptives
+doc/examples/tutorial7a.spv: doc/examples/tutorial7.spv $(pspp_output)
+ $(convert) --commands=regression --nth-command=1 --subtypes=coefficients
doc/examples/tutorial7b.spv: doc/examples/tutorial7.spv $(pspp_output)
- $(AM_V_GEN)$(pspp_output) convert --subtypes=coefficients $< $@
+ $(convert) --commands=regression --nth-command=2 --subtypes=coefficients
# Convert an output file into a text file or HTML file.
#
# (For HTML, use sed to include only the contents of <body>.)
-$(EXAMPLE_OUTPUTS) $(EXAMPLE_HTML): $(pspp_output)
-.spv.out:
+$(EXAMPLE_TXTS) $(EXAMPLE_HTML): $(pspp_output)
+.spv.txt:
$(AM_V_GEN)utilities/pspp-output convert $< $@
.spv.html:
$(AM_V_GEN)utilities/pspp-output convert $< - -O format=html \
| $(SED) -e '\%</body%,$$d' -e '0,/<body/d' > $@.tmp
$(AM_V_at)mv $@.tmp $@
+# Convert a text file into a Texinfo file.
+.txt.texi:
+ $(AM_V_GEN)sed 's/@/@@/g' < $< > $@.tmp
+ $(AM_V_at)mv $@.tmp $@
+
# Insert the link tag for the cascading style sheet.
# But make sure these operations are idempotent.
html-local:
human observer.
However they can also be used for nominal or categorical data.
+The following example defines two variables @exvar{forename} and @exvar{height},
+and reads data into them by manual input:
-@ref{data-list} defines two variables @exvar{forename} and @exvar{height},
-and reads data into them by manual input.
-
-@float Example, data-list
-@cartouche
@example
@prompt{PSPP>} data list list /forename (A12) height.
@prompt{PSPP>} begin data.
@prompt{data>} end data
@prompt{PSPP>}
@end example
-@end cartouche
-@caption{Manual entry of data using the @cmd{DATA LIST} command.
-Two variables
-@exvar{forename} and @exvar{height} are defined and subsequently filled
-with manually entered data.}
-@end float
There are several things to note about this example.
measurements of a sample of healthy adults selected at random.
However, the data entry clerk made a number of mistakes when entering
the data.
-@ref{ex-descriptives} illustrates the use of @cmd{DESCRIPTIVES} to screen this
-data and identify the erroneous values.
+The following example illustrates the use of @cmd{DESCRIPTIVES} to screen this
+data and identify the erroneous values:
-@float Example, ex-descriptives
-@cartouche
@example
@prompt{PSPP>} get file='@value{example-dir}/physiology.sav'.
@prompt{PSPP>} descriptives sex, weight, height.
@end example
-Output:
-@psppoutput {tutorial2}
-@end cartouche
-@caption{Using the @cmd{DESCRIPTIVES} command to display simple
-summary information about the data.
-In this case, the results show unexpectedly low values in the Minimum
-column, suggesting incorrect data entry.}
-@end float
-
-In the output of @ref{ex-descriptives},
-the most interesting column is the minimum value.
+@noindent For this example, PSPP produces the following output:
+@psppoutput {tutorial2a}
+
+The most interesting column in the output is the minimum value.
The @exvar{weight} variable has a minimum value of less than zero,
which is clearly erroneous.
Similarly, the @exvar{height} variable's minimum value seems to be very low.
In fact, it is more than 5 standard deviations from the mean, and is a
seemingly bizarre height for an adult person.
-We can examine the data in more detail with the @cmd{EXAMINE}
-command (@pxref{EXAMINE}):
-In @ref{ex1} you can see that the lowest value of @exvar{height} is
+We can look deeper into these discrepancies by issuing an additional
+@cmd{EXAMINE} command:
+
+@example
+@prompt{PSPP>} examine height, weight /statistics=extreme(3).
+@end example
+
+@noindent This command produces the following additional output (in part):
+@psppoutput {tutorial2b}
+
+@noindent
+From this new output, you can see that the lowest value of @exvar{height} is
179 (which we suspect to be erroneous), but the second lowest is 1598
which
-we know from the @cmd{DESCRIPTIVES} command
+we know from @cmd{DESCRIPTIVES}
is within 1 standard deviation from the mean.
-Similarly the @exvar{weight} variable has a lowest value which is
-negative but a plausible value for the second lowest value.
+Similarly, the lowest value of @exvar{weight} is
+negative, but its second lowest value is plausible.
This suggests that the two extreme values are outliers and probably
represent data entry errors.
-@float Example, ex1
-@cartouche
-[@dots{} continue from @ref{ex-descriptives}]
-@example
-@prompt{PSPP>} examine height, weight /statistics=extreme(3).
-@end example
-
-Output:
-@example
- Extreme Values
-+-------------------------------+-----------+-----+
-| |Case Number|Value|
-+-------------------------------+-----------+-----+
-|Height in millimeters Highest 1| 14| 1903|
-| 2| 15| 1884|
-| 3| 12| 1802|
-| Lowest 1| 30| 179|
-| 2| 31| 1598|
-| 3| 28| 1601|
-+-------------------------------+-----------+-----+
-|Weight in kilograms Highest 1| 13| 92.1|
-| 2| 5| 92.1|
-| 3| 17| 91.7|
-| Lowest 1| 38|-55.6|
-| 2| 39| 54.5|
-| 3| 33| 55.4|
-+-------------------------------+-----------+-----+
-@end example
-@end cartouche
-@caption{Using the @cmd{EXAMINE} command to see the extremities of the data
-for different variables. Cases 30 and 38 seem to contain values
-very much lower than the rest of the data.
-They are possibly erroneous.}
-@end float
+The output also identifies the case numbers for each extreme value,
+so we can see that
+cases 30 and 38 are the ones with the erroneous values.
@node Dealing with suspicious data
@subsection Dealing with suspicious data
From now on, they will be ignored in analysis.
For detailed information about the @cmd{RECODE} command @pxref{RECODE}.
-If you now re-run the @cmd{DESCRIPTIVES} or @cmd{EXAMINE} commands in
-@ref{ex-descriptives} and @ref{ex1} you
-will see a data summary with more plausible parameters.
+If you now re-run the @cmd{DESCRIPTIVES} or @cmd{EXAMINE} commands from
+the previous section,
+you will see a data summary with more plausible parameters.
You will also notice that the data summaries indicate the two missing values.
@node Inverting negatively coded variables
Data entry errors are not the only reason for wanting to recode data.
The sample file @file{hotel.sav} comprises data gathered from a
customer satisfaction survey of clients at a particular hotel.
-In @ref{ex-reliability}, this file is loaded for analysis.
-The line @code{display dictionary.} tells @pspp{} to display the
-variables and associated data.
-The output from this command has been omitted from the example for the sake of clarity, but
-you will notice that each of the variables
-@exvar{v1}, @exvar{v2} @dots{} @exvar{v5} are measured on a 5 point Likert scale,
+The following commands load the file and display its
+variables and associated data:
+
+@example
+@prompt{PSPP>} get file='@value{example-dir}/hotel.sav'.
+@prompt{PSPP>} display dictionary.
+@end example
+
+@noindent It yields the following output:
+
+@psppoutput {tutorial3}
+
+The output shows that all of the variables @exvar{v1} through @exvar{v5} are measured on a 5 point Likert scale,
with 1 meaning ``Strongly disagree'' and 5 meaning ``Strongly agree''.
-Whilst variables @exvar{v1}, @exvar{v2} and @exvar{v4} record responses
-to a positively posed question, variables @exvar{v3} and @exvar{v5} are
-responses to negatively worded questions.
-In order to perform meaningful analysis, we need to recode the variables so
+However, some of the questions are positively worded (@exvar{v1}, @exvar{v2}, @exvar{v4}) and others are negatively worded (@exvar{v3}, @exvar{v5}).
+To perform meaningful analysis, we need to recode the variables so
that they all measure in the same direction.
We could use the @cmd{RECODE} command, with syntax such as:
@example
compute @var{var} = 6 - @var{var}.
@end example
@noindent
-@ref{ex-reliability} uses this technique to recode the variables
+The following section uses this technique to recode the variables
@exvar{v3} and @exvar{v5}.
After applying @cmd{COMPUTE} for both variables,
all subsequent commands will use the inverted values.
One would therefore expect the values of these variables (after recoding)
to closely follow one another, and we can test that with the @cmd{RELIABILITY}
command (@pxref{RELIABILITY}).
-@ref{ex-reliability} shows a @pspp{} session where the user (after recoding
-negatively scaled variables) requests reliability statistics for
-@exvar{v1}, @exvar{v3} and @exvar{v4}.
+The following example shows a @pspp{} session where the user recodes
+negatively scaled variables and then requests reliability statistics for
+@exvar{v1}, @exvar{v3}, and @exvar{v4}.
-@float Example, ex-reliability
-@cartouche
@example
@prompt{PSPP>} get file='@value{example-dir}/hotel.sav'.
-@prompt{PSPP>} display dictionary.
-@prompt{PSPP>} * recode negatively worded questions.
@prompt{PSPP>} compute v3 = 6 - v3.
@prompt{PSPP>} compute v5 = 6 - v5.
@prompt{PSPP>} reliability v1, v3, v4.
@end example
-Output (dictionary information omitted for clarity):
+@noindent This yields the following output:
@psppoutput {tutorial4}
-@end cartouche
-@caption{Recoding negatively scaled variables, and testing for
-reliability with the @cmd{RELIABILITY} command. The Cronbach Alpha
-coefficient suggests a high degree of reliability among variables
-@exvar{v1}, @exvar{v3} and @exvar{v4}.}
-@end float
As a rule of thumb, many statisticians consider a value of Cronbach's Alpha of
0.7 or higher to indicate reliable data.
-Here, the value is 0.81 so the data and the recoding that we performed
-are vindicated.
+
+Here, the value is 0.81, which suggests a high degree of reliability
+among variables @exvar{v1}, @exvar{v3} and @exvar{v4}, so the data and
+the recoding that we performed are vindicated.
@node Testing for normality
It is necessary then to ensure normality before deciding upon the
test procedure to use. One way to do this uses the @cmd{EXAMINE} command.
-In @ref{normality}, a researcher was examining the failure rates
+In the following example, a researcher was examining the failure rates
of equipment produced by an engineering company.
The file @file{repairs.sav} contains the mean time between
failures (@exvar{mtbf}) of some items of equipment subject to the study.
Before performing linear analysis on the data,
the researcher wanted to ascertain that the data is normally distributed.
+@example
+@prompt{PSPP>} get file='@value{example-dir}/repairs.sav'.
+@prompt{PSPP>} examine mtbf
+ /statistics=descriptives.
+@end example
+
+@noindent This produces the following output:
+@psppoutput {tutorial5a}
+
A normal distribution has a skewness and kurtosis of zero.
-Looking at the skewness of @exvar{mtbf} in @ref{normality} it is clear
+The skewness of @exvar{mtbf} in the output above makes it clear
that the mtbf figures have a lot of positive skew and are therefore
not drawn from a normally distributed variable.
Positive skew can often be compensated for by applying a logarithmic
-transformation.
-This is done with the @cmd{COMPUTE} command in the line
+transformation, as in the following continuation of the example:
+
+@example
+@prompt{PSPP>} compute mtbf_ln = ln (mtbf).
+@prompt{PSPP>} examine mtbf_ln
+ /statistics=descriptives.
+@end example
+
+@noindent which produces the following additional output:
+@psppoutput {tutorial5b}
+
+The @cmd{COMPUTE} command in the first line above performs the
+logarithmic transformation:
@example
compute mtbf_ln = ln (mtbf).
@end example
Rather than redefining the existing variable, this use of @cmd{COMPUTE}
defines a new variable @exvar{mtbf_ln} which is
the natural logarithm of @exvar{mtbf}.
-The final command in this example calls @cmd{EXAMINE} on this new variable,
-and it can be seen from the results that both the skewness and
+The final command in this example calls @cmd{EXAMINE} on this new variable.
+The results show that both the skewness and
kurtosis for @exvar{mtbf_ln} are very close to zero.
This provides some confidence that the @exvar{mtbf_ln} variable is
normally distributed and thus safe for linear analysis.
an appropriate non-parametric test instead of a linear one.
@xref{NPAR TESTS}, for information about non-parametric tests.
-@float Example, normality
-@cartouche
-@example
-@prompt{PSPP>} get file='@value{example-dir}/repairs.sav'.
-@prompt{PSPP>} examine mtbf
- /statistics=descriptives.
-@prompt{PSPP>} compute mtbf_ln = ln (mtbf).
-@prompt{PSPP>} examine mtbf_ln
- /statistics=descriptives.
-@end example
-
-Output:
-@psppoutput {tutorial5}
-@end cartouche
-@caption{Testing for normality using the @cmd{EXAMINE} command and applying
-a logarithmic transformation.
-The @exvar{mtbf} variable has a large positive skew and is therefore
-unsuitable for linear statistical analysis.
-However the transformed variable (@exvar{mtbf_ln}) is close to normal and
-would appear to be more suitable.}
-@end float
-
-
@node Hypothesis Testing
@section Hypothesis Testing
The @cmd{T-TEST} command is used to find out whether or not two separate
subsets have the same mean.
-@ref{ex-t-test} uses the file @file{physiology.sav} previously
-encountered.
A researcher suspected that the heights and core body
temperature of persons might be different depending upon their sex.
-To investigate this, he posed two null hypotheses:
+To investigate this, he posed two null hypotheses based on the data
+from @file{physiology.sav} previously encountered:
@itemize @bullet
@item The mean heights of males and females in the population are equal.
@item The mean body temperature of males and
the null hypothesis and conclude that there is insufficient evidence to
suggest that the body temperature of male and female persons are different.
-@float Example, ex-t-test
-@cartouche
+The syntax for this analysis is:
@example
@prompt{PSPP>} get file='@value{example-dir}/physiology.sav'.
@prompt{PSPP>} recode height (179 = SYSMIS).
@prompt{PSPP>} t-test group=sex(0,1) /variables = height temperature.
@end example
-Output:
+
+PSPP produces the following output for this syntax:
@psppoutput {tutorial6}
-@end cartouche
-@caption{The @cmd{T-TEST} command tests for differences of means.
+
+The @cmd{T-TEST} command tests for differences of means.
Here, the @exvar{height} variable's two tailed significance is less than
0.05, so the null hypothesis can be rejected.
Thus, the evidence suggests there is a difference between the heights of
However the significance of the test for the @exvar{temperature}
variable is greater than 0.05 so the null hypothesis cannot be
rejected, and there is insufficient evidence to suggest a difference
-in body temperature.}
-@end float
+in body temperature.
@node Linear Regression
@subsection Linear Regression
If a variable is found to be linearly related, then this can be used to
predict future values of that variable.
-In example @ref{ex-regression}, the service department of the company wanted to
+In the following example, the service department of the company wanted to
be able to predict the time to repair equipment, in order to improve
the accuracy of their quotations.
It was suggested that the time to repair might be related to the time
This command not only tests if the variables are related, but also
identifies the potential linear relationship. @xref{REGRESSION}.
+A first attempt includes @exvar{duty_cycle}:
-@float Example, ex-regression
-@cartouche
@example
@prompt{PSPP>} get file='@value{example-dir}/repairs.sav'.
@prompt{PSPP>} regression /variables = mtbf duty_cycle /dependent = mttr.
-@prompt{PSPP>} regression /variables = mtbf /dependent = mttr.
@end example
-Output (excerpts):
-@psppoutput {tutorial7b}
-@end cartouche
-@caption{Linear regression analysis to find a predictor for
-@exvar{mttr}.
-The first attempt, including @exvar{duty_cycle}, produces some
-unacceptable high significance values.
-However the second attempt, which excludes @exvar{duty_cycle}, produces
-significance values no higher than 0.06.
-This suggests that @exvar{mtbf} alone may be a suitable predictor
-for @exvar{mttr}.}
-@end float
-
-The coefficients in the first table suggest that the formula
+
+@noindent This attempt yields the following output (in part):
+@psppoutput {tutorial7a}
+
+The coefficients in the above table suggest that the formula
@math{@var{mttr} = 9.81 + 3.1 \times @var{mtbf} + 1.09 \times @var{duty_cycle}}
can be used to predict the time to repair.
However, the significance value for the @var{duty_cycle} coefficient
is very high, which would make this an unsafe predictor.
For this reason, the test was repeated, but omitting the
-@exvar{duty_cycle} variable.
-This time, the significance of all coefficients no higher than 0.06,
+@exvar{duty_cycle} variable:
+
+@example
+@prompt{PSPP>} regression /variables = mtbf /dependent = mttr.
+@end example
+
+@noindent
+This second try produces the following output (in part):
+@psppoutput {tutorial7b}
+
+This time, the significance of all coefficients is no higher than 0.06,
suggesting that at the 0.06 level, the formula
@math{@var{mttr} = 10.5 + 3.11 \times @var{mtbf}} is a reliable
predictor of the time to repair.