dnl PSPP - a program for statistical analysis.
-dnl Copyright (C) 2017 Free Software Foundation, Inc.
-dnl
+dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc.
+dnl
dnl This program is free software: you can redistribute it and/or modify
dnl it under the terms of the GNU General Public License as published by
dnl the Free Software Foundation, either version 3 of the License, or
dnl (at your option) any later version.
-dnl
+dnl
dnl This program is distributed in the hope that it will be useful,
dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
dnl GNU General Public License for more details.
-dnl
+dnl
dnl You should have received a copy of the GNU General Public License
dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
dnl
,Kurtosis,,-.36,.92
Table: Case Processing Summary
-,,Cases,,,,,
+,Manufacturer,Cases,,,,,
,,Valid,,Missing,,Total,
-,Manufacturer,N,Percent,N,Percent,N,Percent
+,,N,Percent,N,Percent,N,Percent
Breaking Strain,Aspeger,8.00,100.0%,.00,.0%,8.00,100.0%
,Bloggs,8.00,100.0%,.00,.0%,8.00,100.0%
,Charlies,8.00,100.0%,.00,.0%,8.00,100.0%
weight by w.
-examine v1
+examine v1
/statistics=extreme(6)
.
])
AT_CLEANUP
-
AT_SETUP([EXAMINE -- extremes with fractional weights])
AT_KEYWORDS([categorical categoricals])
AT_DATA([extreme.sps], [dnl
AT_CLEANUP
dnl Test the PERCENTILES subcommand of the EXAMINE command.
-dnl In particular test that it behaves properly when there are only
+dnl In particular test that it behaves properly when there are only
dnl a few cases.
AT_SETUP([EXAMINE -- percentiles])
AT_KEYWORDS([categorical categoricals])
AT_DATA([examine.sps], [dnl
DATA LIST LIST /X *.
BEGIN DATA.
-2.00
-8.00
-5.00
+2.00
+8.00
+5.00
END DATA.
EXAMINE /x
AT_DATA([examine.sps], [dnl
DATA LIST LIST /x * y *.
BEGIN DATA.
-1 1
+1 1
2 1
3 1
4 1
x,6,85.7%,1,14.3%,7,100.0%
Table: Case Processing Summary
-,,Cases,,,,,
+,y,Cases,,,,,
,,Valid,,Missing,,Total,
-,y,N,Percent,N,Percent,N,Percent
+,,N,Percent,N,Percent,N,Percent
x,1.00,4,100.0%,0,.0%,4,100.0%
,2.00,2,66.7%,1,33.3%,3,100.0%
])
examine a by x by y
/statistics=DESCRIPTIVES
- .
+ .
])
AT_CHECK([pspp -o pspp.csv examine.sps])
dnl Ignore output -- this is just a no-crash check.
EXAMINE /VARIABLES= z BY y.
-EXAMINE /VARIABLES= z.
+EXAMINE /VARIABLES= z.
])
AT_CHECK([pspp -o pspp.csv examine.sps])
dnl Ignore output -- this is just a no-crash check.
3 1
4 1
end data.
-examine x by y /statistics=descriptives.
+examine x by y /statistics=descriptives.
])
AT_CHECK([pspp -o pspp.csv examine.sps])
dnl Ignore output -- this is just a no-crash check.
AT_DATA([examine.sps], [dnl
DATA LIST LIST /quality * .
BEGIN DATA
-3
+3
END DATA
EXAMINE
- quality
- /STATISTICS descriptives
+ quality
+ /STATISTICS descriptives
/PLOT = histogram
.
])
.
END DATA.
-EXAMINE /x
+EXAMINE /x
PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
/ID=x
/STATISTICS = DESCRIPTIVES EXTREME (5) ALL
dnl The actual bug that this checks for has been lost.
AT_SETUP([EXAMINE -- big input doesn't crash 2])
AT_KEYWORDS([categorical categoricals slow])
-AT_DATA([make-big-input.pl],
- [for ($i=0; $i<100000; $i++) { print "AB12\n" };
- for ($i=0; $i<100000; $i++) { print "AB04\n" };
-])
-AT_CHECK([$PERL make-big-input.pl > large.txt])
+AT_CHECK([$PYTHON3 -c '
+for i in range(100000): print("AB12")
+for i in range(100000): print("AB04")
+' > large.txt])
AT_DATA([examine.sps], [dnl
DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
])
AT_CHECK([pspp -o pspp.csv examine.sps])
dnl Ignore output -- this is just a no-crash check.
-AT_DATA([more-big-input.pl],
- [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
-])
-AT_CHECK([$PERL more-big-input.pl >> large.txt])
+AT_CHECK([$PYTHON3 -c 'for i in range(25000): print("AB04\nAB12")' >> large.txt])
AT_CHECK([pspp -o pspp.csv examine.sps])
dnl Ignore output -- this is just a no-crash check.
AT_CLEANUP
300 threehundred
end data.
+set small=0.
examine x
/statistics = extreme
/id = y
.
])
-AT_CHECK([pspp -O format=csv examine-id.sps], [0],
-[Table: Case Processing Summary
+AT_CHECK([pspp -O format=csv examine-id.sps], [0], [dnl
+Table: Case Processing Summary
,Cases,,,,,
,Valid,,Missing,,Total,
,N,Percent,N,Percent,N,Percent
,,3,three,3.00
,,4,four,4.00
,,5,five,5.00
+
+Table: Tests of Normality
+,Shapiro-Wilk,,
+,Statistic,df,Sig.
+x,.37,14,.00
])
-AT_CLEANUP
+AT_CLEANUP
dnl Test for a crash which happened on cleanup from a bad input syntax
AT_SETUP([EXAMINE -- Bad Input])
9 2
end data.
-EXAMINE
+EXAMINE
/VARIABLES= h
BY g
/STATISTICS = DESCRIPTIVES EXTREME
AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
-AT_CLEANUP
+AT_CLEANUP
dnl Check the MISSING=REPORT option
g,F8.0
Table: Case Processing Summary
-,,Cases,,,,,
+,g,Cases,,,,,
,,Valid,,Missing,,Total,
-,g,N,Percent,N,Percent,N,Percent
+,,N,Percent,N,Percent,N,Percent
x,.,4,100.0%,0,.0%,4,100.0%
,1,9,100.0%,0,.0%,9,100.0%
,2,9,100.0%,0,.0%,9,100.0%
,9[a],4,100.0%,0,.0%,4,100.0%
,99[a],5,100.0%,0,.0%,5,100.0%
-
-Footnotes:
-a,User-missing value.
+Footnote: a. User-missing value.
Table: Extreme Values
,g,,,Case Number,Value
,,,3,25,701
,,,4,26,801
,,,5,27,901
-
-Footnotes:
-a,User-missing value.
+Footnote: a. User-missing value.
]])
-AT_CLEANUP
+AT_CLEANUP
dnl Run a test of the basic STATISTICS using a "real"
,Kurtosis,,.5300,.4783
])
-AT_CLEANUP
+AT_CLEANUP
AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
-AT_CLEANUP
+AT_CLEANUP
AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
-AT_CLEANUP
+AT_CLEANUP
,,3,2,4.00
])
-AT_CLEANUP
+AT_CLEANUP
dnl This is an example from doc/tutorial.texi
dnl So if the results of this have to be changed in any way,
COMPUTE mtbf_ln = LN (mtbf).
EXAMINE mtbf_ln /STATISTICS=DESCRIPTIVES.
])
-AT_CHECK([pspp -o pspp.csv -o pspp.txt repairs.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
+
+AT_CHECK([pspp -O format=csv repairs.sps], [0], [dnl
Table: Case Processing Summary
,Cases,,,,,
,Valid,,Missing,,Total,
,N,Percent,N,Percent,N,Percent
-Mean time between failures (months) ,15,100.0%,0,.0%,15,100.0%
+Mean time between failures (months) ,30,100.0%,0,.0%,30,100.0%
Table: Descriptives
,,,Statistic,Std. Error
-Mean time between failures (months) ,Mean,,8.32,1.62
-,95% Confidence Interval for Mean,Lower Bound,4.85,
-,,Upper Bound,11.79,
-,5% Trimmed Mean,,7.69,
-,Median,,8.12,
-,Variance,,39.21,
-,Std. Deviation,,6.26,
+Mean time between failures (months) ,Mean,,8.78,1.10
+,95% Confidence Interval for Mean,Lower Bound,6.53,
+,,Upper Bound,11.04,
+,5% Trimmed Mean,,8.20,
+,Median,,8.29,
+,Variance,,36.34,
+,Std. Deviation,,6.03,
,Minimum,,1.63,
,Maximum,,26.47,
,Range,,24.84,
-,Interquartile Range,,5.83,
-,Skewness,,1.85,.58
-,Kurtosis,,4.49,1.12
+,Interquartile Range,,6.03,
+,Skewness,,1.65,.43
+,Kurtosis,,3.41,.83
Table: Case Processing Summary
,Cases,,,,,
,Valid,,Missing,,Total,
,N,Percent,N,Percent,N,Percent
-mtbf_ln,15,100.0%,0,.0%,15,100.0%
+mtbf_ln,30,100.0%,0,.0%,30,100.0%
Table: Descriptives
,,,Statistic,Std. Error
-mtbf_ln,Mean,,1.88,.19
-,95% Confidence Interval for Mean,Lower Bound,1.47,
-,,Upper Bound,2.29,
-,5% Trimmed Mean,,1.88,
-,Median,,2.09,
-,Variance,,.54,
-,Std. Deviation,,.74,
+mtbf_ln,Mean,,1.95,.13
+,95% Confidence Interval for Mean,Lower Bound,1.69,
+,,Upper Bound,2.22,
+,5% Trimmed Mean,,1.96,
+,Median,,2.11,
+,Variance,,.49,
+,Std. Deviation,,.70,
,Minimum,,.49,
,Maximum,,3.28,
,Range,,2.79,
-,Interquartile Range,,.92,
-,Skewness,,-.16,.58
-,Kurtosis,,-.09,1.12
+,Interquartile Range,,.88,
+,Skewness,,-.37,.43
+,Kurtosis,,.01,.83
])
+
AT_CLEANUP
dnl This is an example from doc/tutorial.texi
])
AT_CLEANUP
+
+
+AT_SETUP([EXAMINE -- Crash on unrepresentable graphs])
+AT_DATA([examine.sps], [dnl
+data list notable list /x * g *.
+begin data.
+96 1
+end data.
+
+examine x by g
+ /nototal
+ /plot = all.
+])
+dnl This bug only manifested itself on cairo based drivers.
+AT_CHECK([pspp -O format=pdf examine.sps], [0], [ignore], [ignore])
+AT_CLEANUP
+
+
+dnl This example comes from the web site:
+dnl https://www.spsstests.com/2018/11/shapiro-wilk-normality-test-spss.html
+AT_SETUP([EXAMINE -- shapiro-wilk 1])
+AT_KEYWORDS([shapiro wilk])
+AT_DATA([shapiro-wilk.sps], [dnl
+data list notable list /x * g *.
+begin data.
+96 1
+98 1
+95 1
+89 1
+90 1
+92 1
+94 1
+93 1
+97 1
+100 1
+99 2
+96 2
+80 2
+89 2
+91 2
+92 2
+93 2
+94 2
+99 2
+80 2
+end data.
+
+set format F22.3.
+
+examine x by g
+ /nototal
+ /plot = all.
+])
+
+AT_CHECK([pspp -O format=csv shapiro-wilk.sps], [0],[dnl
+Table: Case Processing Summary
+,g,Cases,,,,,
+,,Valid,,Missing,,Total,
+,,N,Percent,N,Percent,N,Percent
+x,1.00,10,100.0%,0,.0%,10,100.0%
+,2.00,10,100.0%,0,.0%,10,100.0%
+
+Table: Tests of Normality
+,g,Shapiro-Wilk,,
+,,Statistic,df,Sig.
+x,1.00,.984,10,.983
+,2.00,.882,10,.136
+])
+
+AT_CLEANUP
+
+
+dnl This example comes from the web site:
+dnl http://www.real-statistics.com/tests-normality-and-symmetry/statistical-tests-normality-symmetry/shapiro-wilk-expanded-test/
+dnl It uses a dataset larger than 11 samples. Hence the alternative method for
+dnl signficance is used.
+AT_SETUP([EXAMINE -- shapiro-wilk 2])
+AT_KEYWORDS([shapiro wilk])
+AT_DATA([shapiro-wilk2.sps], [dnl
+data list notable list /x *.
+begin data.
+65
+61
+63
+86
+70
+55
+74
+35
+72
+68
+45
+58
+end data.
+
+set format F22.3.
+
+examine x
+ /plot = boxplot.
+])
+
+AT_CHECK([pspp -O format=csv shapiro-wilk2.sps], [0],[dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,12,100.0%,0,.0%,12,100.0%
+
+Table: Tests of Normality
+,Shapiro-Wilk,,
+,Statistic,df,Sig.
+x,.971,12,.922
+])
+
+AT_CLEANUP