EXAMINE: Implement the Shapiro-Wilk Test.
[pspp] / tests / language / stats / examine.at
index 4cd52b9feb9dd4439bdb0235d508d54eee13504f..86297931d4478c514ed5373d5bbf553760960850 100644 (file)
@@ -1,6 +1,23 @@
+dnl PSPP - a program for statistical analysis.
+dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc.
+dnl 
+dnl This program is free software: you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation, either version 3 of the License, or
+dnl (at your option) any later version.
+dnl 
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+dnl GNU General Public License for more details.
+dnl 
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
+dnl
 AT_BANNER([EXAMINE])
 
 AT_SETUP([EXAMINE])
 AT_BANNER([EXAMINE])
 
 AT_SETUP([EXAMINE])
+AT_KEYWORDS([categorical categoricals])
 AT_DATA([examine.sps], [
 DATA LIST LIST /QUALITY * W * BRAND * .
 BEGIN DATA
 AT_DATA([examine.sps], [
 DATA LIST LIST /QUALITY * W * BRAND * .
 BEGIN DATA
@@ -36,10 +53,11 @@ EXAMINE
        /STATISTICS descriptives extreme(3)
        .
 ])
        /STATISTICS descriptives extreme(3)
        .
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps])
+
+
 dnl In the following data, only the extreme values have been checked.
 dnl The descriptives have been blindly pasted.
 dnl In the following data, only the extreme values have been checked.
 dnl The descriptives have been blindly pasted.
-AT_CHECK([cat pspp.csv], [0], [dnl
+AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
 Table: Reading free-form data from INLINE.
 Variable,Format
 QUALITY,F8.0
 Table: Reading free-form data from INLINE.
 Variable,Format
 QUALITY,F8.0
@@ -69,16 +87,16 @@ Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
-Breaking Strain,24.00,100%,.00,0%,24.00,100%
+Breaking Strain,24.00,100.0%,.00,.0%,24.00,100.0%
 
 Table: Extreme Values
 ,,,Case Number,Value
 Breaking Strain,Highest,1,12,7.00
 ,,2,16,6.00
 
 Table: Extreme Values
 ,,,Case Number,Value
 Breaking Strain,Highest,1,12,7.00
 ,,2,16,6.00
-,,3,7,5.00
+,,3,14,5.00
 ,Lowest,1,3,1.00
 ,Lowest,1,3,1.00
-,,2,3,1.00
-,,3,4,1.00
+,,2,4,1.00
+,,3,2,2.00
 
 Table: Descriptives
 ,,,Statistic,Std. Error
 
 Table: Descriptives
 ,,,Statistic,Std. Error
@@ -100,21 +118,21 @@ Table: Case Processing Summary
 ,,Cases,,,,,
 ,,Valid,,Missing,,Total,
 ,Manufacturer,N,Percent,N,Percent,N,Percent
 ,,Cases,,,,,
 ,,Valid,,Missing,,Total,
 ,Manufacturer,N,Percent,N,Percent,N,Percent
-Breaking Strain,Aspeger,8.00,100%,.00,0%,8.00,100%
-,Bloggs,8.00,100%,.00,0%,8.00,100%
-,Charlies,8.00,100%,.00,0%,8.00,100%
+Breaking Strain,Aspeger,8.00,100.0%,.00,.0%,8.00,100.0%
+,Bloggs,8.00,100.0%,.00,.0%,8.00,100.0%
+,Charlies,8.00,100.0%,.00,.0%,8.00,100.0%
 
 Table: Extreme Values
 ,Manufacturer,,,Case Number,Value
 
 Table: Extreme Values
 ,Manufacturer,,,Case Number,Value
-Breaking Strain,Aspeger,Highest,1,5,4.00
-,,,2,6,4.00
+Breaking Strain,Aspeger,Highest,1,6,4.00
+,,,2,5,4.00
 ,,,3,1,3.00
 ,,Lowest,1,3,1.00
 ,,,3,1,3.00
 ,,Lowest,1,3,1.00
-,,,2,3,1.00
-,,,3,4,1.00
+,,,2,4,1.00
+,,,3,2,2.00
 ,Bloggs,Highest,1,7,5.00
 ,,,2,9,4.00
 ,Bloggs,Highest,1,7,5.00
 ,,,2,9,4.00
-,,,3,9,4.00
+,,,3,11,3.00
 ,,Lowest,1,8,2.00
 ,,,2,10,2.00
 ,,,3,11,3.00
 ,,Lowest,1,8,2.00
 ,,,2,10,2.00
 ,,,3,11,3.00
@@ -123,7 +141,7 @@ Breaking Strain,Aspeger,Highest,1,5,4.00
 ,,,3,14,5.00
 ,,Lowest,1,15,3.00
 ,,,2,13,4.00
 ,,,3,14,5.00
 ,,Lowest,1,15,3.00
 ,,,2,13,4.00
-,,,3,13,4.00
+,,,3,14,5.00
 
 Table: Descriptives
 ,Manufacturer,,,Statistic,Std. Error
 
 Table: Descriptives
 ,Manufacturer,,,Statistic,Std. Error
@@ -167,9 +185,11 @@ Breaking Strain,Aspeger,Mean,,2.25,.45
 ,,Skewness,,.30,.75
 ,,Kurtosis,,.15,1.48
 ])
 ,,Skewness,,.30,.75
 ,,Kurtosis,,.15,1.48
 ])
+
 AT_CLEANUP
 
 AT_SETUP([EXAMINE -- extremes])
 AT_CLEANUP
 
 AT_SETUP([EXAMINE -- extremes])
+AT_KEYWORDS([categorical categoricals])
 AT_DATA([examine.sps], [dnl
 data list free /V1 W
 begin data.
 AT_DATA([examine.sps], [dnl
 data list free /V1 W
 begin data.
@@ -202,35 +222,112 @@ examine v1
  /statistics=extreme(6)
  .
 ])
  /statistics=extreme(6)
  .
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
+
+AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
-V1,23.00,100%,.00,0%,23.00,100%
+V1,23.00,100.0%,.00,.0%,23.00,100.0%
 
 Table: Extreme Values
 ,,,Case Number,Value
 V1,Highest,1,21,20.00
 ,,2,20,19.00
 ,,3,19,18.00
 
 Table: Extreme Values
 ,,,Case Number,Value
 V1,Highest,1,21,20.00
 ,,2,20,19.00
 ,,3,19,18.00
-,,4,19,18.00
-,,5,18,17.00
-,,6,17,16.00
+,,4,18,17.00
+,,5,17,16.00
+,,6,16,15.00
 ,Lowest,1,1,1.00
 ,,2,2,2.00
 ,,3,3,3.00
 ,Lowest,1,1,1.00
 ,,2,2,2.00
 ,,3,3,3.00
-,,4,3,3.00
-,,5,4,3.00
-,,6,5,4.00
+,,4,4,3.00
+,,5,5,4.00
+,,6,6,5.00
 ])
 ])
+
+AT_CLEANUP
+
+
+AT_SETUP([EXAMINE -- extremes with fractional weights])
+AT_KEYWORDS([categorical categoricals])
+AT_DATA([extreme.sps], [dnl
+set format=F20.3.
+data list notable list /w * x *.
+begin data.
+ 0.88  300000
+ 0.86  320000
+ 0.98  480000
+ 0.93  960000
+ 1.35  960000
+ 1.31  960000
+ 0.88  960000
+ 0.88  1080000
+ 0.88  1080000
+ 0.95  1200000
+ 1.47  1200000
+ 0.93  1200000
+ 0.98  1320000
+ 1.31  1380000
+ 0.93  1440000
+ 0.88  1560000
+ 1.56  1560000
+ 1.47  1560000
+end data.
+
+weight by w.
+
+
+EXAMINE
+        x
+        /STATISTICS = DESCRIPTIVES EXTREME (5)
+        .
+])
+
+AT_CHECK([pspp -O format=csv  extreme.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,19.430,100.0%,.000,.0%,19.430,100.0%
+
+Table: Extreme Values
+,,,Case Number,Value
+x,Highest,1,18,1560000.000
+,,2,17,1560000.000
+,,3,16,1560000.000
+,,4,15,1440000.000
+,,5,14,1380000.000
+,Lowest,1,1,300000.000
+,,2,2,320000.000
+,,3,3,480000.000
+,,4,4,960000.000
+,,5,5,960000.000
+
+Table: Descriptives
+,,,Statistic,Std. Error
+x,Mean,,1120010.293,86222.178
+,95% Confidence Interval for Mean,Lower Bound,939166.693,
+,,Upper Bound,1300853.894,
+,5% Trimmed Mean,,1141017.899,
+,Median,,1200000.000,
+,Variance,,144447748124.869,
+,Std. Deviation,,380062.821,
+,Minimum,,300000.000,
+,Maximum,,1560000.000,
+,Range,,1260000.000,
+,Interquartile Range,,467258.065,
+,Skewness,,-.887,.519
+,Kurtosis,,.340,1.005
+])
+
 AT_CLEANUP
 
 dnl Test the PERCENTILES subcommand of the EXAMINE command.
 dnl In particular test that it behaves properly when there are only 
 dnl a few cases.
 AT_SETUP([EXAMINE -- percentiles])
 AT_CLEANUP
 
 dnl Test the PERCENTILES subcommand of the EXAMINE command.
 dnl In particular test that it behaves properly when there are only 
 dnl a few cases.
 AT_SETUP([EXAMINE -- percentiles])
+AT_KEYWORDS([categorical categoricals])
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /X *.
 BEGIN DATA.
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /X *.
 BEGIN DATA.
@@ -254,7 +351,7 @@ EXAMINE /x
 EXAMINE /x
        /PERCENTILES=AEMPIRICAL.
 ])
 EXAMINE /x
        /PERCENTILES=AEMPIRICAL.
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps])
+AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
 AT_CHECK([cat pspp.csv], [0], [dnl
 Table: Reading free-form data from INLINE.
 Variable,Format
 AT_CHECK([cat pspp.csv], [0], [dnl
 Table: Reading free-form data from INLINE.
 Variable,Format
@@ -264,19 +361,19 @@ Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
-X,3,100%,0,0%,3,100%
+X,3,100.0%,0,.0%,3,100.0%
 
 Table: Percentiles
 ,,Percentiles,,,,,,
 ,,5,10,25,50,75,90,95
 
 Table: Percentiles
 ,,Percentiles,,,,,,
 ,,5,10,25,50,75,90,95
-X,HAverage,.40,.80,2.00,5.00,8.00,8.00,8.00
+X,Weighted Average,.40,.80,2.00,5.00,8.00,8.00,8.00
 ,Tukey's Hinges,,,3.50,5.00,6.50,,
 
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
 ,Tukey's Hinges,,,3.50,5.00,6.50,,
 
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
-X,3,100%,0,0%,3,100%
+X,3,100.0%,0,.0%,3,100.0%
 
 Table: Percentiles
 ,,Percentiles,,,,,,
 
 Table: Percentiles
 ,,Percentiles,,,,,,
@@ -288,41 +385,42 @@ Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
-X,3,100%,0,0%,3,100%
+X,3,100.0%,0,.0%,3,100.0%
 
 Table: Percentiles
 ,,Percentiles,,,,,,
 ,,5,10,25,50,75,90,95
 
 Table: Percentiles
 ,,Percentiles,,,,,,
 ,,5,10,25,50,75,90,95
-X,Rounded,.00,.00,2.00,5.00,5.00,8.00,8.00
+X,Weighted Average,.00,.00,2.00,5.00,5.00,8.00,8.00
 ,Tukey's Hinges,,,3.50,5.00,6.50,,
 
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
 ,Tukey's Hinges,,,3.50,5.00,6.50,,
 
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
-X,3,100%,0,0%,3,100%
+X,3,100.0%,0,.0%,3,100.0%
 
 Table: Percentiles
 ,,Percentiles,,,,,,
 ,,5,10,25,50,75,90,95
 
 Table: Percentiles
 ,,Percentiles,,,,,,
 ,,5,10,25,50,75,90,95
-X,Empirical,2.00,2.00,2.00,5.00,8.00,8.00,8.00
+X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
 ,Tukey's Hinges,,,3.50,5.00,6.50,,
 
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
 ,Tukey's Hinges,,,3.50,5.00,6.50,,
 
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
-X,3,100%,0,0%,3,100%
+X,3,100.0%,0,.0%,3,100.0%
 
 Table: Percentiles
 ,,Percentiles,,,,,,
 ,,5,10,25,50,75,90,95
 
 Table: Percentiles
 ,,Percentiles,,,,,,
 ,,5,10,25,50,75,90,95
-X,Empirical with averaging,2.00,2.00,2.00,5.00,8.00,8.00,8.00
+X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
 ,Tukey's Hinges,,,3.50,5.00,6.50,,
 ])
 AT_CLEANUP
 
 AT_SETUP([EXAMINE -- missing values])
 ,Tukey's Hinges,,,3.50,5.00,6.50,,
 ])
 AT_CLEANUP
 
 AT_SETUP([EXAMINE -- missing values])
+AT_KEYWORDS([categorical categoricals])
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /x * y *.
 BEGIN DATA.
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /x * y *.
 BEGIN DATA.
@@ -335,7 +433,9 @@ BEGIN DATA.
 .   2
 END DATA
 
 .   2
 END DATA
 
-EXAMINE /x by y.
+EXAMINE /x by y
+        /MISSING = PAIRWISE
+        .
 ])
 AT_CHECK([pspp -o pspp.csv examine.sps])
 AT_CHECK([cat pspp.csv], [0], [dnl
 ])
 AT_CHECK([pspp -o pspp.csv examine.sps])
 AT_CHECK([cat pspp.csv], [0], [dnl
@@ -348,18 +448,47 @@ Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
-x,6,85.7143%,1,14.2857%,7,100%
+x,6,85.7%,1,14.3%,7,100.0%
 
 Table: Case Processing Summary
 ,,Cases,,,,,
 ,,Valid,,Missing,,Total,
 ,y,N,Percent,N,Percent,N,Percent
 
 Table: Case Processing Summary
 ,,Cases,,,,,
 ,,Valid,,Missing,,Total,
 ,y,N,Percent,N,Percent,N,Percent
-x,1.00,4,100%,0,0%,4,100%
-,2.00,2,66.6667%,1,33.3333%,3,100%
+x,1.00,4,100.0%,0,.0%,4,100.0%
+,2.00,2,66.7%,1,33.3%,3,100.0%
+])
+AT_CLEANUP
+
+
+AT_SETUP([EXAMINE -- user missing values])
+AT_KEYWORDS([categorical categoricals])
+AT_DATA([examine-m.sps], [dnl
+DATA LIST notable LIST /x * y *.
+BEGIN DATA.
+1                   2
+9999999999          2
+9999999999          99
+END DATA.
+
+MISSING VALUES x (9999999999).
+MISSING VALUES y (99).
+
+EXAMINE
+       /VARIABLES= x y
+       /MISSING=PAIRWISE.
+])
+AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,1,33.3%,2,66.7%,3,100.0%
+y,2,66.7%,1,33.3%,3,100.0%
 ])
 AT_CLEANUP
 
 AT_SETUP([EXAMINE -- missing values and percentiles])
 ])
 AT_CLEANUP
 
 AT_SETUP([EXAMINE -- missing values and percentiles])
+AT_KEYWORDS([categorical categoricals])
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /X *.
 BEGIN DATA.
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /X *.
 BEGIN DATA.
@@ -380,6 +509,7 @@ AT_CLEANUP
 dnl Tests the trimmed mean calculation in the case
 dnl where the data is weighted towards the centre.
 AT_SETUP([EXAMINE -- trimmed mean])
 dnl Tests the trimmed mean calculation in the case
 dnl where the data is weighted towards the centre.
 AT_SETUP([EXAMINE -- trimmed mean])
+AT_KEYWORDS([categorical categoricals])
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /X * C *.
 BEGIN DATA.
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /X * C *.
 BEGIN DATA.
@@ -406,7 +536,7 @@ Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 ,N,Percent,N,Percent,N,Percent
-X,52.00,100%,.00,0%,52.00,100%
+X,52.00,100.0%,.00,.0%,52.00,100.0%
 
 Table: Descriptives
 ,,,Statistic,Std. Error
 
 Table: Descriptives
 ,,,Statistic,Std. Error
@@ -427,6 +557,7 @@ X,Mean,,2.02,.03
 AT_CLEANUP
 
 AT_SETUP([EXAMINE -- crash bug])
 AT_CLEANUP
 
 AT_SETUP([EXAMINE -- crash bug])
+AT_KEYWORDS([categorical categoricals])
 AT_DATA([examine.sps], [dnl
 data list list /a * x * y *.
 begin data.
 AT_DATA([examine.sps], [dnl
 data list list /a * x * y *.
 begin data.
@@ -445,6 +576,7 @@ AT_CLEANUP
 
 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
 AT_SETUP([EXAMINE -- consecutive runs don't crash])
 
 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
 AT_SETUP([EXAMINE -- consecutive runs don't crash])
+AT_KEYWORDS([categorical categoricals])
 AT_DATA([examine.sps], [dnl
 data list list /y * z *.
 begin data.
 AT_DATA([examine.sps], [dnl
 data list list /y * z *.
 begin data.
@@ -463,6 +595,7 @@ AT_CLEANUP
 
 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
 AT_SETUP([EXAMINE -- missing values don't crash])
 
 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
 AT_SETUP([EXAMINE -- missing values don't crash])
+AT_KEYWORDS([categorical categoricals])
 AT_DATA([examine.sps], [dnl
 data list list /x * y *.
 begin data.
 AT_DATA([examine.sps], [dnl
 data list list /x * y *.
 begin data.
@@ -480,6 +613,7 @@ AT_CLEANUP
 
 dnl Test that having only a single case doesn't crash.
 AT_SETUP([EXAMINE -- single case doesn't crash])
 
 dnl Test that having only a single case doesn't crash.
 AT_SETUP([EXAMINE -- single case doesn't crash])
+AT_KEYWORDS([categorical categoricals])
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /quality * .
 BEGIN DATA
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /quality * .
 BEGIN DATA
@@ -490,14 +624,16 @@ END DATA
 EXAMINE
        quality 
        /STATISTICS descriptives 
 EXAMINE
        quality 
        /STATISTICS descriptives 
+        /PLOT = histogram
        .
 ])
        .
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps])
+AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
 
 dnl Test that all-missing data doesn't crash.
 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
 
 dnl Test that all-missing data doesn't crash.
 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
+AT_KEYWORDS([categorical categoricals])
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /x *.
 BEGIN DATA.
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /x *.
 BEGIN DATA.
@@ -507,10 +643,729 @@ BEGIN DATA.
 .
 END DATA.
 
 .
 END DATA.
 
-EXAMINE /x PLOT=HISTOGRAM.
+EXAMINE /x 
+       PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
+       /ID=x
+        /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
+       /PERCENTILE=AEMPIRICAL
+       .
 ])
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps], [0], [dnl
-warning: Not creating plot because data set is empty.
+AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
+dnl Ignore output -- this is just a no-crash check.
+AT_CLEANUP
+
+dnl Test that big input doesn't crash (bug 11307).
+AT_SETUP([EXAMINE -- big input doesn't crash])
+AT_KEYWORDS([categorical categoricals slow])
+AT_DATA([examine.sps], [dnl
+INPUT PROGRAM.
+       LOOP #I=1 TO 50000.
+               COMPUTE X=NORMAL(10).
+               END CASE.
+       END LOOP.
+       END FILE.
+END INPUT PROGRAM.
+
+
+EXAMINE /x
+       /STATISTICS=DESCRIPTIVES.
 ])
 ])
+AT_CHECK([pspp -o pspp.csv examine.sps])
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
+
+dnl Another test that big input doesn't crash.
+dnl The actual bug that this checks for has been lost.
+AT_SETUP([EXAMINE -- big input doesn't crash 2])
+AT_KEYWORDS([categorical categoricals slow])
+AT_DATA([make-big-input.pl], 
+  [for ($i=0; $i<100000; $i++) { print "AB12\n" };
+   for ($i=0; $i<100000; $i++) { print "AB04\n" };
+])
+AT_CHECK([$PERL make-big-input.pl > large.txt])
+AT_DATA([examine.sps], [dnl
+DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
+
+
+AGGREGATE OUTFILE=* /BREAK=X /A=N.
+
+
+EXAMINE /A BY X.
+])
+AT_CHECK([pspp -o pspp.csv examine.sps])
+dnl Ignore output -- this is just a no-crash check.
+AT_DATA([more-big-input.pl], 
+  [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
+])
+AT_CHECK([$PERL more-big-input.pl >> large.txt])
+AT_CHECK([pspp -o pspp.csv examine.sps])
+dnl Ignore output -- this is just a no-crash check.
+AT_CLEANUP
+
+
+dnl Test that the ID command works with non-numberic variables
+AT_SETUP([EXAMINE -- non-numeric ID])
+AT_KEYWORDS([categorical categoricals])
+
+AT_DATA([examine-id.sps], [dnl
+data list notable list /x * y (a12).
+begin data.
+1  one
+2  two
+3  three
+4  four
+5  five
+6  six
+7  seven
+8  eight
+9  nine
+10 ten
+11 eleven
+12 twelve
+30 thirty
+300 threehundred
+end data.
+
+examine x
+       /statistics = extreme
+       /id = y
+       /plot = boxplot
+       .
+])
+
+AT_CHECK([pspp -O format=csv examine-id.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,14,100.0%,0,.0%,14,100.0%
+
+Table: Extreme Values
+,,,y,Value
+x,Highest,1,threehundred,300.00
+,,2,thirty,30.00
+,,3,twelve,12.00
+,,4,eleven,11.00
+,,5,ten,10.00
+,Lowest,1,one,1.00
+,,2,two,2.00
+,,3,three,3.00
+,,4,four,4.00
+,,5,five,5.00
+
+Table: Tests of Normality
+,Shapiro-Wilk,,
+,Statistic,df,Sig.
+x,.37,14,.00
+])
+
+AT_CLEANUP 
+
+dnl Test for a crash which happened on cleanup from a bad input syntax
+AT_SETUP([EXAMINE -- Bad Input])
+AT_KEYWORDS([categorical categoricals])
+
+AT_DATA([examine-bad.sps], [dnl
+data list list /h * g *.
+begin data.
+1 1
+2 1
+3 1
+4 1
+5 2
+6 2
+7 2
+8 2
+9 2
+end data.
+
+EXAMINE 
+       /VARIABLES= h
+       BY  g
+       /STATISTICS = DESCRIPTIVES EXTREME
+        /PLOT = lkajsdas
+       .
+])
+
+AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
+
+AT_CLEANUP 
+
+
+dnl Check the MISSING=REPORT option
+AT_SETUP([EXAMINE -- MISSING=REPORT])
+AT_KEYWORDS([categorical categoricals])
+
+AT_DATA([examine-report.sps], [dnl
+set format = F22.0.
+data list list /x * g *.
+begin data.
+1   1
+2   1
+3   1
+4   1
+5   1
+6   1
+7   1
+8   1
+9   1
+10   2
+20   2
+30   2
+40   2
+50   2
+60   2
+70   2
+80   2
+90   2
+101   9
+201   9
+301   9
+401   9
+501   99
+601   99
+701   99
+801   99
+901   99
+1001  .
+2002  .
+3003  .
+4004  .
+end data.
+
+MISSING VALUES g (9, 99, 999).
+
+EXAMINE
+        /VARIABLES = x
+        BY  g
+        /STATISTICS = EXTREME
+        /NOTOTAL
+        /MISSING = REPORT.
+])
+
+
+AT_CHECK([pspp -o pspp.csv -o pspp.txt examine-report.sps])
+AT_CHECK([cat pspp.csv], [0],
+  [[Table: Reading free-form data from INLINE.
+Variable,Format
+x,F8.0
+g,F8.0
+
+Table: Case Processing Summary
+,,Cases,,,,,
+,,Valid,,Missing,,Total,
+,g,N,Percent,N,Percent,N,Percent
+x,.,4,100.0%,0,.0%,4,100.0%
+,1,9,100.0%,0,.0%,9,100.0%
+,2,9,100.0%,0,.0%,9,100.0%
+,9[a],4,100.0%,0,.0%,4,100.0%
+,99[a],5,100.0%,0,.0%,5,100.0%
+
+Footnotes:
+a,User-missing value.
+
+Table: Extreme Values
+,g,,,Case Number,Value
+x,.,Highest,1,31,4004
+,,,2,30,3003
+,,,3,29,2002
+,,,4,28,1001
+,,,5,0,0
+,,Lowest,1,28,1001
+,,,2,29,2002
+,,,3,30,3003
+,,,4,31,4004
+,,,5,31,4004
+,1,Highest,1,9,9
+,,,2,8,8
+,,,3,7,7
+,,,4,6,6
+,,,5,5,5
+,,Lowest,1,1,1
+,,,2,2,2
+,,,3,3,3
+,,,4,4,4
+,,,5,5,5
+,2,Highest,1,18,90
+,,,2,17,80
+,,,3,16,70
+,,,4,15,60
+,,,5,14,50
+,,Lowest,1,10,10
+,,,2,11,20
+,,,3,12,30
+,,,4,13,40
+,,,5,14,50
+,9[a],Highest,1,22,401
+,,,2,21,301
+,,,3,20,201
+,,,4,19,101
+,,,5,0,0
+,,Lowest,1,19,101
+,,,2,20,201
+,,,3,21,301
+,,,4,22,401
+,,,5,22,401
+,99[a],Highest,1,27,901
+,,,2,26,801
+,,,3,25,701
+,,,4,24,601
+,,,5,23,501
+,,Lowest,1,23,501
+,,,2,24,601
+,,,3,25,701
+,,,4,26,801
+,,,5,27,901
+
+Footnotes:
+a,User-missing value.
+]])
+
+AT_CLEANUP 
+
+
+dnl Run a test of the basic STATISTICS using a "real"
+dnl dataset and comparing with "real" results kindly
+dnl provided by Olaf Nöhring
+AT_SETUP([EXAMINE -- sample unweighted])
+AT_KEYWORDS([categorical categoricals])
+
+AT_DATA([sample.sps], [dnl
+set format = F22.4.
+DATA LIST notable LIST /X *
+BEGIN DATA.
+461.19000000
+466.38000000
+479.46000000
+480.10000000
+483.43000000
+488.30000000
+489.00000000
+491.62000000
+505.62000000
+511.30000000
+521.53000000
+526.70000000
+528.25000000
+538.70000000
+540.22000000
+540.58000000
+546.10000000
+548.17000000
+553.99000000
+566.21000000
+575.90000000
+584.38000000
+593.40000000
+357.05000000
+359.73000000
+360.48000000
+373.98000000
+374.13000000
+381.45000000
+383.72000000
+390.00000000
+400.34000000
+415.32000000
+415.91000000
+418.30000000
+421.03000000
+422.43000000
+426.93000000
+433.25000000
+436.89000000
+445.33000000
+446.33000000
+446.55000000
+456.44000000
+689.49000000
+691.92000000
+695.00000000
+695.36000000
+698.21000000
+699.46000000
+706.61000000
+710.69000000
+715.82000000
+715.82000000
+741.39000000
+752.27000000
+756.73000000
+757.74000000
+759.57000000
+796.07000000
+813.78000000
+817.25000000
+825.48000000
+831.28000000
+849.24000000
+890.00000000
+894.78000000
+935.65000000
+935.90000000
+945.90000000
+1012.8600000
+1022.6000000
+1061.8100000
+1063.5000000
+1077.2300000
+1151.6300000
+1355.2800000
+598.88000000
+606.91000000
+621.60000000
+624.80000000
+636.13000000
+637.38000000
+640.32000000
+649.35000000
+656.51000000
+662.55000000
+664.69000000
+106.22000000
+132.24000000
+174.76000000
+204.85000000
+264.93000000
+264.99000000
+269.84000000
+325.12000000
+331.67000000
+337.26000000
+347.68000000
+354.91000000
+END DATA.
+
+EXAMINE
+       x
+       /STATISTICS=DESCRIPTIVES
+       .
+])
+
+AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+X,100,100.0%,0,.0%,100,100.0%
+
+Table: Descriptives
+,,,Statistic,Std. Error
+X,Mean,,587.6603,23.2665
+,95% Confidence Interval for Mean,Lower Bound,541.4946,
+,,Upper Bound,633.8260,
+,5% Trimmed Mean,,579.7064,
+,Median,,547.1350,
+,Variance,,54132.8466,
+,Std. Deviation,,232.6647,
+,Minimum,,106.2200,
+,Maximum,,1355.2800,
+,Range,,1249.0600,
+,Interquartile Range,,293.1575,
+,Skewness,,.6331,.2414
+,Kurtosis,,.5300,.4783
+])
+
+AT_CLEANUP 
+
+
+
+dnl Test for a crash which happened on bad input syntax
+AT_SETUP([EXAMINE -- Empty Parentheses])
+AT_KEYWORDS([categorical categoricals])
+
+AT_DATA([examine-empty-parens.sps], [dnl
+DATA LIST notable LIST /X *
+BEGIN DATA.
+2
+3
+END DATA.
+
+
+EXAMINE
+       x
+       /PLOT = SPREADLEVEL()
+       .
+])
+
+AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
+
+AT_CLEANUP 
+
+
+
+
+dnl Test for another crash which happened on bad input syntax
+AT_SETUP([EXAMINE -- Bad variable])
+AT_KEYWORDS([categorical categoricals])
+
+AT_DATA([examine-bad-variable.sps], [dnl
+data list list /h * g *.
+begin data.
+3 1
+4 1
+5 2
+end data.
+
+EXAMINE
+        /VARIABLES/ h
+        BY  g
+        .
+])
+
+AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
+
+AT_CLEANUP 
+
+
+
+dnl Test for yet another crash. This time for extremes vs. missing weight values.\0
+AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
+AT_KEYWORDS([categorical categoricals])
+
+AT_DATA([examine-missing-weights.sps], [dnl
+data list notable list /h * g *.
+begin data.
+3 1
+4 .
+5 1
+2 1
+end data.
+
+WEIGHT BY g.
+
+EXAMINE h
+       /STATISTICS extreme(3)
+       .
+])
+
+AT_CHECK([pspp -O format=csv  examine-missing-weights.sps], [0], [dnl
+"examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative.  These case(s) were ignored."
+
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+h,3.00,100.0%,.00,.0%,3.00,100.0%
+
+Table: Extreme Values
+,,,Case Number,Value
+h,Highest,1,3,5.00
+,,2,2,4.00
+,,3,1,3.00
+,Lowest,1,4,2.00
+,,2,1,3.00
+,,3,2,4.00
+])
+
+AT_CLEANUP 
+
+dnl This is an example from doc/tutorial.texi
+dnl So if the results of this have to be changed in any way,
+dnl make sure to update that file.
+AT_SETUP([EXAMINE tutorial example 1])
+cp $top_srcdir/examples/repairs.sav .
+AT_DATA([repairs.sps], [dnl
+GET FILE='repairs.sav'.
+EXAMINE mtbf /STATISTICS=DESCRIPTIVES.
+COMPUTE mtbf_ln = LN (mtbf).
+EXAMINE mtbf_ln /STATISTICS=DESCRIPTIVES.
+])
+AT_CHECK([pspp -o pspp.csv -o pspp.txt repairs.sps])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+Mean time between failures (months) ,15,100.0%,0,.0%,15,100.0%
+
+Table: Descriptives
+,,,Statistic,Std. Error
+Mean time between failures (months) ,Mean,,8.32,1.62
+,95% Confidence Interval for Mean,Lower Bound,4.85,
+,,Upper Bound,11.79,
+,5% Trimmed Mean,,7.69,
+,Median,,8.12,
+,Variance,,39.21,
+,Std. Deviation,,6.26,
+,Minimum,,1.63,
+,Maximum,,26.47,
+,Range,,24.84,
+,Interquartile Range,,5.83,
+,Skewness,,1.85,.58
+,Kurtosis,,4.49,1.12
+
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+mtbf_ln,15,100.0%,0,.0%,15,100.0%
+
+Table: Descriptives
+,,,Statistic,Std. Error
+mtbf_ln,Mean,,1.88,.19
+,95% Confidence Interval for Mean,Lower Bound,1.47,
+,,Upper Bound,2.29,
+,5% Trimmed Mean,,1.88,
+,Median,,2.09,
+,Variance,,.54,
+,Std. Deviation,,.74,
+,Minimum,,.49,
+,Maximum,,3.28,
+,Range,,2.79,
+,Interquartile Range,,.92,
+,Skewness,,-.16,.58
+,Kurtosis,,-.09,1.12
+])
+AT_CLEANUP
+
+dnl This is an example from doc/tutorial.texi
+dnl So if the results of this have to be changed in any way,
+dnl make sure to update that file.
+AT_SETUP([EXAMINE tutorial example 2])
+cp $top_srcdir/examples/physiology.sav .
+AT_DATA([examine.sps], [dnl
+GET FILE='physiology.sav'.
+EXAMINE height, weight /STATISTICS=EXTREME(3).
+])
+AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+Height in millimeters   ,40,100.0%,0,.0%,40,100.0%
+Weight in kilograms ,40,100.0%,0,.0%,40,100.0%
+
+Table: Extreme Values
+,,,Case Number,Value
+Height in millimeters   ,Highest,1,14,1903
+,,2,15,1884
+,,3,12,1802
+,Lowest,1,30,179
+,,2,31,1598
+,,3,28,1601
+Weight in kilograms ,Highest,1,13,92.1
+,,2,5,92.1
+,,3,17,91.7
+,Lowest,1,38,-55.6
+,,2,39,54.5
+,,3,33,55.4
+])
+AT_CLEANUP
+
+
+
+AT_SETUP([EXAMINE -- Crash on unrepresentable graphs])
+AT_DATA([examine.sps], [dnl
+data list notable list /x * g *.
+begin data.
+96 1
+end data.
+
+examine x  by g
+        /nototal
+        /plot = all.
+])
+dnl This bug only manifested itself on cairo based drivers.
+AT_CHECK([pspp -O format=pdf examine.sps], [1], [ignore])
+AT_CLEANUP
+
+
+dnl This example comes from the web site:
+dnl  https://www.spsstests.com/2018/11/shapiro-wilk-normality-test-spss.html
+AT_SETUP([EXAMINE -- shapiro-wilk 1])
+AT_KEYWORDS([shapiro wilk])
+AT_DATA([shapiro-wilk.sps], [dnl
+data list notable list /x * g *.
+begin data.
+96 1
+98 1
+95 1
+89 1
+90 1
+92 1
+94 1
+93 1
+97 1
+100 1
+99 2
+96 2
+80 2
+89 2
+91 2
+92 2
+93 2
+94 2
+99 2
+80 2
+end data.
+
+set format F22.3.
+
+examine x  by g
+       /nototal
+       /plot = all.
+])
+
+AT_CHECK([pspp -O format=csv shapiro-wilk.sps], [0],[dnl
+Table: Case Processing Summary
+,,Cases,,,,,
+,,Valid,,Missing,,Total,
+,g,N,Percent,N,Percent,N,Percent
+x,1.00,10,100.0%,0,.0%,10,100.0%
+,2.00,10,100.0%,0,.0%,10,100.0%
+
+Table: Tests of Normality
+,,Shapiro-Wilk,,
+,g,Statistic,df,Sig.
+x,1.00,.984,10,.983
+,2.00,.882,10,.136
+])
+
+AT_CLEANUP
+
+
+dnl This example comes from the web site:
+dnl  http://www.real-statistics.com/tests-normality-and-symmetry/statistical-tests-normality-symmetry/shapiro-wilk-expanded-test/
+dnl It uses a dataset larger than 11 samples. Hence the alternative method for
+dnl signficance is used.
+AT_SETUP([EXAMINE -- shapiro-wilk 2])
+AT_KEYWORDS([shapiro wilk])
+AT_DATA([shapiro-wilk2.sps], [dnl
+data list notable list /x *.
+begin data.
+65
+61
+63
+86
+70
+55
+74
+35
+72
+68
+45
+58
+end data.
+
+set format F22.3.
+
+examine x
+       /plot = boxplot.
+])
+
+AT_CHECK([pspp -O format=csv shapiro-wilk2.sps], [0],[dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,12,100.0%,0,.0%,12,100.0%
+
+Table: Tests of Normality
+,Shapiro-Wilk,,
+,Statistic,df,Sig.
+x,.971,12,.922
+])
+
+AT_CLEANUP