Convert all Perl build tools to Python and remove Perl build dependency.

[pspp] / tests / language / stats / examine.at
diff --git a/tests/language/stats/examine.at b/tests/language/stats/examine.at

index dbd11437fd55899d57be826608bb954de6a38c6b..bea69dd354d06303f7a71c2f25cbf0f001bb4580 100644 (file)
--- a/tests/language/stats/examine.at
+++ b/tests/language/stats/examine.at
@@ -1,16 +1,16 @@
  dnl PSPP - a program for statistical analysis.
  dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc.
-dnl 
+dnl
  dnl This program is free software: you can redistribute it and/or modify
  dnl it under the terms of the GNU General Public License as published by
  dnl the Free Software Foundation, either version 3 of the License, or
  dnl (at your option) any later version.
-dnl 
+dnl
  dnl This program is distributed in the hope that it will be useful,
  dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
  dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  dnl GNU General Public License for more details.
-dnl 
+dnl
  dnl You should have received a copy of the GNU General Public License
  dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
  dnl
@@ -115,9 +115,9 @@ Breaking Strain,Mean,,3.54,.32
  ,Kurtosis,,-.36,.92
  
  Table: Case Processing Summary
-,,Cases,,,,,
+,Manufacturer,Cases,,,,,
  ,,Valid,,Missing,,Total,
-,Manufacturer,N,Percent,N,Percent,N,Percent
+,,N,Percent,N,Percent,N,Percent
  Breaking Strain,Aspeger,8.00,100.0%,.00,.0%,8.00,100.0%
  ,Bloggs,8.00,100.0%,.00,.0%,8.00,100.0%
  ,Charlies,8.00,100.0%,.00,.0%,8.00,100.0%
@@ -218,7 +218,7 @@ end data.
  
  weight by w.
  
-examine v1 
+examine v1
   /statistics=extreme(6)
   .
  ])
@@ -249,7 +249,6 @@ V1,Highest,1,21,20.00
  AT_CLEANUP
  
  
-
  AT_SETUP([EXAMINE -- extremes with fractional weights])
  AT_KEYWORDS([categorical categoricals])
  AT_DATA([extreme.sps], [dnl
@@ -325,16 +324,16 @@ x,Mean,,1120010.293,86222.178
  AT_CLEANUP
  
  dnl Test the PERCENTILES subcommand of the EXAMINE command.
-dnl In particular test that it behaves properly when there are only 
+dnl In particular test that it behaves properly when there are only
  dnl a few cases.
  AT_SETUP([EXAMINE -- percentiles])
  AT_KEYWORDS([categorical categoricals])
  AT_DATA([examine.sps], [dnl
  DATA LIST LIST /X *.
  BEGIN DATA.
-2.00 
-8.00 
-5.00 
+2.00
+8.00
+5.00
  END DATA.
  
  EXAMINE /x
@@ -425,7 +424,7 @@ AT_KEYWORDS([categorical categoricals])
  AT_DATA([examine.sps], [dnl
  DATA LIST LIST /x * y *.
  BEGIN DATA.
-1   1 
+1   1
  2   1
  3   1
  4   1
@@ -452,9 +451,9 @@ Table: Case Processing Summary
  x,6,85.7%,1,14.3%,7,100.0%
  
  Table: Case Processing Summary
-,,Cases,,,,,
+,y,Cases,,,,,
  ,,Valid,,Missing,,Total,
-,y,N,Percent,N,Percent,N,Percent
+,,N,Percent,N,Percent,N,Percent
  x,1.00,4,100.0%,0,.0%,4,100.0%
  ,2.00,2,66.7%,1,33.3%,3,100.0%
  ])
@@ -569,7 +568,7 @@ end data.
  
  examine a by x by y
         /statistics=DESCRIPTIVES
-       . 
+       .
  ])
  AT_CHECK([pspp -o pspp.csv examine.sps])
  dnl Ignore output -- this is just a no-crash check.
@@ -588,7 +587,7 @@ end data.
  
  EXAMINE /VARIABLES= z BY y.
  
-EXAMINE /VARIABLES= z. 
+EXAMINE /VARIABLES= z.
  ])
  AT_CHECK([pspp -o pspp.csv examine.sps])
  dnl Ignore output -- this is just a no-crash check.
@@ -606,7 +605,7 @@ begin data.
  3 1
  4 1
  end data.
-examine x by y /statistics=descriptives. 
+examine x by y /statistics=descriptives.
  ])
  AT_CHECK([pspp -o pspp.csv examine.sps])
  dnl Ignore output -- this is just a no-crash check.
@@ -618,13 +617,13 @@ AT_KEYWORDS([categorical categoricals])
  AT_DATA([examine.sps], [dnl
  DATA LIST LIST /quality * .
  BEGIN DATA
-3  
+3
  END DATA
  
  
  EXAMINE
-       quality 
-       /STATISTICS descriptives 
+       quality
+       /STATISTICS descriptives
          /PLOT = histogram
         .
  ])
@@ -644,7 +643,7 @@ BEGIN DATA.
  .
  END DATA.
  
-EXAMINE /x 
+EXAMINE /x
         PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
         /ID=x
          /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
@@ -679,11 +678,10 @@ dnl Another test that big input doesn't crash.
  dnl The actual bug that this checks for has been lost.
  AT_SETUP([EXAMINE -- big input doesn't crash 2])
  AT_KEYWORDS([categorical categoricals slow])
-AT_DATA([make-big-input.pl], 
-  [for ($i=0; $i<100000; $i++) { print "AB12\n" };
-   for ($i=0; $i<100000; $i++) { print "AB04\n" };
-])
-AT_CHECK([$PERL make-big-input.pl > large.txt])
+AT_CHECK([$PYTHON3 -c '
+for i in range(100000): print("AB12")
+for i in range(100000): print("AB04")
+' > large.txt])
  AT_DATA([examine.sps], [dnl
  DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
  
@@ -695,10 +693,7 @@ EXAMINE /A BY X.
  ])
  AT_CHECK([pspp -o pspp.csv examine.sps])
  dnl Ignore output -- this is just a no-crash check.
-AT_DATA([more-big-input.pl], 
-  [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
-])
-AT_CHECK([$PERL more-big-input.pl >> large.txt])
+AT_CHECK([$PYTHON3 -c 'for i in range(25000): print("AB04\nAB12")' >> large.txt])
  AT_CHECK([pspp -o pspp.csv examine.sps])
  dnl Ignore output -- this is just a no-crash check.
  AT_CLEANUP
@@ -727,6 +722,7 @@ begin data.
  300 threehundred
  end data.
  
+set small=0.
  examine x
         /statistics = extreme
         /id = y
@@ -734,8 +730,8 @@ examine x
         .
  ])
  
-AT_CHECK([pspp -O format=csv examine-id.sps], [0], 
-[Table: Case Processing Summary
+AT_CHECK([pspp -O format=csv examine-id.sps], [0], [dnl
+Table: Case Processing Summary
  ,Cases,,,,,
  ,Valid,,Missing,,Total,
  ,N,Percent,N,Percent,N,Percent
@@ -753,9 +749,14 @@ x,Highest,1,threehundred,300.00
  ,,3,three,3.00
  ,,4,four,4.00
  ,,5,five,5.00
+
+Table: Tests of Normality
+,Shapiro-Wilk,,
+,Statistic,df,Sig.
+x,.37,14,.00
  ])
  
-AT_CLEANUP 
+AT_CLEANUP
  
  dnl Test for a crash which happened on cleanup from a bad input syntax
  AT_SETUP([EXAMINE -- Bad Input])
@@ -775,7 +776,7 @@ begin data.
  9 2
  end data.
  
-EXAMINE 
+EXAMINE
         /VARIABLES= h
         BY  g
         /STATISTICS = DESCRIPTIVES EXTREME
@@ -785,7 +786,7 @@ EXAMINE
  
  AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
  
-AT_CLEANUP 
+AT_CLEANUP
  
  
  dnl Check the MISSING=REPORT option
@@ -848,17 +849,15 @@ x,F8.0
  g,F8.0
  
  Table: Case Processing Summary
-,,Cases,,,,,
+,g,Cases,,,,,
  ,,Valid,,Missing,,Total,
-,g,N,Percent,N,Percent,N,Percent
+,,N,Percent,N,Percent,N,Percent
  x,.,4,100.0%,0,.0%,4,100.0%
  ,1,9,100.0%,0,.0%,9,100.0%
  ,2,9,100.0%,0,.0%,9,100.0%
  ,9[a],4,100.0%,0,.0%,4,100.0%
  ,99[a],5,100.0%,0,.0%,5,100.0%
-
-Footnotes:
-a,User-missing value.
+Footnote: a. User-missing value.
  
  Table: Extreme Values
  ,g,,,Case Number,Value
@@ -912,12 +911,10 @@ x,.,Highest,1,31,4004
  ,,,3,25,701
  ,,,4,26,801
  ,,,5,27,901
-
-Footnotes:
-a,User-missing value.
+Footnote: a. User-missing value.
  ]])
  
-AT_CLEANUP 
+AT_CLEANUP
  
  
  dnl Run a test of the basic STATISTICS using a "real"
@@ -1062,7 +1059,7 @@ X,Mean,,587.6603,23.2665
  ,Kurtosis,,.5300,.4783
  ])
  
-AT_CLEANUP 
+AT_CLEANUP
  
  
  
@@ -1086,7 +1083,7 @@ EXAMINE
  
  AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
  
-AT_CLEANUP 
+AT_CLEANUP
  
  
  
@@ -1111,7 +1108,7 @@ EXAMINE
  
  AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
  
-AT_CLEANUP 
+AT_CLEANUP
  
  
  
@@ -1154,7 +1151,7 @@ h,Highest,1,3,5.00
  ,,3,2,4.00
  ])
  
-AT_CLEANUP 
+AT_CLEANUP
  
  dnl This is an example from doc/tutorial.texi
  dnl So if the results of this have to be changed in any way,
@@ -1167,52 +1164,53 @@ EXAMINE mtbf /STATISTICS=DESCRIPTIVES.
  COMPUTE mtbf_ln = LN (mtbf).
  EXAMINE mtbf_ln /STATISTICS=DESCRIPTIVES.
  ])
-AT_CHECK([pspp -o pspp.csv -o pspp.txt repairs.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
+
+AT_CHECK([pspp -O format=csv repairs.sps], [0], [dnl
  Table: Case Processing Summary
  ,Cases,,,,,
  ,Valid,,Missing,,Total,
  ,N,Percent,N,Percent,N,Percent
-Mean time between failures (months) ,15,100.0%,0,.0%,15,100.0%
+Mean time between failures (months) ,30,100.0%,0,.0%,30,100.0%
  
  Table: Descriptives
  ,,,Statistic,Std. Error
-Mean time between failures (months) ,Mean,,8.32,1.62
-,95% Confidence Interval for Mean,Lower Bound,4.85,
-,,Upper Bound,11.79,
-,5% Trimmed Mean,,7.69,
-,Median,,8.12,
-,Variance,,39.21,
-,Std. Deviation,,6.26,
+Mean time between failures (months) ,Mean,,8.78,1.10
+,95% Confidence Interval for Mean,Lower Bound,6.53,
+,,Upper Bound,11.04,
+,5% Trimmed Mean,,8.20,
+,Median,,8.29,
+,Variance,,36.34,
+,Std. Deviation,,6.03,
  ,Minimum,,1.63,
  ,Maximum,,26.47,
  ,Range,,24.84,
-,Interquartile Range,,5.83,
-,Skewness,,1.85,.58
-,Kurtosis,,4.49,1.12
+,Interquartile Range,,6.03,
+,Skewness,,1.65,.43
+,Kurtosis,,3.41,.83
  
  Table: Case Processing Summary
  ,Cases,,,,,
  ,Valid,,Missing,,Total,
  ,N,Percent,N,Percent,N,Percent
-mtbf_ln,15,100.0%,0,.0%,15,100.0%
+mtbf_ln,30,100.0%,0,.0%,30,100.0%
  
  Table: Descriptives
  ,,,Statistic,Std. Error
-mtbf_ln,Mean,,1.88,.19
-,95% Confidence Interval for Mean,Lower Bound,1.47,
-,,Upper Bound,2.29,
-,5% Trimmed Mean,,1.88,
-,Median,,2.09,
-,Variance,,.54,
-,Std. Deviation,,.74,
+mtbf_ln,Mean,,1.95,.13
+,95% Confidence Interval for Mean,Lower Bound,1.69,
+,,Upper Bound,2.22,
+,5% Trimmed Mean,,1.96,
+,Median,,2.11,
+,Variance,,.49,
+,Std. Deviation,,.70,
  ,Minimum,,.49,
  ,Maximum,,3.28,
  ,Range,,2.79,
-,Interquartile Range,,.92,
-,Skewness,,-.16,.58
-,Kurtosis,,-.09,1.12
+,Interquartile Range,,.88,
+,Skewness,,-.37,.43
+,Kurtosis,,.01,.83
  ])
+
  AT_CLEANUP
  
  dnl This is an example from doc/tutorial.texi
@@ -1251,6 +1249,7 @@ Weight in kilograms ,Highest,1,13,92.1
  AT_CLEANUP
  
  
+
  AT_SETUP([EXAMINE -- Crash on unrepresentable graphs])
  AT_DATA([examine.sps], [dnl
  data list notable list /x * g *.
@@ -1263,5 +1262,104 @@ examine x  by g
          /plot = all.
  ])
  dnl This bug only manifested itself on cairo based drivers.
-AT_CHECK([pspp -O format=pdf examine.sps], [1], [ignore])
+AT_CHECK([pspp -O format=pdf examine.sps], [0], [ignore], [ignore])
+AT_CLEANUP
+
+
+dnl This example comes from the web site:
+dnl  https://www.spsstests.com/2018/11/shapiro-wilk-normality-test-spss.html
+AT_SETUP([EXAMINE -- shapiro-wilk 1])
+AT_KEYWORDS([shapiro wilk])
+AT_DATA([shapiro-wilk.sps], [dnl
+data list notable list /x * g *.
+begin data.
+96 1
+98 1
+95 1
+89 1
+90 1
+92 1
+94 1
+93 1
+97 1
+100 1
+99 2
+96 2
+80 2
+89 2
+91 2
+92 2
+93 2
+94 2
+99 2
+80 2
+end data.
+
+set format F22.3.
+
+examine x  by g
+       /nototal
+       /plot = all.
+])
+
+AT_CHECK([pspp -O format=csv shapiro-wilk.sps], [0],[dnl
+Table: Case Processing Summary
+,g,Cases,,,,,
+,,Valid,,Missing,,Total,
+,,N,Percent,N,Percent,N,Percent
+x,1.00,10,100.0%,0,.0%,10,100.0%
+,2.00,10,100.0%,0,.0%,10,100.0%
+
+Table: Tests of Normality
+,g,Shapiro-Wilk,,
+,,Statistic,df,Sig.
+x,1.00,.984,10,.983
+,2.00,.882,10,.136
+])
+
+AT_CLEANUP
+
+
+dnl This example comes from the web site:
+dnl  http://www.real-statistics.com/tests-normality-and-symmetry/statistical-tests-normality-symmetry/shapiro-wilk-expanded-test/
+dnl It uses a dataset larger than 11 samples. Hence the alternative method for
+dnl signficance is used.
+AT_SETUP([EXAMINE -- shapiro-wilk 2])
+AT_KEYWORDS([shapiro wilk])
+AT_DATA([shapiro-wilk2.sps], [dnl
+data list notable list /x *.
+begin data.
+65
+61
+63
+86
+70
+55
+74
+35
+72
+68
+45
+58
+end data.
+
+set format F22.3.
+
+examine x
+       /plot = boxplot.
+])
+
+AT_CHECK([pspp -O format=csv shapiro-wilk2.sps], [0],[dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,12,100.0%,0,.0%,12,100.0%
+
+Table: Tests of Normality
+,Shapiro-Wilk,,
+,Statistic,df,Sig.
+x,.971,12,.922
+])
+
  AT_CLEANUP