1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017 Free Software Foundation, Inc.
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 dnl GNU General Public License for more details.
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
20 AT_DATA([examine.sps], [
21 DATA LIST LIST /QUALITY * W * BRAND * .
43 VARIABLE LABELS brand 'Manufacturer'.
44 VARIABLE LABELS quality 'Breaking Strain'.
46 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
48 LIST /FORMAT=NUMBERED.
52 /STATISTICS descriptives extreme(3)
57 dnl In the following data, only the extreme values have been checked.
58 dnl The descriptives have been blindly pasted.
59 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
60 Table: Reading free-form data from INLINE.
67 Case Number,QUALITY,W,BRAND
85 Table: Case Processing Summary
87 ,Valid,,Missing,,Total,
88 ,N,Percent,N,Percent,N,Percent
89 Breaking Strain,24.00,100%,.00,0%,24.00,100%
93 Breaking Strain,Highest,1,12,7.00
101 ,,,Statistic,Std. Error
102 Breaking Strain,Mean,,3.54,.32
103 ,95% Confidence Interval for Mean,Lower Bound,2.87,
105 ,5% Trimmed Mean,,3.50,
108 ,Std. Deviation,,1.59,
112 ,Interquartile Range,,2.75,
116 Table: Case Processing Summary
118 ,,Valid,,Missing,,Total,
119 ,Manufacturer,N,Percent,N,Percent,N,Percent
120 Breaking Strain,Aspeger,8.00,100%,.00,0%,8.00,100%
121 ,Bloggs,8.00,100%,.00,0%,8.00,100%
122 ,Charlies,8.00,100%,.00,0%,8.00,100%
124 Table: Extreme Values
125 ,Manufacturer,,,Case Number,Value
126 Breaking Strain,Aspeger,Highest,1,6,4.00
132 ,Bloggs,Highest,1,7,5.00
138 ,Charlies,Highest,1,12,7.00
146 ,Manufacturer,,,Statistic,Std. Error
147 Breaking Strain,Aspeger,Mean,,2.25,.45
148 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
150 ,,5% Trimmed Mean,,2.22,
153 ,,Std. Deviation,,1.28,
157 ,,Interquartile Range,,2.75,
159 ,,Kurtosis,,-1.55,1.48
160 ,Bloggs,Mean,,3.50,.38
161 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
163 ,,5% Trimmed Mean,,3.50,
166 ,,Std. Deviation,,1.07,
170 ,,Interquartile Range,,1.75,
172 ,,Kurtosis,,-.83,1.48
173 ,Charlies,Mean,,4.88,.44
174 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
176 ,,5% Trimmed Mean,,4.86,
179 ,,Std. Deviation,,1.25,
183 ,,Interquartile Range,,1.75,
190 AT_SETUP([EXAMINE -- extremes])
191 AT_DATA([examine.sps], [dnl
220 /statistics=extreme(6)
224 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
225 Table: Case Processing Summary
227 ,Valid,,Missing,,Total,
228 ,N,Percent,N,Percent,N,Percent
229 V1,23.00,100%,.00,0%,23.00,100%
231 Table: Extreme Values
233 V1,Highest,1,21,20.00
251 AT_SETUP([EXAMINE -- extremes with fractional weights])
252 AT_DATA([extreme.sps], [dnl
254 data list notable list /w * x *.
281 /STATISTICS = DESCRIPTIVES EXTREME (5)
285 AT_CHECK([pspp -O format=csv extreme.sps], [0], [dnl
286 Table: Case Processing Summary
288 ,Valid,,Missing,,Total,
289 ,N,Percent,N,Percent,N,Percent
290 x,19.430,100%,.000,0%,19.430,100%
292 Table: Extreme Values
294 x,Highest,1,18,1560000.000
299 ,Lowest,1,1,300000.000
306 ,,,Statistic,Std. Error
307 x,Mean,,1120010.293,86222.178
308 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
309 ,,Upper Bound,1300853.894,
310 ,5% Trimmed Mean,,1141017.899,
311 ,Median,,1200000.000,
312 ,Variance,,144447748124.869,
313 ,Std. Deviation,,380062.821,
314 ,Minimum,,300000.000,
315 ,Maximum,,1560000.000,
317 ,Interquartile Range,,467258.065,
318 ,Skewness,,-.887,.519
319 ,Kurtosis,,.340,1.005
324 dnl Test the PERCENTILES subcommand of the EXAMINE command.
325 dnl In particular test that it behaves properly when there are only
327 AT_SETUP([EXAMINE -- percentiles])
328 AT_DATA([examine.sps], [dnl
337 /PERCENTILES=HAVERAGE.
340 /PERCENTILES=WAVERAGE.
346 /PERCENTILES=EMPIRICAL.
349 /PERCENTILES=AEMPIRICAL.
351 AT_CHECK([pspp -o pspp.csv examine.sps])
352 AT_CHECK([cat pspp.csv], [0], [dnl
353 Table: Reading free-form data from INLINE.
357 Table: Case Processing Summary
359 ,Valid,,Missing,,Total,
360 ,N,Percent,N,Percent,N,Percent
365 ,,5,10,25,50,75,90,95
366 X,HAverage,.40,.80,2.00,5.00,8.00,8.00,8.00
367 ,Tukey's Hinges,,,3.50,5.00,6.50,,
369 Table: Case Processing Summary
371 ,Valid,,Missing,,Total,
372 ,N,Percent,N,Percent,N,Percent
377 ,,5,10,25,50,75,90,95
378 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
379 ,Tukey's Hinges,,,3.50,5.00,6.50,,
381 Table: Case Processing Summary
383 ,Valid,,Missing,,Total,
384 ,N,Percent,N,Percent,N,Percent
389 ,,5,10,25,50,75,90,95
390 X,Rounded,.00,.00,2.00,5.00,5.00,8.00,8.00
391 ,Tukey's Hinges,,,3.50,5.00,6.50,,
393 Table: Case Processing Summary
395 ,Valid,,Missing,,Total,
396 ,N,Percent,N,Percent,N,Percent
401 ,,5,10,25,50,75,90,95
402 X,Empirical,2.00,2.00,2.00,5.00,8.00,8.00,8.00
403 ,Tukey's Hinges,,,3.50,5.00,6.50,,
405 Table: Case Processing Summary
407 ,Valid,,Missing,,Total,
408 ,N,Percent,N,Percent,N,Percent
413 ,,5,10,25,50,75,90,95
414 X,Empirical with averaging,2.00,2.00,2.00,5.00,8.00,8.00,8.00
415 ,Tukey's Hinges,,,3.50,5.00,6.50,,
419 AT_SETUP([EXAMINE -- missing values])
420 AT_DATA([examine.sps], [dnl
421 DATA LIST LIST /x * y *.
436 AT_CHECK([pspp -o pspp.csv examine.sps])
437 AT_CHECK([cat pspp.csv], [0], [dnl
438 Table: Reading free-form data from INLINE.
443 Table: Case Processing Summary
445 ,Valid,,Missing,,Total,
446 ,N,Percent,N,Percent,N,Percent
447 x,6,85.7143%,1,14.2857%,7,100%
449 Table: Case Processing Summary
451 ,,Valid,,Missing,,Total,
452 ,y,N,Percent,N,Percent,N,Percent
453 x,1.00,4,100%,0,0%,4,100%
454 ,2.00,2,66.6667%,1,33.3333%,3,100%
459 AT_SETUP([EXAMINE -- user missing values])
460 AT_DATA([examine-m.sps], [dnl
461 DATA LIST notable LIST /x * y *.
468 MISSING VALUES x (9999999999).
469 MISSING VALUES y (99).
475 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
476 Table: Case Processing Summary
478 ,Valid,,Missing,,Total,
479 ,N,Percent,N,Percent,N,Percent
480 x,1,33.3333%,2,66.6667%,3,100%
481 y,2,66.6667%,1,33.3333%,3,100%
485 AT_SETUP([EXAMINE -- missing values and percentiles])
486 AT_DATA([examine.sps], [dnl
494 MISSING VALUE X (99).
497 /PERCENTILES=HAVERAGE.
499 AT_CHECK([pspp -o pspp.csv examine.sps])
500 dnl Ignore output -- this is just a no-crash check.
503 dnl Tests the trimmed mean calculation in the case
504 dnl where the data is weighted towards the centre.
505 AT_SETUP([EXAMINE -- trimmed mean])
506 AT_DATA([examine.sps], [dnl
507 DATA LIST LIST /X * C *.
518 /STATISTICS=DESCRIPTIVES
521 AT_CHECK([pspp -o pspp.csv examine.sps])
522 AT_CHECK([cat pspp.csv], [0], [dnl
523 Table: Reading free-form data from INLINE.
528 Table: Case Processing Summary
530 ,Valid,,Missing,,Total,
531 ,N,Percent,N,Percent,N,Percent
532 X,52.00,100%,.00,0%,52.00,100%
535 ,,,Statistic,Std. Error
537 ,95% Confidence Interval for Mean,Lower Bound,1.95,
539 ,5% Trimmed Mean,,2.00,
542 ,Std. Deviation,,.24,
546 ,Interquartile Range,,.00,
552 AT_SETUP([EXAMINE -- crash bug])
553 AT_DATA([examine.sps], [dnl
554 data list list /a * x * y *.
562 /statistics=DESCRIPTIVES
565 AT_CHECK([pspp -o pspp.csv examine.sps])
566 dnl Ignore output -- this is just a no-crash check.
569 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
570 AT_SETUP([EXAMINE -- consecutive runs don't crash])
571 AT_DATA([examine.sps], [dnl
572 data list list /y * z *.
579 EXAMINE /VARIABLES= z BY y.
581 EXAMINE /VARIABLES= z.
583 AT_CHECK([pspp -o pspp.csv examine.sps])
584 dnl Ignore output -- this is just a no-crash check.
587 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
588 AT_SETUP([EXAMINE -- missing values don't crash])
589 AT_DATA([examine.sps], [dnl
590 data list list /x * y *.
598 examine x by y /statistics=descriptives.
600 AT_CHECK([pspp -o pspp.csv examine.sps])
601 dnl Ignore output -- this is just a no-crash check.
604 dnl Test that having only a single case doesn't crash.
605 AT_SETUP([EXAMINE -- single case doesn't crash])
606 AT_DATA([examine.sps], [dnl
607 DATA LIST LIST /quality * .
615 /STATISTICS descriptives
619 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
620 dnl Ignore output -- this is just a no-crash check.
623 dnl Test that all-missing data doesn't crash.
624 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
625 AT_DATA([examine.sps], [dnl
635 PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
637 /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
638 /PERCENTILE=AEMPIRICAL
641 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
642 dnl Ignore output -- this is just a no-crash check.
645 dnl Test that big input doesn't crash (bug 11307).
646 AT_SETUP([EXAMINE -- big input doesn't crash])
647 AT_DATA([examine.sps], [dnl
650 COMPUTE X=NORMAL(10).
658 /STATISTICS=DESCRIPTIVES.
660 AT_CHECK([pspp -o pspp.csv examine.sps])
661 dnl Ignore output -- this is just a no-crash check.
664 dnl Another test that big input doesn't crash.
665 dnl The actual bug that this checks for has been lost.
666 AT_SETUP([EXAMINE -- big input doesn't crash 2])
667 AT_DATA([make-big-input.pl],
668 [for ($i=0; $i<100000; $i++) { print "AB12\n" };
669 for ($i=0; $i<100000; $i++) { print "AB04\n" };
671 AT_CHECK([$PERL make-big-input.pl > large.txt])
672 AT_DATA([examine.sps], [dnl
673 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
676 AGGREGATE OUTFILE=* /BREAK=X /A=N.
681 AT_CHECK([pspp -o pspp.csv examine.sps])
682 dnl Ignore output -- this is just a no-crash check.
683 AT_DATA([more-big-input.pl],
684 [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
686 AT_CHECK([$PERL more-big-input.pl >> large.txt])
687 AT_CHECK([pspp -o pspp.csv examine.sps])
688 dnl Ignore output -- this is just a no-crash check.
692 dnl Test that the ID command works with non-numberic variables
693 AT_SETUP([EXAMINE -- non-numeric ID])
695 AT_DATA([examine-id.sps], [dnl
696 data list notable list /x * y (a12).
715 /statistics = extreme
721 AT_CHECK([pspp -O format=csv examine-id.sps], [0],
722 [Table: Case Processing Summary
724 ,Valid,,Missing,,Total,
725 ,N,Percent,N,Percent,N,Percent
726 x,14,100%,0,0%,14,100%
728 Table: Extreme Values
730 x,Highest,1,threehundred,300.00
744 dnl Test for a crash which happened on cleanup from a bad input syntax
745 AT_SETUP([EXAMINE -- Bad Input])
747 AT_DATA([examine-bad.sps], [dnl
748 data list list /h * g *.
764 /STATISTICS = DESCRIPTIVES EXTREME
769 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
774 dnl Check the MISSING=REPORT option
775 AT_SETUP([EXAMINE -- MISSING=REPORT])
778 AT_DATA([examine-report.sps], [dnl
780 data list list /x * g *.
815 MISSING VALUES g (9, 99, 999).
820 /STATISTICS = EXTREME
826 AT_CHECK([pspp -O format=csv examine-report.sps], [0], [dnl
827 Table: Reading free-form data from INLINE.
832 Table: Case Processing Summary
834 ,,Valid,,Missing,,Total,
835 ,g,N,Percent,N,Percent,N,Percent
836 x,. (missing),4,100%,0,0%,4,100%
837 ,1,9,100%,0,0%,9,100%
838 ,2,9,100%,0,0%,9,100%
839 ,9 (missing),4,100%,0,0%,4,100%
840 ,99 (missing),5,100%,0,0%,5,100%
842 Table: Extreme Values
843 ,g,,,Case Number,Value
844 x,. (missing),Highest,1,31,4004
874 ,9 (missing),Highest,1,22,401
884 ,99 (missing),Highest,1,27,901
900 dnl Run a test of the basic STATISTICS using a "real"
901 dnl dataset and comparing with "real" results kindly
902 dnl provided by Olaf Nöhring
903 AT_SETUP([EXAMINE -- sample unweighted])
905 AT_DATA([sample.sps], [dnl
907 DATA LIST notable LIST /X *
1013 /STATISTICS=DESCRIPTIVES
1017 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
1018 Table: Case Processing Summary
1020 ,Valid,,Missing,,Total,
1021 ,N,Percent,N,Percent,N,Percent
1022 X,100,100%,0,0%,100,100%
1025 ,,,Statistic,Std. Error
1026 X,Mean,,587.6603,23.2665
1027 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1028 ,,Upper Bound,633.8260,
1029 ,5% Trimmed Mean,,579.7064,
1031 ,Variance,,54132.8466,
1032 ,Std. Deviation,,232.6647,
1034 ,Maximum,,1355.2800,
1036 ,Interquartile Range,,293.1575,
1037 ,Skewness,,.6331,.2414
1038 ,Kurtosis,,.5300,.4783
1045 dnl Test for a crash which happened on bad input syntax
1046 AT_SETUP([EXAMINE -- Empty Parentheses])
1048 AT_DATA([examine-empty-parens.sps], [dnl
1049 DATA LIST notable LIST /X *
1058 /PLOT = SPREADLEVEL()
1062 AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
1069 dnl Test for another crash which happened on bad input syntax
1070 AT_SETUP([EXAMINE -- Bad variable])
1072 AT_DATA([examine-bad-variable.sps], [dnl
1073 data list list /h * g *.
1086 AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
1092 dnl Test for yet another crash. This time for extremes vs. missing weight values.
\0
1093 AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
1095 AT_DATA([examine-missing-weights.sps], [dnl
1096 data list notable list /h * g *.
1107 /STATISTICS extreme(3)
1111 AT_CHECK([pspp -O format=csv examine-missing-weights.sps], [0], [dnl
1112 "examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative. These case(s) were ignored."
1114 Table: Case Processing Summary
1116 ,Valid,,Missing,,Total,
1117 ,N,Percent,N,Percent,N,Percent
1118 h,3.00,100%,.00,0%,3.00,100%
1120 Table: Extreme Values
1121 ,,,Case Number,Value