1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017 Free Software Foundation, Inc.
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 dnl GNU General Public License for more details.
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
16 dnl AT_BANNER([EXAMINE])
19 AT_DATA([examine.sps], [
20 DATA LIST LIST /QUALITY * W * BRAND * .
42 VARIABLE LABELS brand 'Manufacturer'.
43 VARIABLE LABELS quality 'Breaking Strain'.
45 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
47 LIST /FORMAT=NUMBERED.
51 /STATISTICS descriptives extreme(3)
56 dnl In the following data, only the extreme values have been checked.
57 dnl The descriptives have been blindly pasted.
58 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
59 Table: Reading free-form data from INLINE.
66 Case Number,QUALITY,W,BRAND
84 Table: Case Processing Summary
86 ,Valid,,Missing,,Total,
87 ,N,Percent,N,Percent,N,Percent
88 Breaking Strain,24.00,100%,.00,0%,24.00,100%
92 Breaking Strain,Highest,1,12,7.00
100 ,,,Statistic,Std. Error
101 Breaking Strain,Mean,,3.54,.32
102 ,95% Confidence Interval for Mean,Lower Bound,2.87,
104 ,5% Trimmed Mean,,3.50,
107 ,Std. Deviation,,1.59,
111 ,Interquartile Range,,2.75,
115 Table: Case Processing Summary
117 ,,Valid,,Missing,,Total,
118 ,Manufacturer,N,Percent,N,Percent,N,Percent
119 Breaking Strain,Aspeger,8.00,100%,.00,0%,8.00,100%
120 ,Bloggs,8.00,100%,.00,0%,8.00,100%
121 ,Charlies,8.00,100%,.00,0%,8.00,100%
123 Table: Extreme Values
124 ,Manufacturer,,,Case Number,Value
125 Breaking Strain,Aspeger,Highest,1,6,4.00
131 ,Bloggs,Highest,1,7,5.00
137 ,Charlies,Highest,1,12,7.00
145 ,Manufacturer,,,Statistic,Std. Error
146 Breaking Strain,Aspeger,Mean,,2.25,.45
147 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
149 ,,5% Trimmed Mean,,2.22,
152 ,,Std. Deviation,,1.28,
156 ,,Interquartile Range,,2.75,
158 ,,Kurtosis,,-1.55,1.48
159 ,Bloggs,Mean,,3.50,.38
160 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
162 ,,5% Trimmed Mean,,3.50,
165 ,,Std. Deviation,,1.07,
169 ,,Interquartile Range,,1.75,
171 ,,Kurtosis,,-.83,1.48
172 ,Charlies,Mean,,4.88,.44
173 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
175 ,,5% Trimmed Mean,,4.86,
178 ,,Std. Deviation,,1.25,
182 ,,Interquartile Range,,1.75,
189 AT_SETUP([EXAMINE -- extremes])
190 AT_DATA([examine.sps], [dnl
219 /statistics=extreme(6)
223 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
224 Table: Case Processing Summary
226 ,Valid,,Missing,,Total,
227 ,N,Percent,N,Percent,N,Percent
228 V1,23.00,100%,.00,0%,23.00,100%
230 Table: Extreme Values
232 V1,Highest,1,21,20.00
250 AT_SETUP([EXAMINE -- extremes with fractional weights])
251 AT_DATA([extreme.sps], [dnl
253 data list notable list /w * x *.
280 /STATISTICS = DESCRIPTIVES EXTREME (5)
284 AT_CHECK([pspp -O format=csv extreme.sps], [0], [dnl
285 Table: Case Processing Summary
287 ,Valid,,Missing,,Total,
288 ,N,Percent,N,Percent,N,Percent
289 x,19.430,100%,.000,0%,19.430,100%
291 Table: Extreme Values
293 x,Highest,1,18,1560000.000
298 ,Lowest,1,1,300000.000
305 ,,,Statistic,Std. Error
306 x,Mean,,1120010.293,86222.178
307 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
308 ,,Upper Bound,1300853.894,
309 ,5% Trimmed Mean,,1141017.899,
310 ,Median,,1200000.000,
311 ,Variance,,144447748124.869,
312 ,Std. Deviation,,380062.821,
313 ,Minimum,,300000.000,
314 ,Maximum,,1560000.000,
316 ,Interquartile Range,,467258.065,
317 ,Skewness,,-.887,.519
318 ,Kurtosis,,.340,1.005
323 dnl Test the PERCENTILES subcommand of the EXAMINE command.
324 dnl In particular test that it behaves properly when there are only
326 AT_SETUP([EXAMINE -- percentiles])
327 AT_DATA([examine.sps], [dnl
336 /PERCENTILES=HAVERAGE.
339 /PERCENTILES=WAVERAGE.
345 /PERCENTILES=EMPIRICAL.
348 /PERCENTILES=AEMPIRICAL.
350 AT_CHECK([pspp -o pspp.csv examine.sps])
351 AT_CHECK([cat pspp.csv], [0], [dnl
352 Table: Reading free-form data from INLINE.
356 Table: Case Processing Summary
358 ,Valid,,Missing,,Total,
359 ,N,Percent,N,Percent,N,Percent
364 ,,5,10,25,50,75,90,95
365 X,HAverage,.40,.80,2.00,5.00,8.00,8.00,8.00
366 ,Tukey's Hinges,,,3.50,5.00,6.50,,
368 Table: Case Processing Summary
370 ,Valid,,Missing,,Total,
371 ,N,Percent,N,Percent,N,Percent
376 ,,5,10,25,50,75,90,95
377 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
378 ,Tukey's Hinges,,,3.50,5.00,6.50,,
380 Table: Case Processing Summary
382 ,Valid,,Missing,,Total,
383 ,N,Percent,N,Percent,N,Percent
388 ,,5,10,25,50,75,90,95
389 X,Rounded,.00,.00,2.00,5.00,5.00,8.00,8.00
390 ,Tukey's Hinges,,,3.50,5.00,6.50,,
392 Table: Case Processing Summary
394 ,Valid,,Missing,,Total,
395 ,N,Percent,N,Percent,N,Percent
400 ,,5,10,25,50,75,90,95
401 X,Empirical,2.00,2.00,2.00,5.00,8.00,8.00,8.00
402 ,Tukey's Hinges,,,3.50,5.00,6.50,,
404 Table: Case Processing Summary
406 ,Valid,,Missing,,Total,
407 ,N,Percent,N,Percent,N,Percent
412 ,,5,10,25,50,75,90,95
413 X,Empirical with averaging,2.00,2.00,2.00,5.00,8.00,8.00,8.00
414 ,Tukey's Hinges,,,3.50,5.00,6.50,,
418 AT_SETUP([EXAMINE -- missing values])
419 AT_DATA([examine.sps], [dnl
420 DATA LIST LIST /x * y *.
435 AT_CHECK([pspp -o pspp.csv examine.sps])
436 AT_CHECK([cat pspp.csv], [0], [dnl
437 Table: Reading free-form data from INLINE.
442 Table: Case Processing Summary
444 ,Valid,,Missing,,Total,
445 ,N,Percent,N,Percent,N,Percent
446 x,6,85.7143%,1,14.2857%,7,100%
448 Table: Case Processing Summary
450 ,,Valid,,Missing,,Total,
451 ,y,N,Percent,N,Percent,N,Percent
452 x,1.00,4,100%,0,0%,4,100%
453 ,2.00,2,66.6667%,1,33.3333%,3,100%
458 AT_SETUP([EXAMINE -- user missing values])
459 AT_DATA([examine-m.sps], [dnl
460 DATA LIST notable LIST /x * y *.
467 MISSING VALUES x (9999999999).
468 MISSING VALUES y (99).
474 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
475 Table: Case Processing Summary
477 ,Valid,,Missing,,Total,
478 ,N,Percent,N,Percent,N,Percent
479 x,1,33.3333%,2,66.6667%,3,100%
480 y,2,66.6667%,1,33.3333%,3,100%
484 AT_SETUP([EXAMINE -- missing values and percentiles])
485 AT_DATA([examine.sps], [dnl
493 MISSING VALUE X (99).
496 /PERCENTILES=HAVERAGE.
498 AT_CHECK([pspp -o pspp.csv examine.sps])
499 dnl Ignore output -- this is just a no-crash check.
502 dnl Tests the trimmed mean calculation in the case
503 dnl where the data is weighted towards the centre.
504 AT_SETUP([EXAMINE -- trimmed mean])
505 AT_DATA([examine.sps], [dnl
506 DATA LIST LIST /X * C *.
517 /STATISTICS=DESCRIPTIVES
520 AT_CHECK([pspp -o pspp.csv examine.sps])
521 AT_CHECK([cat pspp.csv], [0], [dnl
522 Table: Reading free-form data from INLINE.
527 Table: Case Processing Summary
529 ,Valid,,Missing,,Total,
530 ,N,Percent,N,Percent,N,Percent
531 X,52.00,100%,.00,0%,52.00,100%
534 ,,,Statistic,Std. Error
536 ,95% Confidence Interval for Mean,Lower Bound,1.95,
538 ,5% Trimmed Mean,,2.00,
541 ,Std. Deviation,,.24,
545 ,Interquartile Range,,.00,
551 AT_SETUP([EXAMINE -- crash bug])
552 AT_DATA([examine.sps], [dnl
553 data list list /a * x * y *.
561 /statistics=DESCRIPTIVES
564 AT_CHECK([pspp -o pspp.csv examine.sps])
565 dnl Ignore output -- this is just a no-crash check.
568 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
569 AT_SETUP([EXAMINE -- consecutive runs don't crash])
570 AT_DATA([examine.sps], [dnl
571 data list list /y * z *.
578 EXAMINE /VARIABLES= z BY y.
580 EXAMINE /VARIABLES= z.
582 AT_CHECK([pspp -o pspp.csv examine.sps])
583 dnl Ignore output -- this is just a no-crash check.
586 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
587 AT_SETUP([EXAMINE -- missing values don't crash])
588 AT_DATA([examine.sps], [dnl
589 data list list /x * y *.
597 examine x by y /statistics=descriptives.
599 AT_CHECK([pspp -o pspp.csv examine.sps])
600 dnl Ignore output -- this is just a no-crash check.
603 dnl Test that having only a single case doesn't crash.
604 AT_SETUP([EXAMINE -- single case doesn't crash])
605 AT_DATA([examine.sps], [dnl
606 DATA LIST LIST /quality * .
614 /STATISTICS descriptives
618 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
619 dnl Ignore output -- this is just a no-crash check.
622 dnl Test that all-missing data doesn't crash.
623 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
624 AT_DATA([examine.sps], [dnl
634 PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
636 /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
637 /PERCENTILE=AEMPIRICAL
640 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
641 dnl Ignore output -- this is just a no-crash check.
644 dnl Test that big input doesn't crash (bug 11307).
645 AT_SETUP([EXAMINE -- big input doesn't crash])
646 AT_DATA([examine.sps], [dnl
649 COMPUTE X=NORMAL(10).
657 /STATISTICS=DESCRIPTIVES.
659 AT_CHECK([pspp -o pspp.csv examine.sps])
660 dnl Ignore output -- this is just a no-crash check.
663 dnl Another test that big input doesn't crash.
664 dnl The actual bug that this checks for has been lost.
665 AT_SETUP([EXAMINE -- big input doesn't crash 2])
666 AT_DATA([make-big-input.pl],
667 [for ($i=0; $i<100000; $i++) { print "AB12\n" };
668 for ($i=0; $i<100000; $i++) { print "AB04\n" };
670 AT_CHECK([$PERL make-big-input.pl > large.txt])
671 AT_DATA([examine.sps], [dnl
672 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
675 AGGREGATE OUTFILE=* /BREAK=X /A=N.
680 AT_CHECK([pspp -o pspp.csv examine.sps])
681 dnl Ignore output -- this is just a no-crash check.
682 AT_DATA([more-big-input.pl],
683 [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
685 AT_CHECK([$PERL more-big-input.pl >> large.txt])
686 AT_CHECK([pspp -o pspp.csv examine.sps])
687 dnl Ignore output -- this is just a no-crash check.
691 dnl Test that the ID command works with non-numberic variables
692 AT_SETUP([EXAMINE -- non-numeric ID])
694 AT_DATA([examine-id.sps], [dnl
695 data list notable list /x * y (a12).
714 /statistics = extreme
720 AT_CHECK([pspp -O format=csv examine-id.sps], [0],
721 [Table: Case Processing Summary
723 ,Valid,,Missing,,Total,
724 ,N,Percent,N,Percent,N,Percent
725 x,14,100%,0,0%,14,100%
727 Table: Extreme Values
729 x,Highest,1,threehundred,300.00
743 dnl Test for a crash which happened on cleanup from a bad input syntax
744 AT_SETUP([EXAMINE -- Bad Input])
746 AT_DATA([examine-bad.sps], [dnl
747 data list list /h * g *.
763 /STATISTICS = DESCRIPTIVES EXTREME
768 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
773 dnl Check the MISSING=REPORT option
774 AT_SETUP([EXAMINE -- MISSING=REPORT])
777 AT_DATA([examine-report.sps], [dnl
779 data list list /x * g *.
814 MISSING VALUES g (9, 99, 999).
819 /STATISTICS = EXTREME
825 AT_CHECK([pspp -O format=csv examine-report.sps], [0], [dnl
826 Table: Reading free-form data from INLINE.
831 Table: Case Processing Summary
833 ,,Valid,,Missing,,Total,
834 ,g,N,Percent,N,Percent,N,Percent
835 x,. (missing),4,100%,0,0%,4,100%
836 ,1,9,100%,0,0%,9,100%
837 ,2,9,100%,0,0%,9,100%
838 ,9 (missing),4,100%,0,0%,4,100%
839 ,99 (missing),5,100%,0,0%,5,100%
841 Table: Extreme Values
842 ,g,,,Case Number,Value
843 x,. (missing),Highest,1,31,4004
873 ,9 (missing),Highest,1,22,401
883 ,99 (missing),Highest,1,27,901
899 dnl Run a test of the basic STATISTICS using a "real"
900 dnl dataset and comparing with "real" results kindly
901 dnl provided by Olaf Nöhring
902 AT_SETUP([EXAMINE -- sample unweighted])
904 AT_DATA([sample.sps], [dnl
906 DATA LIST notable LIST /X *
1012 /STATISTICS=DESCRIPTIVES
1016 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
1017 Table: Case Processing Summary
1019 ,Valid,,Missing,,Total,
1020 ,N,Percent,N,Percent,N,Percent
1021 X,100,100%,0,0%,100,100%
1024 ,,,Statistic,Std. Error
1025 X,Mean,,587.6603,23.2665
1026 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1027 ,,Upper Bound,633.8260,
1028 ,5% Trimmed Mean,,579.7064,
1030 ,Variance,,54132.8466,
1031 ,Std. Deviation,,232.6647,
1033 ,Maximum,,1355.2800,
1035 ,Interquartile Range,,293.1575,
1036 ,Skewness,,.6331,.2414
1037 ,Kurtosis,,.5300,.4783
1044 dnl Test for a crash which happened on bad input syntax
1045 AT_SETUP([EXAMINE -- Empty Parentheses])
1047 AT_DATA([examine-empty-parens.sps], [dnl
1048 DATA LIST notable LIST /X *
1057 /PLOT = SPREADLEVEL()
1061 AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
1068 dnl Test for another crash which happened on bad input syntax
1069 AT_SETUP([EXAMINE -- Bad variable])
1071 AT_DATA([examine-bad-variable.sps], [dnl
1072 data list list /h * g *.
1085 AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
1091 dnl Test for yet another crash. This time for extremes vs. missing weight values.
\0
1092 AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
1094 AT_DATA([examine-missing-weights.sps], [dnl
1095 data list notable list /h * g *.
1106 /STATISTICS extreme(3)
1110 AT_CHECK([pspp -O format=csv examine-missing-weights.sps], [0], [dnl
1111 "examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative. These case(s) were ignored."
1113 Table: Case Processing Summary
1115 ,Valid,,Missing,,Total,
1116 ,N,Percent,N,Percent,N,Percent
1117 h,3.00,100%,.00,0%,3.00,100%
1119 Table: Extreme Values
1120 ,,,Case Number,Value