1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017 Free Software Foundation, Inc.
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 dnl GNU General Public License for more details.
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
20 AT_KEYWORDS([categorical categoricals])
21 AT_DATA([examine.sps], [
22 DATA LIST LIST /QUALITY * W * BRAND * .
44 VARIABLE LABELS brand 'Manufacturer'.
45 VARIABLE LABELS quality 'Breaking Strain'.
47 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
49 LIST /FORMAT=NUMBERED.
53 /STATISTICS descriptives extreme(3)
58 dnl In the following data, only the extreme values have been checked.
59 dnl The descriptives have been blindly pasted.
60 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
61 Table: Reading free-form data from INLINE.
68 Case Number,QUALITY,W,BRAND
86 Table: Case Processing Summary
88 ,Valid,,Missing,,Total,
89 ,N,Percent,N,Percent,N,Percent
90 Breaking Strain,24.00,100%,.00,0%,24.00,100%
94 Breaking Strain,Highest,1,12,7.00
102 ,,,Statistic,Std. Error
103 Breaking Strain,Mean,,3.54,.32
104 ,95% Confidence Interval for Mean,Lower Bound,2.87,
106 ,5% Trimmed Mean,,3.50,
109 ,Std. Deviation,,1.59,
113 ,Interquartile Range,,2.75,
117 Table: Case Processing Summary
119 ,,Valid,,Missing,,Total,
120 ,Manufacturer,N,Percent,N,Percent,N,Percent
121 Breaking Strain,Aspeger,8.00,100%,.00,0%,8.00,100%
122 ,Bloggs,8.00,100%,.00,0%,8.00,100%
123 ,Charlies,8.00,100%,.00,0%,8.00,100%
125 Table: Extreme Values
126 ,Manufacturer,,,Case Number,Value
127 Breaking Strain,Aspeger,Highest,1,6,4.00
133 ,Bloggs,Highest,1,7,5.00
139 ,Charlies,Highest,1,12,7.00
147 ,Manufacturer,,,Statistic,Std. Error
148 Breaking Strain,Aspeger,Mean,,2.25,.45
149 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
151 ,,5% Trimmed Mean,,2.22,
154 ,,Std. Deviation,,1.28,
158 ,,Interquartile Range,,2.75,
160 ,,Kurtosis,,-1.55,1.48
161 ,Bloggs,Mean,,3.50,.38
162 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
164 ,,5% Trimmed Mean,,3.50,
167 ,,Std. Deviation,,1.07,
171 ,,Interquartile Range,,1.75,
173 ,,Kurtosis,,-.83,1.48
174 ,Charlies,Mean,,4.88,.44
175 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
177 ,,5% Trimmed Mean,,4.86,
180 ,,Std. Deviation,,1.25,
184 ,,Interquartile Range,,1.75,
191 AT_SETUP([EXAMINE -- extremes])
192 AT_KEYWORDS([categorical categoricals])
193 AT_DATA([examine.sps], [dnl
222 /statistics=extreme(6)
226 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
227 Table: Case Processing Summary
229 ,Valid,,Missing,,Total,
230 ,N,Percent,N,Percent,N,Percent
231 V1,23.00,100%,.00,0%,23.00,100%
233 Table: Extreme Values
235 V1,Highest,1,21,20.00
253 AT_SETUP([EXAMINE -- extremes with fractional weights])
254 AT_KEYWORDS([categorical categoricals])
255 AT_DATA([extreme.sps], [dnl
257 data list notable list /w * x *.
284 /STATISTICS = DESCRIPTIVES EXTREME (5)
288 AT_CHECK([pspp -O format=csv extreme.sps], [0], [dnl
289 Table: Case Processing Summary
291 ,Valid,,Missing,,Total,
292 ,N,Percent,N,Percent,N,Percent
293 x,19.430,100%,.000,0%,19.430,100%
295 Table: Extreme Values
297 x,Highest,1,18,1560000.000
302 ,Lowest,1,1,300000.000
309 ,,,Statistic,Std. Error
310 x,Mean,,1120010.293,86222.178
311 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
312 ,,Upper Bound,1300853.894,
313 ,5% Trimmed Mean,,1141017.899,
314 ,Median,,1200000.000,
315 ,Variance,,144447748124.869,
316 ,Std. Deviation,,380062.821,
317 ,Minimum,,300000.000,
318 ,Maximum,,1560000.000,
320 ,Interquartile Range,,467258.065,
321 ,Skewness,,-.887,.519
322 ,Kurtosis,,.340,1.005
327 dnl Test the PERCENTILES subcommand of the EXAMINE command.
328 dnl In particular test that it behaves properly when there are only
330 AT_SETUP([EXAMINE -- percentiles])
331 AT_KEYWORDS([categorical categoricals])
332 AT_DATA([examine.sps], [dnl
341 /PERCENTILES=HAVERAGE.
344 /PERCENTILES=WAVERAGE.
350 /PERCENTILES=EMPIRICAL.
353 /PERCENTILES=AEMPIRICAL.
355 AT_CHECK([pspp -o pspp.csv examine.sps])
356 AT_CHECK([cat pspp.csv], [0], [dnl
357 Table: Reading free-form data from INLINE.
361 Table: Case Processing Summary
363 ,Valid,,Missing,,Total,
364 ,N,Percent,N,Percent,N,Percent
369 ,,5,10,25,50,75,90,95
370 X,HAverage,.40,.80,2.00,5.00,8.00,8.00,8.00
371 ,Tukey's Hinges,,,3.50,5.00,6.50,,
373 Table: Case Processing Summary
375 ,Valid,,Missing,,Total,
376 ,N,Percent,N,Percent,N,Percent
381 ,,5,10,25,50,75,90,95
382 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
383 ,Tukey's Hinges,,,3.50,5.00,6.50,,
385 Table: Case Processing Summary
387 ,Valid,,Missing,,Total,
388 ,N,Percent,N,Percent,N,Percent
393 ,,5,10,25,50,75,90,95
394 X,Rounded,.00,.00,2.00,5.00,5.00,8.00,8.00
395 ,Tukey's Hinges,,,3.50,5.00,6.50,,
397 Table: Case Processing Summary
399 ,Valid,,Missing,,Total,
400 ,N,Percent,N,Percent,N,Percent
405 ,,5,10,25,50,75,90,95
406 X,Empirical,2.00,2.00,2.00,5.00,8.00,8.00,8.00
407 ,Tukey's Hinges,,,3.50,5.00,6.50,,
409 Table: Case Processing Summary
411 ,Valid,,Missing,,Total,
412 ,N,Percent,N,Percent,N,Percent
417 ,,5,10,25,50,75,90,95
418 X,Empirical with averaging,2.00,2.00,2.00,5.00,8.00,8.00,8.00
419 ,Tukey's Hinges,,,3.50,5.00,6.50,,
423 AT_SETUP([EXAMINE -- missing values])
424 AT_KEYWORDS([categorical categoricals])
425 AT_DATA([examine.sps], [dnl
426 DATA LIST LIST /x * y *.
441 AT_CHECK([pspp -o pspp.csv examine.sps])
442 AT_CHECK([cat pspp.csv], [0], [dnl
443 Table: Reading free-form data from INLINE.
448 Table: Case Processing Summary
450 ,Valid,,Missing,,Total,
451 ,N,Percent,N,Percent,N,Percent
452 x,6,85.7143%,1,14.2857%,7,100%
454 Table: Case Processing Summary
456 ,,Valid,,Missing,,Total,
457 ,y,N,Percent,N,Percent,N,Percent
458 x,1.00,4,100%,0,0%,4,100%
459 ,2.00,2,66.6667%,1,33.3333%,3,100%
464 AT_SETUP([EXAMINE -- user missing values])
465 AT_KEYWORDS([categorical categoricals])
466 AT_DATA([examine-m.sps], [dnl
467 DATA LIST notable LIST /x * y *.
474 MISSING VALUES x (9999999999).
475 MISSING VALUES y (99).
481 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
482 Table: Case Processing Summary
484 ,Valid,,Missing,,Total,
485 ,N,Percent,N,Percent,N,Percent
486 x,1,33.3333%,2,66.6667%,3,100%
487 y,2,66.6667%,1,33.3333%,3,100%
491 AT_SETUP([EXAMINE -- missing values and percentiles])
492 AT_KEYWORDS([categorical categoricals])
493 AT_DATA([examine.sps], [dnl
501 MISSING VALUE X (99).
504 /PERCENTILES=HAVERAGE.
506 AT_CHECK([pspp -o pspp.csv examine.sps])
507 dnl Ignore output -- this is just a no-crash check.
510 dnl Tests the trimmed mean calculation in the case
511 dnl where the data is weighted towards the centre.
512 AT_SETUP([EXAMINE -- trimmed mean])
513 AT_KEYWORDS([categorical categoricals])
514 AT_DATA([examine.sps], [dnl
515 DATA LIST LIST /X * C *.
526 /STATISTICS=DESCRIPTIVES
529 AT_CHECK([pspp -o pspp.csv examine.sps])
530 AT_CHECK([cat pspp.csv], [0], [dnl
531 Table: Reading free-form data from INLINE.
536 Table: Case Processing Summary
538 ,Valid,,Missing,,Total,
539 ,N,Percent,N,Percent,N,Percent
540 X,52.00,100%,.00,0%,52.00,100%
543 ,,,Statistic,Std. Error
545 ,95% Confidence Interval for Mean,Lower Bound,1.95,
547 ,5% Trimmed Mean,,2.00,
550 ,Std. Deviation,,.24,
554 ,Interquartile Range,,.00,
560 AT_SETUP([EXAMINE -- crash bug])
561 AT_KEYWORDS([categorical categoricals])
562 AT_DATA([examine.sps], [dnl
563 data list list /a * x * y *.
571 /statistics=DESCRIPTIVES
574 AT_CHECK([pspp -o pspp.csv examine.sps])
575 dnl Ignore output -- this is just a no-crash check.
578 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
579 AT_SETUP([EXAMINE -- consecutive runs don't crash])
580 AT_KEYWORDS([categorical categoricals])
581 AT_DATA([examine.sps], [dnl
582 data list list /y * z *.
589 EXAMINE /VARIABLES= z BY y.
591 EXAMINE /VARIABLES= z.
593 AT_CHECK([pspp -o pspp.csv examine.sps])
594 dnl Ignore output -- this is just a no-crash check.
597 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
598 AT_SETUP([EXAMINE -- missing values don't crash])
599 AT_KEYWORDS([categorical categoricals])
600 AT_DATA([examine.sps], [dnl
601 data list list /x * y *.
609 examine x by y /statistics=descriptives.
611 AT_CHECK([pspp -o pspp.csv examine.sps])
612 dnl Ignore output -- this is just a no-crash check.
615 dnl Test that having only a single case doesn't crash.
616 AT_SETUP([EXAMINE -- single case doesn't crash])
617 AT_KEYWORDS([categorical categoricals])
618 AT_DATA([examine.sps], [dnl
619 DATA LIST LIST /quality * .
627 /STATISTICS descriptives
631 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
632 dnl Ignore output -- this is just a no-crash check.
635 dnl Test that all-missing data doesn't crash.
636 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
637 AT_KEYWORDS([categorical categoricals])
638 AT_DATA([examine.sps], [dnl
648 PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
650 /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
651 /PERCENTILE=AEMPIRICAL
654 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
655 dnl Ignore output -- this is just a no-crash check.
658 dnl Test that big input doesn't crash (bug 11307).
659 AT_SETUP([EXAMINE -- big input doesn't crash])
660 AT_KEYWORDS([categorical categoricals])
661 AT_DATA([examine.sps], [dnl
664 COMPUTE X=NORMAL(10).
672 /STATISTICS=DESCRIPTIVES.
674 AT_CHECK([pspp -o pspp.csv examine.sps])
675 dnl Ignore output -- this is just a no-crash check.
678 dnl Another test that big input doesn't crash.
679 dnl The actual bug that this checks for has been lost.
680 AT_SETUP([EXAMINE -- big input doesn't crash 2])
681 AT_KEYWORDS([categorical categoricals])
682 AT_DATA([make-big-input.pl],
683 [for ($i=0; $i<100000; $i++) { print "AB12\n" };
684 for ($i=0; $i<100000; $i++) { print "AB04\n" };
686 AT_CHECK([$PERL make-big-input.pl > large.txt])
687 AT_DATA([examine.sps], [dnl
688 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
691 AGGREGATE OUTFILE=* /BREAK=X /A=N.
696 AT_CHECK([pspp -o pspp.csv examine.sps])
697 dnl Ignore output -- this is just a no-crash check.
698 AT_DATA([more-big-input.pl],
699 [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
701 AT_CHECK([$PERL more-big-input.pl >> large.txt])
702 AT_CHECK([pspp -o pspp.csv examine.sps])
703 dnl Ignore output -- this is just a no-crash check.
707 dnl Test that the ID command works with non-numberic variables
708 AT_SETUP([EXAMINE -- non-numeric ID])
709 AT_KEYWORDS([categorical categoricals])
711 AT_DATA([examine-id.sps], [dnl
712 data list notable list /x * y (a12).
731 /statistics = extreme
737 AT_CHECK([pspp -O format=csv examine-id.sps], [0],
738 [Table: Case Processing Summary
740 ,Valid,,Missing,,Total,
741 ,N,Percent,N,Percent,N,Percent
742 x,14,100%,0,0%,14,100%
744 Table: Extreme Values
746 x,Highest,1,threehundred,300.00
760 dnl Test for a crash which happened on cleanup from a bad input syntax
761 AT_SETUP([EXAMINE -- Bad Input])
762 AT_KEYWORDS([categorical categoricals])
764 AT_DATA([examine-bad.sps], [dnl
765 data list list /h * g *.
781 /STATISTICS = DESCRIPTIVES EXTREME
786 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
791 dnl Check the MISSING=REPORT option
792 AT_SETUP([EXAMINE -- MISSING=REPORT])
793 AT_KEYWORDS([categorical categoricals])
795 AT_DATA([examine-report.sps], [dnl
797 data list list /x * g *.
832 MISSING VALUES g (9, 99, 999).
837 /STATISTICS = EXTREME
843 AT_CHECK([pspp -O format=csv examine-report.sps], [0], [dnl
844 Table: Reading free-form data from INLINE.
849 Table: Case Processing Summary
851 ,,Valid,,Missing,,Total,
852 ,g,N,Percent,N,Percent,N,Percent
853 x,. (missing),4,100%,0,0%,4,100%
854 ,1,9,100%,0,0%,9,100%
855 ,2,9,100%,0,0%,9,100%
856 ,9 (missing),4,100%,0,0%,4,100%
857 ,99 (missing),5,100%,0,0%,5,100%
859 Table: Extreme Values
860 ,g,,,Case Number,Value
861 x,. (missing),Highest,1,31,4004
891 ,9 (missing),Highest,1,22,401
901 ,99 (missing),Highest,1,27,901
917 dnl Run a test of the basic STATISTICS using a "real"
918 dnl dataset and comparing with "real" results kindly
919 dnl provided by Olaf Nöhring
920 AT_SETUP([EXAMINE -- sample unweighted])
921 AT_KEYWORDS([categorical categoricals])
923 AT_DATA([sample.sps], [dnl
925 DATA LIST notable LIST /X *
1031 /STATISTICS=DESCRIPTIVES
1035 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
1036 Table: Case Processing Summary
1038 ,Valid,,Missing,,Total,
1039 ,N,Percent,N,Percent,N,Percent
1040 X,100,100%,0,0%,100,100%
1043 ,,,Statistic,Std. Error
1044 X,Mean,,587.6603,23.2665
1045 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1046 ,,Upper Bound,633.8260,
1047 ,5% Trimmed Mean,,579.7064,
1049 ,Variance,,54132.8466,
1050 ,Std. Deviation,,232.6647,
1052 ,Maximum,,1355.2800,
1054 ,Interquartile Range,,293.1575,
1055 ,Skewness,,.6331,.2414
1056 ,Kurtosis,,.5300,.4783
1063 dnl Test for a crash which happened on bad input syntax
1064 AT_SETUP([EXAMINE -- Empty Parentheses])
1065 AT_KEYWORDS([categorical categoricals])
1067 AT_DATA([examine-empty-parens.sps], [dnl
1068 DATA LIST notable LIST /X *
1077 /PLOT = SPREADLEVEL()
1081 AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
1088 dnl Test for another crash which happened on bad input syntax
1089 AT_SETUP([EXAMINE -- Bad variable])
1090 AT_KEYWORDS([categorical categoricals])
1092 AT_DATA([examine-bad-variable.sps], [dnl
1093 data list list /h * g *.
1106 AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
1112 dnl Test for yet another crash. This time for extremes vs. missing weight values.
\0
1113 AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
1114 AT_KEYWORDS([categorical categoricals])
1116 AT_DATA([examine-missing-weights.sps], [dnl
1117 data list notable list /h * g *.
1128 /STATISTICS extreme(3)
1132 AT_CHECK([pspp -O format=csv examine-missing-weights.sps], [0], [dnl
1133 "examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative. These case(s) were ignored."
1135 Table: Case Processing Summary
1137 ,Valid,,Missing,,Total,
1138 ,N,Percent,N,Percent,N,Percent
1139 h,3.00,100%,.00,0%,3.00,100%
1141 Table: Extreme Values
1142 ,,,Case Number,Value