1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc.
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 dnl GNU General Public License for more details.
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
20 AT_KEYWORDS([categorical categoricals])
21 AT_DATA([examine.sps], [
22 DATA LIST LIST /QUALITY * W * BRAND * .
44 VARIABLE LABELS brand 'Manufacturer'.
45 VARIABLE LABELS quality 'Breaking Strain'.
47 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
49 LIST /FORMAT=NUMBERED.
53 /STATISTICS descriptives extreme(3)
58 dnl In the following data, only the extreme values have been checked.
59 dnl The descriptives have been blindly pasted.
60 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
61 Table: Reading free-form data from INLINE.
68 Case Number,QUALITY,W,BRAND
86 Table: Case Processing Summary
88 ,Valid,,Missing,,Total,
89 ,N,Percent,N,Percent,N,Percent
90 Breaking Strain,24.00,100.0%,.00,.0%,24.00,100.0%
94 Breaking Strain,Highest,1,12,7.00
102 ,,,Statistic,Std. Error
103 Breaking Strain,Mean,,3.54,.32
104 ,95% Confidence Interval for Mean,Lower Bound,2.87,
106 ,5% Trimmed Mean,,3.50,
109 ,Std. Deviation,,1.59,
113 ,Interquartile Range,,2.75,
117 Table: Case Processing Summary
119 ,,Valid,,Missing,,Total,
120 ,Manufacturer,N,Percent,N,Percent,N,Percent
121 Breaking Strain,Aspeger,8.00,100.0%,.00,.0%,8.00,100.0%
122 ,Bloggs,8.00,100.0%,.00,.0%,8.00,100.0%
123 ,Charlies,8.00,100.0%,.00,.0%,8.00,100.0%
125 Table: Extreme Values
126 ,Manufacturer,,,Case Number,Value
127 Breaking Strain,Aspeger,Highest,1,6,4.00
133 ,Bloggs,Highest,1,7,5.00
139 ,Charlies,Highest,1,12,7.00
147 ,Manufacturer,,,Statistic,Std. Error
148 Breaking Strain,Aspeger,Mean,,2.25,.45
149 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
151 ,,5% Trimmed Mean,,2.22,
154 ,,Std. Deviation,,1.28,
158 ,,Interquartile Range,,2.75,
160 ,,Kurtosis,,-1.55,1.48
161 ,Bloggs,Mean,,3.50,.38
162 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
164 ,,5% Trimmed Mean,,3.50,
167 ,,Std. Deviation,,1.07,
171 ,,Interquartile Range,,1.75,
173 ,,Kurtosis,,-.83,1.48
174 ,Charlies,Mean,,4.88,.44
175 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
177 ,,5% Trimmed Mean,,4.86,
180 ,,Std. Deviation,,1.25,
184 ,,Interquartile Range,,1.75,
191 AT_SETUP([EXAMINE -- extremes])
192 AT_KEYWORDS([categorical categoricals])
193 AT_DATA([examine.sps], [dnl
222 /statistics=extreme(6)
226 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
227 Table: Case Processing Summary
229 ,Valid,,Missing,,Total,
230 ,N,Percent,N,Percent,N,Percent
231 V1,23.00,100.0%,.00,.0%,23.00,100.0%
233 Table: Extreme Values
235 V1,Highest,1,21,20.00
253 AT_SETUP([EXAMINE -- extremes with fractional weights])
254 AT_KEYWORDS([categorical categoricals])
255 AT_DATA([extreme.sps], [dnl
257 data list notable list /w * x *.
284 /STATISTICS = DESCRIPTIVES EXTREME (5)
288 AT_CHECK([pspp -O format=csv extreme.sps], [0], [dnl
289 Table: Case Processing Summary
291 ,Valid,,Missing,,Total,
292 ,N,Percent,N,Percent,N,Percent
293 x,19.430,100.0%,.000,.0%,19.430,100.0%
295 Table: Extreme Values
297 x,Highest,1,18,1560000.000
302 ,Lowest,1,1,300000.000
309 ,,,Statistic,Std. Error
310 x,Mean,,1120010.293,86222.178
311 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
312 ,,Upper Bound,1300853.894,
313 ,5% Trimmed Mean,,1141017.899,
314 ,Median,,1200000.000,
315 ,Variance,,144447748124.869,
316 ,Std. Deviation,,380062.821,
317 ,Minimum,,300000.000,
318 ,Maximum,,1560000.000,
320 ,Interquartile Range,,467258.065,
321 ,Skewness,,-.887,.519
322 ,Kurtosis,,.340,1.005
327 dnl Test the PERCENTILES subcommand of the EXAMINE command.
328 dnl In particular test that it behaves properly when there are only
330 AT_SETUP([EXAMINE -- percentiles])
331 AT_KEYWORDS([categorical categoricals])
332 AT_DATA([examine.sps], [dnl
341 /PERCENTILES=HAVERAGE.
344 /PERCENTILES=WAVERAGE.
350 /PERCENTILES=EMPIRICAL.
353 /PERCENTILES=AEMPIRICAL.
355 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
356 AT_CHECK([cat pspp.csv], [0], [dnl
357 Table: Reading free-form data from INLINE.
361 Table: Case Processing Summary
363 ,Valid,,Missing,,Total,
364 ,N,Percent,N,Percent,N,Percent
365 X,3,100.0%,0,.0%,3,100.0%
369 ,,5,10,25,50,75,90,95
370 X,Weighted Average,.40,.80,2.00,5.00,8.00,8.00,8.00
371 ,Tukey's Hinges,,,3.50,5.00,6.50,,
373 Table: Case Processing Summary
375 ,Valid,,Missing,,Total,
376 ,N,Percent,N,Percent,N,Percent
377 X,3,100.0%,0,.0%,3,100.0%
381 ,,5,10,25,50,75,90,95
382 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
383 ,Tukey's Hinges,,,3.50,5.00,6.50,,
385 Table: Case Processing Summary
387 ,Valid,,Missing,,Total,
388 ,N,Percent,N,Percent,N,Percent
389 X,3,100.0%,0,.0%,3,100.0%
393 ,,5,10,25,50,75,90,95
394 X,Weighted Average,.00,.00,2.00,5.00,5.00,8.00,8.00
395 ,Tukey's Hinges,,,3.50,5.00,6.50,,
397 Table: Case Processing Summary
399 ,Valid,,Missing,,Total,
400 ,N,Percent,N,Percent,N,Percent
401 X,3,100.0%,0,.0%,3,100.0%
405 ,,5,10,25,50,75,90,95
406 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
407 ,Tukey's Hinges,,,3.50,5.00,6.50,,
409 Table: Case Processing Summary
411 ,Valid,,Missing,,Total,
412 ,N,Percent,N,Percent,N,Percent
413 X,3,100.0%,0,.0%,3,100.0%
417 ,,5,10,25,50,75,90,95
418 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
419 ,Tukey's Hinges,,,3.50,5.00,6.50,,
423 AT_SETUP([EXAMINE -- missing values])
424 AT_KEYWORDS([categorical categoricals])
425 AT_DATA([examine.sps], [dnl
426 DATA LIST LIST /x * y *.
441 AT_CHECK([pspp -o pspp.csv examine.sps])
442 AT_CHECK([cat pspp.csv], [0], [dnl
443 Table: Reading free-form data from INLINE.
448 Table: Case Processing Summary
450 ,Valid,,Missing,,Total,
451 ,N,Percent,N,Percent,N,Percent
452 x,6,85.7%,1,14.3%,7,100.0%
454 Table: Case Processing Summary
456 ,,Valid,,Missing,,Total,
457 ,y,N,Percent,N,Percent,N,Percent
458 x,1.00,4,100.0%,0,.0%,4,100.0%
459 ,2.00,2,66.7%,1,33.3%,3,100.0%
464 AT_SETUP([EXAMINE -- user missing values])
465 AT_KEYWORDS([categorical categoricals])
466 AT_DATA([examine-m.sps], [dnl
467 DATA LIST notable LIST /x * y *.
474 MISSING VALUES x (9999999999).
475 MISSING VALUES y (99).
481 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
482 Table: Case Processing Summary
484 ,Valid,,Missing,,Total,
485 ,N,Percent,N,Percent,N,Percent
486 x,1,33.3%,2,66.7%,3,100.0%
487 y,2,66.7%,1,33.3%,3,100.0%
491 AT_SETUP([EXAMINE -- missing values and percentiles])
492 AT_KEYWORDS([categorical categoricals])
493 AT_DATA([examine.sps], [dnl
501 MISSING VALUE X (99).
504 /PERCENTILES=HAVERAGE.
506 AT_CHECK([pspp -o pspp.csv examine.sps])
507 dnl Ignore output -- this is just a no-crash check.
510 dnl Tests the trimmed mean calculation in the case
511 dnl where the data is weighted towards the centre.
512 AT_SETUP([EXAMINE -- trimmed mean])
513 AT_KEYWORDS([categorical categoricals])
514 AT_DATA([examine.sps], [dnl
515 DATA LIST LIST /X * C *.
526 /STATISTICS=DESCRIPTIVES
529 AT_CHECK([pspp -o pspp.csv examine.sps])
530 AT_CHECK([cat pspp.csv], [0], [dnl
531 Table: Reading free-form data from INLINE.
536 Table: Case Processing Summary
538 ,Valid,,Missing,,Total,
539 ,N,Percent,N,Percent,N,Percent
540 X,52.00,100.0%,.00,.0%,52.00,100.0%
543 ,,,Statistic,Std. Error
545 ,95% Confidence Interval for Mean,Lower Bound,1.95,
547 ,5% Trimmed Mean,,2.00,
550 ,Std. Deviation,,.24,
554 ,Interquartile Range,,.00,
560 AT_SETUP([EXAMINE -- crash bug])
561 AT_KEYWORDS([categorical categoricals])
562 AT_DATA([examine.sps], [dnl
563 data list list /a * x * y *.
571 /statistics=DESCRIPTIVES
574 AT_CHECK([pspp -o pspp.csv examine.sps])
575 dnl Ignore output -- this is just a no-crash check.
578 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
579 AT_SETUP([EXAMINE -- consecutive runs don't crash])
580 AT_KEYWORDS([categorical categoricals])
581 AT_DATA([examine.sps], [dnl
582 data list list /y * z *.
589 EXAMINE /VARIABLES= z BY y.
591 EXAMINE /VARIABLES= z.
593 AT_CHECK([pspp -o pspp.csv examine.sps])
594 dnl Ignore output -- this is just a no-crash check.
597 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
598 AT_SETUP([EXAMINE -- missing values don't crash])
599 AT_KEYWORDS([categorical categoricals])
600 AT_DATA([examine.sps], [dnl
601 data list list /x * y *.
609 examine x by y /statistics=descriptives.
611 AT_CHECK([pspp -o pspp.csv examine.sps])
612 dnl Ignore output -- this is just a no-crash check.
615 dnl Test that having only a single case doesn't crash.
616 AT_SETUP([EXAMINE -- single case doesn't crash])
617 AT_KEYWORDS([categorical categoricals])
618 AT_DATA([examine.sps], [dnl
619 DATA LIST LIST /quality * .
627 /STATISTICS descriptives
631 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
632 dnl Ignore output -- this is just a no-crash check.
635 dnl Test that all-missing data doesn't crash.
636 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
637 AT_KEYWORDS([categorical categoricals])
638 AT_DATA([examine.sps], [dnl
648 PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
650 /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
651 /PERCENTILE=AEMPIRICAL
654 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
655 dnl Ignore output -- this is just a no-crash check.
658 dnl Test that big input doesn't crash (bug 11307).
659 AT_SETUP([EXAMINE -- big input doesn't crash])
660 AT_KEYWORDS([categorical categoricals slow])
661 AT_DATA([examine.sps], [dnl
664 COMPUTE X=NORMAL(10).
672 /STATISTICS=DESCRIPTIVES.
674 AT_CHECK([pspp -o pspp.csv examine.sps])
675 dnl Ignore output -- this is just a no-crash check.
678 dnl Another test that big input doesn't crash.
679 dnl The actual bug that this checks for has been lost.
680 AT_SETUP([EXAMINE -- big input doesn't crash 2])
681 AT_KEYWORDS([categorical categoricals slow])
682 AT_DATA([make-big-input.pl],
683 [for ($i=0; $i<100000; $i++) { print "AB12\n" };
684 for ($i=0; $i<100000; $i++) { print "AB04\n" };
686 AT_CHECK([$PERL make-big-input.pl > large.txt])
687 AT_DATA([examine.sps], [dnl
688 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
691 AGGREGATE OUTFILE=* /BREAK=X /A=N.
696 AT_CHECK([pspp -o pspp.csv examine.sps])
697 dnl Ignore output -- this is just a no-crash check.
698 AT_DATA([more-big-input.pl],
699 [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
701 AT_CHECK([$PERL more-big-input.pl >> large.txt])
702 AT_CHECK([pspp -o pspp.csv examine.sps])
703 dnl Ignore output -- this is just a no-crash check.
707 dnl Test that the ID command works with non-numberic variables
708 AT_SETUP([EXAMINE -- non-numeric ID])
709 AT_KEYWORDS([categorical categoricals])
711 AT_DATA([examine-id.sps], [dnl
712 data list notable list /x * y (a12).
731 /statistics = extreme
737 AT_CHECK([pspp -O format=csv examine-id.sps], [0],
738 [Table: Case Processing Summary
740 ,Valid,,Missing,,Total,
741 ,N,Percent,N,Percent,N,Percent
742 x,14,100.0%,0,.0%,14,100.0%
744 Table: Extreme Values
746 x,Highest,1,threehundred,300.00
760 dnl Test for a crash which happened on cleanup from a bad input syntax
761 AT_SETUP([EXAMINE -- Bad Input])
762 AT_KEYWORDS([categorical categoricals])
764 AT_DATA([examine-bad.sps], [dnl
765 data list list /h * g *.
781 /STATISTICS = DESCRIPTIVES EXTREME
786 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
791 dnl Check the MISSING=REPORT option
792 AT_SETUP([EXAMINE -- MISSING=REPORT])
793 AT_KEYWORDS([categorical categoricals])
795 AT_DATA([examine-report.sps], [dnl
797 data list list /x * g *.
832 MISSING VALUES g (9, 99, 999).
837 /STATISTICS = EXTREME
843 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine-report.sps])
844 AT_CHECK([cat pspp.csv], [0],
845 [[Table: Reading free-form data from INLINE.
850 Table: Case Processing Summary
852 ,,Valid,,Missing,,Total,
853 ,g,N,Percent,N,Percent,N,Percent
854 x,.,4,100.0%,0,.0%,4,100.0%
855 ,1,9,100.0%,0,.0%,9,100.0%
856 ,2,9,100.0%,0,.0%,9,100.0%
857 ,9[a],4,100.0%,0,.0%,4,100.0%
858 ,99[a],5,100.0%,0,.0%,5,100.0%
861 a,User-missing value.
863 Table: Extreme Values
864 ,g,,,Case Number,Value
865 x,.,Highest,1,31,4004
895 ,9[a],Highest,1,22,401
905 ,99[a],Highest,1,27,901
917 a,User-missing value.
923 dnl Run a test of the basic STATISTICS using a "real"
924 dnl dataset and comparing with "real" results kindly
925 dnl provided by Olaf Nöhring
926 AT_SETUP([EXAMINE -- sample unweighted])
927 AT_KEYWORDS([categorical categoricals])
929 AT_DATA([sample.sps], [dnl
931 DATA LIST notable LIST /X *
1037 /STATISTICS=DESCRIPTIVES
1041 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
1042 Table: Case Processing Summary
1044 ,Valid,,Missing,,Total,
1045 ,N,Percent,N,Percent,N,Percent
1046 X,100,100.0%,0,.0%,100,100.0%
1049 ,,,Statistic,Std. Error
1050 X,Mean,,587.6603,23.2665
1051 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1052 ,,Upper Bound,633.8260,
1053 ,5% Trimmed Mean,,579.7064,
1055 ,Variance,,54132.8466,
1056 ,Std. Deviation,,232.6647,
1058 ,Maximum,,1355.2800,
1060 ,Interquartile Range,,293.1575,
1061 ,Skewness,,.6331,.2414
1062 ,Kurtosis,,.5300,.4783
1069 dnl Test for a crash which happened on bad input syntax
1070 AT_SETUP([EXAMINE -- Empty Parentheses])
1071 AT_KEYWORDS([categorical categoricals])
1073 AT_DATA([examine-empty-parens.sps], [dnl
1074 DATA LIST notable LIST /X *
1083 /PLOT = SPREADLEVEL()
1087 AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
1094 dnl Test for another crash which happened on bad input syntax
1095 AT_SETUP([EXAMINE -- Bad variable])
1096 AT_KEYWORDS([categorical categoricals])
1098 AT_DATA([examine-bad-variable.sps], [dnl
1099 data list list /h * g *.
1112 AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
1118 dnl Test for yet another crash. This time for extremes vs. missing weight values.
\0
1119 AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
1120 AT_KEYWORDS([categorical categoricals])
1122 AT_DATA([examine-missing-weights.sps], [dnl
1123 data list notable list /h * g *.
1134 /STATISTICS extreme(3)
1138 AT_CHECK([pspp -O format=csv examine-missing-weights.sps], [0], [dnl
1139 "examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative. These case(s) were ignored."
1141 Table: Case Processing Summary
1143 ,Valid,,Missing,,Total,
1144 ,N,Percent,N,Percent,N,Percent
1145 h,3.00,100.0%,.00,.0%,3.00,100.0%
1147 Table: Extreme Values
1148 ,,,Case Number,Value
1159 dnl This is an example from doc/tutorial.texi
1160 dnl So if the results of this have to be changed in any way,
1161 dnl make sure to update that file.
1162 AT_SETUP([EXAMINE tutorial example 1])
1163 cp $top_srcdir/examples/repairs.sav .
1164 AT_DATA([repairs.sps], [dnl
1165 GET FILE='repairs.sav'.
1166 EXAMINE mtbf /STATISTICS=DESCRIPTIVES.
1167 COMPUTE mtbf_ln = LN (mtbf).
1168 EXAMINE mtbf_ln /STATISTICS=DESCRIPTIVES.
1170 AT_CHECK([pspp -o pspp.csv -o pspp.txt repairs.sps])
1171 AT_CHECK([cat pspp.csv], [0], [dnl
1172 Table: Case Processing Summary
1174 ,Valid,,Missing,,Total,
1175 ,N,Percent,N,Percent,N,Percent
1176 Mean time between failures (months) ,15,100.0%,0,.0%,15,100.0%
1179 ,,,Statistic,Std. Error
1180 Mean time between failures (months) ,Mean,,8.32,1.62
1181 ,95% Confidence Interval for Mean,Lower Bound,4.85,
1182 ,,Upper Bound,11.79,
1183 ,5% Trimmed Mean,,7.69,
1186 ,Std. Deviation,,6.26,
1190 ,Interquartile Range,,5.83,
1192 ,Kurtosis,,4.49,1.12
1194 Table: Case Processing Summary
1196 ,Valid,,Missing,,Total,
1197 ,N,Percent,N,Percent,N,Percent
1198 mtbf_ln,15,100.0%,0,.0%,15,100.0%
1201 ,,,Statistic,Std. Error
1202 mtbf_ln,Mean,,1.88,.19
1203 ,95% Confidence Interval for Mean,Lower Bound,1.47,
1205 ,5% Trimmed Mean,,1.88,
1208 ,Std. Deviation,,.74,
1212 ,Interquartile Range,,.92,
1214 ,Kurtosis,,-.09,1.12
1218 dnl This is an example from doc/tutorial.texi
1219 dnl So if the results of this have to be changed in any way,
1220 dnl make sure to update that file.
1221 AT_SETUP([EXAMINE tutorial example 2])
1222 cp $top_srcdir/examples/physiology.sav .
1223 AT_DATA([examine.sps], [dnl
1224 GET FILE='physiology.sav'.
1225 EXAMINE height, weight /STATISTICS=EXTREME(3).
1227 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
1228 AT_CHECK([cat pspp.csv], [0], [dnl
1229 Table: Case Processing Summary
1231 ,Valid,,Missing,,Total,
1232 ,N,Percent,N,Percent,N,Percent
1233 Height in millimeters ,40,100.0%,0,.0%,40,100.0%
1234 Weight in kilograms ,40,100.0%,0,.0%,40,100.0%
1236 Table: Extreme Values
1237 ,,,Case Number,Value
1238 Height in millimeters ,Highest,1,14,1903
1244 Weight in kilograms ,Highest,1,13,92.1
1254 AT_SETUP([EXAMINE -- Crash on unrepresentable graphs])
1255 AT_DATA([examine.sps], [dnl
1256 data list notable list /x * g *.
1265 dnl This bug only manifested itself on cairo based drivers.
1266 AT_CHECK([pspp -O format=pdf examine.sps], [1], [ignore])