4 AT_DATA([examine.sps], [
5 DATA LIST LIST /QUALITY * W * BRAND * .
27 VARIABLE LABELS brand 'Manufacturer'.
28 VARIABLE LABELS quality 'Breaking Strain'.
30 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
32 LIST /FORMAT=NUMBERED.
36 /STATISTICS descriptives extreme(3)
41 dnl In the following data, only the extreme values have been checked.
42 dnl The descriptives have been blindly pasted.
43 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
44 Table: Reading free-form data from INLINE.
51 Case Number,QUALITY,W,BRAND
69 Table: Case Processing Summary
71 ,Valid,,Missing,,Total,
72 ,N,Percent,N,Percent,N,Percent
73 Breaking Strain,24.00,100%,.00,0%,24.00,100%
77 Breaking Strain,Highest,1,12,7.00
85 ,,,Statistic,Std. Error
86 Breaking Strain,Mean,,3.54,.32
87 ,95% Confidence Interval for Mean,Lower Bound,2.87,
89 ,5% Trimmed Mean,,3.50,
92 ,Std. Deviation,,1.59,
96 ,Interquartile Range,,2.75,
100 Table: Case Processing Summary
102 ,,Valid,,Missing,,Total,
103 ,Manufacturer,N,Percent,N,Percent,N,Percent
104 Breaking Strain,Aspeger,8.00,100%,.00,0%,8.00,100%
105 ,Bloggs,8.00,100%,.00,0%,8.00,100%
106 ,Charlies,8.00,100%,.00,0%,8.00,100%
108 Table: Extreme Values
109 ,Manufacturer,,,Case Number,Value
110 Breaking Strain,Aspeger,Highest,1,6,4.00
116 ,Bloggs,Highest,1,7,5.00
122 ,Charlies,Highest,1,12,7.00
130 ,Manufacturer,,,Statistic,Std. Error
131 Breaking Strain,Aspeger,Mean,,2.25,.45
132 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
134 ,,5% Trimmed Mean,,2.22,
137 ,,Std. Deviation,,1.28,
141 ,,Interquartile Range,,2.75,
143 ,,Kurtosis,,-1.55,1.48
144 ,Bloggs,Mean,,3.50,.38
145 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
147 ,,5% Trimmed Mean,,3.50,
150 ,,Std. Deviation,,1.07,
154 ,,Interquartile Range,,1.75,
156 ,,Kurtosis,,-.83,1.48
157 ,Charlies,Mean,,4.88,.44
158 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
160 ,,5% Trimmed Mean,,4.86,
163 ,,Std. Deviation,,1.25,
167 ,,Interquartile Range,,1.75,
174 AT_SETUP([EXAMINE -- extremes])
175 AT_DATA([examine.sps], [dnl
204 /statistics=extreme(6)
208 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
209 Table: Case Processing Summary
211 ,Valid,,Missing,,Total,
212 ,N,Percent,N,Percent,N,Percent
213 V1,23.00,100%,.00,0%,23.00,100%
215 Table: Extreme Values
217 V1,Highest,1,21,20.00
235 AT_SETUP([EXAMINE -- extremes with fractional weights])
236 AT_DATA([extreme.sps], [dnl
238 data list notable list /w * x *.
265 /STATISTICS = DESCRIPTIVES EXTREME (5)
269 AT_CHECK([pspp -O format=csv extreme.sps], [0], [dnl
270 Table: Case Processing Summary
272 ,Valid,,Missing,,Total,
273 ,N,Percent,N,Percent,N,Percent
274 x,19.430,100%,.000,0%,19.430,100%
276 Table: Extreme Values
278 x,Highest,1,18,1560000.000
283 ,Lowest,1,1,300000.000
290 ,,,Statistic,Std. Error
291 x,Mean,,1120010.293,86222.178
292 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
293 ,,Upper Bound,1300853.894,
294 ,5% Trimmed Mean,,1141017.899,
295 ,Median,,1200000.000,
296 ,Variance,,144447748124.869,
297 ,Std. Deviation,,380062.821,
298 ,Minimum,,300000.000,
299 ,Maximum,,1560000.000,
301 ,Interquartile Range,,467258.065,
302 ,Skewness,,-.887,.519
303 ,Kurtosis,,.340,1.005
308 dnl Test the PERCENTILES subcommand of the EXAMINE command.
309 dnl In particular test that it behaves properly when there are only
311 AT_SETUP([EXAMINE -- percentiles])
312 AT_DATA([examine.sps], [dnl
321 /PERCENTILES=HAVERAGE.
324 /PERCENTILES=WAVERAGE.
330 /PERCENTILES=EMPIRICAL.
333 /PERCENTILES=AEMPIRICAL.
335 AT_CHECK([pspp -o pspp.csv examine.sps])
336 AT_CHECK([cat pspp.csv], [0], [dnl
337 Table: Reading free-form data from INLINE.
341 Table: Case Processing Summary
343 ,Valid,,Missing,,Total,
344 ,N,Percent,N,Percent,N,Percent
349 ,,5,10,25,50,75,90,95
350 X,HAverage,.40,.80,2.00,5.00,8.00,8.00,8.00
351 ,Tukey's Hinges,,,3.50,5.00,6.50,,
353 Table: Case Processing Summary
355 ,Valid,,Missing,,Total,
356 ,N,Percent,N,Percent,N,Percent
361 ,,5,10,25,50,75,90,95
362 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
363 ,Tukey's Hinges,,,3.50,5.00,6.50,,
365 Table: Case Processing Summary
367 ,Valid,,Missing,,Total,
368 ,N,Percent,N,Percent,N,Percent
373 ,,5,10,25,50,75,90,95
374 X,Rounded,.00,.00,2.00,5.00,5.00,8.00,8.00
375 ,Tukey's Hinges,,,3.50,5.00,6.50,,
377 Table: Case Processing Summary
379 ,Valid,,Missing,,Total,
380 ,N,Percent,N,Percent,N,Percent
385 ,,5,10,25,50,75,90,95
386 X,Empirical,2.00,2.00,2.00,5.00,8.00,8.00,8.00
387 ,Tukey's Hinges,,,3.50,5.00,6.50,,
389 Table: Case Processing Summary
391 ,Valid,,Missing,,Total,
392 ,N,Percent,N,Percent,N,Percent
397 ,,5,10,25,50,75,90,95
398 X,Empirical with averaging,2.00,2.00,2.00,5.00,8.00,8.00,8.00
399 ,Tukey's Hinges,,,3.50,5.00,6.50,,
403 AT_SETUP([EXAMINE -- missing values])
404 AT_DATA([examine.sps], [dnl
405 DATA LIST LIST /x * y *.
420 AT_CHECK([pspp -o pspp.csv examine.sps])
421 AT_CHECK([cat pspp.csv], [0], [dnl
422 Table: Reading free-form data from INLINE.
427 Table: Case Processing Summary
429 ,Valid,,Missing,,Total,
430 ,N,Percent,N,Percent,N,Percent
431 x,6,85.7143%,1,14.2857%,7,100%
433 Table: Case Processing Summary
435 ,,Valid,,Missing,,Total,
436 ,y,N,Percent,N,Percent,N,Percent
437 x,1.00,4,100%,0,0%,4,100%
438 ,2.00,2,66.6667%,1,33.3333%,3,100%
443 AT_SETUP([EXAMINE -- user missing values])
444 AT_DATA([examine-m.sps], [dnl
445 DATA LIST notable LIST /x * y *.
452 MISSING VALUES x (9999999999).
453 MISSING VALUES y (99).
459 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
460 Table: Case Processing Summary
462 ,Valid,,Missing,,Total,
463 ,N,Percent,N,Percent,N,Percent
464 x,1,33.3333%,2,66.6667%,3,100%
465 y,2,66.6667%,1,33.3333%,3,100%
469 AT_SETUP([EXAMINE -- missing values and percentiles])
470 AT_DATA([examine.sps], [dnl
478 MISSING VALUE X (99).
481 /PERCENTILES=HAVERAGE.
483 AT_CHECK([pspp -o pspp.csv examine.sps])
484 dnl Ignore output -- this is just a no-crash check.
487 dnl Tests the trimmed mean calculation in the case
488 dnl where the data is weighted towards the centre.
489 AT_SETUP([EXAMINE -- trimmed mean])
490 AT_DATA([examine.sps], [dnl
491 DATA LIST LIST /X * C *.
502 /STATISTICS=DESCRIPTIVES
505 AT_CHECK([pspp -o pspp.csv examine.sps])
506 AT_CHECK([cat pspp.csv], [0], [dnl
507 Table: Reading free-form data from INLINE.
512 Table: Case Processing Summary
514 ,Valid,,Missing,,Total,
515 ,N,Percent,N,Percent,N,Percent
516 X,52.00,100%,.00,0%,52.00,100%
519 ,,,Statistic,Std. Error
521 ,95% Confidence Interval for Mean,Lower Bound,1.95,
523 ,5% Trimmed Mean,,2.00,
526 ,Std. Deviation,,.24,
530 ,Interquartile Range,,.00,
536 AT_SETUP([EXAMINE -- crash bug])
537 AT_DATA([examine.sps], [dnl
538 data list list /a * x * y *.
546 /statistics=DESCRIPTIVES
549 AT_CHECK([pspp -o pspp.csv examine.sps])
550 dnl Ignore output -- this is just a no-crash check.
553 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
554 AT_SETUP([EXAMINE -- consecutive runs don't crash])
555 AT_DATA([examine.sps], [dnl
556 data list list /y * z *.
563 EXAMINE /VARIABLES= z BY y.
565 EXAMINE /VARIABLES= z.
567 AT_CHECK([pspp -o pspp.csv examine.sps])
568 dnl Ignore output -- this is just a no-crash check.
571 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
572 AT_SETUP([EXAMINE -- missing values don't crash])
573 AT_DATA([examine.sps], [dnl
574 data list list /x * y *.
582 examine x by y /statistics=descriptives.
584 AT_CHECK([pspp -o pspp.csv examine.sps])
585 dnl Ignore output -- this is just a no-crash check.
588 dnl Test that having only a single case doesn't crash.
589 AT_SETUP([EXAMINE -- single case doesn't crash])
590 AT_DATA([examine.sps], [dnl
591 DATA LIST LIST /quality * .
599 /STATISTICS descriptives
603 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
604 dnl Ignore output -- this is just a no-crash check.
607 dnl Test that all-missing data doesn't crash.
608 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
609 AT_DATA([examine.sps], [dnl
618 EXAMINE /x PLOT=HISTOGRAM.
620 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
621 dnl Ignore output -- this is just a no-crash check.
624 dnl Test that big input doesn't crash (bug 11307).
625 AT_SETUP([EXAMINE -- big input doesn't crash])
626 AT_DATA([examine.sps], [dnl
629 COMPUTE X=NORMAL(10).
637 /STATISTICS=DESCRIPTIVES.
639 AT_CHECK([pspp -o pspp.csv examine.sps])
640 dnl Ignore output -- this is just a no-crash check.
643 dnl Another test that big input doesn't crash.
644 dnl The actual bug that this checks for has been lost.
645 AT_SETUP([EXAMINE -- big input doesn't crash 2])
646 AT_DATA([make-big-input.pl],
647 [for ($i=0; $i<100000; $i++) { print "AB12\n" };
648 for ($i=0; $i<100000; $i++) { print "AB04\n" };
650 AT_CHECK([$PERL make-big-input.pl > large.txt])
651 AT_DATA([examine.sps], [dnl
652 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
655 AGGREGATE OUTFILE=* /BREAK=X /A=N.
660 AT_CHECK([pspp -o pspp.csv examine.sps])
661 dnl Ignore output -- this is just a no-crash check.
662 AT_DATA([more-big-input.pl],
663 [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
665 AT_CHECK([$PERL more-big-input.pl >> large.txt])
666 AT_CHECK([pspp -o pspp.csv examine.sps])
667 dnl Ignore output -- this is just a no-crash check.
671 dnl Test that the ID command works with non-numberic variables
672 AT_SETUP([EXAMINE -- non-numeric ID])
674 AT_DATA([examine-id.sps], [dnl
675 data list notable list /x * y (a12).
694 /statistics = extreme
700 AT_CHECK([pspp -O format=csv examine-id.sps], [0],
701 [Table: Case Processing Summary
703 ,Valid,,Missing,,Total,
704 ,N,Percent,N,Percent,N,Percent
705 x,14,100%,0,0%,14,100%
707 Table: Extreme Values
709 x,Highest,1,threehundred,300.00
723 dnl Test for a crash which happened on cleanup from a bad input syntax
724 AT_SETUP([EXAMINE -- Bad Input])
726 AT_DATA([examine-bad.sps], [dnl
727 data list list /h * g *.
743 /STATISTICS = DESCRIPTIVES EXTREME
748 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
753 dnl Check the MISSING=REPORT option
754 AT_SETUP([EXAMINE -- MISSING=REPORT])
757 AT_DATA([examine-report.sps], [dnl
759 data list list /x * g *.
794 MISSING VALUES g (9, 99, 999).
799 /STATISTICS = EXTREME
805 AT_CHECK([pspp -O format=csv examine-report.sps], [0], [dnl
806 Table: Reading free-form data from INLINE.
811 Table: Case Processing Summary
813 ,,Valid,,Missing,,Total,
814 ,g,N,Percent,N,Percent,N,Percent
815 x,. (missing),4,100%,0,0%,4,100%
816 ,1,9,100%,0,0%,9,100%
817 ,2,9,100%,0,0%,9,100%
818 ,9 (missing),4,100%,0,0%,4,100%
819 ,99 (missing),5,100%,0,0%,5,100%
821 Table: Extreme Values
822 ,g,,,Case Number,Value
823 x,. (missing),Highest,1,31,4004
853 ,9 (missing),Highest,1,22,401
863 ,99 (missing),Highest,1,27,901
879 dnl Run a test of the basic STATISTICS using a "real"
880 dnl dataset and comparing with "real" results kindly
881 dnl provided by Olaf Nöhring
882 AT_SETUP([EXAMINE -- sample unweighted])
884 AT_DATA([sample.sps], [dnl
886 DATA LIST notable LIST /X *
992 /STATISTICS=DESCRIPTIVES
996 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
997 Table: Case Processing Summary
999 ,Valid,,Missing,,Total,
1000 ,N,Percent,N,Percent,N,Percent
1001 X,100,100%,0,0%,100,100%
1004 ,,,Statistic,Std. Error
1005 X,Mean,,587.6603,23.2665
1006 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1007 ,,Upper Bound,633.8260,
1008 ,5% Trimmed Mean,,579.7064,
1010 ,Variance,,54132.8466,
1011 ,Std. Deviation,,232.6647,
1013 ,Maximum,,1355.2800,
1015 ,Interquartile Range,,293.1575,
1016 ,Skewness,,.6331,.2414
1017 ,Kurtosis,,.5300,.4783