EXAMINE: Implement the Shapiro-Wilk Test.
[pspp] / tests / language / stats / examine.at
1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc.
3 dnl 
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
8 dnl 
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 dnl GNU General Public License for more details.
13 dnl 
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 dnl
17 AT_BANNER([EXAMINE])
18
19 AT_SETUP([EXAMINE])
20 AT_KEYWORDS([categorical categoricals])
21 AT_DATA([examine.sps], [
22 DATA LIST LIST /QUALITY * W * BRAND * .
23 BEGIN DATA
24 3  1  1
25 2  2  1
26 1  2  1
27 1  1  1
28 4  1  1
29 4  1  1
30 5  1  2
31 2  1  2
32 4  4  2
33 2  1  2
34 3  1  2
35 7  1  3
36 4  2  3
37 5  3  3
38 3  1  3
39 6  1  3
40 END DATA
41
42 WEIGHT BY w.
43
44 VARIABLE LABELS brand   'Manufacturer'.
45 VARIABLE LABELS quality 'Breaking Strain'.
46
47 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
48
49 LIST /FORMAT=NUMBERED.
50
51 EXAMINE
52         quality BY brand
53         /STATISTICS descriptives extreme(3)
54         .
55 ])
56
57
58 dnl In the following data, only the extreme values have been checked.
59 dnl The descriptives have been blindly pasted.
60 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
61 Table: Reading free-form data from INLINE.
62 Variable,Format
63 QUALITY,F8.0
64 W,F8.0
65 BRAND,F8.0
66
67 Table: Data List
68 Case Number,QUALITY,W,BRAND
69 1,3.00,1.00,1.00
70 2,2.00,2.00,1.00
71 3,1.00,2.00,1.00
72 4,1.00,1.00,1.00
73 5,4.00,1.00,1.00
74 6,4.00,1.00,1.00
75 7,5.00,1.00,2.00
76 8,2.00,1.00,2.00
77 9,4.00,4.00,2.00
78 10,2.00,1.00,2.00
79 11,3.00,1.00,2.00
80 12,7.00,1.00,3.00
81 13,4.00,2.00,3.00
82 14,5.00,3.00,3.00
83 15,3.00,1.00,3.00
84 16,6.00,1.00,3.00
85
86 Table: Case Processing Summary
87 ,Cases,,,,,
88 ,Valid,,Missing,,Total,
89 ,N,Percent,N,Percent,N,Percent
90 Breaking Strain,24.00,100.0%,.00,.0%,24.00,100.0%
91
92 Table: Extreme Values
93 ,,,Case Number,Value
94 Breaking Strain,Highest,1,12,7.00
95 ,,2,16,6.00
96 ,,3,14,5.00
97 ,Lowest,1,3,1.00
98 ,,2,4,1.00
99 ,,3,2,2.00
100
101 Table: Descriptives
102 ,,,Statistic,Std. Error
103 Breaking Strain,Mean,,3.54,.32
104 ,95% Confidence Interval for Mean,Lower Bound,2.87,
105 ,,Upper Bound,4.21,
106 ,5% Trimmed Mean,,3.50,
107 ,Median,,4.00,
108 ,Variance,,2.52,
109 ,Std. Deviation,,1.59,
110 ,Minimum,,1.00,
111 ,Maximum,,7.00,
112 ,Range,,6.00,
113 ,Interquartile Range,,2.75,
114 ,Skewness,,.06,.47
115 ,Kurtosis,,-.36,.92
116
117 Table: Case Processing Summary
118 ,,Cases,,,,,
119 ,,Valid,,Missing,,Total,
120 ,Manufacturer,N,Percent,N,Percent,N,Percent
121 Breaking Strain,Aspeger,8.00,100.0%,.00,.0%,8.00,100.0%
122 ,Bloggs,8.00,100.0%,.00,.0%,8.00,100.0%
123 ,Charlies,8.00,100.0%,.00,.0%,8.00,100.0%
124
125 Table: Extreme Values
126 ,Manufacturer,,,Case Number,Value
127 Breaking Strain,Aspeger,Highest,1,6,4.00
128 ,,,2,5,4.00
129 ,,,3,1,3.00
130 ,,Lowest,1,3,1.00
131 ,,,2,4,1.00
132 ,,,3,2,2.00
133 ,Bloggs,Highest,1,7,5.00
134 ,,,2,9,4.00
135 ,,,3,11,3.00
136 ,,Lowest,1,8,2.00
137 ,,,2,10,2.00
138 ,,,3,11,3.00
139 ,Charlies,Highest,1,12,7.00
140 ,,,2,16,6.00
141 ,,,3,14,5.00
142 ,,Lowest,1,15,3.00
143 ,,,2,13,4.00
144 ,,,3,14,5.00
145
146 Table: Descriptives
147 ,Manufacturer,,,Statistic,Std. Error
148 Breaking Strain,Aspeger,Mean,,2.25,.45
149 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
150 ,,,Upper Bound,3.32,
151 ,,5% Trimmed Mean,,2.22,
152 ,,Median,,2.00,
153 ,,Variance,,1.64,
154 ,,Std. Deviation,,1.28,
155 ,,Minimum,,1.00,
156 ,,Maximum,,4.00,
157 ,,Range,,3.00,
158 ,,Interquartile Range,,2.75,
159 ,,Skewness,,.47,.75
160 ,,Kurtosis,,-1.55,1.48
161 ,Bloggs,Mean,,3.50,.38
162 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
163 ,,,Upper Bound,4.39,
164 ,,5% Trimmed Mean,,3.50,
165 ,,Median,,4.00,
166 ,,Variance,,1.14,
167 ,,Std. Deviation,,1.07,
168 ,,Minimum,,2.00,
169 ,,Maximum,,5.00,
170 ,,Range,,3.00,
171 ,,Interquartile Range,,1.75,
172 ,,Skewness,,-.47,.75
173 ,,Kurtosis,,-.83,1.48
174 ,Charlies,Mean,,4.88,.44
175 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
176 ,,,Upper Bound,5.92,
177 ,,5% Trimmed Mean,,4.86,
178 ,,Median,,5.00,
179 ,,Variance,,1.55,
180 ,,Std. Deviation,,1.25,
181 ,,Minimum,,3.00,
182 ,,Maximum,,7.00,
183 ,,Range,,4.00,
184 ,,Interquartile Range,,1.75,
185 ,,Skewness,,.30,.75
186 ,,Kurtosis,,.15,1.48
187 ])
188
189 AT_CLEANUP
190
191 AT_SETUP([EXAMINE -- extremes])
192 AT_KEYWORDS([categorical categoricals])
193 AT_DATA([examine.sps], [dnl
194 data list free /V1 W
195 begin data.
196 1  1
197 2  1
198 3  2
199 3  1
200 4  1
201 5  1
202 6  1
203 7  1
204 8  1
205 9  1
206 10 1
207 11 1
208 12 1
209 13 1
210 14 1
211 15 1
212 16 1
213 17 1
214 18 2
215 19 1
216 20 1
217 end data.
218
219 weight by w.
220
221 examine v1 
222  /statistics=extreme(6)
223  .
224 ])
225
226 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
227 Table: Case Processing Summary
228 ,Cases,,,,,
229 ,Valid,,Missing,,Total,
230 ,N,Percent,N,Percent,N,Percent
231 V1,23.00,100.0%,.00,.0%,23.00,100.0%
232
233 Table: Extreme Values
234 ,,,Case Number,Value
235 V1,Highest,1,21,20.00
236 ,,2,20,19.00
237 ,,3,19,18.00
238 ,,4,18,17.00
239 ,,5,17,16.00
240 ,,6,16,15.00
241 ,Lowest,1,1,1.00
242 ,,2,2,2.00
243 ,,3,3,3.00
244 ,,4,4,3.00
245 ,,5,5,4.00
246 ,,6,6,5.00
247 ])
248
249 AT_CLEANUP
250
251
252 AT_SETUP([EXAMINE -- extremes with fractional weights])
253 AT_KEYWORDS([categorical categoricals])
254 AT_DATA([extreme.sps], [dnl
255 set format=F20.3.
256 data list notable list /w * x *.
257 begin data.
258  0.88  300000
259  0.86  320000
260  0.98  480000
261  0.93  960000
262  1.35  960000
263  1.31  960000
264  0.88  960000
265  0.88  1080000
266  0.88  1080000
267  0.95  1200000
268  1.47  1200000
269  0.93  1200000
270  0.98  1320000
271  1.31  1380000
272  0.93  1440000
273  0.88  1560000
274  1.56  1560000
275  1.47  1560000
276 end data.
277
278 weight by w.
279
280
281 EXAMINE
282         x
283         /STATISTICS = DESCRIPTIVES EXTREME (5)
284         .
285 ])
286
287 AT_CHECK([pspp -O format=csv  extreme.sps], [0], [dnl
288 Table: Case Processing Summary
289 ,Cases,,,,,
290 ,Valid,,Missing,,Total,
291 ,N,Percent,N,Percent,N,Percent
292 x,19.430,100.0%,.000,.0%,19.430,100.0%
293
294 Table: Extreme Values
295 ,,,Case Number,Value
296 x,Highest,1,18,1560000.000
297 ,,2,17,1560000.000
298 ,,3,16,1560000.000
299 ,,4,15,1440000.000
300 ,,5,14,1380000.000
301 ,Lowest,1,1,300000.000
302 ,,2,2,320000.000
303 ,,3,3,480000.000
304 ,,4,4,960000.000
305 ,,5,5,960000.000
306
307 Table: Descriptives
308 ,,,Statistic,Std. Error
309 x,Mean,,1120010.293,86222.178
310 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
311 ,,Upper Bound,1300853.894,
312 ,5% Trimmed Mean,,1141017.899,
313 ,Median,,1200000.000,
314 ,Variance,,144447748124.869,
315 ,Std. Deviation,,380062.821,
316 ,Minimum,,300000.000,
317 ,Maximum,,1560000.000,
318 ,Range,,1260000.000,
319 ,Interquartile Range,,467258.065,
320 ,Skewness,,-.887,.519
321 ,Kurtosis,,.340,1.005
322 ])
323
324 AT_CLEANUP
325
326 dnl Test the PERCENTILES subcommand of the EXAMINE command.
327 dnl In particular test that it behaves properly when there are only 
328 dnl a few cases.
329 AT_SETUP([EXAMINE -- percentiles])
330 AT_KEYWORDS([categorical categoricals])
331 AT_DATA([examine.sps], [dnl
332 DATA LIST LIST /X *.
333 BEGIN DATA.
334 2.00 
335 8.00 
336 5.00 
337 END DATA.
338
339 EXAMINE /x
340         /PERCENTILES=HAVERAGE.
341
342 EXAMINE /x
343         /PERCENTILES=WAVERAGE.
344
345 EXAMINE /x
346         /PERCENTILES=ROUND.
347
348 EXAMINE /x
349         /PERCENTILES=EMPIRICAL.
350
351 EXAMINE /x
352         /PERCENTILES=AEMPIRICAL.
353 ])
354 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
355 AT_CHECK([cat pspp.csv], [0], [dnl
356 Table: Reading free-form data from INLINE.
357 Variable,Format
358 X,F8.0
359
360 Table: Case Processing Summary
361 ,Cases,,,,,
362 ,Valid,,Missing,,Total,
363 ,N,Percent,N,Percent,N,Percent
364 X,3,100.0%,0,.0%,3,100.0%
365
366 Table: Percentiles
367 ,,Percentiles,,,,,,
368 ,,5,10,25,50,75,90,95
369 X,Weighted Average,.40,.80,2.00,5.00,8.00,8.00,8.00
370 ,Tukey's Hinges,,,3.50,5.00,6.50,,
371
372 Table: Case Processing Summary
373 ,Cases,,,,,
374 ,Valid,,Missing,,Total,
375 ,N,Percent,N,Percent,N,Percent
376 X,3,100.0%,0,.0%,3,100.0%
377
378 Table: Percentiles
379 ,,Percentiles,,,,,,
380 ,,5,10,25,50,75,90,95
381 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
382 ,Tukey's Hinges,,,3.50,5.00,6.50,,
383
384 Table: Case Processing Summary
385 ,Cases,,,,,
386 ,Valid,,Missing,,Total,
387 ,N,Percent,N,Percent,N,Percent
388 X,3,100.0%,0,.0%,3,100.0%
389
390 Table: Percentiles
391 ,,Percentiles,,,,,,
392 ,,5,10,25,50,75,90,95
393 X,Weighted Average,.00,.00,2.00,5.00,5.00,8.00,8.00
394 ,Tukey's Hinges,,,3.50,5.00,6.50,,
395
396 Table: Case Processing Summary
397 ,Cases,,,,,
398 ,Valid,,Missing,,Total,
399 ,N,Percent,N,Percent,N,Percent
400 X,3,100.0%,0,.0%,3,100.0%
401
402 Table: Percentiles
403 ,,Percentiles,,,,,,
404 ,,5,10,25,50,75,90,95
405 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
406 ,Tukey's Hinges,,,3.50,5.00,6.50,,
407
408 Table: Case Processing Summary
409 ,Cases,,,,,
410 ,Valid,,Missing,,Total,
411 ,N,Percent,N,Percent,N,Percent
412 X,3,100.0%,0,.0%,3,100.0%
413
414 Table: Percentiles
415 ,,Percentiles,,,,,,
416 ,,5,10,25,50,75,90,95
417 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
418 ,Tukey's Hinges,,,3.50,5.00,6.50,,
419 ])
420 AT_CLEANUP
421
422 AT_SETUP([EXAMINE -- missing values])
423 AT_KEYWORDS([categorical categoricals])
424 AT_DATA([examine.sps], [dnl
425 DATA LIST LIST /x * y *.
426 BEGIN DATA.
427 1   1 
428 2   1
429 3   1
430 4   1
431 5   2
432 6   2
433 .   2
434 END DATA
435
436 EXAMINE /x by y
437         /MISSING = PAIRWISE
438         .
439 ])
440 AT_CHECK([pspp -o pspp.csv examine.sps])
441 AT_CHECK([cat pspp.csv], [0], [dnl
442 Table: Reading free-form data from INLINE.
443 Variable,Format
444 x,F8.0
445 y,F8.0
446
447 Table: Case Processing Summary
448 ,Cases,,,,,
449 ,Valid,,Missing,,Total,
450 ,N,Percent,N,Percent,N,Percent
451 x,6,85.7%,1,14.3%,7,100.0%
452
453 Table: Case Processing Summary
454 ,,Cases,,,,,
455 ,,Valid,,Missing,,Total,
456 ,y,N,Percent,N,Percent,N,Percent
457 x,1.00,4,100.0%,0,.0%,4,100.0%
458 ,2.00,2,66.7%,1,33.3%,3,100.0%
459 ])
460 AT_CLEANUP
461
462
463 AT_SETUP([EXAMINE -- user missing values])
464 AT_KEYWORDS([categorical categoricals])
465 AT_DATA([examine-m.sps], [dnl
466 DATA LIST notable LIST /x * y *.
467 BEGIN DATA.
468 1                   2
469 9999999999          2
470 9999999999          99
471 END DATA.
472
473 MISSING VALUES x (9999999999).
474 MISSING VALUES y (99).
475
476 EXAMINE
477         /VARIABLES= x y
478         /MISSING=PAIRWISE.
479 ])
480 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
481 Table: Case Processing Summary
482 ,Cases,,,,,
483 ,Valid,,Missing,,Total,
484 ,N,Percent,N,Percent,N,Percent
485 x,1,33.3%,2,66.7%,3,100.0%
486 y,2,66.7%,1,33.3%,3,100.0%
487 ])
488 AT_CLEANUP
489
490 AT_SETUP([EXAMINE -- missing values and percentiles])
491 AT_KEYWORDS([categorical categoricals])
492 AT_DATA([examine.sps], [dnl
493 DATA LIST LIST /X *.
494 BEGIN DATA.
495 99
496 99
497 5.00
498 END DATA.
499
500 MISSING VALUE X (99).
501
502 EXAMINE /x
503         /PERCENTILES=HAVERAGE.
504 ])
505 AT_CHECK([pspp -o pspp.csv examine.sps])
506 dnl Ignore output -- this is just a no-crash check.
507 AT_CLEANUP
508
509 dnl Tests the trimmed mean calculation in the case
510 dnl where the data is weighted towards the centre.
511 AT_SETUP([EXAMINE -- trimmed mean])
512 AT_KEYWORDS([categorical categoricals])
513 AT_DATA([examine.sps], [dnl
514 DATA LIST LIST /X * C *.
515 BEGIN DATA.
516 1 1
517 2 49
518 3 2
519 END DATA.
520
521 WEIGHT BY c.
522
523 EXAMINE
524         x
525         /STATISTICS=DESCRIPTIVES
526         .
527 ])
528 AT_CHECK([pspp -o pspp.csv examine.sps])
529 AT_CHECK([cat pspp.csv], [0], [dnl
530 Table: Reading free-form data from INLINE.
531 Variable,Format
532 X,F8.0
533 C,F8.0
534
535 Table: Case Processing Summary
536 ,Cases,,,,,
537 ,Valid,,Missing,,Total,
538 ,N,Percent,N,Percent,N,Percent
539 X,52.00,100.0%,.00,.0%,52.00,100.0%
540
541 Table: Descriptives
542 ,,,Statistic,Std. Error
543 X,Mean,,2.02,.03
544 ,95% Confidence Interval for Mean,Lower Bound,1.95,
545 ,,Upper Bound,2.09,
546 ,5% Trimmed Mean,,2.00,
547 ,Median,,2.00,
548 ,Variance,,.06,
549 ,Std. Deviation,,.24,
550 ,Minimum,,1.00,
551 ,Maximum,,3.00,
552 ,Range,,2.00,
553 ,Interquartile Range,,.00,
554 ,Skewness,,1.19,.33
555 ,Kurtosis,,15.73,.65
556 ])
557 AT_CLEANUP
558
559 AT_SETUP([EXAMINE -- crash bug])
560 AT_KEYWORDS([categorical categoricals])
561 AT_DATA([examine.sps], [dnl
562 data list list /a * x * y *.
563 begin data.
564 3 1 3
565 5 1 4
566 7 2 3
567 end data.
568
569 examine a by x by y
570         /statistics=DESCRIPTIVES
571         . 
572 ])
573 AT_CHECK([pspp -o pspp.csv examine.sps])
574 dnl Ignore output -- this is just a no-crash check.
575 AT_CLEANUP
576
577 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
578 AT_SETUP([EXAMINE -- consecutive runs don't crash])
579 AT_KEYWORDS([categorical categoricals])
580 AT_DATA([examine.sps], [dnl
581 data list list /y * z *.
582 begin data.
583 6 4
584 5 3
585 7 6
586 end data.
587
588 EXAMINE /VARIABLES= z BY y.
589
590 EXAMINE /VARIABLES= z. 
591 ])
592 AT_CHECK([pspp -o pspp.csv examine.sps])
593 dnl Ignore output -- this is just a no-crash check.
594 AT_CLEANUP
595
596 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
597 AT_SETUP([EXAMINE -- missing values don't crash])
598 AT_KEYWORDS([categorical categoricals])
599 AT_DATA([examine.sps], [dnl
600 data list list /x * y *.
601 begin data.
602 1 0
603 2 0
604 . 0
605 3 1
606 4 1
607 end data.
608 examine x by y /statistics=descriptives. 
609 ])
610 AT_CHECK([pspp -o pspp.csv examine.sps])
611 dnl Ignore output -- this is just a no-crash check.
612 AT_CLEANUP
613
614 dnl Test that having only a single case doesn't crash.
615 AT_SETUP([EXAMINE -- single case doesn't crash])
616 AT_KEYWORDS([categorical categoricals])
617 AT_DATA([examine.sps], [dnl
618 DATA LIST LIST /quality * .
619 BEGIN DATA
620 3  
621 END DATA
622
623
624 EXAMINE
625         quality 
626         /STATISTICS descriptives 
627         /PLOT = histogram
628         .
629 ])
630 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
631 dnl Ignore output -- this is just a no-crash check.
632 AT_CLEANUP
633
634 dnl Test that all-missing data doesn't crash.
635 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
636 AT_KEYWORDS([categorical categoricals])
637 AT_DATA([examine.sps], [dnl
638 DATA LIST LIST /x *.
639 BEGIN DATA.
640 .
641 .
642 .
643 .
644 END DATA.
645
646 EXAMINE /x 
647         PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
648         /ID=x
649         /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
650         /PERCENTILE=AEMPIRICAL
651         .
652 ])
653 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
654 dnl Ignore output -- this is just a no-crash check.
655 AT_CLEANUP
656
657 dnl Test that big input doesn't crash (bug 11307).
658 AT_SETUP([EXAMINE -- big input doesn't crash])
659 AT_KEYWORDS([categorical categoricals slow])
660 AT_DATA([examine.sps], [dnl
661 INPUT PROGRAM.
662         LOOP #I=1 TO 50000.
663                 COMPUTE X=NORMAL(10).
664                 END CASE.
665         END LOOP.
666         END FILE.
667 END INPUT PROGRAM.
668
669
670 EXAMINE /x
671         /STATISTICS=DESCRIPTIVES.
672 ])
673 AT_CHECK([pspp -o pspp.csv examine.sps])
674 dnl Ignore output -- this is just a no-crash check.
675 AT_CLEANUP
676
677 dnl Another test that big input doesn't crash.
678 dnl The actual bug that this checks for has been lost.
679 AT_SETUP([EXAMINE -- big input doesn't crash 2])
680 AT_KEYWORDS([categorical categoricals slow])
681 AT_DATA([make-big-input.pl], 
682   [for ($i=0; $i<100000; $i++) { print "AB12\n" };
683    for ($i=0; $i<100000; $i++) { print "AB04\n" };
684 ])
685 AT_CHECK([$PERL make-big-input.pl > large.txt])
686 AT_DATA([examine.sps], [dnl
687 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
688
689
690 AGGREGATE OUTFILE=* /BREAK=X /A=N.
691
692
693 EXAMINE /A BY X.
694 ])
695 AT_CHECK([pspp -o pspp.csv examine.sps])
696 dnl Ignore output -- this is just a no-crash check.
697 AT_DATA([more-big-input.pl], 
698   [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
699 ])
700 AT_CHECK([$PERL more-big-input.pl >> large.txt])
701 AT_CHECK([pspp -o pspp.csv examine.sps])
702 dnl Ignore output -- this is just a no-crash check.
703 AT_CLEANUP
704
705
706 dnl Test that the ID command works with non-numberic variables
707 AT_SETUP([EXAMINE -- non-numeric ID])
708 AT_KEYWORDS([categorical categoricals])
709
710 AT_DATA([examine-id.sps], [dnl
711 data list notable list /x * y (a12).
712 begin data.
713 1  one
714 2  two
715 3  three
716 4  four
717 5  five
718 6  six
719 7  seven
720 8  eight
721 9  nine
722 10 ten
723 11 eleven
724 12 twelve
725 30 thirty
726 300 threehundred
727 end data.
728
729 examine x
730         /statistics = extreme
731         /id = y
732         /plot = boxplot
733         .
734 ])
735
736 AT_CHECK([pspp -O format=csv examine-id.sps], [0], [dnl
737 Table: Case Processing Summary
738 ,Cases,,,,,
739 ,Valid,,Missing,,Total,
740 ,N,Percent,N,Percent,N,Percent
741 x,14,100.0%,0,.0%,14,100.0%
742
743 Table: Extreme Values
744 ,,,y,Value
745 x,Highest,1,threehundred,300.00
746 ,,2,thirty,30.00
747 ,,3,twelve,12.00
748 ,,4,eleven,11.00
749 ,,5,ten,10.00
750 ,Lowest,1,one,1.00
751 ,,2,two,2.00
752 ,,3,three,3.00
753 ,,4,four,4.00
754 ,,5,five,5.00
755
756 Table: Tests of Normality
757 ,Shapiro-Wilk,,
758 ,Statistic,df,Sig.
759 x,.37,14,.00
760 ])
761
762 AT_CLEANUP 
763
764 dnl Test for a crash which happened on cleanup from a bad input syntax
765 AT_SETUP([EXAMINE -- Bad Input])
766 AT_KEYWORDS([categorical categoricals])
767
768 AT_DATA([examine-bad.sps], [dnl
769 data list list /h * g *.
770 begin data.
771 1 1
772 2 1
773 3 1
774 4 1
775 5 2
776 6 2
777 7 2
778 8 2
779 9 2
780 end data.
781
782 EXAMINE 
783         /VARIABLES= h
784         BY  g
785         /STATISTICS = DESCRIPTIVES EXTREME
786         /PLOT = lkajsdas
787         .
788 ])
789
790 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
791
792 AT_CLEANUP 
793
794
795 dnl Check the MISSING=REPORT option
796 AT_SETUP([EXAMINE -- MISSING=REPORT])
797 AT_KEYWORDS([categorical categoricals])
798
799 AT_DATA([examine-report.sps], [dnl
800 set format = F22.0.
801 data list list /x * g *.
802 begin data.
803 1   1
804 2   1
805 3   1
806 4   1
807 5   1
808 6   1
809 7   1
810 8   1
811 9   1
812 10   2
813 20   2
814 30   2
815 40   2
816 50   2
817 60   2
818 70   2
819 80   2
820 90   2
821 101   9
822 201   9
823 301   9
824 401   9
825 501   99
826 601   99
827 701   99
828 801   99
829 901   99
830 1001  .
831 2002  .
832 3003  .
833 4004  .
834 end data.
835
836 MISSING VALUES g (9, 99, 999).
837
838 EXAMINE
839         /VARIABLES = x
840         BY  g
841         /STATISTICS = EXTREME
842         /NOTOTAL
843         /MISSING = REPORT.
844 ])
845
846
847 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine-report.sps])
848 AT_CHECK([cat pspp.csv], [0],
849   [[Table: Reading free-form data from INLINE.
850 Variable,Format
851 x,F8.0
852 g,F8.0
853
854 Table: Case Processing Summary
855 ,,Cases,,,,,
856 ,,Valid,,Missing,,Total,
857 ,g,N,Percent,N,Percent,N,Percent
858 x,.,4,100.0%,0,.0%,4,100.0%
859 ,1,9,100.0%,0,.0%,9,100.0%
860 ,2,9,100.0%,0,.0%,9,100.0%
861 ,9[a],4,100.0%,0,.0%,4,100.0%
862 ,99[a],5,100.0%,0,.0%,5,100.0%
863
864 Footnotes:
865 a,User-missing value.
866
867 Table: Extreme Values
868 ,g,,,Case Number,Value
869 x,.,Highest,1,31,4004
870 ,,,2,30,3003
871 ,,,3,29,2002
872 ,,,4,28,1001
873 ,,,5,0,0
874 ,,Lowest,1,28,1001
875 ,,,2,29,2002
876 ,,,3,30,3003
877 ,,,4,31,4004
878 ,,,5,31,4004
879 ,1,Highest,1,9,9
880 ,,,2,8,8
881 ,,,3,7,7
882 ,,,4,6,6
883 ,,,5,5,5
884 ,,Lowest,1,1,1
885 ,,,2,2,2
886 ,,,3,3,3
887 ,,,4,4,4
888 ,,,5,5,5
889 ,2,Highest,1,18,90
890 ,,,2,17,80
891 ,,,3,16,70
892 ,,,4,15,60
893 ,,,5,14,50
894 ,,Lowest,1,10,10
895 ,,,2,11,20
896 ,,,3,12,30
897 ,,,4,13,40
898 ,,,5,14,50
899 ,9[a],Highest,1,22,401
900 ,,,2,21,301
901 ,,,3,20,201
902 ,,,4,19,101
903 ,,,5,0,0
904 ,,Lowest,1,19,101
905 ,,,2,20,201
906 ,,,3,21,301
907 ,,,4,22,401
908 ,,,5,22,401
909 ,99[a],Highest,1,27,901
910 ,,,2,26,801
911 ,,,3,25,701
912 ,,,4,24,601
913 ,,,5,23,501
914 ,,Lowest,1,23,501
915 ,,,2,24,601
916 ,,,3,25,701
917 ,,,4,26,801
918 ,,,5,27,901
919
920 Footnotes:
921 a,User-missing value.
922 ]])
923
924 AT_CLEANUP 
925
926
927 dnl Run a test of the basic STATISTICS using a "real"
928 dnl dataset and comparing with "real" results kindly
929 dnl provided by Olaf Nöhring
930 AT_SETUP([EXAMINE -- sample unweighted])
931 AT_KEYWORDS([categorical categoricals])
932
933 AT_DATA([sample.sps], [dnl
934 set format = F22.4.
935 DATA LIST notable LIST /X *
936 BEGIN DATA.
937 461.19000000
938 466.38000000
939 479.46000000
940 480.10000000
941 483.43000000
942 488.30000000
943 489.00000000
944 491.62000000
945 505.62000000
946 511.30000000
947 521.53000000
948 526.70000000
949 528.25000000
950 538.70000000
951 540.22000000
952 540.58000000
953 546.10000000
954 548.17000000
955 553.99000000
956 566.21000000
957 575.90000000
958 584.38000000
959 593.40000000
960 357.05000000
961 359.73000000
962 360.48000000
963 373.98000000
964 374.13000000
965 381.45000000
966 383.72000000
967 390.00000000
968 400.34000000
969 415.32000000
970 415.91000000
971 418.30000000
972 421.03000000
973 422.43000000
974 426.93000000
975 433.25000000
976 436.89000000
977 445.33000000
978 446.33000000
979 446.55000000
980 456.44000000
981 689.49000000
982 691.92000000
983 695.00000000
984 695.36000000
985 698.21000000
986 699.46000000
987 706.61000000
988 710.69000000
989 715.82000000
990 715.82000000
991 741.39000000
992 752.27000000
993 756.73000000
994 757.74000000
995 759.57000000
996 796.07000000
997 813.78000000
998 817.25000000
999 825.48000000
1000 831.28000000
1001 849.24000000
1002 890.00000000
1003 894.78000000
1004 935.65000000
1005 935.90000000
1006 945.90000000
1007 1012.8600000
1008 1022.6000000
1009 1061.8100000
1010 1063.5000000
1011 1077.2300000
1012 1151.6300000
1013 1355.2800000
1014 598.88000000
1015 606.91000000
1016 621.60000000
1017 624.80000000
1018 636.13000000
1019 637.38000000
1020 640.32000000
1021 649.35000000
1022 656.51000000
1023 662.55000000
1024 664.69000000
1025 106.22000000
1026 132.24000000
1027 174.76000000
1028 204.85000000
1029 264.93000000
1030 264.99000000
1031 269.84000000
1032 325.12000000
1033 331.67000000
1034 337.26000000
1035 347.68000000
1036 354.91000000
1037 END DATA.
1038
1039 EXAMINE
1040         x
1041         /STATISTICS=DESCRIPTIVES
1042         .
1043 ])
1044
1045 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
1046 Table: Case Processing Summary
1047 ,Cases,,,,,
1048 ,Valid,,Missing,,Total,
1049 ,N,Percent,N,Percent,N,Percent
1050 X,100,100.0%,0,.0%,100,100.0%
1051
1052 Table: Descriptives
1053 ,,,Statistic,Std. Error
1054 X,Mean,,587.6603,23.2665
1055 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1056 ,,Upper Bound,633.8260,
1057 ,5% Trimmed Mean,,579.7064,
1058 ,Median,,547.1350,
1059 ,Variance,,54132.8466,
1060 ,Std. Deviation,,232.6647,
1061 ,Minimum,,106.2200,
1062 ,Maximum,,1355.2800,
1063 ,Range,,1249.0600,
1064 ,Interquartile Range,,293.1575,
1065 ,Skewness,,.6331,.2414
1066 ,Kurtosis,,.5300,.4783
1067 ])
1068
1069 AT_CLEANUP 
1070
1071
1072
1073 dnl Test for a crash which happened on bad input syntax
1074 AT_SETUP([EXAMINE -- Empty Parentheses])
1075 AT_KEYWORDS([categorical categoricals])
1076
1077 AT_DATA([examine-empty-parens.sps], [dnl
1078 DATA LIST notable LIST /X *
1079 BEGIN DATA.
1080 2
1081 3
1082 END DATA.
1083
1084
1085 EXAMINE
1086         x
1087         /PLOT = SPREADLEVEL()
1088         .
1089 ])
1090
1091 AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
1092
1093 AT_CLEANUP 
1094
1095
1096
1097
1098 dnl Test for another crash which happened on bad input syntax
1099 AT_SETUP([EXAMINE -- Bad variable])
1100 AT_KEYWORDS([categorical categoricals])
1101
1102 AT_DATA([examine-bad-variable.sps], [dnl
1103 data list list /h * g *.
1104 begin data.
1105 3 1
1106 4 1
1107 5 2
1108 end data.
1109
1110 EXAMINE
1111         /VARIABLES/ h
1112         BY  g
1113         .
1114 ])
1115
1116 AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
1117
1118 AT_CLEANUP 
1119
1120
1121
1122 dnl Test for yet another crash. This time for extremes vs. missing weight values.\0
1123 AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
1124 AT_KEYWORDS([categorical categoricals])
1125
1126 AT_DATA([examine-missing-weights.sps], [dnl
1127 data list notable list /h * g *.
1128 begin data.
1129 3 1
1130 4 .
1131 5 1
1132 2 1
1133 end data.
1134
1135 WEIGHT BY g.
1136
1137 EXAMINE h
1138         /STATISTICS extreme(3)
1139         .
1140 ])
1141
1142 AT_CHECK([pspp -O format=csv  examine-missing-weights.sps], [0], [dnl
1143 "examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative.  These case(s) were ignored."
1144
1145 Table: Case Processing Summary
1146 ,Cases,,,,,
1147 ,Valid,,Missing,,Total,
1148 ,N,Percent,N,Percent,N,Percent
1149 h,3.00,100.0%,.00,.0%,3.00,100.0%
1150
1151 Table: Extreme Values
1152 ,,,Case Number,Value
1153 h,Highest,1,3,5.00
1154 ,,2,2,4.00
1155 ,,3,1,3.00
1156 ,Lowest,1,4,2.00
1157 ,,2,1,3.00
1158 ,,3,2,4.00
1159 ])
1160
1161 AT_CLEANUP 
1162
1163 dnl This is an example from doc/tutorial.texi
1164 dnl So if the results of this have to be changed in any way,
1165 dnl make sure to update that file.
1166 AT_SETUP([EXAMINE tutorial example 1])
1167 cp $top_srcdir/examples/repairs.sav .
1168 AT_DATA([repairs.sps], [dnl
1169 GET FILE='repairs.sav'.
1170 EXAMINE mtbf /STATISTICS=DESCRIPTIVES.
1171 COMPUTE mtbf_ln = LN (mtbf).
1172 EXAMINE mtbf_ln /STATISTICS=DESCRIPTIVES.
1173 ])
1174 AT_CHECK([pspp -o pspp.csv -o pspp.txt repairs.sps])
1175 AT_CHECK([cat pspp.csv], [0], [dnl
1176 Table: Case Processing Summary
1177 ,Cases,,,,,
1178 ,Valid,,Missing,,Total,
1179 ,N,Percent,N,Percent,N,Percent
1180 Mean time between failures (months) ,15,100.0%,0,.0%,15,100.0%
1181
1182 Table: Descriptives
1183 ,,,Statistic,Std. Error
1184 Mean time between failures (months) ,Mean,,8.32,1.62
1185 ,95% Confidence Interval for Mean,Lower Bound,4.85,
1186 ,,Upper Bound,11.79,
1187 ,5% Trimmed Mean,,7.69,
1188 ,Median,,8.12,
1189 ,Variance,,39.21,
1190 ,Std. Deviation,,6.26,
1191 ,Minimum,,1.63,
1192 ,Maximum,,26.47,
1193 ,Range,,24.84,
1194 ,Interquartile Range,,5.83,
1195 ,Skewness,,1.85,.58
1196 ,Kurtosis,,4.49,1.12
1197
1198 Table: Case Processing Summary
1199 ,Cases,,,,,
1200 ,Valid,,Missing,,Total,
1201 ,N,Percent,N,Percent,N,Percent
1202 mtbf_ln,15,100.0%,0,.0%,15,100.0%
1203
1204 Table: Descriptives
1205 ,,,Statistic,Std. Error
1206 mtbf_ln,Mean,,1.88,.19
1207 ,95% Confidence Interval for Mean,Lower Bound,1.47,
1208 ,,Upper Bound,2.29,
1209 ,5% Trimmed Mean,,1.88,
1210 ,Median,,2.09,
1211 ,Variance,,.54,
1212 ,Std. Deviation,,.74,
1213 ,Minimum,,.49,
1214 ,Maximum,,3.28,
1215 ,Range,,2.79,
1216 ,Interquartile Range,,.92,
1217 ,Skewness,,-.16,.58
1218 ,Kurtosis,,-.09,1.12
1219 ])
1220 AT_CLEANUP
1221
1222 dnl This is an example from doc/tutorial.texi
1223 dnl So if the results of this have to be changed in any way,
1224 dnl make sure to update that file.
1225 AT_SETUP([EXAMINE tutorial example 2])
1226 cp $top_srcdir/examples/physiology.sav .
1227 AT_DATA([examine.sps], [dnl
1228 GET FILE='physiology.sav'.
1229 EXAMINE height, weight /STATISTICS=EXTREME(3).
1230 ])
1231 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
1232 AT_CHECK([cat pspp.csv], [0], [dnl
1233 Table: Case Processing Summary
1234 ,Cases,,,,,
1235 ,Valid,,Missing,,Total,
1236 ,N,Percent,N,Percent,N,Percent
1237 Height in millimeters   ,40,100.0%,0,.0%,40,100.0%
1238 Weight in kilograms ,40,100.0%,0,.0%,40,100.0%
1239
1240 Table: Extreme Values
1241 ,,,Case Number,Value
1242 Height in millimeters   ,Highest,1,14,1903
1243 ,,2,15,1884
1244 ,,3,12,1802
1245 ,Lowest,1,30,179
1246 ,,2,31,1598
1247 ,,3,28,1601
1248 Weight in kilograms ,Highest,1,13,92.1
1249 ,,2,5,92.1
1250 ,,3,17,91.7
1251 ,Lowest,1,38,-55.6
1252 ,,2,39,54.5
1253 ,,3,33,55.4
1254 ])
1255 AT_CLEANUP
1256
1257
1258
1259 AT_SETUP([EXAMINE -- Crash on unrepresentable graphs])
1260 AT_DATA([examine.sps], [dnl
1261 data list notable list /x * g *.
1262 begin data.
1263 96 1
1264 end data.
1265
1266 examine x  by g
1267         /nototal
1268         /plot = all.
1269 ])
1270 dnl This bug only manifested itself on cairo based drivers.
1271 AT_CHECK([pspp -O format=pdf examine.sps], [1], [ignore])
1272 AT_CLEANUP
1273
1274
1275 dnl This example comes from the web site:
1276 dnl  https://www.spsstests.com/2018/11/shapiro-wilk-normality-test-spss.html
1277 AT_SETUP([EXAMINE -- shapiro-wilk 1])
1278 AT_KEYWORDS([shapiro wilk])
1279 AT_DATA([shapiro-wilk.sps], [dnl
1280 data list notable list /x * g *.
1281 begin data.
1282 96 1
1283 98 1
1284 95 1
1285 89 1
1286 90 1
1287 92 1
1288 94 1
1289 93 1
1290 97 1
1291 100 1
1292 99 2
1293 96 2
1294 80 2
1295 89 2
1296 91 2
1297 92 2
1298 93 2
1299 94 2
1300 99 2
1301 80 2
1302 end data.
1303
1304 set format F22.3.
1305
1306 examine x  by g
1307         /nototal
1308         /plot = all.
1309 ])
1310
1311 AT_CHECK([pspp -O format=csv shapiro-wilk.sps], [0],[dnl
1312 Table: Case Processing Summary
1313 ,,Cases,,,,,
1314 ,,Valid,,Missing,,Total,
1315 ,g,N,Percent,N,Percent,N,Percent
1316 x,1.00,10,100.0%,0,.0%,10,100.0%
1317 ,2.00,10,100.0%,0,.0%,10,100.0%
1318
1319 Table: Tests of Normality
1320 ,,Shapiro-Wilk,,
1321 ,g,Statistic,df,Sig.
1322 x,1.00,.984,10,.983
1323 ,2.00,.882,10,.136
1324 ])
1325
1326 AT_CLEANUP
1327
1328
1329 dnl This example comes from the web site:
1330 dnl  http://www.real-statistics.com/tests-normality-and-symmetry/statistical-tests-normality-symmetry/shapiro-wilk-expanded-test/
1331 dnl It uses a dataset larger than 11 samples. Hence the alternative method for
1332 dnl signficance is used.
1333 AT_SETUP([EXAMINE -- shapiro-wilk 2])
1334 AT_KEYWORDS([shapiro wilk])
1335 AT_DATA([shapiro-wilk2.sps], [dnl
1336 data list notable list /x *.
1337 begin data.
1338 65
1339 61
1340 63
1341 86
1342 70
1343 55
1344 74
1345 35
1346 72
1347 68
1348 45
1349 58
1350 end data.
1351
1352 set format F22.3.
1353
1354 examine x
1355         /plot = boxplot.
1356 ])
1357
1358 AT_CHECK([pspp -O format=csv shapiro-wilk2.sps], [0],[dnl
1359 Table: Case Processing Summary
1360 ,Cases,,,,,
1361 ,Valid,,Missing,,Total,
1362 ,N,Percent,N,Percent,N,Percent
1363 x,12,100.0%,0,.0%,12,100.0%
1364
1365 Table: Tests of Normality
1366 ,Shapiro-Wilk,,
1367 ,Statistic,df,Sig.
1368 x,.971,12,.922
1369 ])
1370
1371 AT_CLEANUP