Convert all Perl build tools to Python and remove Perl build dependency.
[pspp] / tests / language / stats / examine.at
1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc.
3 dnl
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
8 dnl
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 dnl GNU General Public License for more details.
13 dnl
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 dnl
17 AT_BANNER([EXAMINE])
18
19 AT_SETUP([EXAMINE])
20 AT_KEYWORDS([categorical categoricals])
21 AT_DATA([examine.sps], [
22 DATA LIST LIST /QUALITY * W * BRAND * .
23 BEGIN DATA
24 3  1  1
25 2  2  1
26 1  2  1
27 1  1  1
28 4  1  1
29 4  1  1
30 5  1  2
31 2  1  2
32 4  4  2
33 2  1  2
34 3  1  2
35 7  1  3
36 4  2  3
37 5  3  3
38 3  1  3
39 6  1  3
40 END DATA
41
42 WEIGHT BY w.
43
44 VARIABLE LABELS brand   'Manufacturer'.
45 VARIABLE LABELS quality 'Breaking Strain'.
46
47 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
48
49 LIST /FORMAT=NUMBERED.
50
51 EXAMINE
52         quality BY brand
53         /STATISTICS descriptives extreme(3)
54         .
55 ])
56
57
58 dnl In the following data, only the extreme values have been checked.
59 dnl The descriptives have been blindly pasted.
60 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
61 Table: Reading free-form data from INLINE.
62 Variable,Format
63 QUALITY,F8.0
64 W,F8.0
65 BRAND,F8.0
66
67 Table: Data List
68 Case Number,QUALITY,W,BRAND
69 1,3.00,1.00,1.00
70 2,2.00,2.00,1.00
71 3,1.00,2.00,1.00
72 4,1.00,1.00,1.00
73 5,4.00,1.00,1.00
74 6,4.00,1.00,1.00
75 7,5.00,1.00,2.00
76 8,2.00,1.00,2.00
77 9,4.00,4.00,2.00
78 10,2.00,1.00,2.00
79 11,3.00,1.00,2.00
80 12,7.00,1.00,3.00
81 13,4.00,2.00,3.00
82 14,5.00,3.00,3.00
83 15,3.00,1.00,3.00
84 16,6.00,1.00,3.00
85
86 Table: Case Processing Summary
87 ,Cases,,,,,
88 ,Valid,,Missing,,Total,
89 ,N,Percent,N,Percent,N,Percent
90 Breaking Strain,24.00,100.0%,.00,.0%,24.00,100.0%
91
92 Table: Extreme Values
93 ,,,Case Number,Value
94 Breaking Strain,Highest,1,12,7.00
95 ,,2,16,6.00
96 ,,3,14,5.00
97 ,Lowest,1,3,1.00
98 ,,2,4,1.00
99 ,,3,2,2.00
100
101 Table: Descriptives
102 ,,,Statistic,Std. Error
103 Breaking Strain,Mean,,3.54,.32
104 ,95% Confidence Interval for Mean,Lower Bound,2.87,
105 ,,Upper Bound,4.21,
106 ,5% Trimmed Mean,,3.50,
107 ,Median,,4.00,
108 ,Variance,,2.52,
109 ,Std. Deviation,,1.59,
110 ,Minimum,,1.00,
111 ,Maximum,,7.00,
112 ,Range,,6.00,
113 ,Interquartile Range,,2.75,
114 ,Skewness,,.06,.47
115 ,Kurtosis,,-.36,.92
116
117 Table: Case Processing Summary
118 ,Manufacturer,Cases,,,,,
119 ,,Valid,,Missing,,Total,
120 ,,N,Percent,N,Percent,N,Percent
121 Breaking Strain,Aspeger,8.00,100.0%,.00,.0%,8.00,100.0%
122 ,Bloggs,8.00,100.0%,.00,.0%,8.00,100.0%
123 ,Charlies,8.00,100.0%,.00,.0%,8.00,100.0%
124
125 Table: Extreme Values
126 ,Manufacturer,,,Case Number,Value
127 Breaking Strain,Aspeger,Highest,1,6,4.00
128 ,,,2,5,4.00
129 ,,,3,1,3.00
130 ,,Lowest,1,3,1.00
131 ,,,2,4,1.00
132 ,,,3,2,2.00
133 ,Bloggs,Highest,1,7,5.00
134 ,,,2,9,4.00
135 ,,,3,11,3.00
136 ,,Lowest,1,8,2.00
137 ,,,2,10,2.00
138 ,,,3,11,3.00
139 ,Charlies,Highest,1,12,7.00
140 ,,,2,16,6.00
141 ,,,3,14,5.00
142 ,,Lowest,1,15,3.00
143 ,,,2,13,4.00
144 ,,,3,14,5.00
145
146 Table: Descriptives
147 ,Manufacturer,,,Statistic,Std. Error
148 Breaking Strain,Aspeger,Mean,,2.25,.45
149 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
150 ,,,Upper Bound,3.32,
151 ,,5% Trimmed Mean,,2.22,
152 ,,Median,,2.00,
153 ,,Variance,,1.64,
154 ,,Std. Deviation,,1.28,
155 ,,Minimum,,1.00,
156 ,,Maximum,,4.00,
157 ,,Range,,3.00,
158 ,,Interquartile Range,,2.75,
159 ,,Skewness,,.47,.75
160 ,,Kurtosis,,-1.55,1.48
161 ,Bloggs,Mean,,3.50,.38
162 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
163 ,,,Upper Bound,4.39,
164 ,,5% Trimmed Mean,,3.50,
165 ,,Median,,4.00,
166 ,,Variance,,1.14,
167 ,,Std. Deviation,,1.07,
168 ,,Minimum,,2.00,
169 ,,Maximum,,5.00,
170 ,,Range,,3.00,
171 ,,Interquartile Range,,1.75,
172 ,,Skewness,,-.47,.75
173 ,,Kurtosis,,-.83,1.48
174 ,Charlies,Mean,,4.88,.44
175 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
176 ,,,Upper Bound,5.92,
177 ,,5% Trimmed Mean,,4.86,
178 ,,Median,,5.00,
179 ,,Variance,,1.55,
180 ,,Std. Deviation,,1.25,
181 ,,Minimum,,3.00,
182 ,,Maximum,,7.00,
183 ,,Range,,4.00,
184 ,,Interquartile Range,,1.75,
185 ,,Skewness,,.30,.75
186 ,,Kurtosis,,.15,1.48
187 ])
188
189 AT_CLEANUP
190
191 AT_SETUP([EXAMINE -- extremes])
192 AT_KEYWORDS([categorical categoricals])
193 AT_DATA([examine.sps], [dnl
194 data list free /V1 W
195 begin data.
196 1  1
197 2  1
198 3  2
199 3  1
200 4  1
201 5  1
202 6  1
203 7  1
204 8  1
205 9  1
206 10 1
207 11 1
208 12 1
209 13 1
210 14 1
211 15 1
212 16 1
213 17 1
214 18 2
215 19 1
216 20 1
217 end data.
218
219 weight by w.
220
221 examine v1
222  /statistics=extreme(6)
223  .
224 ])
225
226 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
227 Table: Case Processing Summary
228 ,Cases,,,,,
229 ,Valid,,Missing,,Total,
230 ,N,Percent,N,Percent,N,Percent
231 V1,23.00,100.0%,.00,.0%,23.00,100.0%
232
233 Table: Extreme Values
234 ,,,Case Number,Value
235 V1,Highest,1,21,20.00
236 ,,2,20,19.00
237 ,,3,19,18.00
238 ,,4,18,17.00
239 ,,5,17,16.00
240 ,,6,16,15.00
241 ,Lowest,1,1,1.00
242 ,,2,2,2.00
243 ,,3,3,3.00
244 ,,4,4,3.00
245 ,,5,5,4.00
246 ,,6,6,5.00
247 ])
248
249 AT_CLEANUP
250
251
252 AT_SETUP([EXAMINE -- extremes with fractional weights])
253 AT_KEYWORDS([categorical categoricals])
254 AT_DATA([extreme.sps], [dnl
255 set format=F20.3.
256 data list notable list /w * x *.
257 begin data.
258  0.88  300000
259  0.86  320000
260  0.98  480000
261  0.93  960000
262  1.35  960000
263  1.31  960000
264  0.88  960000
265  0.88  1080000
266  0.88  1080000
267  0.95  1200000
268  1.47  1200000
269  0.93  1200000
270  0.98  1320000
271  1.31  1380000
272  0.93  1440000
273  0.88  1560000
274  1.56  1560000
275  1.47  1560000
276 end data.
277
278 weight by w.
279
280
281 EXAMINE
282         x
283         /STATISTICS = DESCRIPTIVES EXTREME (5)
284         .
285 ])
286
287 AT_CHECK([pspp -O format=csv  extreme.sps], [0], [dnl
288 Table: Case Processing Summary
289 ,Cases,,,,,
290 ,Valid,,Missing,,Total,
291 ,N,Percent,N,Percent,N,Percent
292 x,19.430,100.0%,.000,.0%,19.430,100.0%
293
294 Table: Extreme Values
295 ,,,Case Number,Value
296 x,Highest,1,18,1560000.000
297 ,,2,17,1560000.000
298 ,,3,16,1560000.000
299 ,,4,15,1440000.000
300 ,,5,14,1380000.000
301 ,Lowest,1,1,300000.000
302 ,,2,2,320000.000
303 ,,3,3,480000.000
304 ,,4,4,960000.000
305 ,,5,5,960000.000
306
307 Table: Descriptives
308 ,,,Statistic,Std. Error
309 x,Mean,,1120010.293,86222.178
310 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
311 ,,Upper Bound,1300853.894,
312 ,5% Trimmed Mean,,1141017.899,
313 ,Median,,1200000.000,
314 ,Variance,,144447748124.869,
315 ,Std. Deviation,,380062.821,
316 ,Minimum,,300000.000,
317 ,Maximum,,1560000.000,
318 ,Range,,1260000.000,
319 ,Interquartile Range,,467258.065,
320 ,Skewness,,-.887,.519
321 ,Kurtosis,,.340,1.005
322 ])
323
324 AT_CLEANUP
325
326 dnl Test the PERCENTILES subcommand of the EXAMINE command.
327 dnl In particular test that it behaves properly when there are only
328 dnl a few cases.
329 AT_SETUP([EXAMINE -- percentiles])
330 AT_KEYWORDS([categorical categoricals])
331 AT_DATA([examine.sps], [dnl
332 DATA LIST LIST /X *.
333 BEGIN DATA.
334 2.00
335 8.00
336 5.00
337 END DATA.
338
339 EXAMINE /x
340         /PERCENTILES=HAVERAGE.
341
342 EXAMINE /x
343         /PERCENTILES=WAVERAGE.
344
345 EXAMINE /x
346         /PERCENTILES=ROUND.
347
348 EXAMINE /x
349         /PERCENTILES=EMPIRICAL.
350
351 EXAMINE /x
352         /PERCENTILES=AEMPIRICAL.
353 ])
354 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
355 AT_CHECK([cat pspp.csv], [0], [dnl
356 Table: Reading free-form data from INLINE.
357 Variable,Format
358 X,F8.0
359
360 Table: Case Processing Summary
361 ,Cases,,,,,
362 ,Valid,,Missing,,Total,
363 ,N,Percent,N,Percent,N,Percent
364 X,3,100.0%,0,.0%,3,100.0%
365
366 Table: Percentiles
367 ,,Percentiles,,,,,,
368 ,,5,10,25,50,75,90,95
369 X,Weighted Average,.40,.80,2.00,5.00,8.00,8.00,8.00
370 ,Tukey's Hinges,,,3.50,5.00,6.50,,
371
372 Table: Case Processing Summary
373 ,Cases,,,,,
374 ,Valid,,Missing,,Total,
375 ,N,Percent,N,Percent,N,Percent
376 X,3,100.0%,0,.0%,3,100.0%
377
378 Table: Percentiles
379 ,,Percentiles,,,,,,
380 ,,5,10,25,50,75,90,95
381 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
382 ,Tukey's Hinges,,,3.50,5.00,6.50,,
383
384 Table: Case Processing Summary
385 ,Cases,,,,,
386 ,Valid,,Missing,,Total,
387 ,N,Percent,N,Percent,N,Percent
388 X,3,100.0%,0,.0%,3,100.0%
389
390 Table: Percentiles
391 ,,Percentiles,,,,,,
392 ,,5,10,25,50,75,90,95
393 X,Weighted Average,.00,.00,2.00,5.00,5.00,8.00,8.00
394 ,Tukey's Hinges,,,3.50,5.00,6.50,,
395
396 Table: Case Processing Summary
397 ,Cases,,,,,
398 ,Valid,,Missing,,Total,
399 ,N,Percent,N,Percent,N,Percent
400 X,3,100.0%,0,.0%,3,100.0%
401
402 Table: Percentiles
403 ,,Percentiles,,,,,,
404 ,,5,10,25,50,75,90,95
405 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
406 ,Tukey's Hinges,,,3.50,5.00,6.50,,
407
408 Table: Case Processing Summary
409 ,Cases,,,,,
410 ,Valid,,Missing,,Total,
411 ,N,Percent,N,Percent,N,Percent
412 X,3,100.0%,0,.0%,3,100.0%
413
414 Table: Percentiles
415 ,,Percentiles,,,,,,
416 ,,5,10,25,50,75,90,95
417 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
418 ,Tukey's Hinges,,,3.50,5.00,6.50,,
419 ])
420 AT_CLEANUP
421
422 AT_SETUP([EXAMINE -- missing values])
423 AT_KEYWORDS([categorical categoricals])
424 AT_DATA([examine.sps], [dnl
425 DATA LIST LIST /x * y *.
426 BEGIN DATA.
427 1   1
428 2   1
429 3   1
430 4   1
431 5   2
432 6   2
433 .   2
434 END DATA
435
436 EXAMINE /x by y
437         /MISSING = PAIRWISE
438         .
439 ])
440 AT_CHECK([pspp -o pspp.csv examine.sps])
441 AT_CHECK([cat pspp.csv], [0], [dnl
442 Table: Reading free-form data from INLINE.
443 Variable,Format
444 x,F8.0
445 y,F8.0
446
447 Table: Case Processing Summary
448 ,Cases,,,,,
449 ,Valid,,Missing,,Total,
450 ,N,Percent,N,Percent,N,Percent
451 x,6,85.7%,1,14.3%,7,100.0%
452
453 Table: Case Processing Summary
454 ,y,Cases,,,,,
455 ,,Valid,,Missing,,Total,
456 ,,N,Percent,N,Percent,N,Percent
457 x,1.00,4,100.0%,0,.0%,4,100.0%
458 ,2.00,2,66.7%,1,33.3%,3,100.0%
459 ])
460 AT_CLEANUP
461
462
463 AT_SETUP([EXAMINE -- user missing values])
464 AT_KEYWORDS([categorical categoricals])
465 AT_DATA([examine-m.sps], [dnl
466 DATA LIST notable LIST /x * y *.
467 BEGIN DATA.
468 1                   2
469 9999999999          2
470 9999999999          99
471 END DATA.
472
473 MISSING VALUES x (9999999999).
474 MISSING VALUES y (99).
475
476 EXAMINE
477         /VARIABLES= x y
478         /MISSING=PAIRWISE.
479 ])
480 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
481 Table: Case Processing Summary
482 ,Cases,,,,,
483 ,Valid,,Missing,,Total,
484 ,N,Percent,N,Percent,N,Percent
485 x,1,33.3%,2,66.7%,3,100.0%
486 y,2,66.7%,1,33.3%,3,100.0%
487 ])
488 AT_CLEANUP
489
490 AT_SETUP([EXAMINE -- missing values and percentiles])
491 AT_KEYWORDS([categorical categoricals])
492 AT_DATA([examine.sps], [dnl
493 DATA LIST LIST /X *.
494 BEGIN DATA.
495 99
496 99
497 5.00
498 END DATA.
499
500 MISSING VALUE X (99).
501
502 EXAMINE /x
503         /PERCENTILES=HAVERAGE.
504 ])
505 AT_CHECK([pspp -o pspp.csv examine.sps])
506 dnl Ignore output -- this is just a no-crash check.
507 AT_CLEANUP
508
509 dnl Tests the trimmed mean calculation in the case
510 dnl where the data is weighted towards the centre.
511 AT_SETUP([EXAMINE -- trimmed mean])
512 AT_KEYWORDS([categorical categoricals])
513 AT_DATA([examine.sps], [dnl
514 DATA LIST LIST /X * C *.
515 BEGIN DATA.
516 1 1
517 2 49
518 3 2
519 END DATA.
520
521 WEIGHT BY c.
522
523 EXAMINE
524         x
525         /STATISTICS=DESCRIPTIVES
526         .
527 ])
528 AT_CHECK([pspp -o pspp.csv examine.sps])
529 AT_CHECK([cat pspp.csv], [0], [dnl
530 Table: Reading free-form data from INLINE.
531 Variable,Format
532 X,F8.0
533 C,F8.0
534
535 Table: Case Processing Summary
536 ,Cases,,,,,
537 ,Valid,,Missing,,Total,
538 ,N,Percent,N,Percent,N,Percent
539 X,52.00,100.0%,.00,.0%,52.00,100.0%
540
541 Table: Descriptives
542 ,,,Statistic,Std. Error
543 X,Mean,,2.02,.03
544 ,95% Confidence Interval for Mean,Lower Bound,1.95,
545 ,,Upper Bound,2.09,
546 ,5% Trimmed Mean,,2.00,
547 ,Median,,2.00,
548 ,Variance,,.06,
549 ,Std. Deviation,,.24,
550 ,Minimum,,1.00,
551 ,Maximum,,3.00,
552 ,Range,,2.00,
553 ,Interquartile Range,,.00,
554 ,Skewness,,1.19,.33
555 ,Kurtosis,,15.73,.65
556 ])
557 AT_CLEANUP
558
559 AT_SETUP([EXAMINE -- crash bug])
560 AT_KEYWORDS([categorical categoricals])
561 AT_DATA([examine.sps], [dnl
562 data list list /a * x * y *.
563 begin data.
564 3 1 3
565 5 1 4
566 7 2 3
567 end data.
568
569 examine a by x by y
570         /statistics=DESCRIPTIVES
571         .
572 ])
573 AT_CHECK([pspp -o pspp.csv examine.sps])
574 dnl Ignore output -- this is just a no-crash check.
575 AT_CLEANUP
576
577 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
578 AT_SETUP([EXAMINE -- consecutive runs don't crash])
579 AT_KEYWORDS([categorical categoricals])
580 AT_DATA([examine.sps], [dnl
581 data list list /y * z *.
582 begin data.
583 6 4
584 5 3
585 7 6
586 end data.
587
588 EXAMINE /VARIABLES= z BY y.
589
590 EXAMINE /VARIABLES= z.
591 ])
592 AT_CHECK([pspp -o pspp.csv examine.sps])
593 dnl Ignore output -- this is just a no-crash check.
594 AT_CLEANUP
595
596 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
597 AT_SETUP([EXAMINE -- missing values don't crash])
598 AT_KEYWORDS([categorical categoricals])
599 AT_DATA([examine.sps], [dnl
600 data list list /x * y *.
601 begin data.
602 1 0
603 2 0
604 . 0
605 3 1
606 4 1
607 end data.
608 examine x by y /statistics=descriptives.
609 ])
610 AT_CHECK([pspp -o pspp.csv examine.sps])
611 dnl Ignore output -- this is just a no-crash check.
612 AT_CLEANUP
613
614 dnl Test that having only a single case doesn't crash.
615 AT_SETUP([EXAMINE -- single case doesn't crash])
616 AT_KEYWORDS([categorical categoricals])
617 AT_DATA([examine.sps], [dnl
618 DATA LIST LIST /quality * .
619 BEGIN DATA
620 3
621 END DATA
622
623
624 EXAMINE
625         quality
626         /STATISTICS descriptives
627         /PLOT = histogram
628         .
629 ])
630 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
631 dnl Ignore output -- this is just a no-crash check.
632 AT_CLEANUP
633
634 dnl Test that all-missing data doesn't crash.
635 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
636 AT_KEYWORDS([categorical categoricals])
637 AT_DATA([examine.sps], [dnl
638 DATA LIST LIST /x *.
639 BEGIN DATA.
640 .
641 .
642 .
643 .
644 END DATA.
645
646 EXAMINE /x
647         PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
648         /ID=x
649         /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
650         /PERCENTILE=AEMPIRICAL
651         .
652 ])
653 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
654 dnl Ignore output -- this is just a no-crash check.
655 AT_CLEANUP
656
657 dnl Test that big input doesn't crash (bug 11307).
658 AT_SETUP([EXAMINE -- big input doesn't crash])
659 AT_KEYWORDS([categorical categoricals slow])
660 AT_DATA([examine.sps], [dnl
661 INPUT PROGRAM.
662         LOOP #I=1 TO 50000.
663                 COMPUTE X=NORMAL(10).
664                 END CASE.
665         END LOOP.
666         END FILE.
667 END INPUT PROGRAM.
668
669
670 EXAMINE /x
671         /STATISTICS=DESCRIPTIVES.
672 ])
673 AT_CHECK([pspp -o pspp.csv examine.sps])
674 dnl Ignore output -- this is just a no-crash check.
675 AT_CLEANUP
676
677 dnl Another test that big input doesn't crash.
678 dnl The actual bug that this checks for has been lost.
679 AT_SETUP([EXAMINE -- big input doesn't crash 2])
680 AT_KEYWORDS([categorical categoricals slow])
681 AT_CHECK([$PYTHON3 -c '
682 for i in range(100000): print("AB12")
683 for i in range(100000): print("AB04")
684 ' > large.txt])
685 AT_DATA([examine.sps], [dnl
686 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
687
688
689 AGGREGATE OUTFILE=* /BREAK=X /A=N.
690
691
692 EXAMINE /A BY X.
693 ])
694 AT_CHECK([pspp -o pspp.csv examine.sps])
695 dnl Ignore output -- this is just a no-crash check.
696 AT_CHECK([$PYTHON3 -c 'for i in range(25000): print("AB04\nAB12")' >> large.txt])
697 AT_CHECK([pspp -o pspp.csv examine.sps])
698 dnl Ignore output -- this is just a no-crash check.
699 AT_CLEANUP
700
701
702 dnl Test that the ID command works with non-numberic variables
703 AT_SETUP([EXAMINE -- non-numeric ID])
704 AT_KEYWORDS([categorical categoricals])
705
706 AT_DATA([examine-id.sps], [dnl
707 data list notable list /x * y (a12).
708 begin data.
709 1  one
710 2  two
711 3  three
712 4  four
713 5  five
714 6  six
715 7  seven
716 8  eight
717 9  nine
718 10 ten
719 11 eleven
720 12 twelve
721 30 thirty
722 300 threehundred
723 end data.
724
725 set small=0.
726 examine x
727         /statistics = extreme
728         /id = y
729         /plot = boxplot
730         .
731 ])
732
733 AT_CHECK([pspp -O format=csv examine-id.sps], [0], [dnl
734 Table: Case Processing Summary
735 ,Cases,,,,,
736 ,Valid,,Missing,,Total,
737 ,N,Percent,N,Percent,N,Percent
738 x,14,100.0%,0,.0%,14,100.0%
739
740 Table: Extreme Values
741 ,,,y,Value
742 x,Highest,1,threehundred,300.00
743 ,,2,thirty,30.00
744 ,,3,twelve,12.00
745 ,,4,eleven,11.00
746 ,,5,ten,10.00
747 ,Lowest,1,one,1.00
748 ,,2,two,2.00
749 ,,3,three,3.00
750 ,,4,four,4.00
751 ,,5,five,5.00
752
753 Table: Tests of Normality
754 ,Shapiro-Wilk,,
755 ,Statistic,df,Sig.
756 x,.37,14,.00
757 ])
758
759 AT_CLEANUP
760
761 dnl Test for a crash which happened on cleanup from a bad input syntax
762 AT_SETUP([EXAMINE -- Bad Input])
763 AT_KEYWORDS([categorical categoricals])
764
765 AT_DATA([examine-bad.sps], [dnl
766 data list list /h * g *.
767 begin data.
768 1 1
769 2 1
770 3 1
771 4 1
772 5 2
773 6 2
774 7 2
775 8 2
776 9 2
777 end data.
778
779 EXAMINE
780         /VARIABLES= h
781         BY  g
782         /STATISTICS = DESCRIPTIVES EXTREME
783         /PLOT = lkajsdas
784         .
785 ])
786
787 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
788
789 AT_CLEANUP
790
791
792 dnl Check the MISSING=REPORT option
793 AT_SETUP([EXAMINE -- MISSING=REPORT])
794 AT_KEYWORDS([categorical categoricals])
795
796 AT_DATA([examine-report.sps], [dnl
797 set format = F22.0.
798 data list list /x * g *.
799 begin data.
800 1   1
801 2   1
802 3   1
803 4   1
804 5   1
805 6   1
806 7   1
807 8   1
808 9   1
809 10   2
810 20   2
811 30   2
812 40   2
813 50   2
814 60   2
815 70   2
816 80   2
817 90   2
818 101   9
819 201   9
820 301   9
821 401   9
822 501   99
823 601   99
824 701   99
825 801   99
826 901   99
827 1001  .
828 2002  .
829 3003  .
830 4004  .
831 end data.
832
833 MISSING VALUES g (9, 99, 999).
834
835 EXAMINE
836         /VARIABLES = x
837         BY  g
838         /STATISTICS = EXTREME
839         /NOTOTAL
840         /MISSING = REPORT.
841 ])
842
843
844 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine-report.sps])
845 AT_CHECK([cat pspp.csv], [0],
846   [[Table: Reading free-form data from INLINE.
847 Variable,Format
848 x,F8.0
849 g,F8.0
850
851 Table: Case Processing Summary
852 ,g,Cases,,,,,
853 ,,Valid,,Missing,,Total,
854 ,,N,Percent,N,Percent,N,Percent
855 x,.,4,100.0%,0,.0%,4,100.0%
856 ,1,9,100.0%,0,.0%,9,100.0%
857 ,2,9,100.0%,0,.0%,9,100.0%
858 ,9[a],4,100.0%,0,.0%,4,100.0%
859 ,99[a],5,100.0%,0,.0%,5,100.0%
860 Footnote: a. User-missing value.
861
862 Table: Extreme Values
863 ,g,,,Case Number,Value
864 x,.,Highest,1,31,4004
865 ,,,2,30,3003
866 ,,,3,29,2002
867 ,,,4,28,1001
868 ,,,5,0,0
869 ,,Lowest,1,28,1001
870 ,,,2,29,2002
871 ,,,3,30,3003
872 ,,,4,31,4004
873 ,,,5,31,4004
874 ,1,Highest,1,9,9
875 ,,,2,8,8
876 ,,,3,7,7
877 ,,,4,6,6
878 ,,,5,5,5
879 ,,Lowest,1,1,1
880 ,,,2,2,2
881 ,,,3,3,3
882 ,,,4,4,4
883 ,,,5,5,5
884 ,2,Highest,1,18,90
885 ,,,2,17,80
886 ,,,3,16,70
887 ,,,4,15,60
888 ,,,5,14,50
889 ,,Lowest,1,10,10
890 ,,,2,11,20
891 ,,,3,12,30
892 ,,,4,13,40
893 ,,,5,14,50
894 ,9[a],Highest,1,22,401
895 ,,,2,21,301
896 ,,,3,20,201
897 ,,,4,19,101
898 ,,,5,0,0
899 ,,Lowest,1,19,101
900 ,,,2,20,201
901 ,,,3,21,301
902 ,,,4,22,401
903 ,,,5,22,401
904 ,99[a],Highest,1,27,901
905 ,,,2,26,801
906 ,,,3,25,701
907 ,,,4,24,601
908 ,,,5,23,501
909 ,,Lowest,1,23,501
910 ,,,2,24,601
911 ,,,3,25,701
912 ,,,4,26,801
913 ,,,5,27,901
914 Footnote: a. User-missing value.
915 ]])
916
917 AT_CLEANUP
918
919
920 dnl Run a test of the basic STATISTICS using a "real"
921 dnl dataset and comparing with "real" results kindly
922 dnl provided by Olaf Nöhring
923 AT_SETUP([EXAMINE -- sample unweighted])
924 AT_KEYWORDS([categorical categoricals])
925
926 AT_DATA([sample.sps], [dnl
927 set format = F22.4.
928 DATA LIST notable LIST /X *
929 BEGIN DATA.
930 461.19000000
931 466.38000000
932 479.46000000
933 480.10000000
934 483.43000000
935 488.30000000
936 489.00000000
937 491.62000000
938 505.62000000
939 511.30000000
940 521.53000000
941 526.70000000
942 528.25000000
943 538.70000000
944 540.22000000
945 540.58000000
946 546.10000000
947 548.17000000
948 553.99000000
949 566.21000000
950 575.90000000
951 584.38000000
952 593.40000000
953 357.05000000
954 359.73000000
955 360.48000000
956 373.98000000
957 374.13000000
958 381.45000000
959 383.72000000
960 390.00000000
961 400.34000000
962 415.32000000
963 415.91000000
964 418.30000000
965 421.03000000
966 422.43000000
967 426.93000000
968 433.25000000
969 436.89000000
970 445.33000000
971 446.33000000
972 446.55000000
973 456.44000000
974 689.49000000
975 691.92000000
976 695.00000000
977 695.36000000
978 698.21000000
979 699.46000000
980 706.61000000
981 710.69000000
982 715.82000000
983 715.82000000
984 741.39000000
985 752.27000000
986 756.73000000
987 757.74000000
988 759.57000000
989 796.07000000
990 813.78000000
991 817.25000000
992 825.48000000
993 831.28000000
994 849.24000000
995 890.00000000
996 894.78000000
997 935.65000000
998 935.90000000
999 945.90000000
1000 1012.8600000
1001 1022.6000000
1002 1061.8100000
1003 1063.5000000
1004 1077.2300000
1005 1151.6300000
1006 1355.2800000
1007 598.88000000
1008 606.91000000
1009 621.60000000
1010 624.80000000
1011 636.13000000
1012 637.38000000
1013 640.32000000
1014 649.35000000
1015 656.51000000
1016 662.55000000
1017 664.69000000
1018 106.22000000
1019 132.24000000
1020 174.76000000
1021 204.85000000
1022 264.93000000
1023 264.99000000
1024 269.84000000
1025 325.12000000
1026 331.67000000
1027 337.26000000
1028 347.68000000
1029 354.91000000
1030 END DATA.
1031
1032 EXAMINE
1033         x
1034         /STATISTICS=DESCRIPTIVES
1035         .
1036 ])
1037
1038 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
1039 Table: Case Processing Summary
1040 ,Cases,,,,,
1041 ,Valid,,Missing,,Total,
1042 ,N,Percent,N,Percent,N,Percent
1043 X,100,100.0%,0,.0%,100,100.0%
1044
1045 Table: Descriptives
1046 ,,,Statistic,Std. Error
1047 X,Mean,,587.6603,23.2665
1048 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1049 ,,Upper Bound,633.8260,
1050 ,5% Trimmed Mean,,579.7064,
1051 ,Median,,547.1350,
1052 ,Variance,,54132.8466,
1053 ,Std. Deviation,,232.6647,
1054 ,Minimum,,106.2200,
1055 ,Maximum,,1355.2800,
1056 ,Range,,1249.0600,
1057 ,Interquartile Range,,293.1575,
1058 ,Skewness,,.6331,.2414
1059 ,Kurtosis,,.5300,.4783
1060 ])
1061
1062 AT_CLEANUP
1063
1064
1065
1066 dnl Test for a crash which happened on bad input syntax
1067 AT_SETUP([EXAMINE -- Empty Parentheses])
1068 AT_KEYWORDS([categorical categoricals])
1069
1070 AT_DATA([examine-empty-parens.sps], [dnl
1071 DATA LIST notable LIST /X *
1072 BEGIN DATA.
1073 2
1074 3
1075 END DATA.
1076
1077
1078 EXAMINE
1079         x
1080         /PLOT = SPREADLEVEL()
1081         .
1082 ])
1083
1084 AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
1085
1086 AT_CLEANUP
1087
1088
1089
1090
1091 dnl Test for another crash which happened on bad input syntax
1092 AT_SETUP([EXAMINE -- Bad variable])
1093 AT_KEYWORDS([categorical categoricals])
1094
1095 AT_DATA([examine-bad-variable.sps], [dnl
1096 data list list /h * g *.
1097 begin data.
1098 3 1
1099 4 1
1100 5 2
1101 end data.
1102
1103 EXAMINE
1104         /VARIABLES/ h
1105         BY  g
1106         .
1107 ])
1108
1109 AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
1110
1111 AT_CLEANUP
1112
1113
1114
1115 dnl Test for yet another crash. This time for extremes vs. missing weight values.\0
1116 AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
1117 AT_KEYWORDS([categorical categoricals])
1118
1119 AT_DATA([examine-missing-weights.sps], [dnl
1120 data list notable list /h * g *.
1121 begin data.
1122 3 1
1123 4 .
1124 5 1
1125 2 1
1126 end data.
1127
1128 WEIGHT BY g.
1129
1130 EXAMINE h
1131         /STATISTICS extreme(3)
1132         .
1133 ])
1134
1135 AT_CHECK([pspp -O format=csv  examine-missing-weights.sps], [0], [dnl
1136 "examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative.  These case(s) were ignored."
1137
1138 Table: Case Processing Summary
1139 ,Cases,,,,,
1140 ,Valid,,Missing,,Total,
1141 ,N,Percent,N,Percent,N,Percent
1142 h,3.00,100.0%,.00,.0%,3.00,100.0%
1143
1144 Table: Extreme Values
1145 ,,,Case Number,Value
1146 h,Highest,1,3,5.00
1147 ,,2,2,4.00
1148 ,,3,1,3.00
1149 ,Lowest,1,4,2.00
1150 ,,2,1,3.00
1151 ,,3,2,4.00
1152 ])
1153
1154 AT_CLEANUP
1155
1156 dnl This is an example from doc/tutorial.texi
1157 dnl So if the results of this have to be changed in any way,
1158 dnl make sure to update that file.
1159 AT_SETUP([EXAMINE tutorial example 1])
1160 cp $top_srcdir/examples/repairs.sav .
1161 AT_DATA([repairs.sps], [dnl
1162 GET FILE='repairs.sav'.
1163 EXAMINE mtbf /STATISTICS=DESCRIPTIVES.
1164 COMPUTE mtbf_ln = LN (mtbf).
1165 EXAMINE mtbf_ln /STATISTICS=DESCRIPTIVES.
1166 ])
1167
1168 AT_CHECK([pspp -O format=csv repairs.sps], [0], [dnl
1169 Table: Case Processing Summary
1170 ,Cases,,,,,
1171 ,Valid,,Missing,,Total,
1172 ,N,Percent,N,Percent,N,Percent
1173 Mean time between failures (months) ,30,100.0%,0,.0%,30,100.0%
1174
1175 Table: Descriptives
1176 ,,,Statistic,Std. Error
1177 Mean time between failures (months) ,Mean,,8.78,1.10
1178 ,95% Confidence Interval for Mean,Lower Bound,6.53,
1179 ,,Upper Bound,11.04,
1180 ,5% Trimmed Mean,,8.20,
1181 ,Median,,8.29,
1182 ,Variance,,36.34,
1183 ,Std. Deviation,,6.03,
1184 ,Minimum,,1.63,
1185 ,Maximum,,26.47,
1186 ,Range,,24.84,
1187 ,Interquartile Range,,6.03,
1188 ,Skewness,,1.65,.43
1189 ,Kurtosis,,3.41,.83
1190
1191 Table: Case Processing Summary
1192 ,Cases,,,,,
1193 ,Valid,,Missing,,Total,
1194 ,N,Percent,N,Percent,N,Percent
1195 mtbf_ln,30,100.0%,0,.0%,30,100.0%
1196
1197 Table: Descriptives
1198 ,,,Statistic,Std. Error
1199 mtbf_ln,Mean,,1.95,.13
1200 ,95% Confidence Interval for Mean,Lower Bound,1.69,
1201 ,,Upper Bound,2.22,
1202 ,5% Trimmed Mean,,1.96,
1203 ,Median,,2.11,
1204 ,Variance,,.49,
1205 ,Std. Deviation,,.70,
1206 ,Minimum,,.49,
1207 ,Maximum,,3.28,
1208 ,Range,,2.79,
1209 ,Interquartile Range,,.88,
1210 ,Skewness,,-.37,.43
1211 ,Kurtosis,,.01,.83
1212 ])
1213
1214 AT_CLEANUP
1215
1216 dnl This is an example from doc/tutorial.texi
1217 dnl So if the results of this have to be changed in any way,
1218 dnl make sure to update that file.
1219 AT_SETUP([EXAMINE tutorial example 2])
1220 cp $top_srcdir/examples/physiology.sav .
1221 AT_DATA([examine.sps], [dnl
1222 GET FILE='physiology.sav'.
1223 EXAMINE height, weight /STATISTICS=EXTREME(3).
1224 ])
1225 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
1226 AT_CHECK([cat pspp.csv], [0], [dnl
1227 Table: Case Processing Summary
1228 ,Cases,,,,,
1229 ,Valid,,Missing,,Total,
1230 ,N,Percent,N,Percent,N,Percent
1231 Height in millimeters   ,40,100.0%,0,.0%,40,100.0%
1232 Weight in kilograms ,40,100.0%,0,.0%,40,100.0%
1233
1234 Table: Extreme Values
1235 ,,,Case Number,Value
1236 Height in millimeters   ,Highest,1,14,1903
1237 ,,2,15,1884
1238 ,,3,12,1802
1239 ,Lowest,1,30,179
1240 ,,2,31,1598
1241 ,,3,28,1601
1242 Weight in kilograms ,Highest,1,13,92.1
1243 ,,2,5,92.1
1244 ,,3,17,91.7
1245 ,Lowest,1,38,-55.6
1246 ,,2,39,54.5
1247 ,,3,33,55.4
1248 ])
1249 AT_CLEANUP
1250
1251
1252
1253 AT_SETUP([EXAMINE -- Crash on unrepresentable graphs])
1254 AT_DATA([examine.sps], [dnl
1255 data list notable list /x * g *.
1256 begin data.
1257 96 1
1258 end data.
1259
1260 examine x  by g
1261         /nototal
1262         /plot = all.
1263 ])
1264 dnl This bug only manifested itself on cairo based drivers.
1265 AT_CHECK([pspp -O format=pdf examine.sps], [0], [ignore], [ignore])
1266 AT_CLEANUP
1267
1268
1269 dnl This example comes from the web site:
1270 dnl  https://www.spsstests.com/2018/11/shapiro-wilk-normality-test-spss.html
1271 AT_SETUP([EXAMINE -- shapiro-wilk 1])
1272 AT_KEYWORDS([shapiro wilk])
1273 AT_DATA([shapiro-wilk.sps], [dnl
1274 data list notable list /x * g *.
1275 begin data.
1276 96 1
1277 98 1
1278 95 1
1279 89 1
1280 90 1
1281 92 1
1282 94 1
1283 93 1
1284 97 1
1285 100 1
1286 99 2
1287 96 2
1288 80 2
1289 89 2
1290 91 2
1291 92 2
1292 93 2
1293 94 2
1294 99 2
1295 80 2
1296 end data.
1297
1298 set format F22.3.
1299
1300 examine x  by g
1301         /nototal
1302         /plot = all.
1303 ])
1304
1305 AT_CHECK([pspp -O format=csv shapiro-wilk.sps], [0],[dnl
1306 Table: Case Processing Summary
1307 ,g,Cases,,,,,
1308 ,,Valid,,Missing,,Total,
1309 ,,N,Percent,N,Percent,N,Percent
1310 x,1.00,10,100.0%,0,.0%,10,100.0%
1311 ,2.00,10,100.0%,0,.0%,10,100.0%
1312
1313 Table: Tests of Normality
1314 ,g,Shapiro-Wilk,,
1315 ,,Statistic,df,Sig.
1316 x,1.00,.984,10,.983
1317 ,2.00,.882,10,.136
1318 ])
1319
1320 AT_CLEANUP
1321
1322
1323 dnl This example comes from the web site:
1324 dnl  http://www.real-statistics.com/tests-normality-and-symmetry/statistical-tests-normality-symmetry/shapiro-wilk-expanded-test/
1325 dnl It uses a dataset larger than 11 samples. Hence the alternative method for
1326 dnl signficance is used.
1327 AT_SETUP([EXAMINE -- shapiro-wilk 2])
1328 AT_KEYWORDS([shapiro wilk])
1329 AT_DATA([shapiro-wilk2.sps], [dnl
1330 data list notable list /x *.
1331 begin data.
1332 65
1333 61
1334 63
1335 86
1336 70
1337 55
1338 74
1339 35
1340 72
1341 68
1342 45
1343 58
1344 end data.
1345
1346 set format F22.3.
1347
1348 examine x
1349         /plot = boxplot.
1350 ])
1351
1352 AT_CHECK([pspp -O format=csv shapiro-wilk2.sps], [0],[dnl
1353 Table: Case Processing Summary
1354 ,Cases,,,,,
1355 ,Valid,,Missing,,Total,
1356 ,N,Percent,N,Percent,N,Percent
1357 x,12,100.0%,0,.0%,12,100.0%
1358
1359 Table: Tests of Normality
1360 ,Shapiro-Wilk,,
1361 ,Statistic,df,Sig.
1362 x,.971,12,.922
1363 ])
1364
1365 AT_CLEANUP