4e7d6f4d8fec7c3db954f287450a9a15a5d20af0
[pspp] / tests / language / stats / examine.at
1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc.
3 dnl
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
8 dnl
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 dnl GNU General Public License for more details.
13 dnl
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 dnl
17 AT_BANNER([EXAMINE])
18
19 AT_SETUP([EXAMINE])
20 AT_KEYWORDS([categorical categoricals])
21 AT_DATA([examine.sps], [
22 DATA LIST LIST /QUALITY * W * BRAND * .
23 BEGIN DATA
24 3  1  1
25 2  2  1
26 1  2  1
27 1  1  1
28 4  1  1
29 4  1  1
30 5  1  2
31 2  1  2
32 4  4  2
33 2  1  2
34 3  1  2
35 7  1  3
36 4  2  3
37 5  3  3
38 3  1  3
39 6  1  3
40 END DATA
41
42 WEIGHT BY w.
43
44 VARIABLE LABELS brand   'Manufacturer'.
45 VARIABLE LABELS quality 'Breaking Strain'.
46
47 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
48
49 LIST /FORMAT=NUMBERED.
50
51 EXAMINE
52         quality BY brand
53         /STATISTICS descriptives extreme(3)
54         .
55 ])
56
57
58 dnl In the following data, only the extreme values have been checked.
59 dnl The descriptives have been blindly pasted.
60 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
61 Table: Reading free-form data from INLINE.
62 Variable,Format
63 QUALITY,F8.0
64 W,F8.0
65 BRAND,F8.0
66
67 Table: Data List
68 Case Number,QUALITY,W,BRAND
69 1,3.00,1.00,1.00
70 2,2.00,2.00,1.00
71 3,1.00,2.00,1.00
72 4,1.00,1.00,1.00
73 5,4.00,1.00,1.00
74 6,4.00,1.00,1.00
75 7,5.00,1.00,2.00
76 8,2.00,1.00,2.00
77 9,4.00,4.00,2.00
78 10,2.00,1.00,2.00
79 11,3.00,1.00,2.00
80 12,7.00,1.00,3.00
81 13,4.00,2.00,3.00
82 14,5.00,3.00,3.00
83 15,3.00,1.00,3.00
84 16,6.00,1.00,3.00
85
86 Table: Case Processing Summary
87 ,Cases,,,,,
88 ,Valid,,Missing,,Total,
89 ,N,Percent,N,Percent,N,Percent
90 Breaking Strain,24.00,100.0%,.00,.0%,24.00,100.0%
91
92 Table: Extreme Values
93 ,,,Case Number,Value
94 Breaking Strain,Highest,1,12,7.00
95 ,,2,16,6.00
96 ,,3,14,5.00
97 ,Lowest,1,3,1.00
98 ,,2,4,1.00
99 ,,3,2,2.00
100
101 Table: Descriptives
102 ,,,Statistic,Std. Error
103 Breaking Strain,Mean,,3.54,.32
104 ,95% Confidence Interval for Mean,Lower Bound,2.87,
105 ,,Upper Bound,4.21,
106 ,5% Trimmed Mean,,3.50,
107 ,Median,,4.00,
108 ,Variance,,2.52,
109 ,Std. Deviation,,1.59,
110 ,Minimum,,1.00,
111 ,Maximum,,7.00,
112 ,Range,,6.00,
113 ,Interquartile Range,,2.75,
114 ,Skewness,,.06,.47
115 ,Kurtosis,,-.36,.92
116
117 Table: Case Processing Summary
118 ,Manufacturer,Cases,,,,,
119 ,,Valid,,Missing,,Total,
120 ,,N,Percent,N,Percent,N,Percent
121 Breaking Strain,Aspeger,8.00,100.0%,.00,.0%,8.00,100.0%
122 ,Bloggs,8.00,100.0%,.00,.0%,8.00,100.0%
123 ,Charlies,8.00,100.0%,.00,.0%,8.00,100.0%
124
125 Table: Extreme Values
126 ,Manufacturer,,,Case Number,Value
127 Breaking Strain,Aspeger,Highest,1,6,4.00
128 ,,,2,5,4.00
129 ,,,3,1,3.00
130 ,,Lowest,1,3,1.00
131 ,,,2,4,1.00
132 ,,,3,2,2.00
133 ,Bloggs,Highest,1,7,5.00
134 ,,,2,9,4.00
135 ,,,3,11,3.00
136 ,,Lowest,1,8,2.00
137 ,,,2,10,2.00
138 ,,,3,11,3.00
139 ,Charlies,Highest,1,12,7.00
140 ,,,2,16,6.00
141 ,,,3,14,5.00
142 ,,Lowest,1,15,3.00
143 ,,,2,13,4.00
144 ,,,3,14,5.00
145
146 Table: Descriptives
147 ,Manufacturer,,,Statistic,Std. Error
148 Breaking Strain,Aspeger,Mean,,2.25,.45
149 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
150 ,,,Upper Bound,3.32,
151 ,,5% Trimmed Mean,,2.22,
152 ,,Median,,2.00,
153 ,,Variance,,1.64,
154 ,,Std. Deviation,,1.28,
155 ,,Minimum,,1.00,
156 ,,Maximum,,4.00,
157 ,,Range,,3.00,
158 ,,Interquartile Range,,2.75,
159 ,,Skewness,,.47,.75
160 ,,Kurtosis,,-1.55,1.48
161 ,Bloggs,Mean,,3.50,.38
162 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
163 ,,,Upper Bound,4.39,
164 ,,5% Trimmed Mean,,3.50,
165 ,,Median,,4.00,
166 ,,Variance,,1.14,
167 ,,Std. Deviation,,1.07,
168 ,,Minimum,,2.00,
169 ,,Maximum,,5.00,
170 ,,Range,,3.00,
171 ,,Interquartile Range,,1.75,
172 ,,Skewness,,-.47,.75
173 ,,Kurtosis,,-.83,1.48
174 ,Charlies,Mean,,4.88,.44
175 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
176 ,,,Upper Bound,5.92,
177 ,,5% Trimmed Mean,,4.86,
178 ,,Median,,5.00,
179 ,,Variance,,1.55,
180 ,,Std. Deviation,,1.25,
181 ,,Minimum,,3.00,
182 ,,Maximum,,7.00,
183 ,,Range,,4.00,
184 ,,Interquartile Range,,1.75,
185 ,,Skewness,,.30,.75
186 ,,Kurtosis,,.15,1.48
187 ])
188
189 AT_CLEANUP
190
191 AT_SETUP([EXAMINE -- extremes])
192 AT_KEYWORDS([categorical categoricals])
193 AT_DATA([examine.sps], [dnl
194 data list free /V1 W
195 begin data.
196 1  1
197 2  1
198 3  2
199 3  1
200 4  1
201 5  1
202 6  1
203 7  1
204 8  1
205 9  1
206 10 1
207 11 1
208 12 1
209 13 1
210 14 1
211 15 1
212 16 1
213 17 1
214 18 2
215 19 1
216 20 1
217 end data.
218
219 weight by w.
220
221 examine v1
222  /statistics=extreme(6)
223  .
224 ])
225
226 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
227 Table: Case Processing Summary
228 ,Cases,,,,,
229 ,Valid,,Missing,,Total,
230 ,N,Percent,N,Percent,N,Percent
231 V1,23.00,100.0%,.00,.0%,23.00,100.0%
232
233 Table: Extreme Values
234 ,,,Case Number,Value
235 V1,Highest,1,21,20.00
236 ,,2,20,19.00
237 ,,3,19,18.00
238 ,,4,18,17.00
239 ,,5,17,16.00
240 ,,6,16,15.00
241 ,Lowest,1,1,1.00
242 ,,2,2,2.00
243 ,,3,3,3.00
244 ,,4,4,3.00
245 ,,5,5,4.00
246 ,,6,6,5.00
247 ])
248
249 AT_CLEANUP
250
251
252 AT_SETUP([EXAMINE -- extremes with fractional weights])
253 AT_KEYWORDS([categorical categoricals])
254 AT_DATA([extreme.sps], [dnl
255 set format=F20.3.
256 data list notable list /w * x *.
257 begin data.
258  0.88  300000
259  0.86  320000
260  0.98  480000
261  0.93  960000
262  1.35  960000
263  1.31  960000
264  0.88  960000
265  0.88  1080000
266  0.88  1080000
267  0.95  1200000
268  1.47  1200000
269  0.93  1200000
270  0.98  1320000
271  1.31  1380000
272  0.93  1440000
273  0.88  1560000
274  1.56  1560000
275  1.47  1560000
276 end data.
277
278 weight by w.
279
280
281 EXAMINE
282         x
283         /STATISTICS = DESCRIPTIVES EXTREME (5)
284         .
285 ])
286
287 AT_CHECK([pspp -O format=csv  extreme.sps], [0], [dnl
288 Table: Case Processing Summary
289 ,Cases,,,,,
290 ,Valid,,Missing,,Total,
291 ,N,Percent,N,Percent,N,Percent
292 x,19.430,100.0%,.000,.0%,19.430,100.0%
293
294 Table: Extreme Values
295 ,,,Case Number,Value
296 x,Highest,1,18,1560000.000
297 ,,2,17,1560000.000
298 ,,3,16,1560000.000
299 ,,4,15,1440000.000
300 ,,5,14,1380000.000
301 ,Lowest,1,1,300000.000
302 ,,2,2,320000.000
303 ,,3,3,480000.000
304 ,,4,4,960000.000
305 ,,5,5,960000.000
306
307 Table: Descriptives
308 ,,,Statistic,Std. Error
309 x,Mean,,1120010.293,86222.178
310 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
311 ,,Upper Bound,1300853.894,
312 ,5% Trimmed Mean,,1141017.899,
313 ,Median,,1200000.000,
314 ,Variance,,144447748124.869,
315 ,Std. Deviation,,380062.821,
316 ,Minimum,,300000.000,
317 ,Maximum,,1560000.000,
318 ,Range,,1260000.000,
319 ,Interquartile Range,,467258.065,
320 ,Skewness,,-.887,.519
321 ,Kurtosis,,.340,1.005
322 ])
323
324 AT_CLEANUP
325
326 dnl Test the PERCENTILES subcommand of the EXAMINE command.
327 dnl In particular test that it behaves properly when there are only
328 dnl a few cases.
329 AT_SETUP([EXAMINE -- percentiles])
330 AT_KEYWORDS([categorical categoricals])
331 AT_DATA([examine.sps], [dnl
332 DATA LIST LIST /X *.
333 BEGIN DATA.
334 2.00
335 8.00
336 5.00
337 END DATA.
338
339 EXAMINE /x
340         /PERCENTILES=HAVERAGE.
341
342 EXAMINE /x
343         /PERCENTILES=WAVERAGE.
344
345 EXAMINE /x
346         /PERCENTILES=ROUND.
347
348 EXAMINE /x
349         /PERCENTILES=EMPIRICAL.
350
351 EXAMINE /x
352         /PERCENTILES=AEMPIRICAL.
353 ])
354 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
355 AT_CHECK([cat pspp.csv], [0], [dnl
356 Table: Reading free-form data from INLINE.
357 Variable,Format
358 X,F8.0
359
360 Table: Case Processing Summary
361 ,Cases,,,,,
362 ,Valid,,Missing,,Total,
363 ,N,Percent,N,Percent,N,Percent
364 X,3,100.0%,0,.0%,3,100.0%
365
366 Table: Percentiles
367 ,,Percentiles,,,,,,
368 ,,5,10,25,50,75,90,95
369 X,Weighted Average,.40,.80,2.00,5.00,8.00,8.00,8.00
370 ,Tukey's Hinges,,,3.50,5.00,6.50,,
371
372 Table: Case Processing Summary
373 ,Cases,,,,,
374 ,Valid,,Missing,,Total,
375 ,N,Percent,N,Percent,N,Percent
376 X,3,100.0%,0,.0%,3,100.0%
377
378 Table: Percentiles
379 ,,Percentiles,,,,,,
380 ,,5,10,25,50,75,90,95
381 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
382 ,Tukey's Hinges,,,3.50,5.00,6.50,,
383
384 Table: Case Processing Summary
385 ,Cases,,,,,
386 ,Valid,,Missing,,Total,
387 ,N,Percent,N,Percent,N,Percent
388 X,3,100.0%,0,.0%,3,100.0%
389
390 Table: Percentiles
391 ,,Percentiles,,,,,,
392 ,,5,10,25,50,75,90,95
393 X,Weighted Average,.00,.00,2.00,5.00,5.00,8.00,8.00
394 ,Tukey's Hinges,,,3.50,5.00,6.50,,
395
396 Table: Case Processing Summary
397 ,Cases,,,,,
398 ,Valid,,Missing,,Total,
399 ,N,Percent,N,Percent,N,Percent
400 X,3,100.0%,0,.0%,3,100.0%
401
402 Table: Percentiles
403 ,,Percentiles,,,,,,
404 ,,5,10,25,50,75,90,95
405 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
406 ,Tukey's Hinges,,,3.50,5.00,6.50,,
407
408 Table: Case Processing Summary
409 ,Cases,,,,,
410 ,Valid,,Missing,,Total,
411 ,N,Percent,N,Percent,N,Percent
412 X,3,100.0%,0,.0%,3,100.0%
413
414 Table: Percentiles
415 ,,Percentiles,,,,,,
416 ,,5,10,25,50,75,90,95
417 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
418 ,Tukey's Hinges,,,3.50,5.00,6.50,,
419 ])
420 AT_CLEANUP
421
422 AT_SETUP([EXAMINE -- missing values])
423 AT_KEYWORDS([categorical categoricals])
424 AT_DATA([examine.sps], [dnl
425 DATA LIST LIST /x * y *.
426 BEGIN DATA.
427 1   1
428 2   1
429 3   1
430 4   1
431 5   2
432 6   2
433 .   2
434 END DATA
435
436 EXAMINE /x by y
437         /MISSING = PAIRWISE
438         .
439 ])
440 AT_CHECK([pspp -o pspp.csv examine.sps])
441 AT_CHECK([cat pspp.csv], [0], [dnl
442 Table: Reading free-form data from INLINE.
443 Variable,Format
444 x,F8.0
445 y,F8.0
446
447 Table: Case Processing Summary
448 ,Cases,,,,,
449 ,Valid,,Missing,,Total,
450 ,N,Percent,N,Percent,N,Percent
451 x,6,85.7%,1,14.3%,7,100.0%
452
453 Table: Case Processing Summary
454 ,y,Cases,,,,,
455 ,,Valid,,Missing,,Total,
456 ,,N,Percent,N,Percent,N,Percent
457 x,1.00,4,100.0%,0,.0%,4,100.0%
458 ,2.00,2,66.7%,1,33.3%,3,100.0%
459 ])
460 AT_CLEANUP
461
462
463 AT_SETUP([EXAMINE -- user missing values])
464 AT_KEYWORDS([categorical categoricals])
465 AT_DATA([examine-m.sps], [dnl
466 DATA LIST notable LIST /x * y *.
467 BEGIN DATA.
468 1                   2
469 9999999999          2
470 9999999999          99
471 END DATA.
472
473 MISSING VALUES x (9999999999).
474 MISSING VALUES y (99).
475
476 EXAMINE
477         /VARIABLES= x y
478         /MISSING=PAIRWISE.
479 ])
480 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
481 Table: Case Processing Summary
482 ,Cases,,,,,
483 ,Valid,,Missing,,Total,
484 ,N,Percent,N,Percent,N,Percent
485 x,1,33.3%,2,66.7%,3,100.0%
486 y,2,66.7%,1,33.3%,3,100.0%
487 ])
488 AT_CLEANUP
489
490 AT_SETUP([EXAMINE -- missing values and percentiles])
491 AT_KEYWORDS([categorical categoricals])
492 AT_DATA([examine.sps], [dnl
493 DATA LIST LIST /X *.
494 BEGIN DATA.
495 99
496 99
497 5.00
498 END DATA.
499
500 MISSING VALUE X (99).
501
502 EXAMINE /x
503         /PERCENTILES=HAVERAGE.
504 ])
505 AT_CHECK([pspp -o pspp.csv examine.sps])
506 dnl Ignore output -- this is just a no-crash check.
507 AT_CLEANUP
508
509 dnl Tests the trimmed mean calculation in the case
510 dnl where the data is weighted towards the centre.
511 AT_SETUP([EXAMINE -- trimmed mean])
512 AT_KEYWORDS([categorical categoricals])
513 AT_DATA([examine.sps], [dnl
514 DATA LIST LIST /X * C *.
515 BEGIN DATA.
516 1 1
517 2 49
518 3 2
519 END DATA.
520
521 WEIGHT BY c.
522
523 EXAMINE
524         x
525         /STATISTICS=DESCRIPTIVES
526         .
527 ])
528 AT_CHECK([pspp -o pspp.csv examine.sps])
529 AT_CHECK([cat pspp.csv], [0], [dnl
530 Table: Reading free-form data from INLINE.
531 Variable,Format
532 X,F8.0
533 C,F8.0
534
535 Table: Case Processing Summary
536 ,Cases,,,,,
537 ,Valid,,Missing,,Total,
538 ,N,Percent,N,Percent,N,Percent
539 X,52.00,100.0%,.00,.0%,52.00,100.0%
540
541 Table: Descriptives
542 ,,,Statistic,Std. Error
543 X,Mean,,2.02,.03
544 ,95% Confidence Interval for Mean,Lower Bound,1.95,
545 ,,Upper Bound,2.09,
546 ,5% Trimmed Mean,,2.00,
547 ,Median,,2.00,
548 ,Variance,,.06,
549 ,Std. Deviation,,.24,
550 ,Minimum,,1.00,
551 ,Maximum,,3.00,
552 ,Range,,2.00,
553 ,Interquartile Range,,.00,
554 ,Skewness,,1.19,.33
555 ,Kurtosis,,15.73,.65
556 ])
557 AT_CLEANUP
558
559 AT_SETUP([EXAMINE -- crash bug])
560 AT_KEYWORDS([categorical categoricals])
561 AT_DATA([examine.sps], [dnl
562 data list list /a * x * y *.
563 begin data.
564 3 1 3
565 5 1 4
566 7 2 3
567 end data.
568
569 examine a by x by y
570         /statistics=DESCRIPTIVES
571         .
572 ])
573 AT_CHECK([pspp -o pspp.csv examine.sps])
574 dnl Ignore output -- this is just a no-crash check.
575 AT_CLEANUP
576
577 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
578 AT_SETUP([EXAMINE -- consecutive runs don't crash])
579 AT_KEYWORDS([categorical categoricals])
580 AT_DATA([examine.sps], [dnl
581 data list list /y * z *.
582 begin data.
583 6 4
584 5 3
585 7 6
586 end data.
587
588 EXAMINE /VARIABLES= z BY y.
589
590 EXAMINE /VARIABLES= z.
591 ])
592 AT_CHECK([pspp -o pspp.csv examine.sps])
593 dnl Ignore output -- this is just a no-crash check.
594 AT_CLEANUP
595
596 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
597 AT_SETUP([EXAMINE -- missing values don't crash])
598 AT_KEYWORDS([categorical categoricals])
599 AT_DATA([examine.sps], [dnl
600 data list list /x * y *.
601 begin data.
602 1 0
603 2 0
604 . 0
605 3 1
606 4 1
607 end data.
608 examine x by y /statistics=descriptives.
609 ])
610 AT_CHECK([pspp -o pspp.csv examine.sps])
611 dnl Ignore output -- this is just a no-crash check.
612 AT_CLEANUP
613
614 dnl Test that having only a single case doesn't crash.
615 AT_SETUP([EXAMINE -- single case doesn't crash])
616 AT_KEYWORDS([categorical categoricals])
617 AT_DATA([examine.sps], [dnl
618 DATA LIST LIST /quality * .
619 BEGIN DATA
620 3
621 END DATA
622
623
624 EXAMINE
625         quality
626         /STATISTICS descriptives
627         /PLOT = histogram
628         .
629 ])
630 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
631 dnl Ignore output -- this is just a no-crash check.
632 AT_CLEANUP
633
634 dnl Test that all-missing data doesn't crash.
635 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
636 AT_KEYWORDS([categorical categoricals])
637 AT_DATA([examine.sps], [dnl
638 DATA LIST LIST /x *.
639 BEGIN DATA.
640 .
641 .
642 .
643 .
644 END DATA.
645
646 EXAMINE /x
647         PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
648         /ID=x
649         /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
650         /PERCENTILE=AEMPIRICAL
651         .
652 ])
653 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
654 dnl Ignore output -- this is just a no-crash check.
655 AT_CLEANUP
656
657 dnl Test that big input doesn't crash (bug 11307).
658 AT_SETUP([EXAMINE -- big input doesn't crash])
659 AT_KEYWORDS([categorical categoricals slow])
660 AT_DATA([examine.sps], [dnl
661 INPUT PROGRAM.
662         LOOP #I=1 TO 50000.
663                 COMPUTE X=NORMAL(10).
664                 END CASE.
665         END LOOP.
666         END FILE.
667 END INPUT PROGRAM.
668
669
670 EXAMINE /x
671         /STATISTICS=DESCRIPTIVES.
672 ])
673 AT_CHECK([pspp -o pspp.csv examine.sps])
674 dnl Ignore output -- this is just a no-crash check.
675 AT_CLEANUP
676
677 dnl Another test that big input doesn't crash.
678 dnl The actual bug that this checks for has been lost.
679 AT_SETUP([EXAMINE -- big input doesn't crash 2])
680 AT_KEYWORDS([categorical categoricals slow])
681 AT_DATA([make-big-input.pl],
682   [for ($i=0; $i<100000; $i++) { print "AB12\n" };
683    for ($i=0; $i<100000; $i++) { print "AB04\n" };
684 ])
685 AT_CHECK([$PERL make-big-input.pl > large.txt])
686 AT_DATA([examine.sps], [dnl
687 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
688
689
690 AGGREGATE OUTFILE=* /BREAK=X /A=N.
691
692
693 EXAMINE /A BY X.
694 ])
695 AT_CHECK([pspp -o pspp.csv examine.sps])
696 dnl Ignore output -- this is just a no-crash check.
697 AT_DATA([more-big-input.pl],
698   [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
699 ])
700 AT_CHECK([$PERL more-big-input.pl >> large.txt])
701 AT_CHECK([pspp -o pspp.csv examine.sps])
702 dnl Ignore output -- this is just a no-crash check.
703 AT_CLEANUP
704
705
706 dnl Test that the ID command works with non-numberic variables
707 AT_SETUP([EXAMINE -- non-numeric ID])
708 AT_KEYWORDS([categorical categoricals])
709
710 AT_DATA([examine-id.sps], [dnl
711 data list notable list /x * y (a12).
712 begin data.
713 1  one
714 2  two
715 3  three
716 4  four
717 5  five
718 6  six
719 7  seven
720 8  eight
721 9  nine
722 10 ten
723 11 eleven
724 12 twelve
725 30 thirty
726 300 threehundred
727 end data.
728
729 set small=0.
730 examine x
731         /statistics = extreme
732         /id = y
733         /plot = boxplot
734         .
735 ])
736
737 AT_CHECK([pspp -O format=csv examine-id.sps], [0], [dnl
738 Table: Case Processing Summary
739 ,Cases,,,,,
740 ,Valid,,Missing,,Total,
741 ,N,Percent,N,Percent,N,Percent
742 x,14,100.0%,0,.0%,14,100.0%
743
744 Table: Extreme Values
745 ,,,y,Value
746 x,Highest,1,threehundred,300.00
747 ,,2,thirty,30.00
748 ,,3,twelve,12.00
749 ,,4,eleven,11.00
750 ,,5,ten,10.00
751 ,Lowest,1,one,1.00
752 ,,2,two,2.00
753 ,,3,three,3.00
754 ,,4,four,4.00
755 ,,5,five,5.00
756
757 Table: Tests of Normality
758 ,Shapiro-Wilk,,
759 ,Statistic,df,Sig.
760 x,.37,14,.00
761 ])
762
763 AT_CLEANUP
764
765 dnl Test for a crash which happened on cleanup from a bad input syntax
766 AT_SETUP([EXAMINE -- Bad Input])
767 AT_KEYWORDS([categorical categoricals])
768
769 AT_DATA([examine-bad.sps], [dnl
770 data list list /h * g *.
771 begin data.
772 1 1
773 2 1
774 3 1
775 4 1
776 5 2
777 6 2
778 7 2
779 8 2
780 9 2
781 end data.
782
783 EXAMINE
784         /VARIABLES= h
785         BY  g
786         /STATISTICS = DESCRIPTIVES EXTREME
787         /PLOT = lkajsdas
788         .
789 ])
790
791 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
792
793 AT_CLEANUP
794
795
796 dnl Check the MISSING=REPORT option
797 AT_SETUP([EXAMINE -- MISSING=REPORT])
798 AT_KEYWORDS([categorical categoricals])
799
800 AT_DATA([examine-report.sps], [dnl
801 set format = F22.0.
802 data list list /x * g *.
803 begin data.
804 1   1
805 2   1
806 3   1
807 4   1
808 5   1
809 6   1
810 7   1
811 8   1
812 9   1
813 10   2
814 20   2
815 30   2
816 40   2
817 50   2
818 60   2
819 70   2
820 80   2
821 90   2
822 101   9
823 201   9
824 301   9
825 401   9
826 501   99
827 601   99
828 701   99
829 801   99
830 901   99
831 1001  .
832 2002  .
833 3003  .
834 4004  .
835 end data.
836
837 MISSING VALUES g (9, 99, 999).
838
839 EXAMINE
840         /VARIABLES = x
841         BY  g
842         /STATISTICS = EXTREME
843         /NOTOTAL
844         /MISSING = REPORT.
845 ])
846
847
848 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine-report.sps])
849 AT_CHECK([cat pspp.csv], [0],
850   [[Table: Reading free-form data from INLINE.
851 Variable,Format
852 x,F8.0
853 g,F8.0
854
855 Table: Case Processing Summary
856 ,g,Cases,,,,,
857 ,,Valid,,Missing,,Total,
858 ,,N,Percent,N,Percent,N,Percent
859 x,.,4,100.0%,0,.0%,4,100.0%
860 ,1,9,100.0%,0,.0%,9,100.0%
861 ,2,9,100.0%,0,.0%,9,100.0%
862 ,9[a],4,100.0%,0,.0%,4,100.0%
863 ,99[a],5,100.0%,0,.0%,5,100.0%
864 Footnote: a. User-missing value.
865
866 Table: Extreme Values
867 ,g,,,Case Number,Value
868 x,.,Highest,1,31,4004
869 ,,,2,30,3003
870 ,,,3,29,2002
871 ,,,4,28,1001
872 ,,,5,0,0
873 ,,Lowest,1,28,1001
874 ,,,2,29,2002
875 ,,,3,30,3003
876 ,,,4,31,4004
877 ,,,5,31,4004
878 ,1,Highest,1,9,9
879 ,,,2,8,8
880 ,,,3,7,7
881 ,,,4,6,6
882 ,,,5,5,5
883 ,,Lowest,1,1,1
884 ,,,2,2,2
885 ,,,3,3,3
886 ,,,4,4,4
887 ,,,5,5,5
888 ,2,Highest,1,18,90
889 ,,,2,17,80
890 ,,,3,16,70
891 ,,,4,15,60
892 ,,,5,14,50
893 ,,Lowest,1,10,10
894 ,,,2,11,20
895 ,,,3,12,30
896 ,,,4,13,40
897 ,,,5,14,50
898 ,9[a],Highest,1,22,401
899 ,,,2,21,301
900 ,,,3,20,201
901 ,,,4,19,101
902 ,,,5,0,0
903 ,,Lowest,1,19,101
904 ,,,2,20,201
905 ,,,3,21,301
906 ,,,4,22,401
907 ,,,5,22,401
908 ,99[a],Highest,1,27,901
909 ,,,2,26,801
910 ,,,3,25,701
911 ,,,4,24,601
912 ,,,5,23,501
913 ,,Lowest,1,23,501
914 ,,,2,24,601
915 ,,,3,25,701
916 ,,,4,26,801
917 ,,,5,27,901
918 Footnote: a. User-missing value.
919 ]])
920
921 AT_CLEANUP
922
923
924 dnl Run a test of the basic STATISTICS using a "real"
925 dnl dataset and comparing with "real" results kindly
926 dnl provided by Olaf Nöhring
927 AT_SETUP([EXAMINE -- sample unweighted])
928 AT_KEYWORDS([categorical categoricals])
929
930 AT_DATA([sample.sps], [dnl
931 set format = F22.4.
932 DATA LIST notable LIST /X *
933 BEGIN DATA.
934 461.19000000
935 466.38000000
936 479.46000000
937 480.10000000
938 483.43000000
939 488.30000000
940 489.00000000
941 491.62000000
942 505.62000000
943 511.30000000
944 521.53000000
945 526.70000000
946 528.25000000
947 538.70000000
948 540.22000000
949 540.58000000
950 546.10000000
951 548.17000000
952 553.99000000
953 566.21000000
954 575.90000000
955 584.38000000
956 593.40000000
957 357.05000000
958 359.73000000
959 360.48000000
960 373.98000000
961 374.13000000
962 381.45000000
963 383.72000000
964 390.00000000
965 400.34000000
966 415.32000000
967 415.91000000
968 418.30000000
969 421.03000000
970 422.43000000
971 426.93000000
972 433.25000000
973 436.89000000
974 445.33000000
975 446.33000000
976 446.55000000
977 456.44000000
978 689.49000000
979 691.92000000
980 695.00000000
981 695.36000000
982 698.21000000
983 699.46000000
984 706.61000000
985 710.69000000
986 715.82000000
987 715.82000000
988 741.39000000
989 752.27000000
990 756.73000000
991 757.74000000
992 759.57000000
993 796.07000000
994 813.78000000
995 817.25000000
996 825.48000000
997 831.28000000
998 849.24000000
999 890.00000000
1000 894.78000000
1001 935.65000000
1002 935.90000000
1003 945.90000000
1004 1012.8600000
1005 1022.6000000
1006 1061.8100000
1007 1063.5000000
1008 1077.2300000
1009 1151.6300000
1010 1355.2800000
1011 598.88000000
1012 606.91000000
1013 621.60000000
1014 624.80000000
1015 636.13000000
1016 637.38000000
1017 640.32000000
1018 649.35000000
1019 656.51000000
1020 662.55000000
1021 664.69000000
1022 106.22000000
1023 132.24000000
1024 174.76000000
1025 204.85000000
1026 264.93000000
1027 264.99000000
1028 269.84000000
1029 325.12000000
1030 331.67000000
1031 337.26000000
1032 347.68000000
1033 354.91000000
1034 END DATA.
1035
1036 EXAMINE
1037         x
1038         /STATISTICS=DESCRIPTIVES
1039         .
1040 ])
1041
1042 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
1043 Table: Case Processing Summary
1044 ,Cases,,,,,
1045 ,Valid,,Missing,,Total,
1046 ,N,Percent,N,Percent,N,Percent
1047 X,100,100.0%,0,.0%,100,100.0%
1048
1049 Table: Descriptives
1050 ,,,Statistic,Std. Error
1051 X,Mean,,587.6603,23.2665
1052 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1053 ,,Upper Bound,633.8260,
1054 ,5% Trimmed Mean,,579.7064,
1055 ,Median,,547.1350,
1056 ,Variance,,54132.8466,
1057 ,Std. Deviation,,232.6647,
1058 ,Minimum,,106.2200,
1059 ,Maximum,,1355.2800,
1060 ,Range,,1249.0600,
1061 ,Interquartile Range,,293.1575,
1062 ,Skewness,,.6331,.2414
1063 ,Kurtosis,,.5300,.4783
1064 ])
1065
1066 AT_CLEANUP
1067
1068
1069
1070 dnl Test for a crash which happened on bad input syntax
1071 AT_SETUP([EXAMINE -- Empty Parentheses])
1072 AT_KEYWORDS([categorical categoricals])
1073
1074 AT_DATA([examine-empty-parens.sps], [dnl
1075 DATA LIST notable LIST /X *
1076 BEGIN DATA.
1077 2
1078 3
1079 END DATA.
1080
1081
1082 EXAMINE
1083         x
1084         /PLOT = SPREADLEVEL()
1085         .
1086 ])
1087
1088 AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
1089
1090 AT_CLEANUP
1091
1092
1093
1094
1095 dnl Test for another crash which happened on bad input syntax
1096 AT_SETUP([EXAMINE -- Bad variable])
1097 AT_KEYWORDS([categorical categoricals])
1098
1099 AT_DATA([examine-bad-variable.sps], [dnl
1100 data list list /h * g *.
1101 begin data.
1102 3 1
1103 4 1
1104 5 2
1105 end data.
1106
1107 EXAMINE
1108         /VARIABLES/ h
1109         BY  g
1110         .
1111 ])
1112
1113 AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
1114
1115 AT_CLEANUP
1116
1117
1118
1119 dnl Test for yet another crash. This time for extremes vs. missing weight values.\0
1120 AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
1121 AT_KEYWORDS([categorical categoricals])
1122
1123 AT_DATA([examine-missing-weights.sps], [dnl
1124 data list notable list /h * g *.
1125 begin data.
1126 3 1
1127 4 .
1128 5 1
1129 2 1
1130 end data.
1131
1132 WEIGHT BY g.
1133
1134 EXAMINE h
1135         /STATISTICS extreme(3)
1136         .
1137 ])
1138
1139 AT_CHECK([pspp -O format=csv  examine-missing-weights.sps], [0], [dnl
1140 "examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative.  These case(s) were ignored."
1141
1142 Table: Case Processing Summary
1143 ,Cases,,,,,
1144 ,Valid,,Missing,,Total,
1145 ,N,Percent,N,Percent,N,Percent
1146 h,3.00,100.0%,.00,.0%,3.00,100.0%
1147
1148 Table: Extreme Values
1149 ,,,Case Number,Value
1150 h,Highest,1,3,5.00
1151 ,,2,2,4.00
1152 ,,3,1,3.00
1153 ,Lowest,1,4,2.00
1154 ,,2,1,3.00
1155 ,,3,2,4.00
1156 ])
1157
1158 AT_CLEANUP
1159
1160 dnl This is an example from doc/tutorial.texi
1161 dnl So if the results of this have to be changed in any way,
1162 dnl make sure to update that file.
1163 AT_SETUP([EXAMINE tutorial example 1])
1164 cp $top_srcdir/examples/repairs.sav .
1165 AT_DATA([repairs.sps], [dnl
1166 GET FILE='repairs.sav'.
1167 EXAMINE mtbf /STATISTICS=DESCRIPTIVES.
1168 COMPUTE mtbf_ln = LN (mtbf).
1169 EXAMINE mtbf_ln /STATISTICS=DESCRIPTIVES.
1170 ])
1171
1172 AT_CHECK([pspp -O format=csv repairs.sps], [0], [dnl
1173 Table: Case Processing Summary
1174 ,Cases,,,,,
1175 ,Valid,,Missing,,Total,
1176 ,N,Percent,N,Percent,N,Percent
1177 Mean time between failures (months) ,30,100.0%,0,.0%,30,100.0%
1178
1179 Table: Descriptives
1180 ,,,Statistic,Std. Error
1181 Mean time between failures (months) ,Mean,,8.78,1.10
1182 ,95% Confidence Interval for Mean,Lower Bound,6.53,
1183 ,,Upper Bound,11.04,
1184 ,5% Trimmed Mean,,8.20,
1185 ,Median,,8.29,
1186 ,Variance,,36.34,
1187 ,Std. Deviation,,6.03,
1188 ,Minimum,,1.63,
1189 ,Maximum,,26.47,
1190 ,Range,,24.84,
1191 ,Interquartile Range,,6.03,
1192 ,Skewness,,1.65,.43
1193 ,Kurtosis,,3.41,.83
1194
1195 Table: Case Processing Summary
1196 ,Cases,,,,,
1197 ,Valid,,Missing,,Total,
1198 ,N,Percent,N,Percent,N,Percent
1199 mtbf_ln,30,100.0%,0,.0%,30,100.0%
1200
1201 Table: Descriptives
1202 ,,,Statistic,Std. Error
1203 mtbf_ln,Mean,,1.95,.13
1204 ,95% Confidence Interval for Mean,Lower Bound,1.69,
1205 ,,Upper Bound,2.22,
1206 ,5% Trimmed Mean,,1.96,
1207 ,Median,,2.11,
1208 ,Variance,,.49,
1209 ,Std. Deviation,,.70,
1210 ,Minimum,,.49,
1211 ,Maximum,,3.28,
1212 ,Range,,2.79,
1213 ,Interquartile Range,,.88,
1214 ,Skewness,,-.37,.43
1215 ,Kurtosis,,.01,.83
1216 ])
1217
1218 AT_CLEANUP
1219
1220 dnl This is an example from doc/tutorial.texi
1221 dnl So if the results of this have to be changed in any way,
1222 dnl make sure to update that file.
1223 AT_SETUP([EXAMINE tutorial example 2])
1224 cp $top_srcdir/examples/physiology.sav .
1225 AT_DATA([examine.sps], [dnl
1226 GET FILE='physiology.sav'.
1227 EXAMINE height, weight /STATISTICS=EXTREME(3).
1228 ])
1229 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
1230 AT_CHECK([cat pspp.csv], [0], [dnl
1231 Table: Case Processing Summary
1232 ,Cases,,,,,
1233 ,Valid,,Missing,,Total,
1234 ,N,Percent,N,Percent,N,Percent
1235 Height in millimeters   ,40,100.0%,0,.0%,40,100.0%
1236 Weight in kilograms ,40,100.0%,0,.0%,40,100.0%
1237
1238 Table: Extreme Values
1239 ,,,Case Number,Value
1240 Height in millimeters   ,Highest,1,14,1903
1241 ,,2,15,1884
1242 ,,3,12,1802
1243 ,Lowest,1,30,179
1244 ,,2,31,1598
1245 ,,3,28,1601
1246 Weight in kilograms ,Highest,1,13,92.1
1247 ,,2,5,92.1
1248 ,,3,17,91.7
1249 ,Lowest,1,38,-55.6
1250 ,,2,39,54.5
1251 ,,3,33,55.4
1252 ])
1253 AT_CLEANUP
1254
1255
1256
1257 AT_SETUP([EXAMINE -- Crash on unrepresentable graphs])
1258 AT_DATA([examine.sps], [dnl
1259 data list notable list /x * g *.
1260 begin data.
1261 96 1
1262 end data.
1263
1264 examine x  by g
1265         /nototal
1266         /plot = all.
1267 ])
1268 dnl This bug only manifested itself on cairo based drivers.
1269 AT_CHECK([pspp -O format=pdf examine.sps], [0], [ignore], [ignore])
1270 AT_CLEANUP
1271
1272
1273 dnl This example comes from the web site:
1274 dnl  https://www.spsstests.com/2018/11/shapiro-wilk-normality-test-spss.html
1275 AT_SETUP([EXAMINE -- shapiro-wilk 1])
1276 AT_KEYWORDS([shapiro wilk])
1277 AT_DATA([shapiro-wilk.sps], [dnl
1278 data list notable list /x * g *.
1279 begin data.
1280 96 1
1281 98 1
1282 95 1
1283 89 1
1284 90 1
1285 92 1
1286 94 1
1287 93 1
1288 97 1
1289 100 1
1290 99 2
1291 96 2
1292 80 2
1293 89 2
1294 91 2
1295 92 2
1296 93 2
1297 94 2
1298 99 2
1299 80 2
1300 end data.
1301
1302 set format F22.3.
1303
1304 examine x  by g
1305         /nototal
1306         /plot = all.
1307 ])
1308
1309 AT_CHECK([pspp -O format=csv shapiro-wilk.sps], [0],[dnl
1310 Table: Case Processing Summary
1311 ,g,Cases,,,,,
1312 ,,Valid,,Missing,,Total,
1313 ,,N,Percent,N,Percent,N,Percent
1314 x,1.00,10,100.0%,0,.0%,10,100.0%
1315 ,2.00,10,100.0%,0,.0%,10,100.0%
1316
1317 Table: Tests of Normality
1318 ,g,Shapiro-Wilk,,
1319 ,,Statistic,df,Sig.
1320 x,1.00,.984,10,.983
1321 ,2.00,.882,10,.136
1322 ])
1323
1324 AT_CLEANUP
1325
1326
1327 dnl This example comes from the web site:
1328 dnl  http://www.real-statistics.com/tests-normality-and-symmetry/statistical-tests-normality-symmetry/shapiro-wilk-expanded-test/
1329 dnl It uses a dataset larger than 11 samples. Hence the alternative method for
1330 dnl signficance is used.
1331 AT_SETUP([EXAMINE -- shapiro-wilk 2])
1332 AT_KEYWORDS([shapiro wilk])
1333 AT_DATA([shapiro-wilk2.sps], [dnl
1334 data list notable list /x *.
1335 begin data.
1336 65
1337 61
1338 63
1339 86
1340 70
1341 55
1342 74
1343 35
1344 72
1345 68
1346 45
1347 58
1348 end data.
1349
1350 set format F22.3.
1351
1352 examine x
1353         /plot = boxplot.
1354 ])
1355
1356 AT_CHECK([pspp -O format=csv shapiro-wilk2.sps], [0],[dnl
1357 Table: Case Processing Summary
1358 ,Cases,,,,,
1359 ,Valid,,Missing,,Total,
1360 ,N,Percent,N,Percent,N,Percent
1361 x,12,100.0%,0,.0%,12,100.0%
1362
1363 Table: Tests of Normality
1364 ,Shapiro-Wilk,,
1365 ,Statistic,df,Sig.
1366 x,.971,12,.922
1367 ])
1368
1369 AT_CLEANUP