csv: Change footnote format.
[pspp] / tests / language / stats / examine.at
1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc.
3 dnl
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
8 dnl
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 dnl GNU General Public License for more details.
13 dnl
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 dnl
17 AT_BANNER([EXAMINE])
18
19 AT_SETUP([EXAMINE])
20 AT_KEYWORDS([categorical categoricals])
21 AT_DATA([examine.sps], [
22 DATA LIST LIST /QUALITY * W * BRAND * .
23 BEGIN DATA
24 3  1  1
25 2  2  1
26 1  2  1
27 1  1  1
28 4  1  1
29 4  1  1
30 5  1  2
31 2  1  2
32 4  4  2
33 2  1  2
34 3  1  2
35 7  1  3
36 4  2  3
37 5  3  3
38 3  1  3
39 6  1  3
40 END DATA
41
42 WEIGHT BY w.
43
44 VARIABLE LABELS brand   'Manufacturer'.
45 VARIABLE LABELS quality 'Breaking Strain'.
46
47 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
48
49 LIST /FORMAT=NUMBERED.
50
51 EXAMINE
52         quality BY brand
53         /STATISTICS descriptives extreme(3)
54         .
55 ])
56
57
58 dnl In the following data, only the extreme values have been checked.
59 dnl The descriptives have been blindly pasted.
60 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
61 Table: Reading free-form data from INLINE.
62 Variable,Format
63 QUALITY,F8.0
64 W,F8.0
65 BRAND,F8.0
66
67 Table: Data List
68 Case Number,QUALITY,W,BRAND
69 1,3.00,1.00,1.00
70 2,2.00,2.00,1.00
71 3,1.00,2.00,1.00
72 4,1.00,1.00,1.00
73 5,4.00,1.00,1.00
74 6,4.00,1.00,1.00
75 7,5.00,1.00,2.00
76 8,2.00,1.00,2.00
77 9,4.00,4.00,2.00
78 10,2.00,1.00,2.00
79 11,3.00,1.00,2.00
80 12,7.00,1.00,3.00
81 13,4.00,2.00,3.00
82 14,5.00,3.00,3.00
83 15,3.00,1.00,3.00
84 16,6.00,1.00,3.00
85
86 Table: Case Processing Summary
87 ,Cases,,,,,
88 ,Valid,,Missing,,Total,
89 ,N,Percent,N,Percent,N,Percent
90 Breaking Strain,24.00,100.0%,.00,.0%,24.00,100.0%
91
92 Table: Extreme Values
93 ,,,Case Number,Value
94 Breaking Strain,Highest,1,12,7.00
95 ,,2,16,6.00
96 ,,3,14,5.00
97 ,Lowest,1,3,1.00
98 ,,2,4,1.00
99 ,,3,2,2.00
100
101 Table: Descriptives
102 ,,,Statistic,Std. Error
103 Breaking Strain,Mean,,3.54,.32
104 ,95% Confidence Interval for Mean,Lower Bound,2.87,
105 ,,Upper Bound,4.21,
106 ,5% Trimmed Mean,,3.50,
107 ,Median,,4.00,
108 ,Variance,,2.52,
109 ,Std. Deviation,,1.59,
110 ,Minimum,,1.00,
111 ,Maximum,,7.00,
112 ,Range,,6.00,
113 ,Interquartile Range,,2.75,
114 ,Skewness,,.06,.47
115 ,Kurtosis,,-.36,.92
116
117 Table: Case Processing Summary
118 ,Manufacturer,Cases,,,,,
119 ,,Valid,,Missing,,Total,
120 ,,N,Percent,N,Percent,N,Percent
121 Breaking Strain,Aspeger,8.00,100.0%,.00,.0%,8.00,100.0%
122 ,Bloggs,8.00,100.0%,.00,.0%,8.00,100.0%
123 ,Charlies,8.00,100.0%,.00,.0%,8.00,100.0%
124
125 Table: Extreme Values
126 ,Manufacturer,,,Case Number,Value
127 Breaking Strain,Aspeger,Highest,1,6,4.00
128 ,,,2,5,4.00
129 ,,,3,1,3.00
130 ,,Lowest,1,3,1.00
131 ,,,2,4,1.00
132 ,,,3,2,2.00
133 ,Bloggs,Highest,1,7,5.00
134 ,,,2,9,4.00
135 ,,,3,11,3.00
136 ,,Lowest,1,8,2.00
137 ,,,2,10,2.00
138 ,,,3,11,3.00
139 ,Charlies,Highest,1,12,7.00
140 ,,,2,16,6.00
141 ,,,3,14,5.00
142 ,,Lowest,1,15,3.00
143 ,,,2,13,4.00
144 ,,,3,14,5.00
145
146 Table: Descriptives
147 ,Manufacturer,,,Statistic,Std. Error
148 Breaking Strain,Aspeger,Mean,,2.25,.45
149 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
150 ,,,Upper Bound,3.32,
151 ,,5% Trimmed Mean,,2.22,
152 ,,Median,,2.00,
153 ,,Variance,,1.64,
154 ,,Std. Deviation,,1.28,
155 ,,Minimum,,1.00,
156 ,,Maximum,,4.00,
157 ,,Range,,3.00,
158 ,,Interquartile Range,,2.75,
159 ,,Skewness,,.47,.75
160 ,,Kurtosis,,-1.55,1.48
161 ,Bloggs,Mean,,3.50,.38
162 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
163 ,,,Upper Bound,4.39,
164 ,,5% Trimmed Mean,,3.50,
165 ,,Median,,4.00,
166 ,,Variance,,1.14,
167 ,,Std. Deviation,,1.07,
168 ,,Minimum,,2.00,
169 ,,Maximum,,5.00,
170 ,,Range,,3.00,
171 ,,Interquartile Range,,1.75,
172 ,,Skewness,,-.47,.75
173 ,,Kurtosis,,-.83,1.48
174 ,Charlies,Mean,,4.88,.44
175 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
176 ,,,Upper Bound,5.92,
177 ,,5% Trimmed Mean,,4.86,
178 ,,Median,,5.00,
179 ,,Variance,,1.55,
180 ,,Std. Deviation,,1.25,
181 ,,Minimum,,3.00,
182 ,,Maximum,,7.00,
183 ,,Range,,4.00,
184 ,,Interquartile Range,,1.75,
185 ,,Skewness,,.30,.75
186 ,,Kurtosis,,.15,1.48
187 ])
188
189 AT_CLEANUP
190
191 AT_SETUP([EXAMINE -- extremes])
192 AT_KEYWORDS([categorical categoricals])
193 AT_DATA([examine.sps], [dnl
194 data list free /V1 W
195 begin data.
196 1  1
197 2  1
198 3  2
199 3  1
200 4  1
201 5  1
202 6  1
203 7  1
204 8  1
205 9  1
206 10 1
207 11 1
208 12 1
209 13 1
210 14 1
211 15 1
212 16 1
213 17 1
214 18 2
215 19 1
216 20 1
217 end data.
218
219 weight by w.
220
221 examine v1
222  /statistics=extreme(6)
223  .
224 ])
225
226 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
227 Table: Case Processing Summary
228 ,Cases,,,,,
229 ,Valid,,Missing,,Total,
230 ,N,Percent,N,Percent,N,Percent
231 V1,23.00,100.0%,.00,.0%,23.00,100.0%
232
233 Table: Extreme Values
234 ,,,Case Number,Value
235 V1,Highest,1,21,20.00
236 ,,2,20,19.00
237 ,,3,19,18.00
238 ,,4,18,17.00
239 ,,5,17,16.00
240 ,,6,16,15.00
241 ,Lowest,1,1,1.00
242 ,,2,2,2.00
243 ,,3,3,3.00
244 ,,4,4,3.00
245 ,,5,5,4.00
246 ,,6,6,5.00
247 ])
248
249 AT_CLEANUP
250
251
252 AT_SETUP([EXAMINE -- extremes with fractional weights])
253 AT_KEYWORDS([categorical categoricals])
254 AT_DATA([extreme.sps], [dnl
255 set format=F20.3.
256 data list notable list /w * x *.
257 begin data.
258  0.88  300000
259  0.86  320000
260  0.98  480000
261  0.93  960000
262  1.35  960000
263  1.31  960000
264  0.88  960000
265  0.88  1080000
266  0.88  1080000
267  0.95  1200000
268  1.47  1200000
269  0.93  1200000
270  0.98  1320000
271  1.31  1380000
272  0.93  1440000
273  0.88  1560000
274  1.56  1560000
275  1.47  1560000
276 end data.
277
278 weight by w.
279
280
281 EXAMINE
282         x
283         /STATISTICS = DESCRIPTIVES EXTREME (5)
284         .
285 ])
286
287 AT_CHECK([pspp -O format=csv  extreme.sps], [0], [dnl
288 Table: Case Processing Summary
289 ,Cases,,,,,
290 ,Valid,,Missing,,Total,
291 ,N,Percent,N,Percent,N,Percent
292 x,19.430,100.0%,.000,.0%,19.430,100.0%
293
294 Table: Extreme Values
295 ,,,Case Number,Value
296 x,Highest,1,18,1560000.000
297 ,,2,17,1560000.000
298 ,,3,16,1560000.000
299 ,,4,15,1440000.000
300 ,,5,14,1380000.000
301 ,Lowest,1,1,300000.000
302 ,,2,2,320000.000
303 ,,3,3,480000.000
304 ,,4,4,960000.000
305 ,,5,5,960000.000
306
307 Table: Descriptives
308 ,,,Statistic,Std. Error
309 x,Mean,,1120010.293,86222.178
310 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
311 ,,Upper Bound,1300853.894,
312 ,5% Trimmed Mean,,1141017.899,
313 ,Median,,1200000.000,
314 ,Variance,,144447748124.869,
315 ,Std. Deviation,,380062.821,
316 ,Minimum,,300000.000,
317 ,Maximum,,1560000.000,
318 ,Range,,1260000.000,
319 ,Interquartile Range,,467258.065,
320 ,Skewness,,-.887,.519
321 ,Kurtosis,,.340,1.005
322 ])
323
324 AT_CLEANUP
325
326 dnl Test the PERCENTILES subcommand of the EXAMINE command.
327 dnl In particular test that it behaves properly when there are only
328 dnl a few cases.
329 AT_SETUP([EXAMINE -- percentiles])
330 AT_KEYWORDS([categorical categoricals])
331 AT_DATA([examine.sps], [dnl
332 DATA LIST LIST /X *.
333 BEGIN DATA.
334 2.00
335 8.00
336 5.00
337 END DATA.
338
339 EXAMINE /x
340         /PERCENTILES=HAVERAGE.
341
342 EXAMINE /x
343         /PERCENTILES=WAVERAGE.
344
345 EXAMINE /x
346         /PERCENTILES=ROUND.
347
348 EXAMINE /x
349         /PERCENTILES=EMPIRICAL.
350
351 EXAMINE /x
352         /PERCENTILES=AEMPIRICAL.
353 ])
354 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
355 AT_CHECK([cat pspp.csv], [0], [dnl
356 Table: Reading free-form data from INLINE.
357 Variable,Format
358 X,F8.0
359
360 Table: Case Processing Summary
361 ,Cases,,,,,
362 ,Valid,,Missing,,Total,
363 ,N,Percent,N,Percent,N,Percent
364 X,3,100.0%,0,.0%,3,100.0%
365
366 Table: Percentiles
367 ,,Percentiles,,,,,,
368 ,,5,10,25,50,75,90,95
369 X,Weighted Average,.40,.80,2.00,5.00,8.00,8.00,8.00
370 ,Tukey's Hinges,,,3.50,5.00,6.50,,
371
372 Table: Case Processing Summary
373 ,Cases,,,,,
374 ,Valid,,Missing,,Total,
375 ,N,Percent,N,Percent,N,Percent
376 X,3,100.0%,0,.0%,3,100.0%
377
378 Table: Percentiles
379 ,,Percentiles,,,,,,
380 ,,5,10,25,50,75,90,95
381 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
382 ,Tukey's Hinges,,,3.50,5.00,6.50,,
383
384 Table: Case Processing Summary
385 ,Cases,,,,,
386 ,Valid,,Missing,,Total,
387 ,N,Percent,N,Percent,N,Percent
388 X,3,100.0%,0,.0%,3,100.0%
389
390 Table: Percentiles
391 ,,Percentiles,,,,,,
392 ,,5,10,25,50,75,90,95
393 X,Weighted Average,.00,.00,2.00,5.00,5.00,8.00,8.00
394 ,Tukey's Hinges,,,3.50,5.00,6.50,,
395
396 Table: Case Processing Summary
397 ,Cases,,,,,
398 ,Valid,,Missing,,Total,
399 ,N,Percent,N,Percent,N,Percent
400 X,3,100.0%,0,.0%,3,100.0%
401
402 Table: Percentiles
403 ,,Percentiles,,,,,,
404 ,,5,10,25,50,75,90,95
405 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
406 ,Tukey's Hinges,,,3.50,5.00,6.50,,
407
408 Table: Case Processing Summary
409 ,Cases,,,,,
410 ,Valid,,Missing,,Total,
411 ,N,Percent,N,Percent,N,Percent
412 X,3,100.0%,0,.0%,3,100.0%
413
414 Table: Percentiles
415 ,,Percentiles,,,,,,
416 ,,5,10,25,50,75,90,95
417 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
418 ,Tukey's Hinges,,,3.50,5.00,6.50,,
419 ])
420 AT_CLEANUP
421
422 AT_SETUP([EXAMINE -- missing values])
423 AT_KEYWORDS([categorical categoricals])
424 AT_DATA([examine.sps], [dnl
425 DATA LIST LIST /x * y *.
426 BEGIN DATA.
427 1   1
428 2   1
429 3   1
430 4   1
431 5   2
432 6   2
433 .   2
434 END DATA
435
436 EXAMINE /x by y
437         /MISSING = PAIRWISE
438         .
439 ])
440 AT_CHECK([pspp -o pspp.csv examine.sps])
441 AT_CHECK([cat pspp.csv], [0], [dnl
442 Table: Reading free-form data from INLINE.
443 Variable,Format
444 x,F8.0
445 y,F8.0
446
447 Table: Case Processing Summary
448 ,Cases,,,,,
449 ,Valid,,Missing,,Total,
450 ,N,Percent,N,Percent,N,Percent
451 x,6,85.7%,1,14.3%,7,100.0%
452
453 Table: Case Processing Summary
454 ,y,Cases,,,,,
455 ,,Valid,,Missing,,Total,
456 ,,N,Percent,N,Percent,N,Percent
457 x,1.00,4,100.0%,0,.0%,4,100.0%
458 ,2.00,2,66.7%,1,33.3%,3,100.0%
459 ])
460 AT_CLEANUP
461
462
463 AT_SETUP([EXAMINE -- user missing values])
464 AT_KEYWORDS([categorical categoricals])
465 AT_DATA([examine-m.sps], [dnl
466 DATA LIST notable LIST /x * y *.
467 BEGIN DATA.
468 1                   2
469 9999999999          2
470 9999999999          99
471 END DATA.
472
473 MISSING VALUES x (9999999999).
474 MISSING VALUES y (99).
475
476 EXAMINE
477         /VARIABLES= x y
478         /MISSING=PAIRWISE.
479 ])
480 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
481 Table: Case Processing Summary
482 ,Cases,,,,,
483 ,Valid,,Missing,,Total,
484 ,N,Percent,N,Percent,N,Percent
485 x,1,33.3%,2,66.7%,3,100.0%
486 y,2,66.7%,1,33.3%,3,100.0%
487 ])
488 AT_CLEANUP
489
490 AT_SETUP([EXAMINE -- missing values and percentiles])
491 AT_KEYWORDS([categorical categoricals])
492 AT_DATA([examine.sps], [dnl
493 DATA LIST LIST /X *.
494 BEGIN DATA.
495 99
496 99
497 5.00
498 END DATA.
499
500 MISSING VALUE X (99).
501
502 EXAMINE /x
503         /PERCENTILES=HAVERAGE.
504 ])
505 AT_CHECK([pspp -o pspp.csv examine.sps])
506 dnl Ignore output -- this is just a no-crash check.
507 AT_CLEANUP
508
509 dnl Tests the trimmed mean calculation in the case
510 dnl where the data is weighted towards the centre.
511 AT_SETUP([EXAMINE -- trimmed mean])
512 AT_KEYWORDS([categorical categoricals])
513 AT_DATA([examine.sps], [dnl
514 DATA LIST LIST /X * C *.
515 BEGIN DATA.
516 1 1
517 2 49
518 3 2
519 END DATA.
520
521 WEIGHT BY c.
522
523 EXAMINE
524         x
525         /STATISTICS=DESCRIPTIVES
526         .
527 ])
528 AT_CHECK([pspp -o pspp.csv examine.sps])
529 AT_CHECK([cat pspp.csv], [0], [dnl
530 Table: Reading free-form data from INLINE.
531 Variable,Format
532 X,F8.0
533 C,F8.0
534
535 Table: Case Processing Summary
536 ,Cases,,,,,
537 ,Valid,,Missing,,Total,
538 ,N,Percent,N,Percent,N,Percent
539 X,52.00,100.0%,.00,.0%,52.00,100.0%
540
541 Table: Descriptives
542 ,,,Statistic,Std. Error
543 X,Mean,,2.02,.03
544 ,95% Confidence Interval for Mean,Lower Bound,1.95,
545 ,,Upper Bound,2.09,
546 ,5% Trimmed Mean,,2.00,
547 ,Median,,2.00,
548 ,Variance,,.06,
549 ,Std. Deviation,,.24,
550 ,Minimum,,1.00,
551 ,Maximum,,3.00,
552 ,Range,,2.00,
553 ,Interquartile Range,,.00,
554 ,Skewness,,1.19,.33
555 ,Kurtosis,,15.73,.65
556 ])
557 AT_CLEANUP
558
559 AT_SETUP([EXAMINE -- crash bug])
560 AT_KEYWORDS([categorical categoricals])
561 AT_DATA([examine.sps], [dnl
562 data list list /a * x * y *.
563 begin data.
564 3 1 3
565 5 1 4
566 7 2 3
567 end data.
568
569 examine a by x by y
570         /statistics=DESCRIPTIVES
571         .
572 ])
573 AT_CHECK([pspp -o pspp.csv examine.sps])
574 dnl Ignore output -- this is just a no-crash check.
575 AT_CLEANUP
576
577 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
578 AT_SETUP([EXAMINE -- consecutive runs don't crash])
579 AT_KEYWORDS([categorical categoricals])
580 AT_DATA([examine.sps], [dnl
581 data list list /y * z *.
582 begin data.
583 6 4
584 5 3
585 7 6
586 end data.
587
588 EXAMINE /VARIABLES= z BY y.
589
590 EXAMINE /VARIABLES= z.
591 ])
592 AT_CHECK([pspp -o pspp.csv examine.sps])
593 dnl Ignore output -- this is just a no-crash check.
594 AT_CLEANUP
595
596 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
597 AT_SETUP([EXAMINE -- missing values don't crash])
598 AT_KEYWORDS([categorical categoricals])
599 AT_DATA([examine.sps], [dnl
600 data list list /x * y *.
601 begin data.
602 1 0
603 2 0
604 . 0
605 3 1
606 4 1
607 end data.
608 examine x by y /statistics=descriptives.
609 ])
610 AT_CHECK([pspp -o pspp.csv examine.sps])
611 dnl Ignore output -- this is just a no-crash check.
612 AT_CLEANUP
613
614 dnl Test that having only a single case doesn't crash.
615 AT_SETUP([EXAMINE -- single case doesn't crash])
616 AT_KEYWORDS([categorical categoricals])
617 AT_DATA([examine.sps], [dnl
618 DATA LIST LIST /quality * .
619 BEGIN DATA
620 3
621 END DATA
622
623
624 EXAMINE
625         quality
626         /STATISTICS descriptives
627         /PLOT = histogram
628         .
629 ])
630 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
631 dnl Ignore output -- this is just a no-crash check.
632 AT_CLEANUP
633
634 dnl Test that all-missing data doesn't crash.
635 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
636 AT_KEYWORDS([categorical categoricals])
637 AT_DATA([examine.sps], [dnl
638 DATA LIST LIST /x *.
639 BEGIN DATA.
640 .
641 .
642 .
643 .
644 END DATA.
645
646 EXAMINE /x
647         PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
648         /ID=x
649         /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
650         /PERCENTILE=AEMPIRICAL
651         .
652 ])
653 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
654 dnl Ignore output -- this is just a no-crash check.
655 AT_CLEANUP
656
657 dnl Test that big input doesn't crash (bug 11307).
658 AT_SETUP([EXAMINE -- big input doesn't crash])
659 AT_KEYWORDS([categorical categoricals slow])
660 AT_DATA([examine.sps], [dnl
661 INPUT PROGRAM.
662         LOOP #I=1 TO 50000.
663                 COMPUTE X=NORMAL(10).
664                 END CASE.
665         END LOOP.
666         END FILE.
667 END INPUT PROGRAM.
668
669
670 EXAMINE /x
671         /STATISTICS=DESCRIPTIVES.
672 ])
673 AT_CHECK([pspp -o pspp.csv examine.sps])
674 dnl Ignore output -- this is just a no-crash check.
675 AT_CLEANUP
676
677 dnl Another test that big input doesn't crash.
678 dnl The actual bug that this checks for has been lost.
679 AT_SETUP([EXAMINE -- big input doesn't crash 2])
680 AT_KEYWORDS([categorical categoricals slow])
681 AT_DATA([make-big-input.pl],
682   [for ($i=0; $i<100000; $i++) { print "AB12\n" };
683    for ($i=0; $i<100000; $i++) { print "AB04\n" };
684 ])
685 AT_CHECK([$PERL make-big-input.pl > large.txt])
686 AT_DATA([examine.sps], [dnl
687 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
688
689
690 AGGREGATE OUTFILE=* /BREAK=X /A=N.
691
692
693 EXAMINE /A BY X.
694 ])
695 AT_CHECK([pspp -o pspp.csv examine.sps])
696 dnl Ignore output -- this is just a no-crash check.
697 AT_DATA([more-big-input.pl],
698   [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
699 ])
700 AT_CHECK([$PERL more-big-input.pl >> large.txt])
701 AT_CHECK([pspp -o pspp.csv examine.sps])
702 dnl Ignore output -- this is just a no-crash check.
703 AT_CLEANUP
704
705
706 dnl Test that the ID command works with non-numberic variables
707 AT_SETUP([EXAMINE -- non-numeric ID])
708 AT_KEYWORDS([categorical categoricals])
709
710 AT_DATA([examine-id.sps], [dnl
711 data list notable list /x * y (a12).
712 begin data.
713 1  one
714 2  two
715 3  three
716 4  four
717 5  five
718 6  six
719 7  seven
720 8  eight
721 9  nine
722 10 ten
723 11 eleven
724 12 twelve
725 30 thirty
726 300 threehundred
727 end data.
728
729 examine x
730         /statistics = extreme
731         /id = y
732         /plot = boxplot
733         .
734 ])
735
736 AT_CHECK([pspp -O format=csv examine-id.sps], [0], [dnl
737 Table: Case Processing Summary
738 ,Cases,,,,,
739 ,Valid,,Missing,,Total,
740 ,N,Percent,N,Percent,N,Percent
741 x,14,100.0%,0,.0%,14,100.0%
742
743 Table: Extreme Values
744 ,,,y,Value
745 x,Highest,1,threehundred,300.00
746 ,,2,thirty,30.00
747 ,,3,twelve,12.00
748 ,,4,eleven,11.00
749 ,,5,ten,10.00
750 ,Lowest,1,one,1.00
751 ,,2,two,2.00
752 ,,3,three,3.00
753 ,,4,four,4.00
754 ,,5,five,5.00
755
756 Table: Tests of Normality
757 ,Shapiro-Wilk,,
758 ,Statistic,df,Sig.
759 x,.37,14,.00
760 ])
761
762 AT_CLEANUP
763
764 dnl Test for a crash which happened on cleanup from a bad input syntax
765 AT_SETUP([EXAMINE -- Bad Input])
766 AT_KEYWORDS([categorical categoricals])
767
768 AT_DATA([examine-bad.sps], [dnl
769 data list list /h * g *.
770 begin data.
771 1 1
772 2 1
773 3 1
774 4 1
775 5 2
776 6 2
777 7 2
778 8 2
779 9 2
780 end data.
781
782 EXAMINE
783         /VARIABLES= h
784         BY  g
785         /STATISTICS = DESCRIPTIVES EXTREME
786         /PLOT = lkajsdas
787         .
788 ])
789
790 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
791
792 AT_CLEANUP
793
794
795 dnl Check the MISSING=REPORT option
796 AT_SETUP([EXAMINE -- MISSING=REPORT])
797 AT_KEYWORDS([categorical categoricals])
798
799 AT_DATA([examine-report.sps], [dnl
800 set format = F22.0.
801 data list list /x * g *.
802 begin data.
803 1   1
804 2   1
805 3   1
806 4   1
807 5   1
808 6   1
809 7   1
810 8   1
811 9   1
812 10   2
813 20   2
814 30   2
815 40   2
816 50   2
817 60   2
818 70   2
819 80   2
820 90   2
821 101   9
822 201   9
823 301   9
824 401   9
825 501   99
826 601   99
827 701   99
828 801   99
829 901   99
830 1001  .
831 2002  .
832 3003  .
833 4004  .
834 end data.
835
836 MISSING VALUES g (9, 99, 999).
837
838 EXAMINE
839         /VARIABLES = x
840         BY  g
841         /STATISTICS = EXTREME
842         /NOTOTAL
843         /MISSING = REPORT.
844 ])
845
846
847 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine-report.sps])
848 AT_CHECK([cat pspp.csv], [0],
849   [[Table: Reading free-form data from INLINE.
850 Variable,Format
851 x,F8.0
852 g,F8.0
853
854 Table: Case Processing Summary
855 ,g,Cases,,,,,
856 ,,Valid,,Missing,,Total,
857 ,,N,Percent,N,Percent,N,Percent
858 x,.,4,100.0%,0,.0%,4,100.0%
859 ,1,9,100.0%,0,.0%,9,100.0%
860 ,2,9,100.0%,0,.0%,9,100.0%
861 ,9[a],4,100.0%,0,.0%,4,100.0%
862 ,99[a],5,100.0%,0,.0%,5,100.0%
863 Footnote: a. User-missing value.
864
865 Table: Extreme Values
866 ,g,,,Case Number,Value
867 x,.,Highest,1,31,4004
868 ,,,2,30,3003
869 ,,,3,29,2002
870 ,,,4,28,1001
871 ,,,5,0,0
872 ,,Lowest,1,28,1001
873 ,,,2,29,2002
874 ,,,3,30,3003
875 ,,,4,31,4004
876 ,,,5,31,4004
877 ,1,Highest,1,9,9
878 ,,,2,8,8
879 ,,,3,7,7
880 ,,,4,6,6
881 ,,,5,5,5
882 ,,Lowest,1,1,1
883 ,,,2,2,2
884 ,,,3,3,3
885 ,,,4,4,4
886 ,,,5,5,5
887 ,2,Highest,1,18,90
888 ,,,2,17,80
889 ,,,3,16,70
890 ,,,4,15,60
891 ,,,5,14,50
892 ,,Lowest,1,10,10
893 ,,,2,11,20
894 ,,,3,12,30
895 ,,,4,13,40
896 ,,,5,14,50
897 ,9[a],Highest,1,22,401
898 ,,,2,21,301
899 ,,,3,20,201
900 ,,,4,19,101
901 ,,,5,0,0
902 ,,Lowest,1,19,101
903 ,,,2,20,201
904 ,,,3,21,301
905 ,,,4,22,401
906 ,,,5,22,401
907 ,99[a],Highest,1,27,901
908 ,,,2,26,801
909 ,,,3,25,701
910 ,,,4,24,601
911 ,,,5,23,501
912 ,,Lowest,1,23,501
913 ,,,2,24,601
914 ,,,3,25,701
915 ,,,4,26,801
916 ,,,5,27,901
917 Footnote: a. User-missing value.
918 ]])
919
920 AT_CLEANUP
921
922
923 dnl Run a test of the basic STATISTICS using a "real"
924 dnl dataset and comparing with "real" results kindly
925 dnl provided by Olaf Nöhring
926 AT_SETUP([EXAMINE -- sample unweighted])
927 AT_KEYWORDS([categorical categoricals])
928
929 AT_DATA([sample.sps], [dnl
930 set format = F22.4.
931 DATA LIST notable LIST /X *
932 BEGIN DATA.
933 461.19000000
934 466.38000000
935 479.46000000
936 480.10000000
937 483.43000000
938 488.30000000
939 489.00000000
940 491.62000000
941 505.62000000
942 511.30000000
943 521.53000000
944 526.70000000
945 528.25000000
946 538.70000000
947 540.22000000
948 540.58000000
949 546.10000000
950 548.17000000
951 553.99000000
952 566.21000000
953 575.90000000
954 584.38000000
955 593.40000000
956 357.05000000
957 359.73000000
958 360.48000000
959 373.98000000
960 374.13000000
961 381.45000000
962 383.72000000
963 390.00000000
964 400.34000000
965 415.32000000
966 415.91000000
967 418.30000000
968 421.03000000
969 422.43000000
970 426.93000000
971 433.25000000
972 436.89000000
973 445.33000000
974 446.33000000
975 446.55000000
976 456.44000000
977 689.49000000
978 691.92000000
979 695.00000000
980 695.36000000
981 698.21000000
982 699.46000000
983 706.61000000
984 710.69000000
985 715.82000000
986 715.82000000
987 741.39000000
988 752.27000000
989 756.73000000
990 757.74000000
991 759.57000000
992 796.07000000
993 813.78000000
994 817.25000000
995 825.48000000
996 831.28000000
997 849.24000000
998 890.00000000
999 894.78000000
1000 935.65000000
1001 935.90000000
1002 945.90000000
1003 1012.8600000
1004 1022.6000000
1005 1061.8100000
1006 1063.5000000
1007 1077.2300000
1008 1151.6300000
1009 1355.2800000
1010 598.88000000
1011 606.91000000
1012 621.60000000
1013 624.80000000
1014 636.13000000
1015 637.38000000
1016 640.32000000
1017 649.35000000
1018 656.51000000
1019 662.55000000
1020 664.69000000
1021 106.22000000
1022 132.24000000
1023 174.76000000
1024 204.85000000
1025 264.93000000
1026 264.99000000
1027 269.84000000
1028 325.12000000
1029 331.67000000
1030 337.26000000
1031 347.68000000
1032 354.91000000
1033 END DATA.
1034
1035 EXAMINE
1036         x
1037         /STATISTICS=DESCRIPTIVES
1038         .
1039 ])
1040
1041 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
1042 Table: Case Processing Summary
1043 ,Cases,,,,,
1044 ,Valid,,Missing,,Total,
1045 ,N,Percent,N,Percent,N,Percent
1046 X,100,100.0%,0,.0%,100,100.0%
1047
1048 Table: Descriptives
1049 ,,,Statistic,Std. Error
1050 X,Mean,,587.6603,23.2665
1051 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1052 ,,Upper Bound,633.8260,
1053 ,5% Trimmed Mean,,579.7064,
1054 ,Median,,547.1350,
1055 ,Variance,,54132.8466,
1056 ,Std. Deviation,,232.6647,
1057 ,Minimum,,106.2200,
1058 ,Maximum,,1355.2800,
1059 ,Range,,1249.0600,
1060 ,Interquartile Range,,293.1575,
1061 ,Skewness,,.6331,.2414
1062 ,Kurtosis,,.5300,.4783
1063 ])
1064
1065 AT_CLEANUP
1066
1067
1068
1069 dnl Test for a crash which happened on bad input syntax
1070 AT_SETUP([EXAMINE -- Empty Parentheses])
1071 AT_KEYWORDS([categorical categoricals])
1072
1073 AT_DATA([examine-empty-parens.sps], [dnl
1074 DATA LIST notable LIST /X *
1075 BEGIN DATA.
1076 2
1077 3
1078 END DATA.
1079
1080
1081 EXAMINE
1082         x
1083         /PLOT = SPREADLEVEL()
1084         .
1085 ])
1086
1087 AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
1088
1089 AT_CLEANUP
1090
1091
1092
1093
1094 dnl Test for another crash which happened on bad input syntax
1095 AT_SETUP([EXAMINE -- Bad variable])
1096 AT_KEYWORDS([categorical categoricals])
1097
1098 AT_DATA([examine-bad-variable.sps], [dnl
1099 data list list /h * g *.
1100 begin data.
1101 3 1
1102 4 1
1103 5 2
1104 end data.
1105
1106 EXAMINE
1107         /VARIABLES/ h
1108         BY  g
1109         .
1110 ])
1111
1112 AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
1113
1114 AT_CLEANUP
1115
1116
1117
1118 dnl Test for yet another crash. This time for extremes vs. missing weight values.\0
1119 AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
1120 AT_KEYWORDS([categorical categoricals])
1121
1122 AT_DATA([examine-missing-weights.sps], [dnl
1123 data list notable list /h * g *.
1124 begin data.
1125 3 1
1126 4 .
1127 5 1
1128 2 1
1129 end data.
1130
1131 WEIGHT BY g.
1132
1133 EXAMINE h
1134         /STATISTICS extreme(3)
1135         .
1136 ])
1137
1138 AT_CHECK([pspp -O format=csv  examine-missing-weights.sps], [0], [dnl
1139 "examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative.  These case(s) were ignored."
1140
1141 Table: Case Processing Summary
1142 ,Cases,,,,,
1143 ,Valid,,Missing,,Total,
1144 ,N,Percent,N,Percent,N,Percent
1145 h,3.00,100.0%,.00,.0%,3.00,100.0%
1146
1147 Table: Extreme Values
1148 ,,,Case Number,Value
1149 h,Highest,1,3,5.00
1150 ,,2,2,4.00
1151 ,,3,1,3.00
1152 ,Lowest,1,4,2.00
1153 ,,2,1,3.00
1154 ,,3,2,4.00
1155 ])
1156
1157 AT_CLEANUP
1158
1159 dnl This is an example from doc/tutorial.texi
1160 dnl So if the results of this have to be changed in any way,
1161 dnl make sure to update that file.
1162 AT_SETUP([EXAMINE tutorial example 1])
1163 cp $top_srcdir/examples/repairs.sav .
1164 AT_DATA([repairs.sps], [dnl
1165 GET FILE='repairs.sav'.
1166 EXAMINE mtbf /STATISTICS=DESCRIPTIVES.
1167 COMPUTE mtbf_ln = LN (mtbf).
1168 EXAMINE mtbf_ln /STATISTICS=DESCRIPTIVES.
1169 ])
1170
1171 AT_CHECK([pspp -O format=csv repairs.sps], [0], [dnl
1172 Table: Case Processing Summary
1173 ,Cases,,,,,
1174 ,Valid,,Missing,,Total,
1175 ,N,Percent,N,Percent,N,Percent
1176 Mean time between failures (months) ,30,100.0%,0,.0%,30,100.0%
1177
1178 Table: Descriptives
1179 ,,,Statistic,Std. Error
1180 Mean time between failures (months) ,Mean,,8.78,1.10
1181 ,95% Confidence Interval for Mean,Lower Bound,6.53,
1182 ,,Upper Bound,11.04,
1183 ,5% Trimmed Mean,,8.20,
1184 ,Median,,8.29,
1185 ,Variance,,36.34,
1186 ,Std. Deviation,,6.03,
1187 ,Minimum,,1.63,
1188 ,Maximum,,26.47,
1189 ,Range,,24.84,
1190 ,Interquartile Range,,6.03,
1191 ,Skewness,,1.65,.43
1192 ,Kurtosis,,3.41,.83
1193
1194 Table: Case Processing Summary
1195 ,Cases,,,,,
1196 ,Valid,,Missing,,Total,
1197 ,N,Percent,N,Percent,N,Percent
1198 mtbf_ln,30,100.0%,0,.0%,30,100.0%
1199
1200 Table: Descriptives
1201 ,,,Statistic,Std. Error
1202 mtbf_ln,Mean,,1.95,.13
1203 ,95% Confidence Interval for Mean,Lower Bound,1.69,
1204 ,,Upper Bound,2.22,
1205 ,5% Trimmed Mean,,1.96,
1206 ,Median,,2.11,
1207 ,Variance,,.49,
1208 ,Std. Deviation,,.70,
1209 ,Minimum,,.49,
1210 ,Maximum,,3.28,
1211 ,Range,,2.79,
1212 ,Interquartile Range,,.88,
1213 ,Skewness,,-.37,.43
1214 ,Kurtosis,,.01,.83
1215 ])
1216
1217 AT_CLEANUP
1218
1219 dnl This is an example from doc/tutorial.texi
1220 dnl So if the results of this have to be changed in any way,
1221 dnl make sure to update that file.
1222 AT_SETUP([EXAMINE tutorial example 2])
1223 cp $top_srcdir/examples/physiology.sav .
1224 AT_DATA([examine.sps], [dnl
1225 GET FILE='physiology.sav'.
1226 EXAMINE height, weight /STATISTICS=EXTREME(3).
1227 ])
1228 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
1229 AT_CHECK([cat pspp.csv], [0], [dnl
1230 Table: Case Processing Summary
1231 ,Cases,,,,,
1232 ,Valid,,Missing,,Total,
1233 ,N,Percent,N,Percent,N,Percent
1234 Height in millimeters   ,40,100.0%,0,.0%,40,100.0%
1235 Weight in kilograms ,40,100.0%,0,.0%,40,100.0%
1236
1237 Table: Extreme Values
1238 ,,,Case Number,Value
1239 Height in millimeters   ,Highest,1,14,1903
1240 ,,2,15,1884
1241 ,,3,12,1802
1242 ,Lowest,1,30,179
1243 ,,2,31,1598
1244 ,,3,28,1601
1245 Weight in kilograms ,Highest,1,13,92.1
1246 ,,2,5,92.1
1247 ,,3,17,91.7
1248 ,Lowest,1,38,-55.6
1249 ,,2,39,54.5
1250 ,,3,33,55.4
1251 ])
1252 AT_CLEANUP
1253
1254
1255
1256 AT_SETUP([EXAMINE -- Crash on unrepresentable graphs])
1257 AT_DATA([examine.sps], [dnl
1258 data list notable list /x * g *.
1259 begin data.
1260 96 1
1261 end data.
1262
1263 examine x  by g
1264         /nototal
1265         /plot = all.
1266 ])
1267 dnl This bug only manifested itself on cairo based drivers.
1268 AT_CHECK([pspp -O format=pdf examine.sps], [0], [ignore], [ignore])
1269 AT_CLEANUP
1270
1271
1272 dnl This example comes from the web site:
1273 dnl  https://www.spsstests.com/2018/11/shapiro-wilk-normality-test-spss.html
1274 AT_SETUP([EXAMINE -- shapiro-wilk 1])
1275 AT_KEYWORDS([shapiro wilk])
1276 AT_DATA([shapiro-wilk.sps], [dnl
1277 data list notable list /x * g *.
1278 begin data.
1279 96 1
1280 98 1
1281 95 1
1282 89 1
1283 90 1
1284 92 1
1285 94 1
1286 93 1
1287 97 1
1288 100 1
1289 99 2
1290 96 2
1291 80 2
1292 89 2
1293 91 2
1294 92 2
1295 93 2
1296 94 2
1297 99 2
1298 80 2
1299 end data.
1300
1301 set format F22.3.
1302
1303 examine x  by g
1304         /nototal
1305         /plot = all.
1306 ])
1307
1308 AT_CHECK([pspp -O format=csv shapiro-wilk.sps], [0],[dnl
1309 Table: Case Processing Summary
1310 ,g,Cases,,,,,
1311 ,,Valid,,Missing,,Total,
1312 ,,N,Percent,N,Percent,N,Percent
1313 x,1.00,10,100.0%,0,.0%,10,100.0%
1314 ,2.00,10,100.0%,0,.0%,10,100.0%
1315
1316 Table: Tests of Normality
1317 ,g,Shapiro-Wilk,,
1318 ,,Statistic,df,Sig.
1319 x,1.00,.984,10,.983
1320 ,2.00,.882,10,.136
1321 ])
1322
1323 AT_CLEANUP
1324
1325
1326 dnl This example comes from the web site:
1327 dnl  http://www.real-statistics.com/tests-normality-and-symmetry/statistical-tests-normality-symmetry/shapiro-wilk-expanded-test/
1328 dnl It uses a dataset larger than 11 samples. Hence the alternative method for
1329 dnl signficance is used.
1330 AT_SETUP([EXAMINE -- shapiro-wilk 2])
1331 AT_KEYWORDS([shapiro wilk])
1332 AT_DATA([shapiro-wilk2.sps], [dnl
1333 data list notable list /x *.
1334 begin data.
1335 65
1336 61
1337 63
1338 86
1339 70
1340 55
1341 74
1342 35
1343 72
1344 68
1345 45
1346 58
1347 end data.
1348
1349 set format F22.3.
1350
1351 examine x
1352         /plot = boxplot.
1353 ])
1354
1355 AT_CHECK([pspp -O format=csv shapiro-wilk2.sps], [0],[dnl
1356 Table: Case Processing Summary
1357 ,Cases,,,,,
1358 ,Valid,,Missing,,Total,
1359 ,N,Percent,N,Percent,N,Percent
1360 x,12,100.0%,0,.0%,12,100.0%
1361
1362 Table: Tests of Normality
1363 ,Shapiro-Wilk,,
1364 ,Statistic,df,Sig.
1365 x,.971,12,.922
1366 ])
1367
1368 AT_CLEANUP