tests: Add "categorical" keyword to tests that use categoricals.
[pspp] / tests / language / stats / examine.at
1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017 Free Software Foundation, Inc.
3 dnl 
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
8 dnl 
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 dnl GNU General Public License for more details.
13 dnl 
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 dnl
17 AT_BANNER([EXAMINE])
18
19 AT_SETUP([EXAMINE])
20 AT_KEYWORDS([categorical categoricals])
21 AT_DATA([examine.sps], [
22 DATA LIST LIST /QUALITY * W * BRAND * .
23 BEGIN DATA
24 3  1  1
25 2  2  1
26 1  2  1
27 1  1  1
28 4  1  1
29 4  1  1
30 5  1  2
31 2  1  2
32 4  4  2
33 2  1  2
34 3  1  2
35 7  1  3
36 4  2  3
37 5  3  3
38 3  1  3
39 6  1  3
40 END DATA
41
42 WEIGHT BY w.
43
44 VARIABLE LABELS brand   'Manufacturer'.
45 VARIABLE LABELS quality 'Breaking Strain'.
46
47 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
48
49 LIST /FORMAT=NUMBERED.
50
51 EXAMINE
52         quality BY brand
53         /STATISTICS descriptives extreme(3)
54         .
55 ])
56
57
58 dnl In the following data, only the extreme values have been checked.
59 dnl The descriptives have been blindly pasted.
60 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
61 Table: Reading free-form data from INLINE.
62 Variable,Format
63 QUALITY,F8.0
64 W,F8.0
65 BRAND,F8.0
66
67 Table: Data List
68 Case Number,QUALITY,W,BRAND
69 1,3.00,1.00,1.00
70 2,2.00,2.00,1.00
71 3,1.00,2.00,1.00
72 4,1.00,1.00,1.00
73 5,4.00,1.00,1.00
74 6,4.00,1.00,1.00
75 7,5.00,1.00,2.00
76 8,2.00,1.00,2.00
77 9,4.00,4.00,2.00
78 10,2.00,1.00,2.00
79 11,3.00,1.00,2.00
80 12,7.00,1.00,3.00
81 13,4.00,2.00,3.00
82 14,5.00,3.00,3.00
83 15,3.00,1.00,3.00
84 16,6.00,1.00,3.00
85
86 Table: Case Processing Summary
87 ,Cases,,,,,
88 ,Valid,,Missing,,Total,
89 ,N,Percent,N,Percent,N,Percent
90 Breaking Strain,24.00,100%,.00,0%,24.00,100%
91
92 Table: Extreme Values
93 ,,,Case Number,Value
94 Breaking Strain,Highest,1,12,7.00
95 ,,2,16,6.00
96 ,,3,14,5.00
97 ,Lowest,1,3,1.00
98 ,,2,4,1.00
99 ,,3,2,2.00
100
101 Table: Descriptives
102 ,,,Statistic,Std. Error
103 Breaking Strain,Mean,,3.54,.32
104 ,95% Confidence Interval for Mean,Lower Bound,2.87,
105 ,,Upper Bound,4.21,
106 ,5% Trimmed Mean,,3.50,
107 ,Median,,4.00,
108 ,Variance,,2.52,
109 ,Std. Deviation,,1.59,
110 ,Minimum,,1.00,
111 ,Maximum,,7.00,
112 ,Range,,6.00,
113 ,Interquartile Range,,2.75,
114 ,Skewness,,.06,.47
115 ,Kurtosis,,-.36,.92
116
117 Table: Case Processing Summary
118 ,,Cases,,,,,
119 ,,Valid,,Missing,,Total,
120 ,Manufacturer,N,Percent,N,Percent,N,Percent
121 Breaking Strain,Aspeger,8.00,100%,.00,0%,8.00,100%
122 ,Bloggs,8.00,100%,.00,0%,8.00,100%
123 ,Charlies,8.00,100%,.00,0%,8.00,100%
124
125 Table: Extreme Values
126 ,Manufacturer,,,Case Number,Value
127 Breaking Strain,Aspeger,Highest,1,6,4.00
128 ,,,2,5,4.00
129 ,,,3,1,3.00
130 ,,Lowest,1,3,1.00
131 ,,,2,4,1.00
132 ,,,3,2,2.00
133 ,Bloggs,Highest,1,7,5.00
134 ,,,2,9,4.00
135 ,,,3,11,3.00
136 ,,Lowest,1,8,2.00
137 ,,,2,10,2.00
138 ,,,3,11,3.00
139 ,Charlies,Highest,1,12,7.00
140 ,,,2,16,6.00
141 ,,,3,14,5.00
142 ,,Lowest,1,15,3.00
143 ,,,2,13,4.00
144 ,,,3,14,5.00
145
146 Table: Descriptives
147 ,Manufacturer,,,Statistic,Std. Error
148 Breaking Strain,Aspeger,Mean,,2.25,.45
149 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
150 ,,,Upper Bound,3.32,
151 ,,5% Trimmed Mean,,2.22,
152 ,,Median,,2.00,
153 ,,Variance,,1.64,
154 ,,Std. Deviation,,1.28,
155 ,,Minimum,,1.00,
156 ,,Maximum,,4.00,
157 ,,Range,,3.00,
158 ,,Interquartile Range,,2.75,
159 ,,Skewness,,.47,.75
160 ,,Kurtosis,,-1.55,1.48
161 ,Bloggs,Mean,,3.50,.38
162 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
163 ,,,Upper Bound,4.39,
164 ,,5% Trimmed Mean,,3.50,
165 ,,Median,,4.00,
166 ,,Variance,,1.14,
167 ,,Std. Deviation,,1.07,
168 ,,Minimum,,2.00,
169 ,,Maximum,,5.00,
170 ,,Range,,3.00,
171 ,,Interquartile Range,,1.75,
172 ,,Skewness,,-.47,.75
173 ,,Kurtosis,,-.83,1.48
174 ,Charlies,Mean,,4.88,.44
175 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
176 ,,,Upper Bound,5.92,
177 ,,5% Trimmed Mean,,4.86,
178 ,,Median,,5.00,
179 ,,Variance,,1.55,
180 ,,Std. Deviation,,1.25,
181 ,,Minimum,,3.00,
182 ,,Maximum,,7.00,
183 ,,Range,,4.00,
184 ,,Interquartile Range,,1.75,
185 ,,Skewness,,.30,.75
186 ,,Kurtosis,,.15,1.48
187 ])
188
189 AT_CLEANUP
190
191 AT_SETUP([EXAMINE -- extremes])
192 AT_KEYWORDS([categorical categoricals])
193 AT_DATA([examine.sps], [dnl
194 data list free /V1 W
195 begin data.
196 1  1
197 2  1
198 3  2
199 3  1
200 4  1
201 5  1
202 6  1
203 7  1
204 8  1
205 9  1
206 10 1
207 11 1
208 12 1
209 13 1
210 14 1
211 15 1
212 16 1
213 17 1
214 18 2
215 19 1
216 20 1
217 end data.
218
219 weight by w.
220
221 examine v1 
222  /statistics=extreme(6)
223  .
224 ])
225
226 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
227 Table: Case Processing Summary
228 ,Cases,,,,,
229 ,Valid,,Missing,,Total,
230 ,N,Percent,N,Percent,N,Percent
231 V1,23.00,100%,.00,0%,23.00,100%
232
233 Table: Extreme Values
234 ,,,Case Number,Value
235 V1,Highest,1,21,20.00
236 ,,2,20,19.00
237 ,,3,19,18.00
238 ,,4,18,17.00
239 ,,5,17,16.00
240 ,,6,16,15.00
241 ,Lowest,1,1,1.00
242 ,,2,2,2.00
243 ,,3,3,3.00
244 ,,4,4,3.00
245 ,,5,5,4.00
246 ,,6,6,5.00
247 ])
248
249 AT_CLEANUP
250
251
252
253 AT_SETUP([EXAMINE -- extremes with fractional weights])
254 AT_KEYWORDS([categorical categoricals])
255 AT_DATA([extreme.sps], [dnl
256 set format=F20.3.
257 data list notable list /w * x *.
258 begin data.
259  0.88  300000
260  0.86  320000
261  0.98  480000
262  0.93  960000
263  1.35  960000
264  1.31  960000
265  0.88  960000
266  0.88  1080000
267  0.88  1080000
268  0.95  1200000
269  1.47  1200000
270  0.93  1200000
271  0.98  1320000
272  1.31  1380000
273  0.93  1440000
274  0.88  1560000
275  1.56  1560000
276  1.47  1560000
277 end data.
278
279 weight by w.
280
281
282 EXAMINE
283         x
284         /STATISTICS = DESCRIPTIVES EXTREME (5)
285         .
286 ])
287
288 AT_CHECK([pspp -O format=csv  extreme.sps], [0], [dnl
289 Table: Case Processing Summary
290 ,Cases,,,,,
291 ,Valid,,Missing,,Total,
292 ,N,Percent,N,Percent,N,Percent
293 x,19.430,100%,.000,0%,19.430,100%
294
295 Table: Extreme Values
296 ,,,Case Number,Value
297 x,Highest,1,18,1560000.000
298 ,,2,17,1560000.000
299 ,,3,16,1560000.000
300 ,,4,15,1440000.000
301 ,,5,14,1380000.000
302 ,Lowest,1,1,300000.000
303 ,,2,2,320000.000
304 ,,3,3,480000.000
305 ,,4,4,960000.000
306 ,,5,5,960000.000
307
308 Table: Descriptives
309 ,,,Statistic,Std. Error
310 x,Mean,,1120010.293,86222.178
311 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
312 ,,Upper Bound,1300853.894,
313 ,5% Trimmed Mean,,1141017.899,
314 ,Median,,1200000.000,
315 ,Variance,,144447748124.869,
316 ,Std. Deviation,,380062.821,
317 ,Minimum,,300000.000,
318 ,Maximum,,1560000.000,
319 ,Range,,1260000.000,
320 ,Interquartile Range,,467258.065,
321 ,Skewness,,-.887,.519
322 ,Kurtosis,,.340,1.005
323 ])
324
325 AT_CLEANUP
326
327 dnl Test the PERCENTILES subcommand of the EXAMINE command.
328 dnl In particular test that it behaves properly when there are only 
329 dnl a few cases.
330 AT_SETUP([EXAMINE -- percentiles])
331 AT_KEYWORDS([categorical categoricals])
332 AT_DATA([examine.sps], [dnl
333 DATA LIST LIST /X *.
334 BEGIN DATA.
335 2.00 
336 8.00 
337 5.00 
338 END DATA.
339
340 EXAMINE /x
341         /PERCENTILES=HAVERAGE.
342
343 EXAMINE /x
344         /PERCENTILES=WAVERAGE.
345
346 EXAMINE /x
347         /PERCENTILES=ROUND.
348
349 EXAMINE /x
350         /PERCENTILES=EMPIRICAL.
351
352 EXAMINE /x
353         /PERCENTILES=AEMPIRICAL.
354 ])
355 AT_CHECK([pspp -o pspp.csv examine.sps])
356 AT_CHECK([cat pspp.csv], [0], [dnl
357 Table: Reading free-form data from INLINE.
358 Variable,Format
359 X,F8.0
360
361 Table: Case Processing Summary
362 ,Cases,,,,,
363 ,Valid,,Missing,,Total,
364 ,N,Percent,N,Percent,N,Percent
365 X,3,100%,0,0%,3,100%
366
367 Table: Percentiles
368 ,,Percentiles,,,,,,
369 ,,5,10,25,50,75,90,95
370 X,HAverage,.40,.80,2.00,5.00,8.00,8.00,8.00
371 ,Tukey's Hinges,,,3.50,5.00,6.50,,
372
373 Table: Case Processing Summary
374 ,Cases,,,,,
375 ,Valid,,Missing,,Total,
376 ,N,Percent,N,Percent,N,Percent
377 X,3,100%,0,0%,3,100%
378
379 Table: Percentiles
380 ,,Percentiles,,,,,,
381 ,,5,10,25,50,75,90,95
382 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
383 ,Tukey's Hinges,,,3.50,5.00,6.50,,
384
385 Table: Case Processing Summary
386 ,Cases,,,,,
387 ,Valid,,Missing,,Total,
388 ,N,Percent,N,Percent,N,Percent
389 X,3,100%,0,0%,3,100%
390
391 Table: Percentiles
392 ,,Percentiles,,,,,,
393 ,,5,10,25,50,75,90,95
394 X,Rounded,.00,.00,2.00,5.00,5.00,8.00,8.00
395 ,Tukey's Hinges,,,3.50,5.00,6.50,,
396
397 Table: Case Processing Summary
398 ,Cases,,,,,
399 ,Valid,,Missing,,Total,
400 ,N,Percent,N,Percent,N,Percent
401 X,3,100%,0,0%,3,100%
402
403 Table: Percentiles
404 ,,Percentiles,,,,,,
405 ,,5,10,25,50,75,90,95
406 X,Empirical,2.00,2.00,2.00,5.00,8.00,8.00,8.00
407 ,Tukey's Hinges,,,3.50,5.00,6.50,,
408
409 Table: Case Processing Summary
410 ,Cases,,,,,
411 ,Valid,,Missing,,Total,
412 ,N,Percent,N,Percent,N,Percent
413 X,3,100%,0,0%,3,100%
414
415 Table: Percentiles
416 ,,Percentiles,,,,,,
417 ,,5,10,25,50,75,90,95
418 X,Empirical with averaging,2.00,2.00,2.00,5.00,8.00,8.00,8.00
419 ,Tukey's Hinges,,,3.50,5.00,6.50,,
420 ])
421 AT_CLEANUP
422
423 AT_SETUP([EXAMINE -- missing values])
424 AT_KEYWORDS([categorical categoricals])
425 AT_DATA([examine.sps], [dnl
426 DATA LIST LIST /x * y *.
427 BEGIN DATA.
428 1   1 
429 2   1
430 3   1
431 4   1
432 5   2
433 6   2
434 .   2
435 END DATA
436
437 EXAMINE /x by y
438         /MISSING = PAIRWISE
439         .
440 ])
441 AT_CHECK([pspp -o pspp.csv examine.sps])
442 AT_CHECK([cat pspp.csv], [0], [dnl
443 Table: Reading free-form data from INLINE.
444 Variable,Format
445 x,F8.0
446 y,F8.0
447
448 Table: Case Processing Summary
449 ,Cases,,,,,
450 ,Valid,,Missing,,Total,
451 ,N,Percent,N,Percent,N,Percent
452 x,6,85.7143%,1,14.2857%,7,100%
453
454 Table: Case Processing Summary
455 ,,Cases,,,,,
456 ,,Valid,,Missing,,Total,
457 ,y,N,Percent,N,Percent,N,Percent
458 x,1.00,4,100%,0,0%,4,100%
459 ,2.00,2,66.6667%,1,33.3333%,3,100%
460 ])
461 AT_CLEANUP
462
463
464 AT_SETUP([EXAMINE -- user missing values])
465 AT_KEYWORDS([categorical categoricals])
466 AT_DATA([examine-m.sps], [dnl
467 DATA LIST notable LIST /x * y *.
468 BEGIN DATA.
469 1                   2
470 9999999999          2
471 9999999999          99
472 END DATA.
473
474 MISSING VALUES x (9999999999).
475 MISSING VALUES y (99).
476
477 EXAMINE
478         /VARIABLES= x y
479         /MISSING=PAIRWISE.
480 ])
481 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
482 Table: Case Processing Summary
483 ,Cases,,,,,
484 ,Valid,,Missing,,Total,
485 ,N,Percent,N,Percent,N,Percent
486 x,1,33.3333%,2,66.6667%,3,100%
487 y,2,66.6667%,1,33.3333%,3,100%
488 ])
489 AT_CLEANUP
490
491 AT_SETUP([EXAMINE -- missing values and percentiles])
492 AT_KEYWORDS([categorical categoricals])
493 AT_DATA([examine.sps], [dnl
494 DATA LIST LIST /X *.
495 BEGIN DATA.
496 99
497 99
498 5.00
499 END DATA.
500
501 MISSING VALUE X (99).
502
503 EXAMINE /x
504         /PERCENTILES=HAVERAGE.
505 ])
506 AT_CHECK([pspp -o pspp.csv examine.sps])
507 dnl Ignore output -- this is just a no-crash check.
508 AT_CLEANUP
509
510 dnl Tests the trimmed mean calculation in the case
511 dnl where the data is weighted towards the centre.
512 AT_SETUP([EXAMINE -- trimmed mean])
513 AT_KEYWORDS([categorical categoricals])
514 AT_DATA([examine.sps], [dnl
515 DATA LIST LIST /X * C *.
516 BEGIN DATA.
517 1 1
518 2 49
519 3 2
520 END DATA.
521
522 WEIGHT BY c.
523
524 EXAMINE
525         x
526         /STATISTICS=DESCRIPTIVES
527         .
528 ])
529 AT_CHECK([pspp -o pspp.csv examine.sps])
530 AT_CHECK([cat pspp.csv], [0], [dnl
531 Table: Reading free-form data from INLINE.
532 Variable,Format
533 X,F8.0
534 C,F8.0
535
536 Table: Case Processing Summary
537 ,Cases,,,,,
538 ,Valid,,Missing,,Total,
539 ,N,Percent,N,Percent,N,Percent
540 X,52.00,100%,.00,0%,52.00,100%
541
542 Table: Descriptives
543 ,,,Statistic,Std. Error
544 X,Mean,,2.02,.03
545 ,95% Confidence Interval for Mean,Lower Bound,1.95,
546 ,,Upper Bound,2.09,
547 ,5% Trimmed Mean,,2.00,
548 ,Median,,2.00,
549 ,Variance,,.06,
550 ,Std. Deviation,,.24,
551 ,Minimum,,1.00,
552 ,Maximum,,3.00,
553 ,Range,,2.00,
554 ,Interquartile Range,,.00,
555 ,Skewness,,1.19,.33
556 ,Kurtosis,,15.73,.65
557 ])
558 AT_CLEANUP
559
560 AT_SETUP([EXAMINE -- crash bug])
561 AT_KEYWORDS([categorical categoricals])
562 AT_DATA([examine.sps], [dnl
563 data list list /a * x * y *.
564 begin data.
565 3 1 3
566 5 1 4
567 7 2 3
568 end data.
569
570 examine a by x by y
571         /statistics=DESCRIPTIVES
572         . 
573 ])
574 AT_CHECK([pspp -o pspp.csv examine.sps])
575 dnl Ignore output -- this is just a no-crash check.
576 AT_CLEANUP
577
578 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
579 AT_SETUP([EXAMINE -- consecutive runs don't crash])
580 AT_KEYWORDS([categorical categoricals])
581 AT_DATA([examine.sps], [dnl
582 data list list /y * z *.
583 begin data.
584 6 4
585 5 3
586 7 6
587 end data.
588
589 EXAMINE /VARIABLES= z BY y.
590
591 EXAMINE /VARIABLES= z. 
592 ])
593 AT_CHECK([pspp -o pspp.csv examine.sps])
594 dnl Ignore output -- this is just a no-crash check.
595 AT_CLEANUP
596
597 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
598 AT_SETUP([EXAMINE -- missing values don't crash])
599 AT_KEYWORDS([categorical categoricals])
600 AT_DATA([examine.sps], [dnl
601 data list list /x * y *.
602 begin data.
603 1 0
604 2 0
605 . 0
606 3 1
607 4 1
608 end data.
609 examine x by y /statistics=descriptives. 
610 ])
611 AT_CHECK([pspp -o pspp.csv examine.sps])
612 dnl Ignore output -- this is just a no-crash check.
613 AT_CLEANUP
614
615 dnl Test that having only a single case doesn't crash.
616 AT_SETUP([EXAMINE -- single case doesn't crash])
617 AT_KEYWORDS([categorical categoricals])
618 AT_DATA([examine.sps], [dnl
619 DATA LIST LIST /quality * .
620 BEGIN DATA
621 3  
622 END DATA
623
624
625 EXAMINE
626         quality 
627         /STATISTICS descriptives 
628         /PLOT = histogram
629         .
630 ])
631 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
632 dnl Ignore output -- this is just a no-crash check.
633 AT_CLEANUP
634
635 dnl Test that all-missing data doesn't crash.
636 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
637 AT_KEYWORDS([categorical categoricals])
638 AT_DATA([examine.sps], [dnl
639 DATA LIST LIST /x *.
640 BEGIN DATA.
641 .
642 .
643 .
644 .
645 END DATA.
646
647 EXAMINE /x 
648         PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
649         /ID=x
650         /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
651         /PERCENTILE=AEMPIRICAL
652         .
653 ])
654 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
655 dnl Ignore output -- this is just a no-crash check.
656 AT_CLEANUP
657
658 dnl Test that big input doesn't crash (bug 11307).
659 AT_SETUP([EXAMINE -- big input doesn't crash])
660 AT_KEYWORDS([categorical categoricals])
661 AT_DATA([examine.sps], [dnl
662 INPUT PROGRAM.
663         LOOP #I=1 TO 50000.
664                 COMPUTE X=NORMAL(10).
665                 END CASE.
666         END LOOP.
667         END FILE.
668 END INPUT PROGRAM.
669
670
671 EXAMINE /x
672         /STATISTICS=DESCRIPTIVES.
673 ])
674 AT_CHECK([pspp -o pspp.csv examine.sps])
675 dnl Ignore output -- this is just a no-crash check.
676 AT_CLEANUP
677
678 dnl Another test that big input doesn't crash.
679 dnl The actual bug that this checks for has been lost.
680 AT_SETUP([EXAMINE -- big input doesn't crash 2])
681 AT_KEYWORDS([categorical categoricals])
682 AT_DATA([make-big-input.pl], 
683   [for ($i=0; $i<100000; $i++) { print "AB12\n" };
684    for ($i=0; $i<100000; $i++) { print "AB04\n" };
685 ])
686 AT_CHECK([$PERL make-big-input.pl > large.txt])
687 AT_DATA([examine.sps], [dnl
688 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
689
690
691 AGGREGATE OUTFILE=* /BREAK=X /A=N.
692
693
694 EXAMINE /A BY X.
695 ])
696 AT_CHECK([pspp -o pspp.csv examine.sps])
697 dnl Ignore output -- this is just a no-crash check.
698 AT_DATA([more-big-input.pl], 
699   [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
700 ])
701 AT_CHECK([$PERL more-big-input.pl >> large.txt])
702 AT_CHECK([pspp -o pspp.csv examine.sps])
703 dnl Ignore output -- this is just a no-crash check.
704 AT_CLEANUP
705
706
707 dnl Test that the ID command works with non-numberic variables
708 AT_SETUP([EXAMINE -- non-numeric ID])
709 AT_KEYWORDS([categorical categoricals])
710
711 AT_DATA([examine-id.sps], [dnl
712 data list notable list /x * y (a12).
713 begin data.
714 1  one
715 2  two
716 3  three
717 4  four
718 5  five
719 6  six
720 7  seven
721 8  eight
722 9  nine
723 10 ten
724 11 eleven
725 12 twelve
726 30 thirty
727 300 threehundred
728 end data.
729
730 examine x
731         /statistics = extreme
732         /id = y
733         /plot = boxplot
734         .
735 ])
736
737 AT_CHECK([pspp -O format=csv examine-id.sps], [0], 
738 [Table: Case Processing Summary
739 ,Cases,,,,,
740 ,Valid,,Missing,,Total,
741 ,N,Percent,N,Percent,N,Percent
742 x,14,100%,0,0%,14,100%
743
744 Table: Extreme Values
745 ,,,y,Value
746 x,Highest,1,threehundred,300.00
747 ,,2,thirty      ,30.00
748 ,,3,twelve      ,12.00
749 ,,4,eleven      ,11.00
750 ,,5,ten         ,10.00
751 ,Lowest,1,one         ,1.00
752 ,,2,two         ,2.00
753 ,,3,three       ,3.00
754 ,,4,four        ,4.00
755 ,,5,five        ,5.00
756 ])
757
758 AT_CLEANUP 
759
760 dnl Test for a crash which happened on cleanup from a bad input syntax
761 AT_SETUP([EXAMINE -- Bad Input])
762 AT_KEYWORDS([categorical categoricals])
763
764 AT_DATA([examine-bad.sps], [dnl
765 data list list /h * g *.
766 begin data.
767 1 1
768 2 1
769 3 1
770 4 1
771 5 2
772 6 2
773 7 2
774 8 2
775 9 2
776 end data.
777
778 EXAMINE 
779         /VARIABLES= h
780         BY  g
781         /STATISTICS = DESCRIPTIVES EXTREME
782         /PLOT = lkajsdas
783         .
784 ])
785
786 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
787
788 AT_CLEANUP 
789
790
791 dnl Check the MISSING=REPORT option
792 AT_SETUP([EXAMINE -- MISSING=REPORT])
793 AT_KEYWORDS([categorical categoricals])
794
795 AT_DATA([examine-report.sps], [dnl
796 set format = F22.0.
797 data list list /x * g *.
798 begin data.
799 1   1
800 2   1
801 3   1
802 4   1
803 5   1
804 6   1
805 7   1
806 8   1
807 9   1
808 10   2
809 20   2
810 30   2
811 40   2
812 50   2
813 60   2
814 70   2
815 80   2
816 90   2
817 101   9
818 201   9
819 301   9
820 401   9
821 501   99
822 601   99
823 701   99
824 801   99
825 901   99
826 1001  .
827 2002  .
828 3003  .
829 4004  .
830 end data.
831
832 MISSING VALUES g (9, 99, 999).
833
834 EXAMINE
835         /VARIABLES = x
836         BY  g
837         /STATISTICS = EXTREME
838         /NOTOTAL
839         /MISSING = REPORT.
840 ])
841
842
843 AT_CHECK([pspp -O format=csv examine-report.sps], [0], [dnl
844 Table: Reading free-form data from INLINE.
845 Variable,Format
846 x,F8.0
847 g,F8.0
848
849 Table: Case Processing Summary
850 ,,Cases,,,,,
851 ,,Valid,,Missing,,Total,
852 ,g,N,Percent,N,Percent,N,Percent
853 x,. (missing),4,100%,0,0%,4,100%
854 ,1,9,100%,0,0%,9,100%
855 ,2,9,100%,0,0%,9,100%
856 ,9 (missing),4,100%,0,0%,4,100%
857 ,99 (missing),5,100%,0,0%,5,100%
858
859 Table: Extreme Values
860 ,g,,,Case Number,Value
861 x,. (missing),Highest,1,31,4004
862 ,,,2,30,3003
863 ,,,3,29,2002
864 ,,,4,28,1001
865 ,,,5,0,0
866 ,,Lowest,1,28,1001
867 ,,,2,29,2002
868 ,,,3,30,3003
869 ,,,4,31,4004
870 ,,,5,31,4004
871 ,1,Highest,1,9,9
872 ,,,2,8,8
873 ,,,3,7,7
874 ,,,4,6,6
875 ,,,5,5,5
876 ,,Lowest,1,1,1
877 ,,,2,2,2
878 ,,,3,3,3
879 ,,,4,4,4
880 ,,,5,5,5
881 ,2,Highest,1,18,90
882 ,,,2,17,80
883 ,,,3,16,70
884 ,,,4,15,60
885 ,,,5,14,50
886 ,,Lowest,1,10,10
887 ,,,2,11,20
888 ,,,3,12,30
889 ,,,4,13,40
890 ,,,5,14,50
891 ,9 (missing),Highest,1,22,401
892 ,,,2,21,301
893 ,,,3,20,201
894 ,,,4,19,101
895 ,,,5,0,0
896 ,,Lowest,1,19,101
897 ,,,2,20,201
898 ,,,3,21,301
899 ,,,4,22,401
900 ,,,5,22,401
901 ,99 (missing),Highest,1,27,901
902 ,,,2,26,801
903 ,,,3,25,701
904 ,,,4,24,601
905 ,,,5,23,501
906 ,,Lowest,1,23,501
907 ,,,2,24,601
908 ,,,3,25,701
909 ,,,4,26,801
910 ,,,5,27,901
911 ])
912
913
914 AT_CLEANUP 
915
916
917 dnl Run a test of the basic STATISTICS using a "real"
918 dnl dataset and comparing with "real" results kindly
919 dnl provided by Olaf Nöhring
920 AT_SETUP([EXAMINE -- sample unweighted])
921 AT_KEYWORDS([categorical categoricals])
922
923 AT_DATA([sample.sps], [dnl
924 set format = F22.4.
925 DATA LIST notable LIST /X *
926 BEGIN DATA.
927 461.19000000
928 466.38000000
929 479.46000000
930 480.10000000
931 483.43000000
932 488.30000000
933 489.00000000
934 491.62000000
935 505.62000000
936 511.30000000
937 521.53000000
938 526.70000000
939 528.25000000
940 538.70000000
941 540.22000000
942 540.58000000
943 546.10000000
944 548.17000000
945 553.99000000
946 566.21000000
947 575.90000000
948 584.38000000
949 593.40000000
950 357.05000000
951 359.73000000
952 360.48000000
953 373.98000000
954 374.13000000
955 381.45000000
956 383.72000000
957 390.00000000
958 400.34000000
959 415.32000000
960 415.91000000
961 418.30000000
962 421.03000000
963 422.43000000
964 426.93000000
965 433.25000000
966 436.89000000
967 445.33000000
968 446.33000000
969 446.55000000
970 456.44000000
971 689.49000000
972 691.92000000
973 695.00000000
974 695.36000000
975 698.21000000
976 699.46000000
977 706.61000000
978 710.69000000
979 715.82000000
980 715.82000000
981 741.39000000
982 752.27000000
983 756.73000000
984 757.74000000
985 759.57000000
986 796.07000000
987 813.78000000
988 817.25000000
989 825.48000000
990 831.28000000
991 849.24000000
992 890.00000000
993 894.78000000
994 935.65000000
995 935.90000000
996 945.90000000
997 1012.8600000
998 1022.6000000
999 1061.8100000
1000 1063.5000000
1001 1077.2300000
1002 1151.6300000
1003 1355.2800000
1004 598.88000000
1005 606.91000000
1006 621.60000000
1007 624.80000000
1008 636.13000000
1009 637.38000000
1010 640.32000000
1011 649.35000000
1012 656.51000000
1013 662.55000000
1014 664.69000000
1015 106.22000000
1016 132.24000000
1017 174.76000000
1018 204.85000000
1019 264.93000000
1020 264.99000000
1021 269.84000000
1022 325.12000000
1023 331.67000000
1024 337.26000000
1025 347.68000000
1026 354.91000000
1027 END DATA.
1028
1029 EXAMINE
1030         x
1031         /STATISTICS=DESCRIPTIVES
1032         .
1033 ])
1034
1035 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
1036 Table: Case Processing Summary
1037 ,Cases,,,,,
1038 ,Valid,,Missing,,Total,
1039 ,N,Percent,N,Percent,N,Percent
1040 X,100,100%,0,0%,100,100%
1041
1042 Table: Descriptives
1043 ,,,Statistic,Std. Error
1044 X,Mean,,587.6603,23.2665
1045 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1046 ,,Upper Bound,633.8260,
1047 ,5% Trimmed Mean,,579.7064,
1048 ,Median,,547.1350,
1049 ,Variance,,54132.8466,
1050 ,Std. Deviation,,232.6647,
1051 ,Minimum,,106.2200,
1052 ,Maximum,,1355.2800,
1053 ,Range,,1249.0600,
1054 ,Interquartile Range,,293.1575,
1055 ,Skewness,,.6331,.2414
1056 ,Kurtosis,,.5300,.4783
1057 ])
1058
1059 AT_CLEANUP 
1060
1061
1062
1063 dnl Test for a crash which happened on bad input syntax
1064 AT_SETUP([EXAMINE -- Empty Parentheses])
1065 AT_KEYWORDS([categorical categoricals])
1066
1067 AT_DATA([examine-empty-parens.sps], [dnl
1068 DATA LIST notable LIST /X *
1069 BEGIN DATA.
1070 2
1071 3
1072 END DATA.
1073
1074
1075 EXAMINE
1076         x
1077         /PLOT = SPREADLEVEL()
1078         .
1079 ])
1080
1081 AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
1082
1083 AT_CLEANUP 
1084
1085
1086
1087
1088 dnl Test for another crash which happened on bad input syntax
1089 AT_SETUP([EXAMINE -- Bad variable])
1090 AT_KEYWORDS([categorical categoricals])
1091
1092 AT_DATA([examine-bad-variable.sps], [dnl
1093 data list list /h * g *.
1094 begin data.
1095 3 1
1096 4 1
1097 5 2
1098 end data.
1099
1100 EXAMINE
1101         /VARIABLES/ h
1102         BY  g
1103         .
1104 ])
1105
1106 AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
1107
1108 AT_CLEANUP 
1109
1110
1111
1112 dnl Test for yet another crash. This time for extremes vs. missing weight values.\0
1113 AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
1114 AT_KEYWORDS([categorical categoricals])
1115
1116 AT_DATA([examine-missing-weights.sps], [dnl
1117 data list notable list /h * g *.
1118 begin data.
1119 3 1
1120 4 .
1121 5 1
1122 2 1
1123 end data.
1124
1125 WEIGHT BY g.
1126
1127 EXAMINE h
1128         /STATISTICS extreme(3)
1129         .
1130 ])
1131
1132 AT_CHECK([pspp -O format=csv  examine-missing-weights.sps], [0], [dnl
1133 "examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative.  These case(s) were ignored."
1134
1135 Table: Case Processing Summary
1136 ,Cases,,,,,
1137 ,Valid,,Missing,,Total,
1138 ,N,Percent,N,Percent,N,Percent
1139 h,3.00,100%,.00,0%,3.00,100%
1140
1141 Table: Extreme Values
1142 ,,,Case Number,Value
1143 h,Highest,1,3,5.00
1144 ,,2,2,4.00
1145 ,,3,1,3.00
1146 ,Lowest,1,4,2.00
1147 ,,2,1,3.00
1148 ,,3,2,4.00
1149 ])
1150
1151 AT_CLEANUP 
1152
1153
1154