output: Introduce pivot tables.
[pspp] / tests / language / stats / examine.at
1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017 Free Software Foundation, Inc.
3 dnl 
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
8 dnl 
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 dnl GNU General Public License for more details.
13 dnl 
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 dnl
17 AT_BANNER([EXAMINE])
18
19 AT_SETUP([EXAMINE])
20 AT_KEYWORDS([categorical categoricals])
21 AT_DATA([examine.sps], [
22 DATA LIST LIST /QUALITY * W * BRAND * .
23 BEGIN DATA
24 3  1  1
25 2  2  1
26 1  2  1
27 1  1  1
28 4  1  1
29 4  1  1
30 5  1  2
31 2  1  2
32 4  4  2
33 2  1  2
34 3  1  2
35 7  1  3
36 4  2  3
37 5  3  3
38 3  1  3
39 6  1  3
40 END DATA
41
42 WEIGHT BY w.
43
44 VARIABLE LABELS brand   'Manufacturer'.
45 VARIABLE LABELS quality 'Breaking Strain'.
46
47 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
48
49 LIST /FORMAT=NUMBERED.
50
51 EXAMINE
52         quality BY brand
53         /STATISTICS descriptives extreme(3)
54         .
55 ])
56
57
58 dnl In the following data, only the extreme values have been checked.
59 dnl The descriptives have been blindly pasted.
60 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
61 Table: Reading free-form data from INLINE.
62 Variable,Format
63 QUALITY,F8.0
64 W,F8.0
65 BRAND,F8.0
66
67 Table: Data List
68 Case Number,QUALITY,W,BRAND
69 1,3.00,1.00,1.00
70 2,2.00,2.00,1.00
71 3,1.00,2.00,1.00
72 4,1.00,1.00,1.00
73 5,4.00,1.00,1.00
74 6,4.00,1.00,1.00
75 7,5.00,1.00,2.00
76 8,2.00,1.00,2.00
77 9,4.00,4.00,2.00
78 10,2.00,1.00,2.00
79 11,3.00,1.00,2.00
80 12,7.00,1.00,3.00
81 13,4.00,2.00,3.00
82 14,5.00,3.00,3.00
83 15,3.00,1.00,3.00
84 16,6.00,1.00,3.00
85
86 Table: Case Processing Summary
87 ,Cases,,,,,
88 ,Valid,,Missing,,Total,
89 ,N,Percent,N,Percent,N,Percent
90 Breaking Strain,24.00,100.0%,.00,.0%,24.00,100.0%
91
92 Table: Extreme Values
93 ,,,Case Number,Value
94 Breaking Strain,Highest,1,12,7.00
95 ,,2,16,6.00
96 ,,3,14,5.00
97 ,Lowest,1,3,1.00
98 ,,2,4,1.00
99 ,,3,2,2.00
100
101 Table: Descriptives
102 ,,,Statistic,Std. Error
103 Breaking Strain,Mean,,3.54,.32
104 ,95% Confidence Interval for Mean,Lower Bound,2.87,
105 ,,Upper Bound,4.21,
106 ,5% Trimmed Mean,,3.50,
107 ,Median,,4.00,
108 ,Variance,,2.52,
109 ,Std. Deviation,,1.59,
110 ,Minimum,,1.00,
111 ,Maximum,,7.00,
112 ,Range,,6.00,
113 ,Interquartile Range,,2.75,
114 ,Skewness,,.06,.47
115 ,Kurtosis,,-.36,.92
116
117 Table: Case Processing Summary
118 ,,Cases,,,,,
119 ,,Valid,,Missing,,Total,
120 ,Manufacturer,N,Percent,N,Percent,N,Percent
121 Breaking Strain,Aspeger,8.00,100.0%,.00,.0%,8.00,100.0%
122 ,Bloggs,8.00,100.0%,.00,.0%,8.00,100.0%
123 ,Charlies,8.00,100.0%,.00,.0%,8.00,100.0%
124
125 Table: Extreme Values
126 ,Manufacturer,,,Case Number,Value
127 Breaking Strain,Aspeger,Highest,1,6,4.00
128 ,,,2,5,4.00
129 ,,,3,1,3.00
130 ,,Lowest,1,3,1.00
131 ,,,2,4,1.00
132 ,,,3,2,2.00
133 ,Bloggs,Highest,1,7,5.00
134 ,,,2,9,4.00
135 ,,,3,11,3.00
136 ,,Lowest,1,8,2.00
137 ,,,2,10,2.00
138 ,,,3,11,3.00
139 ,Charlies,Highest,1,12,7.00
140 ,,,2,16,6.00
141 ,,,3,14,5.00
142 ,,Lowest,1,15,3.00
143 ,,,2,13,4.00
144 ,,,3,14,5.00
145
146 Table: Descriptives
147 ,Manufacturer,,,Statistic,Std. Error
148 Breaking Strain,Aspeger,Mean,,2.25,.45
149 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
150 ,,,Upper Bound,3.32,
151 ,,5% Trimmed Mean,,2.22,
152 ,,Median,,2.00,
153 ,,Variance,,1.64,
154 ,,Std. Deviation,,1.28,
155 ,,Minimum,,1.00,
156 ,,Maximum,,4.00,
157 ,,Range,,3.00,
158 ,,Interquartile Range,,2.75,
159 ,,Skewness,,.47,.75
160 ,,Kurtosis,,-1.55,1.48
161 ,Bloggs,Mean,,3.50,.38
162 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
163 ,,,Upper Bound,4.39,
164 ,,5% Trimmed Mean,,3.50,
165 ,,Median,,4.00,
166 ,,Variance,,1.14,
167 ,,Std. Deviation,,1.07,
168 ,,Minimum,,2.00,
169 ,,Maximum,,5.00,
170 ,,Range,,3.00,
171 ,,Interquartile Range,,1.75,
172 ,,Skewness,,-.47,.75
173 ,,Kurtosis,,-.83,1.48
174 ,Charlies,Mean,,4.88,.44
175 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
176 ,,,Upper Bound,5.92,
177 ,,5% Trimmed Mean,,4.86,
178 ,,Median,,5.00,
179 ,,Variance,,1.55,
180 ,,Std. Deviation,,1.25,
181 ,,Minimum,,3.00,
182 ,,Maximum,,7.00,
183 ,,Range,,4.00,
184 ,,Interquartile Range,,1.75,
185 ,,Skewness,,.30,.75
186 ,,Kurtosis,,.15,1.48
187 ])
188
189 AT_CLEANUP
190
191 AT_SETUP([EXAMINE -- extremes])
192 AT_KEYWORDS([categorical categoricals])
193 AT_DATA([examine.sps], [dnl
194 data list free /V1 W
195 begin data.
196 1  1
197 2  1
198 3  2
199 3  1
200 4  1
201 5  1
202 6  1
203 7  1
204 8  1
205 9  1
206 10 1
207 11 1
208 12 1
209 13 1
210 14 1
211 15 1
212 16 1
213 17 1
214 18 2
215 19 1
216 20 1
217 end data.
218
219 weight by w.
220
221 examine v1 
222  /statistics=extreme(6)
223  .
224 ])
225
226 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
227 Table: Case Processing Summary
228 ,Cases,,,,,
229 ,Valid,,Missing,,Total,
230 ,N,Percent,N,Percent,N,Percent
231 V1,23.00,100.0%,.00,.0%,23.00,100.0%
232
233 Table: Extreme Values
234 ,,,Case Number,Value
235 V1,Highest,1,21,20.00
236 ,,2,20,19.00
237 ,,3,19,18.00
238 ,,4,18,17.00
239 ,,5,17,16.00
240 ,,6,16,15.00
241 ,Lowest,1,1,1.00
242 ,,2,2,2.00
243 ,,3,3,3.00
244 ,,4,4,3.00
245 ,,5,5,4.00
246 ,,6,6,5.00
247 ])
248
249 AT_CLEANUP
250
251
252
253 AT_SETUP([EXAMINE -- extremes with fractional weights])
254 AT_KEYWORDS([categorical categoricals])
255 AT_DATA([extreme.sps], [dnl
256 set format=F20.3.
257 data list notable list /w * x *.
258 begin data.
259  0.88  300000
260  0.86  320000
261  0.98  480000
262  0.93  960000
263  1.35  960000
264  1.31  960000
265  0.88  960000
266  0.88  1080000
267  0.88  1080000
268  0.95  1200000
269  1.47  1200000
270  0.93  1200000
271  0.98  1320000
272  1.31  1380000
273  0.93  1440000
274  0.88  1560000
275  1.56  1560000
276  1.47  1560000
277 end data.
278
279 weight by w.
280
281
282 EXAMINE
283         x
284         /STATISTICS = DESCRIPTIVES EXTREME (5)
285         .
286 ])
287
288 AT_CHECK([pspp -O format=csv  extreme.sps], [0], [dnl
289 Table: Case Processing Summary
290 ,Cases,,,,,
291 ,Valid,,Missing,,Total,
292 ,N,Percent,N,Percent,N,Percent
293 x,19.430,100.0%,.000,.0%,19.430,100.0%
294
295 Table: Extreme Values
296 ,,,Case Number,Value
297 x,Highest,1,18,1560000.000
298 ,,2,17,1560000.000
299 ,,3,16,1560000.000
300 ,,4,15,1440000.000
301 ,,5,14,1380000.000
302 ,Lowest,1,1,300000.000
303 ,,2,2,320000.000
304 ,,3,3,480000.000
305 ,,4,4,960000.000
306 ,,5,5,960000.000
307
308 Table: Descriptives
309 ,,,Statistic,Std. Error
310 x,Mean,,1120010.293,86222.178
311 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
312 ,,Upper Bound,1300853.894,
313 ,5% Trimmed Mean,,1141017.899,
314 ,Median,,1200000.000,
315 ,Variance,,144447748124.869,
316 ,Std. Deviation,,380062.821,
317 ,Minimum,,300000.000,
318 ,Maximum,,1560000.000,
319 ,Range,,1260000.000,
320 ,Interquartile Range,,467258.065,
321 ,Skewness,,-.887,.519
322 ,Kurtosis,,.340,1.005
323 ])
324
325 AT_CLEANUP
326
327 dnl Test the PERCENTILES subcommand of the EXAMINE command.
328 dnl In particular test that it behaves properly when there are only 
329 dnl a few cases.
330 AT_SETUP([EXAMINE -- percentiles])
331 AT_KEYWORDS([categorical categoricals])
332 AT_DATA([examine.sps], [dnl
333 DATA LIST LIST /X *.
334 BEGIN DATA.
335 2.00 
336 8.00 
337 5.00 
338 END DATA.
339
340 EXAMINE /x
341         /PERCENTILES=HAVERAGE.
342
343 EXAMINE /x
344         /PERCENTILES=WAVERAGE.
345
346 EXAMINE /x
347         /PERCENTILES=ROUND.
348
349 EXAMINE /x
350         /PERCENTILES=EMPIRICAL.
351
352 EXAMINE /x
353         /PERCENTILES=AEMPIRICAL.
354 ])
355 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
356 AT_CHECK([cat pspp.csv], [0], [dnl
357 Table: Reading free-form data from INLINE.
358 Variable,Format
359 X,F8.0
360
361 Table: Case Processing Summary
362 ,Cases,,,,,
363 ,Valid,,Missing,,Total,
364 ,N,Percent,N,Percent,N,Percent
365 X,3,100.0%,0,.0%,3,100.0%
366
367 Table: Percentiles
368 ,,Percentiles,,,,,,
369 ,,5,10,25,50,75,90,95
370 X,Weighted Average,.40,.80,2.00,5.00,8.00,8.00,8.00
371 ,Tukey's Hinges,,,3.50,5.00,6.50,,
372
373 Table: Case Processing Summary
374 ,Cases,,,,,
375 ,Valid,,Missing,,Total,
376 ,N,Percent,N,Percent,N,Percent
377 X,3,100.0%,0,.0%,3,100.0%
378
379 Table: Percentiles
380 ,,Percentiles,,,,,,
381 ,,5,10,25,50,75,90,95
382 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
383 ,Tukey's Hinges,,,3.50,5.00,6.50,,
384
385 Table: Case Processing Summary
386 ,Cases,,,,,
387 ,Valid,,Missing,,Total,
388 ,N,Percent,N,Percent,N,Percent
389 X,3,100.0%,0,.0%,3,100.0%
390
391 Table: Percentiles
392 ,,Percentiles,,,,,,
393 ,,5,10,25,50,75,90,95
394 X,Weighted Average,.00,.00,2.00,5.00,5.00,8.00,8.00
395 ,Tukey's Hinges,,,3.50,5.00,6.50,,
396
397 Table: Case Processing Summary
398 ,Cases,,,,,
399 ,Valid,,Missing,,Total,
400 ,N,Percent,N,Percent,N,Percent
401 X,3,100.0%,0,.0%,3,100.0%
402
403 Table: Percentiles
404 ,,Percentiles,,,,,,
405 ,,5,10,25,50,75,90,95
406 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
407 ,Tukey's Hinges,,,3.50,5.00,6.50,,
408
409 Table: Case Processing Summary
410 ,Cases,,,,,
411 ,Valid,,Missing,,Total,
412 ,N,Percent,N,Percent,N,Percent
413 X,3,100.0%,0,.0%,3,100.0%
414
415 Table: Percentiles
416 ,,Percentiles,,,,,,
417 ,,5,10,25,50,75,90,95
418 X,Weighted Average,2.00,2.00,2.00,5.00,8.00,8.00,8.00
419 ,Tukey's Hinges,,,3.50,5.00,6.50,,
420 ])
421 AT_CLEANUP
422
423 AT_SETUP([EXAMINE -- missing values])
424 AT_KEYWORDS([categorical categoricals])
425 AT_DATA([examine.sps], [dnl
426 DATA LIST LIST /x * y *.
427 BEGIN DATA.
428 1   1 
429 2   1
430 3   1
431 4   1
432 5   2
433 6   2
434 .   2
435 END DATA
436
437 EXAMINE /x by y
438         /MISSING = PAIRWISE
439         .
440 ])
441 AT_CHECK([pspp -o pspp.csv examine.sps])
442 AT_CHECK([cat pspp.csv], [0], [dnl
443 Table: Reading free-form data from INLINE.
444 Variable,Format
445 x,F8.0
446 y,F8.0
447
448 Table: Case Processing Summary
449 ,Cases,,,,,
450 ,Valid,,Missing,,Total,
451 ,N,Percent,N,Percent,N,Percent
452 x,6,85.7%,1,14.3%,7,100.0%
453
454 Table: Case Processing Summary
455 ,,Cases,,,,,
456 ,,Valid,,Missing,,Total,
457 ,y,N,Percent,N,Percent,N,Percent
458 x,1.00,4,100.0%,0,.0%,4,100.0%
459 ,2.00,2,66.7%,1,33.3%,3,100.0%
460 ])
461 AT_CLEANUP
462
463
464 AT_SETUP([EXAMINE -- user missing values])
465 AT_KEYWORDS([categorical categoricals])
466 AT_DATA([examine-m.sps], [dnl
467 DATA LIST notable LIST /x * y *.
468 BEGIN DATA.
469 1                   2
470 9999999999          2
471 9999999999          99
472 END DATA.
473
474 MISSING VALUES x (9999999999).
475 MISSING VALUES y (99).
476
477 EXAMINE
478         /VARIABLES= x y
479         /MISSING=PAIRWISE.
480 ])
481 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
482 Table: Case Processing Summary
483 ,Cases,,,,,
484 ,Valid,,Missing,,Total,
485 ,N,Percent,N,Percent,N,Percent
486 x,1,33.3%,2,66.7%,3,100.0%
487 y,2,66.7%,1,33.3%,3,100.0%
488 ])
489 AT_CLEANUP
490
491 AT_SETUP([EXAMINE -- missing values and percentiles])
492 AT_KEYWORDS([categorical categoricals])
493 AT_DATA([examine.sps], [dnl
494 DATA LIST LIST /X *.
495 BEGIN DATA.
496 99
497 99
498 5.00
499 END DATA.
500
501 MISSING VALUE X (99).
502
503 EXAMINE /x
504         /PERCENTILES=HAVERAGE.
505 ])
506 AT_CHECK([pspp -o pspp.csv examine.sps])
507 dnl Ignore output -- this is just a no-crash check.
508 AT_CLEANUP
509
510 dnl Tests the trimmed mean calculation in the case
511 dnl where the data is weighted towards the centre.
512 AT_SETUP([EXAMINE -- trimmed mean])
513 AT_KEYWORDS([categorical categoricals])
514 AT_DATA([examine.sps], [dnl
515 DATA LIST LIST /X * C *.
516 BEGIN DATA.
517 1 1
518 2 49
519 3 2
520 END DATA.
521
522 WEIGHT BY c.
523
524 EXAMINE
525         x
526         /STATISTICS=DESCRIPTIVES
527         .
528 ])
529 AT_CHECK([pspp -o pspp.csv examine.sps])
530 AT_CHECK([cat pspp.csv], [0], [dnl
531 Table: Reading free-form data from INLINE.
532 Variable,Format
533 X,F8.0
534 C,F8.0
535
536 Table: Case Processing Summary
537 ,Cases,,,,,
538 ,Valid,,Missing,,Total,
539 ,N,Percent,N,Percent,N,Percent
540 X,52.00,100.0%,.00,.0%,52.00,100.0%
541
542 Table: Descriptives
543 ,,,Statistic,Std. Error
544 X,Mean,,2.02,.03
545 ,95% Confidence Interval for Mean,Lower Bound,1.95,
546 ,,Upper Bound,2.09,
547 ,5% Trimmed Mean,,2.00,
548 ,Median,,2.00,
549 ,Variance,,.06,
550 ,Std. Deviation,,.24,
551 ,Minimum,,1.00,
552 ,Maximum,,3.00,
553 ,Range,,2.00,
554 ,Interquartile Range,,.00,
555 ,Skewness,,1.19,.33
556 ,Kurtosis,,15.73,.65
557 ])
558 AT_CLEANUP
559
560 AT_SETUP([EXAMINE -- crash bug])
561 AT_KEYWORDS([categorical categoricals])
562 AT_DATA([examine.sps], [dnl
563 data list list /a * x * y *.
564 begin data.
565 3 1 3
566 5 1 4
567 7 2 3
568 end data.
569
570 examine a by x by y
571         /statistics=DESCRIPTIVES
572         . 
573 ])
574 AT_CHECK([pspp -o pspp.csv examine.sps])
575 dnl Ignore output -- this is just a no-crash check.
576 AT_CLEANUP
577
578 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
579 AT_SETUP([EXAMINE -- consecutive runs don't crash])
580 AT_KEYWORDS([categorical categoricals])
581 AT_DATA([examine.sps], [dnl
582 data list list /y * z *.
583 begin data.
584 6 4
585 5 3
586 7 6
587 end data.
588
589 EXAMINE /VARIABLES= z BY y.
590
591 EXAMINE /VARIABLES= z. 
592 ])
593 AT_CHECK([pspp -o pspp.csv examine.sps])
594 dnl Ignore output -- this is just a no-crash check.
595 AT_CLEANUP
596
597 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
598 AT_SETUP([EXAMINE -- missing values don't crash])
599 AT_KEYWORDS([categorical categoricals])
600 AT_DATA([examine.sps], [dnl
601 data list list /x * y *.
602 begin data.
603 1 0
604 2 0
605 . 0
606 3 1
607 4 1
608 end data.
609 examine x by y /statistics=descriptives. 
610 ])
611 AT_CHECK([pspp -o pspp.csv examine.sps])
612 dnl Ignore output -- this is just a no-crash check.
613 AT_CLEANUP
614
615 dnl Test that having only a single case doesn't crash.
616 AT_SETUP([EXAMINE -- single case doesn't crash])
617 AT_KEYWORDS([categorical categoricals])
618 AT_DATA([examine.sps], [dnl
619 DATA LIST LIST /quality * .
620 BEGIN DATA
621 3  
622 END DATA
623
624
625 EXAMINE
626         quality 
627         /STATISTICS descriptives 
628         /PLOT = histogram
629         .
630 ])
631 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
632 dnl Ignore output -- this is just a no-crash check.
633 AT_CLEANUP
634
635 dnl Test that all-missing data doesn't crash.
636 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
637 AT_KEYWORDS([categorical categoricals])
638 AT_DATA([examine.sps], [dnl
639 DATA LIST LIST /x *.
640 BEGIN DATA.
641 .
642 .
643 .
644 .
645 END DATA.
646
647 EXAMINE /x 
648         PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
649         /ID=x
650         /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
651         /PERCENTILE=AEMPIRICAL
652         .
653 ])
654 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
655 dnl Ignore output -- this is just a no-crash check.
656 AT_CLEANUP
657
658 dnl Test that big input doesn't crash (bug 11307).
659 AT_SETUP([EXAMINE -- big input doesn't crash])
660 AT_KEYWORDS([categorical categoricals slow])
661 AT_DATA([examine.sps], [dnl
662 INPUT PROGRAM.
663         LOOP #I=1 TO 50000.
664                 COMPUTE X=NORMAL(10).
665                 END CASE.
666         END LOOP.
667         END FILE.
668 END INPUT PROGRAM.
669
670
671 EXAMINE /x
672         /STATISTICS=DESCRIPTIVES.
673 ])
674 AT_CHECK([pspp -o pspp.csv examine.sps])
675 dnl Ignore output -- this is just a no-crash check.
676 AT_CLEANUP
677
678 dnl Another test that big input doesn't crash.
679 dnl The actual bug that this checks for has been lost.
680 AT_SETUP([EXAMINE -- big input doesn't crash 2])
681 AT_KEYWORDS([categorical categoricals slow])
682 AT_DATA([make-big-input.pl], 
683   [for ($i=0; $i<100000; $i++) { print "AB12\n" };
684    for ($i=0; $i<100000; $i++) { print "AB04\n" };
685 ])
686 AT_CHECK([$PERL make-big-input.pl > large.txt])
687 AT_DATA([examine.sps], [dnl
688 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
689
690
691 AGGREGATE OUTFILE=* /BREAK=X /A=N.
692
693
694 EXAMINE /A BY X.
695 ])
696 AT_CHECK([pspp -o pspp.csv examine.sps])
697 dnl Ignore output -- this is just a no-crash check.
698 AT_DATA([more-big-input.pl], 
699   [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
700 ])
701 AT_CHECK([$PERL more-big-input.pl >> large.txt])
702 AT_CHECK([pspp -o pspp.csv examine.sps])
703 dnl Ignore output -- this is just a no-crash check.
704 AT_CLEANUP
705
706
707 dnl Test that the ID command works with non-numberic variables
708 AT_SETUP([EXAMINE -- non-numeric ID])
709 AT_KEYWORDS([categorical categoricals])
710
711 AT_DATA([examine-id.sps], [dnl
712 data list notable list /x * y (a12).
713 begin data.
714 1  one
715 2  two
716 3  three
717 4  four
718 5  five
719 6  six
720 7  seven
721 8  eight
722 9  nine
723 10 ten
724 11 eleven
725 12 twelve
726 30 thirty
727 300 threehundred
728 end data.
729
730 examine x
731         /statistics = extreme
732         /id = y
733         /plot = boxplot
734         .
735 ])
736
737 AT_CHECK([pspp -O format=csv examine-id.sps], [0], 
738 [Table: Case Processing Summary
739 ,Cases,,,,,
740 ,Valid,,Missing,,Total,
741 ,N,Percent,N,Percent,N,Percent
742 x,14,100.0%,0,.0%,14,100.0%
743
744 Table: Extreme Values
745 ,,,y,Value
746 x,Highest,1,threehundred,300.00
747 ,,2,thirty,30.00
748 ,,3,twelve,12.00
749 ,,4,eleven,11.00
750 ,,5,ten,10.00
751 ,Lowest,1,one,1.00
752 ,,2,two,2.00
753 ,,3,three,3.00
754 ,,4,four,4.00
755 ,,5,five,5.00
756 ])
757
758 AT_CLEANUP 
759
760 dnl Test for a crash which happened on cleanup from a bad input syntax
761 AT_SETUP([EXAMINE -- Bad Input])
762 AT_KEYWORDS([categorical categoricals])
763
764 AT_DATA([examine-bad.sps], [dnl
765 data list list /h * g *.
766 begin data.
767 1 1
768 2 1
769 3 1
770 4 1
771 5 2
772 6 2
773 7 2
774 8 2
775 9 2
776 end data.
777
778 EXAMINE 
779         /VARIABLES= h
780         BY  g
781         /STATISTICS = DESCRIPTIVES EXTREME
782         /PLOT = lkajsdas
783         .
784 ])
785
786 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
787
788 AT_CLEANUP 
789
790
791 dnl Check the MISSING=REPORT option
792 AT_SETUP([EXAMINE -- MISSING=REPORT])
793 AT_KEYWORDS([categorical categoricals])
794
795 AT_DATA([examine-report.sps], [dnl
796 set format = F22.0.
797 data list list /x * g *.
798 begin data.
799 1   1
800 2   1
801 3   1
802 4   1
803 5   1
804 6   1
805 7   1
806 8   1
807 9   1
808 10   2
809 20   2
810 30   2
811 40   2
812 50   2
813 60   2
814 70   2
815 80   2
816 90   2
817 101   9
818 201   9
819 301   9
820 401   9
821 501   99
822 601   99
823 701   99
824 801   99
825 901   99
826 1001  .
827 2002  .
828 3003  .
829 4004  .
830 end data.
831
832 MISSING VALUES g (9, 99, 999).
833
834 EXAMINE
835         /VARIABLES = x
836         BY  g
837         /STATISTICS = EXTREME
838         /NOTOTAL
839         /MISSING = REPORT.
840 ])
841
842
843 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine-report.sps])
844 AT_CHECK([cat pspp.csv], [0],
845   [[Table: Reading free-form data from INLINE.
846 Variable,Format
847 x,F8.0
848 g,F8.0
849
850 Table: Case Processing Summary
851 ,,Cases,,,,,
852 ,,Valid,,Missing,,Total,
853 ,g,N,Percent,N,Percent,N,Percent
854 x,.,4,100.0%,0,.0%,4,100.0%
855 ,1,9,100.0%,0,.0%,9,100.0%
856 ,2,9,100.0%,0,.0%,9,100.0%
857 ,9[a],4,100.0%,0,.0%,4,100.0%
858 ,99[a],5,100.0%,0,.0%,5,100.0%
859
860 Footnotes:
861 a,User-missing value.
862
863 Table: Extreme Values
864 ,g,,,Case Number,Value
865 x,.,Highest,1,31,4004
866 ,,,2,30,3003
867 ,,,3,29,2002
868 ,,,4,28,1001
869 ,,,5,0,0
870 ,,Lowest,1,28,1001
871 ,,,2,29,2002
872 ,,,3,30,3003
873 ,,,4,31,4004
874 ,,,5,31,4004
875 ,1,Highest,1,9,9
876 ,,,2,8,8
877 ,,,3,7,7
878 ,,,4,6,6
879 ,,,5,5,5
880 ,,Lowest,1,1,1
881 ,,,2,2,2
882 ,,,3,3,3
883 ,,,4,4,4
884 ,,,5,5,5
885 ,2,Highest,1,18,90
886 ,,,2,17,80
887 ,,,3,16,70
888 ,,,4,15,60
889 ,,,5,14,50
890 ,,Lowest,1,10,10
891 ,,,2,11,20
892 ,,,3,12,30
893 ,,,4,13,40
894 ,,,5,14,50
895 ,9[a],Highest,1,22,401
896 ,,,2,21,301
897 ,,,3,20,201
898 ,,,4,19,101
899 ,,,5,0,0
900 ,,Lowest,1,19,101
901 ,,,2,20,201
902 ,,,3,21,301
903 ,,,4,22,401
904 ,,,5,22,401
905 ,99[a],Highest,1,27,901
906 ,,,2,26,801
907 ,,,3,25,701
908 ,,,4,24,601
909 ,,,5,23,501
910 ,,Lowest,1,23,501
911 ,,,2,24,601
912 ,,,3,25,701
913 ,,,4,26,801
914 ,,,5,27,901
915
916 Footnotes:
917 a,User-missing value.
918 ]])
919
920 AT_CLEANUP 
921
922
923 dnl Run a test of the basic STATISTICS using a "real"
924 dnl dataset and comparing with "real" results kindly
925 dnl provided by Olaf Nöhring
926 AT_SETUP([EXAMINE -- sample unweighted])
927 AT_KEYWORDS([categorical categoricals])
928
929 AT_DATA([sample.sps], [dnl
930 set format = F22.4.
931 DATA LIST notable LIST /X *
932 BEGIN DATA.
933 461.19000000
934 466.38000000
935 479.46000000
936 480.10000000
937 483.43000000
938 488.30000000
939 489.00000000
940 491.62000000
941 505.62000000
942 511.30000000
943 521.53000000
944 526.70000000
945 528.25000000
946 538.70000000
947 540.22000000
948 540.58000000
949 546.10000000
950 548.17000000
951 553.99000000
952 566.21000000
953 575.90000000
954 584.38000000
955 593.40000000
956 357.05000000
957 359.73000000
958 360.48000000
959 373.98000000
960 374.13000000
961 381.45000000
962 383.72000000
963 390.00000000
964 400.34000000
965 415.32000000
966 415.91000000
967 418.30000000
968 421.03000000
969 422.43000000
970 426.93000000
971 433.25000000
972 436.89000000
973 445.33000000
974 446.33000000
975 446.55000000
976 456.44000000
977 689.49000000
978 691.92000000
979 695.00000000
980 695.36000000
981 698.21000000
982 699.46000000
983 706.61000000
984 710.69000000
985 715.82000000
986 715.82000000
987 741.39000000
988 752.27000000
989 756.73000000
990 757.74000000
991 759.57000000
992 796.07000000
993 813.78000000
994 817.25000000
995 825.48000000
996 831.28000000
997 849.24000000
998 890.00000000
999 894.78000000
1000 935.65000000
1001 935.90000000
1002 945.90000000
1003 1012.8600000
1004 1022.6000000
1005 1061.8100000
1006 1063.5000000
1007 1077.2300000
1008 1151.6300000
1009 1355.2800000
1010 598.88000000
1011 606.91000000
1012 621.60000000
1013 624.80000000
1014 636.13000000
1015 637.38000000
1016 640.32000000
1017 649.35000000
1018 656.51000000
1019 662.55000000
1020 664.69000000
1021 106.22000000
1022 132.24000000
1023 174.76000000
1024 204.85000000
1025 264.93000000
1026 264.99000000
1027 269.84000000
1028 325.12000000
1029 331.67000000
1030 337.26000000
1031 347.68000000
1032 354.91000000
1033 END DATA.
1034
1035 EXAMINE
1036         x
1037         /STATISTICS=DESCRIPTIVES
1038         .
1039 ])
1040
1041 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
1042 Table: Case Processing Summary
1043 ,Cases,,,,,
1044 ,Valid,,Missing,,Total,
1045 ,N,Percent,N,Percent,N,Percent
1046 X,100,100.0%,0,.0%,100,100.0%
1047
1048 Table: Descriptives
1049 ,,,Statistic,Std. Error
1050 X,Mean,,587.6603,23.2665
1051 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1052 ,,Upper Bound,633.8260,
1053 ,5% Trimmed Mean,,579.7064,
1054 ,Median,,547.1350,
1055 ,Variance,,54132.8466,
1056 ,Std. Deviation,,232.6647,
1057 ,Minimum,,106.2200,
1058 ,Maximum,,1355.2800,
1059 ,Range,,1249.0600,
1060 ,Interquartile Range,,293.1575,
1061 ,Skewness,,.6331,.2414
1062 ,Kurtosis,,.5300,.4783
1063 ])
1064
1065 AT_CLEANUP 
1066
1067
1068
1069 dnl Test for a crash which happened on bad input syntax
1070 AT_SETUP([EXAMINE -- Empty Parentheses])
1071 AT_KEYWORDS([categorical categoricals])
1072
1073 AT_DATA([examine-empty-parens.sps], [dnl
1074 DATA LIST notable LIST /X *
1075 BEGIN DATA.
1076 2
1077 3
1078 END DATA.
1079
1080
1081 EXAMINE
1082         x
1083         /PLOT = SPREADLEVEL()
1084         .
1085 ])
1086
1087 AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
1088
1089 AT_CLEANUP 
1090
1091
1092
1093
1094 dnl Test for another crash which happened on bad input syntax
1095 AT_SETUP([EXAMINE -- Bad variable])
1096 AT_KEYWORDS([categorical categoricals])
1097
1098 AT_DATA([examine-bad-variable.sps], [dnl
1099 data list list /h * g *.
1100 begin data.
1101 3 1
1102 4 1
1103 5 2
1104 end data.
1105
1106 EXAMINE
1107         /VARIABLES/ h
1108         BY  g
1109         .
1110 ])
1111
1112 AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
1113
1114 AT_CLEANUP 
1115
1116
1117
1118 dnl Test for yet another crash. This time for extremes vs. missing weight values.\0
1119 AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
1120 AT_KEYWORDS([categorical categoricals])
1121
1122 AT_DATA([examine-missing-weights.sps], [dnl
1123 data list notable list /h * g *.
1124 begin data.
1125 3 1
1126 4 .
1127 5 1
1128 2 1
1129 end data.
1130
1131 WEIGHT BY g.
1132
1133 EXAMINE h
1134         /STATISTICS extreme(3)
1135         .
1136 ])
1137
1138 AT_CHECK([pspp -O format=csv  examine-missing-weights.sps], [0], [dnl
1139 "examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative.  These case(s) were ignored."
1140
1141 Table: Case Processing Summary
1142 ,Cases,,,,,
1143 ,Valid,,Missing,,Total,
1144 ,N,Percent,N,Percent,N,Percent
1145 h,3.00,100.0%,.00,.0%,3.00,100.0%
1146
1147 Table: Extreme Values
1148 ,,,Case Number,Value
1149 h,Highest,1,3,5.00
1150 ,,2,2,4.00
1151 ,,3,1,3.00
1152 ,Lowest,1,4,2.00
1153 ,,2,1,3.00
1154 ,,3,2,4.00
1155 ])
1156
1157 AT_CLEANUP 
1158
1159 dnl This is an example from doc/tutorial.texi
1160 dnl So if the results of this have to be changed in any way,
1161 dnl make sure to update that file.
1162 AT_SETUP([EXAMINE tutorial example 1])
1163 cp $top_srcdir/examples/repairs.sav .
1164 AT_DATA([repairs.sps], [dnl
1165 GET FILE='repairs.sav'.
1166 EXAMINE mtbf /STATISTICS=DESCRIPTIVES.
1167 COMPUTE mtbf_ln = LN (mtbf).
1168 EXAMINE mtbf_ln /STATISTICS=DESCRIPTIVES.
1169 ])
1170 AT_CHECK([pspp -o pspp.csv -o pspp.txt repairs.sps])
1171 AT_CHECK([cat pspp.csv], [0], [dnl
1172 Table: Case Processing Summary
1173 ,Cases,,,,,
1174 ,Valid,,Missing,,Total,
1175 ,N,Percent,N,Percent,N,Percent
1176 Mean time between failures (months) ,15,100.0%,0,.0%,15,100.0%
1177
1178 Table: Descriptives
1179 ,,,Statistic,Std. Error
1180 Mean time between failures (months) ,Mean,,8.32,1.62
1181 ,95% Confidence Interval for Mean,Lower Bound,4.85,
1182 ,,Upper Bound,11.79,
1183 ,5% Trimmed Mean,,7.69,
1184 ,Median,,8.12,
1185 ,Variance,,39.21,
1186 ,Std. Deviation,,6.26,
1187 ,Minimum,,1.63,
1188 ,Maximum,,26.47,
1189 ,Range,,24.84,
1190 ,Interquartile Range,,5.83,
1191 ,Skewness,,1.85,.58
1192 ,Kurtosis,,4.49,1.12
1193
1194 Table: Case Processing Summary
1195 ,Cases,,,,,
1196 ,Valid,,Missing,,Total,
1197 ,N,Percent,N,Percent,N,Percent
1198 mtbf_ln,15,100.0%,0,.0%,15,100.0%
1199
1200 Table: Descriptives
1201 ,,,Statistic,Std. Error
1202 mtbf_ln,Mean,,1.88,.19
1203 ,95% Confidence Interval for Mean,Lower Bound,1.47,
1204 ,,Upper Bound,2.29,
1205 ,5% Trimmed Mean,,1.88,
1206 ,Median,,2.09,
1207 ,Variance,,.54,
1208 ,Std. Deviation,,.74,
1209 ,Minimum,,.49,
1210 ,Maximum,,3.28,
1211 ,Range,,2.79,
1212 ,Interquartile Range,,.92,
1213 ,Skewness,,-.16,.58
1214 ,Kurtosis,,-.09,1.12
1215 ])
1216 AT_CLEANUP
1217
1218 dnl This is an example from doc/tutorial.texi
1219 dnl So if the results of this have to be changed in any way,
1220 dnl make sure to update that file.
1221 AT_SETUP([EXAMINE tutorial example 2])
1222 cp $top_srcdir/examples/physiology.sav .
1223 AT_DATA([examine.sps], [dnl
1224 GET FILE='physiology.sav'.
1225 EXAMINE height, weight /STATISTICS=EXTREME(3).
1226 ])
1227 AT_CHECK([pspp -o pspp.csv -o pspp.txt examine.sps])
1228 AT_CHECK([cat pspp.csv], [0], [dnl
1229 Table: Case Processing Summary
1230 ,Cases,,,,,
1231 ,Valid,,Missing,,Total,
1232 ,N,Percent,N,Percent,N,Percent
1233 Height in millimeters   ,40,100.0%,0,.0%,40,100.0%
1234 Weight in kilograms ,40,100.0%,0,.0%,40,100.0%
1235
1236 Table: Extreme Values
1237 ,,,Case Number,Value
1238 Height in millimeters   ,Highest,1,14,1903
1239 ,,2,15,1884
1240 ,,3,12,1802
1241 ,Lowest,1,30,179
1242 ,,2,31,1598
1243 ,,3,28,1601
1244 Weight in kilograms ,Highest,1,13,92.1
1245 ,,2,5,92.1
1246 ,,3,17,91.7
1247 ,Lowest,1,38,-55.6
1248 ,,2,39,54.5
1249 ,,3,33,55.4
1250 ])
1251 AT_CLEANUP
1252