Merge remote-tracking branch 'origin/master' into sheet
[pspp] / tests / language / stats / examine.at
1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017 Free Software Foundation, Inc.
3 dnl 
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
8 dnl 
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 dnl GNU General Public License for more details.
13 dnl 
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 dnl
17 AT_BANNER([EXAMINE])
18
19 AT_SETUP([EXAMINE])
20 AT_DATA([examine.sps], [
21 DATA LIST LIST /QUALITY * W * BRAND * .
22 BEGIN DATA
23 3  1  1
24 2  2  1
25 1  2  1
26 1  1  1
27 4  1  1
28 4  1  1
29 5  1  2
30 2  1  2
31 4  4  2
32 2  1  2
33 3  1  2
34 7  1  3
35 4  2  3
36 5  3  3
37 3  1  3
38 6  1  3
39 END DATA
40
41 WEIGHT BY w.
42
43 VARIABLE LABELS brand   'Manufacturer'.
44 VARIABLE LABELS quality 'Breaking Strain'.
45
46 VALUE LABELS /brand 1 'Aspeger' 2 'Bloggs' 3 'Charlies'.
47
48 LIST /FORMAT=NUMBERED.
49
50 EXAMINE
51         quality BY brand
52         /STATISTICS descriptives extreme(3)
53         .
54 ])
55
56
57 dnl In the following data, only the extreme values have been checked.
58 dnl The descriptives have been blindly pasted.
59 AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
60 Table: Reading free-form data from INLINE.
61 Variable,Format
62 QUALITY,F8.0
63 W,F8.0
64 BRAND,F8.0
65
66 Table: Data List
67 Case Number,QUALITY,W,BRAND
68 1,3.00,1.00,1.00
69 2,2.00,2.00,1.00
70 3,1.00,2.00,1.00
71 4,1.00,1.00,1.00
72 5,4.00,1.00,1.00
73 6,4.00,1.00,1.00
74 7,5.00,1.00,2.00
75 8,2.00,1.00,2.00
76 9,4.00,4.00,2.00
77 10,2.00,1.00,2.00
78 11,3.00,1.00,2.00
79 12,7.00,1.00,3.00
80 13,4.00,2.00,3.00
81 14,5.00,3.00,3.00
82 15,3.00,1.00,3.00
83 16,6.00,1.00,3.00
84
85 Table: Case Processing Summary
86 ,Cases,,,,,
87 ,Valid,,Missing,,Total,
88 ,N,Percent,N,Percent,N,Percent
89 Breaking Strain,24.00,100%,.00,0%,24.00,100%
90
91 Table: Extreme Values
92 ,,,Case Number,Value
93 Breaking Strain,Highest,1,12,7.00
94 ,,2,16,6.00
95 ,,3,14,5.00
96 ,Lowest,1,3,1.00
97 ,,2,4,1.00
98 ,,3,2,2.00
99
100 Table: Descriptives
101 ,,,Statistic,Std. Error
102 Breaking Strain,Mean,,3.54,.32
103 ,95% Confidence Interval for Mean,Lower Bound,2.87,
104 ,,Upper Bound,4.21,
105 ,5% Trimmed Mean,,3.50,
106 ,Median,,4.00,
107 ,Variance,,2.52,
108 ,Std. Deviation,,1.59,
109 ,Minimum,,1.00,
110 ,Maximum,,7.00,
111 ,Range,,6.00,
112 ,Interquartile Range,,2.75,
113 ,Skewness,,.06,.47
114 ,Kurtosis,,-.36,.92
115
116 Table: Case Processing Summary
117 ,,Cases,,,,,
118 ,,Valid,,Missing,,Total,
119 ,Manufacturer,N,Percent,N,Percent,N,Percent
120 Breaking Strain,Aspeger,8.00,100%,.00,0%,8.00,100%
121 ,Bloggs,8.00,100%,.00,0%,8.00,100%
122 ,Charlies,8.00,100%,.00,0%,8.00,100%
123
124 Table: Extreme Values
125 ,Manufacturer,,,Case Number,Value
126 Breaking Strain,Aspeger,Highest,1,6,4.00
127 ,,,2,5,4.00
128 ,,,3,1,3.00
129 ,,Lowest,1,3,1.00
130 ,,,2,4,1.00
131 ,,,3,2,2.00
132 ,Bloggs,Highest,1,7,5.00
133 ,,,2,9,4.00
134 ,,,3,11,3.00
135 ,,Lowest,1,8,2.00
136 ,,,2,10,2.00
137 ,,,3,11,3.00
138 ,Charlies,Highest,1,12,7.00
139 ,,,2,16,6.00
140 ,,,3,14,5.00
141 ,,Lowest,1,15,3.00
142 ,,,2,13,4.00
143 ,,,3,14,5.00
144
145 Table: Descriptives
146 ,Manufacturer,,,Statistic,Std. Error
147 Breaking Strain,Aspeger,Mean,,2.25,.45
148 ,,95% Confidence Interval for Mean,Lower Bound,1.18,
149 ,,,Upper Bound,3.32,
150 ,,5% Trimmed Mean,,2.22,
151 ,,Median,,2.00,
152 ,,Variance,,1.64,
153 ,,Std. Deviation,,1.28,
154 ,,Minimum,,1.00,
155 ,,Maximum,,4.00,
156 ,,Range,,3.00,
157 ,,Interquartile Range,,2.75,
158 ,,Skewness,,.47,.75
159 ,,Kurtosis,,-1.55,1.48
160 ,Bloggs,Mean,,3.50,.38
161 ,,95% Confidence Interval for Mean,Lower Bound,2.61,
162 ,,,Upper Bound,4.39,
163 ,,5% Trimmed Mean,,3.50,
164 ,,Median,,4.00,
165 ,,Variance,,1.14,
166 ,,Std. Deviation,,1.07,
167 ,,Minimum,,2.00,
168 ,,Maximum,,5.00,
169 ,,Range,,3.00,
170 ,,Interquartile Range,,1.75,
171 ,,Skewness,,-.47,.75
172 ,,Kurtosis,,-.83,1.48
173 ,Charlies,Mean,,4.88,.44
174 ,,95% Confidence Interval for Mean,Lower Bound,3.83,
175 ,,,Upper Bound,5.92,
176 ,,5% Trimmed Mean,,4.86,
177 ,,Median,,5.00,
178 ,,Variance,,1.55,
179 ,,Std. Deviation,,1.25,
180 ,,Minimum,,3.00,
181 ,,Maximum,,7.00,
182 ,,Range,,4.00,
183 ,,Interquartile Range,,1.75,
184 ,,Skewness,,.30,.75
185 ,,Kurtosis,,.15,1.48
186 ])
187
188 AT_CLEANUP
189
190 AT_SETUP([EXAMINE -- extremes])
191 AT_DATA([examine.sps], [dnl
192 data list free /V1 W
193 begin data.
194 1  1
195 2  1
196 3  2
197 3  1
198 4  1
199 5  1
200 6  1
201 7  1
202 8  1
203 9  1
204 10 1
205 11 1
206 12 1
207 13 1
208 14 1
209 15 1
210 16 1
211 17 1
212 18 2
213 19 1
214 20 1
215 end data.
216
217 weight by w.
218
219 examine v1 
220  /statistics=extreme(6)
221  .
222 ])
223
224 AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
225 Table: Case Processing Summary
226 ,Cases,,,,,
227 ,Valid,,Missing,,Total,
228 ,N,Percent,N,Percent,N,Percent
229 V1,23.00,100%,.00,0%,23.00,100%
230
231 Table: Extreme Values
232 ,,,Case Number,Value
233 V1,Highest,1,21,20.00
234 ,,2,20,19.00
235 ,,3,19,18.00
236 ,,4,18,17.00
237 ,,5,17,16.00
238 ,,6,16,15.00
239 ,Lowest,1,1,1.00
240 ,,2,2,2.00
241 ,,3,3,3.00
242 ,,4,4,3.00
243 ,,5,5,4.00
244 ,,6,6,5.00
245 ])
246
247 AT_CLEANUP
248
249
250
251 AT_SETUP([EXAMINE -- extremes with fractional weights])
252 AT_DATA([extreme.sps], [dnl
253 set format=F20.3.
254 data list notable list /w * x *.
255 begin data.
256  0.88  300000
257  0.86  320000
258  0.98  480000
259  0.93  960000
260  1.35  960000
261  1.31  960000
262  0.88  960000
263  0.88  1080000
264  0.88  1080000
265  0.95  1200000
266  1.47  1200000
267  0.93  1200000
268  0.98  1320000
269  1.31  1380000
270  0.93  1440000
271  0.88  1560000
272  1.56  1560000
273  1.47  1560000
274 end data.
275
276 weight by w.
277
278
279 EXAMINE
280         x
281         /STATISTICS = DESCRIPTIVES EXTREME (5)
282         .
283 ])
284
285 AT_CHECK([pspp -O format=csv  extreme.sps], [0], [dnl
286 Table: Case Processing Summary
287 ,Cases,,,,,
288 ,Valid,,Missing,,Total,
289 ,N,Percent,N,Percent,N,Percent
290 x,19.430,100%,.000,0%,19.430,100%
291
292 Table: Extreme Values
293 ,,,Case Number,Value
294 x,Highest,1,18,1560000.000
295 ,,2,17,1560000.000
296 ,,3,16,1560000.000
297 ,,4,15,1440000.000
298 ,,5,14,1380000.000
299 ,Lowest,1,1,300000.000
300 ,,2,2,320000.000
301 ,,3,3,480000.000
302 ,,4,4,960000.000
303 ,,5,5,960000.000
304
305 Table: Descriptives
306 ,,,Statistic,Std. Error
307 x,Mean,,1120010.293,86222.178
308 ,95% Confidence Interval for Mean,Lower Bound,939166.693,
309 ,,Upper Bound,1300853.894,
310 ,5% Trimmed Mean,,1141017.899,
311 ,Median,,1200000.000,
312 ,Variance,,144447748124.869,
313 ,Std. Deviation,,380062.821,
314 ,Minimum,,300000.000,
315 ,Maximum,,1560000.000,
316 ,Range,,1260000.000,
317 ,Interquartile Range,,467258.065,
318 ,Skewness,,-.887,.519
319 ,Kurtosis,,.340,1.005
320 ])
321
322 AT_CLEANUP
323
324 dnl Test the PERCENTILES subcommand of the EXAMINE command.
325 dnl In particular test that it behaves properly when there are only 
326 dnl a few cases.
327 AT_SETUP([EXAMINE -- percentiles])
328 AT_DATA([examine.sps], [dnl
329 DATA LIST LIST /X *.
330 BEGIN DATA.
331 2.00 
332 8.00 
333 5.00 
334 END DATA.
335
336 EXAMINE /x
337         /PERCENTILES=HAVERAGE.
338
339 EXAMINE /x
340         /PERCENTILES=WAVERAGE.
341
342 EXAMINE /x
343         /PERCENTILES=ROUND.
344
345 EXAMINE /x
346         /PERCENTILES=EMPIRICAL.
347
348 EXAMINE /x
349         /PERCENTILES=AEMPIRICAL.
350 ])
351 AT_CHECK([pspp -o pspp.csv examine.sps])
352 AT_CHECK([cat pspp.csv], [0], [dnl
353 Table: Reading free-form data from INLINE.
354 Variable,Format
355 X,F8.0
356
357 Table: Case Processing Summary
358 ,Cases,,,,,
359 ,Valid,,Missing,,Total,
360 ,N,Percent,N,Percent,N,Percent
361 X,3,100%,0,0%,3,100%
362
363 Table: Percentiles
364 ,,Percentiles,,,,,,
365 ,,5,10,25,50,75,90,95
366 X,HAverage,.40,.80,2.00,5.00,8.00,8.00,8.00
367 ,Tukey's Hinges,,,3.50,5.00,6.50,,
368
369 Table: Case Processing Summary
370 ,Cases,,,,,
371 ,Valid,,Missing,,Total,
372 ,N,Percent,N,Percent,N,Percent
373 X,3,100%,0,0%,3,100%
374
375 Table: Percentiles
376 ,,Percentiles,,,,,,
377 ,,5,10,25,50,75,90,95
378 X,Weighted Average,.30,.60,1.50,3.50,5.75,7.10,7.55
379 ,Tukey's Hinges,,,3.50,5.00,6.50,,
380
381 Table: Case Processing Summary
382 ,Cases,,,,,
383 ,Valid,,Missing,,Total,
384 ,N,Percent,N,Percent,N,Percent
385 X,3,100%,0,0%,3,100%
386
387 Table: Percentiles
388 ,,Percentiles,,,,,,
389 ,,5,10,25,50,75,90,95
390 X,Rounded,.00,.00,2.00,5.00,5.00,8.00,8.00
391 ,Tukey's Hinges,,,3.50,5.00,6.50,,
392
393 Table: Case Processing Summary
394 ,Cases,,,,,
395 ,Valid,,Missing,,Total,
396 ,N,Percent,N,Percent,N,Percent
397 X,3,100%,0,0%,3,100%
398
399 Table: Percentiles
400 ,,Percentiles,,,,,,
401 ,,5,10,25,50,75,90,95
402 X,Empirical,2.00,2.00,2.00,5.00,8.00,8.00,8.00
403 ,Tukey's Hinges,,,3.50,5.00,6.50,,
404
405 Table: Case Processing Summary
406 ,Cases,,,,,
407 ,Valid,,Missing,,Total,
408 ,N,Percent,N,Percent,N,Percent
409 X,3,100%,0,0%,3,100%
410
411 Table: Percentiles
412 ,,Percentiles,,,,,,
413 ,,5,10,25,50,75,90,95
414 X,Empirical with averaging,2.00,2.00,2.00,5.00,8.00,8.00,8.00
415 ,Tukey's Hinges,,,3.50,5.00,6.50,,
416 ])
417 AT_CLEANUP
418
419 AT_SETUP([EXAMINE -- missing values])
420 AT_DATA([examine.sps], [dnl
421 DATA LIST LIST /x * y *.
422 BEGIN DATA.
423 1   1 
424 2   1
425 3   1
426 4   1
427 5   2
428 6   2
429 .   2
430 END DATA
431
432 EXAMINE /x by y
433         /MISSING = PAIRWISE
434         .
435 ])
436 AT_CHECK([pspp -o pspp.csv examine.sps])
437 AT_CHECK([cat pspp.csv], [0], [dnl
438 Table: Reading free-form data from INLINE.
439 Variable,Format
440 x,F8.0
441 y,F8.0
442
443 Table: Case Processing Summary
444 ,Cases,,,,,
445 ,Valid,,Missing,,Total,
446 ,N,Percent,N,Percent,N,Percent
447 x,6,85.7143%,1,14.2857%,7,100%
448
449 Table: Case Processing Summary
450 ,,Cases,,,,,
451 ,,Valid,,Missing,,Total,
452 ,y,N,Percent,N,Percent,N,Percent
453 x,1.00,4,100%,0,0%,4,100%
454 ,2.00,2,66.6667%,1,33.3333%,3,100%
455 ])
456 AT_CLEANUP
457
458
459 AT_SETUP([EXAMINE -- user missing values])
460 AT_DATA([examine-m.sps], [dnl
461 DATA LIST notable LIST /x * y *.
462 BEGIN DATA.
463 1                   2
464 9999999999          2
465 9999999999          99
466 END DATA.
467
468 MISSING VALUES x (9999999999).
469 MISSING VALUES y (99).
470
471 EXAMINE
472         /VARIABLES= x y
473         /MISSING=PAIRWISE.
474 ])
475 AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
476 Table: Case Processing Summary
477 ,Cases,,,,,
478 ,Valid,,Missing,,Total,
479 ,N,Percent,N,Percent,N,Percent
480 x,1,33.3333%,2,66.6667%,3,100%
481 y,2,66.6667%,1,33.3333%,3,100%
482 ])
483 AT_CLEANUP
484
485 AT_SETUP([EXAMINE -- missing values and percentiles])
486 AT_DATA([examine.sps], [dnl
487 DATA LIST LIST /X *.
488 BEGIN DATA.
489 99
490 99
491 5.00
492 END DATA.
493
494 MISSING VALUE X (99).
495
496 EXAMINE /x
497         /PERCENTILES=HAVERAGE.
498 ])
499 AT_CHECK([pspp -o pspp.csv examine.sps])
500 dnl Ignore output -- this is just a no-crash check.
501 AT_CLEANUP
502
503 dnl Tests the trimmed mean calculation in the case
504 dnl where the data is weighted towards the centre.
505 AT_SETUP([EXAMINE -- trimmed mean])
506 AT_DATA([examine.sps], [dnl
507 DATA LIST LIST /X * C *.
508 BEGIN DATA.
509 1 1
510 2 49
511 3 2
512 END DATA.
513
514 WEIGHT BY c.
515
516 EXAMINE
517         x
518         /STATISTICS=DESCRIPTIVES
519         .
520 ])
521 AT_CHECK([pspp -o pspp.csv examine.sps])
522 AT_CHECK([cat pspp.csv], [0], [dnl
523 Table: Reading free-form data from INLINE.
524 Variable,Format
525 X,F8.0
526 C,F8.0
527
528 Table: Case Processing Summary
529 ,Cases,,,,,
530 ,Valid,,Missing,,Total,
531 ,N,Percent,N,Percent,N,Percent
532 X,52.00,100%,.00,0%,52.00,100%
533
534 Table: Descriptives
535 ,,,Statistic,Std. Error
536 X,Mean,,2.02,.03
537 ,95% Confidence Interval for Mean,Lower Bound,1.95,
538 ,,Upper Bound,2.09,
539 ,5% Trimmed Mean,,2.00,
540 ,Median,,2.00,
541 ,Variance,,.06,
542 ,Std. Deviation,,.24,
543 ,Minimum,,1.00,
544 ,Maximum,,3.00,
545 ,Range,,2.00,
546 ,Interquartile Range,,.00,
547 ,Skewness,,1.19,.33
548 ,Kurtosis,,15.73,.65
549 ])
550 AT_CLEANUP
551
552 AT_SETUP([EXAMINE -- crash bug])
553 AT_DATA([examine.sps], [dnl
554 data list list /a * x * y *.
555 begin data.
556 3 1 3
557 5 1 4
558 7 2 3
559 end data.
560
561 examine a by x by y
562         /statistics=DESCRIPTIVES
563         . 
564 ])
565 AT_CHECK([pspp -o pspp.csv examine.sps])
566 dnl Ignore output -- this is just a no-crash check.
567 AT_CLEANUP
568
569 dnl Test that two consecutive EXAMINE commands don't crash PSPP.
570 AT_SETUP([EXAMINE -- consecutive runs don't crash])
571 AT_DATA([examine.sps], [dnl
572 data list list /y * z *.
573 begin data.
574 6 4
575 5 3
576 7 6
577 end data.
578
579 EXAMINE /VARIABLES= z BY y.
580
581 EXAMINE /VARIABLES= z. 
582 ])
583 AT_CHECK([pspp -o pspp.csv examine.sps])
584 dnl Ignore output -- this is just a no-crash check.
585 AT_CLEANUP
586
587 dnl Test that /DESCRIPTIVES does not crash in presence of missing values.
588 AT_SETUP([EXAMINE -- missing values don't crash])
589 AT_DATA([examine.sps], [dnl
590 data list list /x * y *.
591 begin data.
592 1 0
593 2 0
594 . 0
595 3 1
596 4 1
597 end data.
598 examine x by y /statistics=descriptives. 
599 ])
600 AT_CHECK([pspp -o pspp.csv examine.sps])
601 dnl Ignore output -- this is just a no-crash check.
602 AT_CLEANUP
603
604 dnl Test that having only a single case doesn't crash.
605 AT_SETUP([EXAMINE -- single case doesn't crash])
606 AT_DATA([examine.sps], [dnl
607 DATA LIST LIST /quality * .
608 BEGIN DATA
609 3  
610 END DATA
611
612
613 EXAMINE
614         quality 
615         /STATISTICS descriptives 
616         /PLOT = histogram
617         .
618 ])
619 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
620 dnl Ignore output -- this is just a no-crash check.
621 AT_CLEANUP
622
623 dnl Test that all-missing data doesn't crash.
624 AT_SETUP([EXAMINE -- all-missing data doesn't crash])
625 AT_DATA([examine.sps], [dnl
626 DATA LIST LIST /x *.
627 BEGIN DATA.
628 .
629 .
630 .
631 .
632 END DATA.
633
634 EXAMINE /x 
635         PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
636         /ID=x
637         /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
638         /PERCENTILE=AEMPIRICAL
639         .
640 ])
641 AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
642 dnl Ignore output -- this is just a no-crash check.
643 AT_CLEANUP
644
645 dnl Test that big input doesn't crash (bug 11307).
646 AT_SETUP([EXAMINE -- big input doesn't crash])
647 AT_DATA([examine.sps], [dnl
648 INPUT PROGRAM.
649         LOOP #I=1 TO 50000.
650                 COMPUTE X=NORMAL(10).
651                 END CASE.
652         END LOOP.
653         END FILE.
654 END INPUT PROGRAM.
655
656
657 EXAMINE /x
658         /STATISTICS=DESCRIPTIVES.
659 ])
660 AT_CHECK([pspp -o pspp.csv examine.sps])
661 dnl Ignore output -- this is just a no-crash check.
662 AT_CLEANUP
663
664 dnl Another test that big input doesn't crash.
665 dnl The actual bug that this checks for has been lost.
666 AT_SETUP([EXAMINE -- big input doesn't crash 2])
667 AT_DATA([make-big-input.pl], 
668   [for ($i=0; $i<100000; $i++) { print "AB12\n" };
669    for ($i=0; $i<100000; $i++) { print "AB04\n" };
670 ])
671 AT_CHECK([$PERL make-big-input.pl > large.txt])
672 AT_DATA([examine.sps], [dnl
673 DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
674
675
676 AGGREGATE OUTFILE=* /BREAK=X /A=N.
677
678
679 EXAMINE /A BY X.
680 ])
681 AT_CHECK([pspp -o pspp.csv examine.sps])
682 dnl Ignore output -- this is just a no-crash check.
683 AT_DATA([more-big-input.pl], 
684   [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
685 ])
686 AT_CHECK([$PERL more-big-input.pl >> large.txt])
687 AT_CHECK([pspp -o pspp.csv examine.sps])
688 dnl Ignore output -- this is just a no-crash check.
689 AT_CLEANUP
690
691
692 dnl Test that the ID command works with non-numberic variables
693 AT_SETUP([EXAMINE -- non-numeric ID])
694
695 AT_DATA([examine-id.sps], [dnl
696 data list notable list /x * y (a12).
697 begin data.
698 1  one
699 2  two
700 3  three
701 4  four
702 5  five
703 6  six
704 7  seven
705 8  eight
706 9  nine
707 10 ten
708 11 eleven
709 12 twelve
710 30 thirty
711 300 threehundred
712 end data.
713
714 examine x
715         /statistics = extreme
716         /id = y
717         /plot = boxplot
718         .
719 ])
720
721 AT_CHECK([pspp -O format=csv examine-id.sps], [0], 
722 [Table: Case Processing Summary
723 ,Cases,,,,,
724 ,Valid,,Missing,,Total,
725 ,N,Percent,N,Percent,N,Percent
726 x,14,100%,0,0%,14,100%
727
728 Table: Extreme Values
729 ,,,y,Value
730 x,Highest,1,threehundred,300.00
731 ,,2,thirty      ,30.00
732 ,,3,twelve      ,12.00
733 ,,4,eleven      ,11.00
734 ,,5,ten         ,10.00
735 ,Lowest,1,one         ,1.00
736 ,,2,two         ,2.00
737 ,,3,three       ,3.00
738 ,,4,four        ,4.00
739 ,,5,five        ,5.00
740 ])
741
742 AT_CLEANUP 
743
744 dnl Test for a crash which happened on cleanup from a bad input syntax
745 AT_SETUP([EXAMINE -- Bad Input])
746
747 AT_DATA([examine-bad.sps], [dnl
748 data list list /h * g *.
749 begin data.
750 1 1
751 2 1
752 3 1
753 4 1
754 5 2
755 6 2
756 7 2
757 8 2
758 9 2
759 end data.
760
761 EXAMINE 
762         /VARIABLES= h
763         BY  g
764         /STATISTICS = DESCRIPTIVES EXTREME
765         /PLOT = lkajsdas
766         .
767 ])
768
769 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
770
771 AT_CLEANUP 
772
773
774 dnl Check the MISSING=REPORT option
775 AT_SETUP([EXAMINE -- MISSING=REPORT])
776
777
778 AT_DATA([examine-report.sps], [dnl
779 set format = F22.0.
780 data list list /x * g *.
781 begin data.
782 1   1
783 2   1
784 3   1
785 4   1
786 5   1
787 6   1
788 7   1
789 8   1
790 9   1
791 10   2
792 20   2
793 30   2
794 40   2
795 50   2
796 60   2
797 70   2
798 80   2
799 90   2
800 101   9
801 201   9
802 301   9
803 401   9
804 501   99
805 601   99
806 701   99
807 801   99
808 901   99
809 1001  .
810 2002  .
811 3003  .
812 4004  .
813 end data.
814
815 MISSING VALUES g (9, 99, 999).
816
817 EXAMINE
818         /VARIABLES = x
819         BY  g
820         /STATISTICS = EXTREME
821         /NOTOTAL
822         /MISSING = REPORT.
823 ])
824
825
826 AT_CHECK([pspp -O format=csv examine-report.sps], [0], [dnl
827 Table: Reading free-form data from INLINE.
828 Variable,Format
829 x,F8.0
830 g,F8.0
831
832 Table: Case Processing Summary
833 ,,Cases,,,,,
834 ,,Valid,,Missing,,Total,
835 ,g,N,Percent,N,Percent,N,Percent
836 x,. (missing),4,100%,0,0%,4,100%
837 ,1,9,100%,0,0%,9,100%
838 ,2,9,100%,0,0%,9,100%
839 ,9 (missing),4,100%,0,0%,4,100%
840 ,99 (missing),5,100%,0,0%,5,100%
841
842 Table: Extreme Values
843 ,g,,,Case Number,Value
844 x,. (missing),Highest,1,31,4004
845 ,,,2,30,3003
846 ,,,3,29,2002
847 ,,,4,28,1001
848 ,,,5,0,0
849 ,,Lowest,1,28,1001
850 ,,,2,29,2002
851 ,,,3,30,3003
852 ,,,4,31,4004
853 ,,,5,31,4004
854 ,1,Highest,1,9,9
855 ,,,2,8,8
856 ,,,3,7,7
857 ,,,4,6,6
858 ,,,5,5,5
859 ,,Lowest,1,1,1
860 ,,,2,2,2
861 ,,,3,3,3
862 ,,,4,4,4
863 ,,,5,5,5
864 ,2,Highest,1,18,90
865 ,,,2,17,80
866 ,,,3,16,70
867 ,,,4,15,60
868 ,,,5,14,50
869 ,,Lowest,1,10,10
870 ,,,2,11,20
871 ,,,3,12,30
872 ,,,4,13,40
873 ,,,5,14,50
874 ,9 (missing),Highest,1,22,401
875 ,,,2,21,301
876 ,,,3,20,201
877 ,,,4,19,101
878 ,,,5,0,0
879 ,,Lowest,1,19,101
880 ,,,2,20,201
881 ,,,3,21,301
882 ,,,4,22,401
883 ,,,5,22,401
884 ,99 (missing),Highest,1,27,901
885 ,,,2,26,801
886 ,,,3,25,701
887 ,,,4,24,601
888 ,,,5,23,501
889 ,,Lowest,1,23,501
890 ,,,2,24,601
891 ,,,3,25,701
892 ,,,4,26,801
893 ,,,5,27,901
894 ])
895
896
897 AT_CLEANUP 
898
899
900 dnl Run a test of the basic STATISTICS using a "real"
901 dnl dataset and comparing with "real" results kindly
902 dnl provided by Olaf Nöhring
903 AT_SETUP([EXAMINE -- sample unweighted])
904
905 AT_DATA([sample.sps], [dnl
906 set format = F22.4.
907 DATA LIST notable LIST /X *
908 BEGIN DATA.
909 461.19000000
910 466.38000000
911 479.46000000
912 480.10000000
913 483.43000000
914 488.30000000
915 489.00000000
916 491.62000000
917 505.62000000
918 511.30000000
919 521.53000000
920 526.70000000
921 528.25000000
922 538.70000000
923 540.22000000
924 540.58000000
925 546.10000000
926 548.17000000
927 553.99000000
928 566.21000000
929 575.90000000
930 584.38000000
931 593.40000000
932 357.05000000
933 359.73000000
934 360.48000000
935 373.98000000
936 374.13000000
937 381.45000000
938 383.72000000
939 390.00000000
940 400.34000000
941 415.32000000
942 415.91000000
943 418.30000000
944 421.03000000
945 422.43000000
946 426.93000000
947 433.25000000
948 436.89000000
949 445.33000000
950 446.33000000
951 446.55000000
952 456.44000000
953 689.49000000
954 691.92000000
955 695.00000000
956 695.36000000
957 698.21000000
958 699.46000000
959 706.61000000
960 710.69000000
961 715.82000000
962 715.82000000
963 741.39000000
964 752.27000000
965 756.73000000
966 757.74000000
967 759.57000000
968 796.07000000
969 813.78000000
970 817.25000000
971 825.48000000
972 831.28000000
973 849.24000000
974 890.00000000
975 894.78000000
976 935.65000000
977 935.90000000
978 945.90000000
979 1012.8600000
980 1022.6000000
981 1061.8100000
982 1063.5000000
983 1077.2300000
984 1151.6300000
985 1355.2800000
986 598.88000000
987 606.91000000
988 621.60000000
989 624.80000000
990 636.13000000
991 637.38000000
992 640.32000000
993 649.35000000
994 656.51000000
995 662.55000000
996 664.69000000
997 106.22000000
998 132.24000000
999 174.76000000
1000 204.85000000
1001 264.93000000
1002 264.99000000
1003 269.84000000
1004 325.12000000
1005 331.67000000
1006 337.26000000
1007 347.68000000
1008 354.91000000
1009 END DATA.
1010
1011 EXAMINE
1012         x
1013         /STATISTICS=DESCRIPTIVES
1014         .
1015 ])
1016
1017 AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
1018 Table: Case Processing Summary
1019 ,Cases,,,,,
1020 ,Valid,,Missing,,Total,
1021 ,N,Percent,N,Percent,N,Percent
1022 X,100,100%,0,0%,100,100%
1023
1024 Table: Descriptives
1025 ,,,Statistic,Std. Error
1026 X,Mean,,587.6603,23.2665
1027 ,95% Confidence Interval for Mean,Lower Bound,541.4946,
1028 ,,Upper Bound,633.8260,
1029 ,5% Trimmed Mean,,579.7064,
1030 ,Median,,547.1350,
1031 ,Variance,,54132.8466,
1032 ,Std. Deviation,,232.6647,
1033 ,Minimum,,106.2200,
1034 ,Maximum,,1355.2800,
1035 ,Range,,1249.0600,
1036 ,Interquartile Range,,293.1575,
1037 ,Skewness,,.6331,.2414
1038 ,Kurtosis,,.5300,.4783
1039 ])
1040
1041 AT_CLEANUP 
1042
1043
1044
1045 dnl Test for a crash which happened on bad input syntax
1046 AT_SETUP([EXAMINE -- Empty Parentheses])
1047
1048 AT_DATA([examine-empty-parens.sps], [dnl
1049 DATA LIST notable LIST /X *
1050 BEGIN DATA.
1051 2
1052 3
1053 END DATA.
1054
1055
1056 EXAMINE
1057         x
1058         /PLOT = SPREADLEVEL()
1059         .
1060 ])
1061
1062 AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
1063
1064 AT_CLEANUP 
1065
1066
1067
1068
1069 dnl Test for another crash which happened on bad input syntax
1070 AT_SETUP([EXAMINE -- Bad variable])
1071
1072 AT_DATA([examine-bad-variable.sps], [dnl
1073 data list list /h * g *.
1074 begin data.
1075 3 1
1076 4 1
1077 5 2
1078 end data.
1079
1080 EXAMINE
1081         /VARIABLES/ h
1082         BY  g
1083         .
1084 ])
1085
1086 AT_CHECK([pspp -o pspp.csv examine-bad-variable.sps], [1], [ignore])
1087
1088 AT_CLEANUP 
1089
1090
1091
1092 dnl Test for yet another crash. This time for extremes vs. missing weight values.\0
1093 AT_SETUP([EXAMINE -- Extremes vs. Missing Weights])
1094
1095 AT_DATA([examine-missing-weights.sps], [dnl
1096 data list notable list /h * g *.
1097 begin data.
1098 3 1
1099 4 .
1100 5 1
1101 2 1
1102 end data.
1103
1104 WEIGHT BY g.
1105
1106 EXAMINE h
1107         /STATISTICS extreme(3)
1108         .
1109 ])
1110
1111 AT_CHECK([pspp -O format=csv  examine-missing-weights.sps], [0], [dnl
1112 "examine-missing-weights.sps:13: warning: EXAMINE: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative.  These case(s) were ignored."
1113
1114 Table: Case Processing Summary
1115 ,Cases,,,,,
1116 ,Valid,,Missing,,Total,
1117 ,N,Percent,N,Percent,N,Percent
1118 h,3.00,100%,.00,0%,3.00,100%
1119
1120 Table: Extreme Values
1121 ,,,Case Number,Value
1122 h,Highest,1,3,5.00
1123 ,,2,2,4.00
1124 ,,3,1,3.00
1125 ,Lowest,1,4,2.00
1126 ,,2,1,3.00
1127 ,,3,2,4.00
1128 ])
1129
1130 AT_CLEANUP 
1131
1132
1133