added FREQUENCIES regression test for bug#48128
[pspp] / tests / language / stats / frequencies.at
1 AT_BANNER([FREQUENCIES procedure])
2
3 AT_SETUP([FREQUENCIES string variable])
4 AT_DATA([frequencies.sps],
5   [DATA LIST FREE/
6    name  (A8) value * quantity .
7 BEGIN DATA.
8 foo 1 5
9 bar 2 6
10 baz 1 9
11 quux 3 1
12 bar 1 2
13 baz 4 3
14 baz 1 4
15 baz 1 1
16 foo 6 0
17 quux 5 8
18 END DATA.
19 EXECUTE.
20
21 FREQUENCIES /VAR = name/ORDER=ANALYSIS.
22 ])
23 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
24 Table: name
25 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
26 ,bar     ,2,20.00,20.00,20.00
27 ,baz     ,4,40.00,40.00,60.00
28 ,foo     ,2,20.00,20.00,80.00
29 ,quux    ,2,20.00,20.00,100.00
30 Total,,10,100.0,100.0,
31 ])
32 AT_CLEANUP
33
34 # Tests for a bug where pspp would crash if two FREQUENCIES commands
35 # existed in a input file.
36 AT_SETUP([FREQUENCIES two runs crash])
37 AT_DATA([frequencies.sps],
38   [data list free /v1 v2.
39 begin data.
40 0 1
41 2 3 
42 4 5
43 3 4
44 end data.
45
46 frequencies v1 v2/statistics=none/ORDER=VARIABLE.
47 frequencies v1 v2/statistics=none.
48 ])
49 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
50   [Table: v1
51 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
52 ,.00,1,25.00,25.00,25.00
53 ,2.00,1,25.00,25.00,50.00
54 ,3.00,1,25.00,25.00,75.00
55 ,4.00,1,25.00,25.00,100.00
56 Total,,4,100.0,100.0,
57
58 Table: v2
59 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
60 ,1.00,1,25.00,25.00,25.00
61 ,3.00,1,25.00,25.00,50.00
62 ,4.00,1,25.00,25.00,75.00
63 ,5.00,1,25.00,25.00,100.00
64 Total,,4,100.0,100.0,
65
66 Table: v1
67 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
68 ,.00,1,25.00,25.00,25.00
69 ,2.00,1,25.00,25.00,50.00
70 ,3.00,1,25.00,25.00,75.00
71 ,4.00,1,25.00,25.00,100.00
72 Total,,4,100.0,100.0,
73
74 Table: v2
75 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
76 ,1.00,1,25.00,25.00,25.00
77 ,3.00,1,25.00,25.00,50.00
78 ,4.00,1,25.00,25.00,75.00
79 ,5.00,1,25.00,25.00,100.00
80 Total,,4,100.0,100.0,
81 ])
82 AT_CLEANUP
83
84 # Test that the LIMIT specification works.
85 AT_SETUP([FREQUENCIES with LIMIT])
86 AT_DATA([frequencies.sps],
87   [data list free /v1 v2.
88 begin data.
89 0 1
90 2 5
91 4 3
92 3 5
93 end data.
94
95 frequencies v1 v2/statistics=none/FORMAT=LIMIT(3).
96 ])
97 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
98 Table: v2
99 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
100 ,1.00,1,25.00,25.00,25.00
101 ,3.00,1,25.00,25.00,50.00
102 ,5.00,2,50.00,50.00,100.00
103 Total,,4,100.0,100.0,
104 ])
105 AT_CLEANUP
106
107 # Tests for a bug where PSPP would crash when a FREQUENCIES command
108 # was used with the HTML output driver.
109 AT_SETUP([FREQUENCIES HTML output crash])
110 AT_DATA([frequencies.sps],
111   [data list free /v1 v2.
112 begin data.
113 0 1
114 2 3 
115 4 5
116 3 4
117 end data.
118
119 list.
120
121 frequencies v1/statistics=none.
122 ])
123 AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0],
124   [Table: Data List
125 v1,v2
126 .00,1.00
127 2.00,3.00
128 4.00,5.00
129 3.00,4.00
130
131 Table: v1
132 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
133 ,.00,1,25.00,25.00,25.00
134 ,2.00,1,25.00,25.00,50.00
135 ,3.00,1,25.00,25.00,75.00
136 ,4.00,1,25.00,25.00,100.00
137 Total,,4,100.0,100.0,
138 ])
139 AT_CHECK([test -s pspp.html])
140 AT_CLEANUP
141
142 # Tests for a bug which crashed PSPP when a piechart with too many
143 # segments was requested.
144 AT_SETUP([FREQUENCIES pie chart crash])
145 AT_DATA([frequencies.sps],
146   [data list list /x * w *.
147 begin data.
148 1  4
149 34 10
150 -9 15
151 232 6
152 11  4
153 134 1
154 9  5
155 32 16
156 -2 6
157 2  16
158 20  6
159 end data.
160
161 weight by w.
162
163 frequencies /x /format=notable /statistics=none
164         /piechart.
165 ])
166 # Cannot use the CSV driver for this because it does not output charts
167 # at all.
168 AT_CHECK([pspp frequencies.sps], [0], [dnl
169 Reading free-form data from INLINE.
170 +--------+------+
171 |Variable|Format|
172 #========#======#
173 |x       |F8.0  |
174 |w       |F8.0  |
175 +--------+------+
176 ])
177 AT_CLEANUP
178
179 dnl Check that histogram subcommand runs wihout crashing
180 AT_SETUP([FREQUENCIES histogram crash])
181 AT_DATA([frequencies.sps],
182   [data list notable list /x * w *.
183 begin data.
184 1  4
185 34 10
186 -9 15
187 232 6
188 11  4
189 134 1
190 9  5
191 32 16
192 -2 6
193 2  16
194 20  6
195 end data.
196
197 weight by w.
198
199 frequencies /x 
200             /format=notable 
201             /statistics=none
202             /histogram=minimum(0) maximum(50) percent(5) normal.
203 ])
204 # Cannot use the CSV driver for this because it does not output charts
205 # at all.
206 AT_CHECK([pspp -O format=pdf frequencies.sps], [0], [ignore])
207 AT_CLEANUP
208
209 # Tests for a bug which crashed PSPP when the median and a histogram
210 # were both requested.
211 AT_SETUP([FREQUENCIES median with histogram crash])
212 AT_DATA([frequencies.sps], [dnl
213 data list list notable /x.
214 begin data.
215 1
216 end data.
217
218 frequencies /x /histogram /STATISTICS=median.
219 ])
220 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [ignore])
221 dnl Ignore output - No crash test.
222 AT_CLEANUP
223
224 # Tests for a bug which caused FREQUENCIES following TEMPORARY to
225 # crash (bug #11492).
226 AT_SETUP([FREQUENCIES crash after TEMPORARY])
227 AT_DATA([frequencies.sps],
228   [DATA LIST LIST /SEX (A1) X *.
229 BEGIN DATA.
230 M 31
231 F 21
232 M 41
233 F 31
234 M 13
235 F 12
236 M 14
237 F 13
238 END DATA.
239
240
241 TEMPORARY
242 SELECT IF SEX EQ 'F'
243 FREQUENCIES /X .
244
245 FINISH
246 ])
247 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
248   [Table: Reading free-form data from INLINE.
249 Variable,Format
250 SEX,A1
251 X,F8.0
252
253 Table: X
254 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
255 ,12.00,1,25.00,25.00,25.00
256 ,13.00,1,25.00,25.00,50.00
257 ,21.00,1,25.00,25.00,75.00
258 ,31.00,1,25.00,25.00,100.00
259 Total,,4,100.0,100.0,
260
261 Table: X
262 N,Valid,4
263 ,Missing,0
264 Mean,,19.25
265 Std Dev,,8.81
266 Minimum,,12.00
267 Maximum,,31.00
268 ])
269 AT_CLEANUP
270
271 m4_define([FREQUENCIES_NTILES_OUTPUT],
272   [Table: x
273 N,Valid,5
274 ,Missing,0
275 Mean,,3.00
276 Std Dev,,1.58
277 Minimum,,1.00
278 Maximum,,5.00
279 Percentiles,0,1.00
280 ,25,2.00
281 ,33,2.33
282 ,50 (Median),3.00
283 ,67,3.67
284 ,75,4.00
285 ,100,5.00
286 ])
287 AT_SETUP([FREQUENCIES basic percentiles])
288 AT_DATA([frequencies.sps],
289   [DATA LIST LIST notable /x * .
290 BEGIN DATA.
291
292
293
294
295 5
296 END DATA.
297
298 FREQUENCIES 
299         VAR=x
300         /FORMAT=NOTABLE
301         /PERCENTILES = 0 25 33.333 50 66.666 75 100.
302 ])
303 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
304   [FREQUENCIES_NTILES_OUTPUT])
305 AT_CLEANUP
306
307 AT_SETUP([FREQUENCIES basic n-tiles])
308 AT_DATA([frequencies.sps],
309   [DATA LIST LIST notable /x * .
310 BEGIN DATA.
311
312
313
314
315 5
316 END DATA.
317
318 FREQUENCIES 
319         VAR=x
320         /FORMAT=NOTABLE
321         /NTILES = 3
322         /NTILES = 4.
323 ])
324 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
325   [FREQUENCIES_NTILES_OUTPUT])
326 AT_CLEANUP
327
328 AT_SETUP([FREQUENCIES compatibility percentiles])
329 AT_DATA([frequencies.sps],
330   [DATA LIST LIST notable /X * .
331 BEGIN DATA.
332
333
334
335
336 5
337 END DATA.
338
339 FREQUENCIES 
340         VAR=x
341         /ALGORITHM=COMPATIBLE
342         /PERCENTILES = 0 25 50 75 100.
343 ])
344 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
345   [Table: X
346 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
347 ,1.00,1,20.00,20.00,20.00
348 ,2.00,1,20.00,20.00,40.00
349 ,3.00,1,20.00,20.00,60.00
350 ,4.00,1,20.00,20.00,80.00
351 ,5.00,1,20.00,20.00,100.00
352 Total,,5,100.0,100.0,
353
354 Table: X
355 N,Valid,5
356 ,Missing,0
357 Mean,,3.00
358 Std Dev,,1.58
359 Minimum,,1.00
360 Maximum,,5.00
361 Percentiles,0,1.00
362 ,25,1.50
363 ,50 (Median),3.00
364 ,75,4.50
365 ,100,5.00
366 ])
367 AT_CLEANUP
368
369 AT_SETUP([FREQUENCIES enhanced percentiles])
370 AT_DATA([frequencies.sps],
371   [DATA LIST LIST notable /X * .
372 BEGIN DATA.
373
374
375
376
377 5
378 END DATA.
379
380 FREQUENCIES 
381         VAR=x
382         /PERCENTILES = 0 25 50 75 100.
383 ])
384 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
385   [Table: X
386 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
387 ,1.00,1,20.00,20.00,20.00
388 ,2.00,1,20.00,20.00,40.00
389 ,3.00,1,20.00,20.00,60.00
390 ,4.00,1,20.00,20.00,80.00
391 ,5.00,1,20.00,20.00,100.00
392 Total,,5,100.0,100.0,
393
394 Table: X
395 N,Valid,5
396 ,Missing,0
397 Mean,,3.00
398 Std Dev,,1.58
399 Minimum,,1.00
400 Maximum,,5.00
401 Percentiles,0,1.00
402 ,25,2.00
403 ,50 (Median),3.00
404 ,75,4.00
405 ,100,5.00
406 ])
407 AT_CLEANUP
408
409 AT_SETUP([FREQUENCIES enhanced percentiles, weighted])
410 AT_DATA([frequencies.sps],
411   [DATA LIST LIST notable /X * F *.
412 BEGIN DATA.
413 1 2
414 2 2
415 3 2
416 4 1
417 4 1
418 5 1
419 5 1
420 END DATA.
421
422 WEIGHT BY f.
423
424 FREQUENCIES 
425         VAR=x
426         /PERCENTILES = 0 25 50 75 100.
427 ])
428 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
429   [Table: X
430 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
431 ,1.00,2.00,20.00,20.00,20.00
432 ,2.00,2.00,20.00,20.00,40.00
433 ,3.00,2.00,20.00,20.00,60.00
434 ,4.00,2.00,20.00,20.00,80.00
435 ,5.00,2.00,20.00,20.00,100.00
436 Total,,10.00,100.0,100.0,
437
438 Table: X
439 N,Valid,10.00
440 ,Missing,.00
441 Mean,,3.00
442 Std Dev,,1.49
443 Minimum,,1.00
444 Maximum,,5.00
445 Percentiles,0,1.00
446 ,25,2.00
447 ,50 (Median),3.00
448 ,75,4.00
449 ,100,5.00
450 ])
451 AT_CLEANUP
452
453 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)])
454 AT_DATA([frequencies.sps],
455   [DATA LIST LIST notable /X * F *.
456 BEGIN DATA.
457 1 1
458 3 2
459 4 1
460 5 1
461 5 1
462 END DATA.
463
464 WEIGHT BY f.
465
466 FREQUENCIES 
467         VAR=x
468         /PERCENTILES = 0 25 50 75 100.
469 ])
470 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
471   [Table: X
472 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
473 ,1.00,1.00,16.67,16.67,16.67
474 ,3.00,2.00,33.33,33.33,50.00
475 ,4.00,1.00,16.67,16.67,66.67
476 ,5.00,2.00,33.33,33.33,100.00
477 Total,,6.00,100.0,100.0,
478
479 Table: X
480 N,Valid,6.00
481 ,Missing,.00
482 Mean,,3.50
483 Std Dev,,1.52
484 Minimum,,1.00
485 Maximum,,5.00
486 Percentiles,0,1.00
487 ,25,3.00
488 ,50 (Median),3.50
489 ,75,4.75
490 ,100,5.00
491 ])
492 AT_CLEANUP
493
494 dnl Data for this test case from Fabio Bordignon <bordignon@demos.it>.
495 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)])
496 AT_DATA([frequencies.sps],
497   [DATA LIST LIST notable /X * F *.
498 BEGIN DATA.
499 1 7
500 2 16
501 3 12
502 4 5
503 END DATA.
504
505 WEIGHT BY f.
506
507 FREQUENCIES 
508         VAR=x
509         /PERCENTILES = 0 25 50 75 100.
510 ])
511 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
512 Table: X
513 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
514 ,1.00,7.00,17.50,17.50,17.50
515 ,2.00,16.00,40.00,40.00,57.50
516 ,3.00,12.00,30.00,30.00,87.50
517 ,4.00,5.00,12.50,12.50,100.00
518 Total,,40.00,100.0,100.0,
519
520 Table: X
521 N,Valid,40.00
522 ,Missing,.00
523 Mean,,2.38
524 Std Dev,,.93
525 Minimum,,1.00
526 Maximum,,4.00
527 Percentiles,0,1.00
528 ,25,2.00
529 ,50 (Median),2.00
530 ,75,3.00
531 ,100,4.00
532 ])
533 AT_CLEANUP
534
535 AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values])
536 AT_DATA([frequencies.sps],
537   [DATA LIST LIST notable /X * F *.
538 BEGIN DATA.
539 1 1
540 3 2
541 4 1
542 5 1
543 5 1
544 99 4
545 END DATA.
546
547 MISSING VALUE x (99.0) .
548 WEIGHT BY f.
549
550 FREQUENCIES 
551         VAR=x
552         /PERCENTILES = 0 25 50 75 100.
553 ])
554
555 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
556   [Table: X
557 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
558 ,1.00,1.00,10.00,16.67,16.67
559 ,3.00,2.00,20.00,33.33,50.00
560 ,4.00,1.00,10.00,16.67,66.67
561 ,5.00,2.00,20.00,33.33,100.00
562 ,99.00,4.00,40.00,Missing,
563 Total,,10.00,100.0,100.0,
564
565 Table: X
566 N,Valid,6.00
567 ,Missing,4.00
568 Mean,,3.50
569 Std Dev,,1.52
570 Minimum,,1.00
571 Maximum,,5.00
572 Percentiles,0,1.00
573 ,25,3.00
574 ,50 (Median),3.50
575 ,75,4.75
576 ,100,5.00
577 ])
578 AT_CLEANUP
579
580 AT_SETUP([FREQUENCIES dichotomous histogram])
581 AT_DATA([frequencies.sps], [dnl
582 data list notable list /d4 *.
583 begin data.
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601 end data.
602
603 FREQUENCIES
604         /VARIABLES = d4
605         /FORMAT=AVALUE TABLE
606         /HISTOGRAM=NORMAL
607         .
608 ])
609
610 AT_CHECK([pspp frequencies.sps], [0],  [ignore])
611 AT_CLEANUP
612
613
614 AT_SETUP([FREQUENCIES median])
615 AT_DATA([median.sps], [dnl
616 data list notable list /x *.
617 begin data.
618 1
619 2
620 3000000
621 end data.
622
623 FREQUENCIES
624         /VARIABLES = x
625         /STATISTICS = MEDIAN
626         .
627 ])
628
629 AT_CHECK([pspp median.sps -O format=csv], [0],  [dnl
630 Table: x
631 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
632 ,1.00,1,33.33,33.33,33.33
633 ,2.00,1,33.33,33.33,66.67
634 ,3000000.00,1,33.33,33.33,100.00
635 Total,,3,100.0,100.0,
636
637 Table: x
638 N,Valid,3
639 ,Missing,0
640 Percentiles,50 (Median),2.00
641 ])
642 AT_CLEANUP
643
644
645
646 AT_SETUP([FREQUENCIES default statistics])
647 AT_DATA([median.sps], [dnl
648 data list notable list /x *.
649 begin data.
650 10
651 20
652 3000000
653 end data.
654
655 FREQUENCIES
656         /VARIABLES = x
657         /STATISTICS
658         .
659
660 FREQUENCIES
661         /VARIABLES = x
662         /STATISTICS = DEFAULT
663         .
664 ])
665
666 AT_CHECK([pspp median.sps -O format=csv], [0],  [dnl
667 Table: x
668 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
669 ,10.00,1,33.33,33.33,33.33
670 ,20.00,1,33.33,33.33,66.67
671 ,3000000.00,1,33.33,33.33,100.00
672 Total,,3,100.0,100.0,
673
674 Table: x
675 N,Valid,3
676 ,Missing,0
677 Mean,,1000010.00
678 Std Dev,,1732042.15
679 Minimum,,10.00
680 Maximum,,3000000.00
681
682 Table: x
683 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
684 ,10.00,1,33.33,33.33,33.33
685 ,20.00,1,33.33,33.33,66.67
686 ,3000000.00,1,33.33,33.33,100.00
687 Total,,3,100.0,100.0,
688
689 Table: x
690 N,Valid,3
691 ,Missing,0
692 Mean,,1000010.00
693 Std Dev,,1732042.15
694 Minimum,,10.00
695 Maximum,,3000000.00
696 ])
697 AT_CLEANUP
698
699
700
701 AT_SETUP([FREQUENCIES no valid data])
702 AT_DATA([empty.sps], [dnl
703 data list notable list /x *.
704 begin data.
705 .
706 .
707 .
708 end data.
709
710 FREQUENCIES
711         /VARIABLES = x
712         /STATISTICS = ALL
713         .
714 ])
715
716 AT_CHECK([pspp empty.sps -O format=csv], [0],  [dnl
717 Table: x
718 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
719 ,.  ,3,100.00,Missing,
720 Total,,3,100.0,100.0,
721
722 Table: x
723 N,Valid,0
724 ,Missing,3
725 Mean,,.
726 S.E. Mean,,.
727 Mode,,.
728 Std Dev,,.
729 Variance,,.
730 Kurtosis,,.
731 S.E. Kurt,,.
732 Skewness,,.
733 S.E. Skew,,.
734 Range,,.
735 Minimum,,.
736 Maximum,,.
737 Sum,,.
738 Percentiles,,.
739 ])
740
741 AT_CLEANUP
742
743
744 AT_SETUP([FREQUENCIES histogram no valid cases])
745 AT_DATA([empty.sps], [dnl
746 data list notable list /x w *.
747 begin data.
748 1 .
749 2 .
750 3 .
751 end data.
752
753 weight by w.
754
755 FREQUENCIES
756         /VARIABLES = x
757         /histogram
758         .
759 ])
760
761 AT_CHECK([pspp empty.sps -O format=csv], [0],  [ignore])
762
763 AT_CLEANUP
764
765 AT_SETUP([FREQUENCIES percentiles + histogram bug#48128])
766 AT_DATA([bug.sps], [dnl
767 SET FORMAT=F8.0.
768
769 INPUT PROGRAM.
770         LOOP I=1 TO 10.
771                 COMPUTE SCORE=EXP(NORMAL(1)).
772                 END CASE.
773         END LOOP.
774         END FILE.
775 END INPUT PROGRAM.
776
777 FREQUENCIES VARIABLES=SCORE
778 /FORMAT=NOTABLE
779 /STATISTICS=ALL
780 /PERCENTILES=1 10 20 30 40 50 60 70 80 90 99
781 /HISTOGRAM.
782
783 ])
784
785 AT_CHECK([pspp bug.sps], [0],  [ignore])
786
787 AT_CLEANUP