Enable the show value labels feature
[pspp] / tests / language / stats / frequencies.at
1 AT_BANNER([FREQUENCIES procedure])
2
3 AT_SETUP([FREQUENCIES string variable])
4 AT_DATA([frequencies.sps],
5   [DATA LIST FREE/
6    name  (A8) value * quantity .
7 BEGIN DATA.
8 foo 1 5
9 bar 2 6
10 baz 1 9
11 quux 3 1
12 bar 1 2
13 baz 4 3
14 baz 1 4
15 baz 1 1
16 foo 6 0
17 quux 5 8
18 END DATA.
19 EXECUTE.
20
21 FREQUENCIES /VAR = name/ORDER=ANALYSIS.
22 ])
23 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
24 Table: name
25 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
26 ,bar     ,2,20.00,20.00,20.00
27 ,baz     ,4,40.00,40.00,60.00
28 ,foo     ,2,20.00,20.00,80.00
29 ,quux    ,2,20.00,20.00,100.00
30 Total,,10,100.0,100.0,
31 ])
32 AT_CLEANUP
33
34 # Tests for a bug where pspp would crash if two FREQUENCIES commands
35 # existed in a input file.
36 AT_SETUP([FREQUENCIES two runs crash])
37 AT_DATA([frequencies.sps],
38   [data list free /v1 v2.
39 begin data.
40 0 1
41 2 3 
42 4 5
43 3 4
44 end data.
45
46 frequencies v1 v2/statistics=none/ORDER=VARIABLE.
47 frequencies v1 v2/statistics=none.
48 ])
49 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
50   [Table: v1
51 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
52 ,.00,1,25.00,25.00,25.00
53 ,2.00,1,25.00,25.00,50.00
54 ,3.00,1,25.00,25.00,75.00
55 ,4.00,1,25.00,25.00,100.00
56 Total,,4,100.0,100.0,
57
58 Table: v2
59 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
60 ,1.00,1,25.00,25.00,25.00
61 ,3.00,1,25.00,25.00,50.00
62 ,4.00,1,25.00,25.00,75.00
63 ,5.00,1,25.00,25.00,100.00
64 Total,,4,100.0,100.0,
65
66 Table: v1
67 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
68 ,.00,1,25.00,25.00,25.00
69 ,2.00,1,25.00,25.00,50.00
70 ,3.00,1,25.00,25.00,75.00
71 ,4.00,1,25.00,25.00,100.00
72 Total,,4,100.0,100.0,
73
74 Table: v2
75 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
76 ,1.00,1,25.00,25.00,25.00
77 ,3.00,1,25.00,25.00,50.00
78 ,4.00,1,25.00,25.00,75.00
79 ,5.00,1,25.00,25.00,100.00
80 Total,,4,100.0,100.0,
81 ])
82 AT_CLEANUP
83
84 # Test that the LIMIT specification works.
85 AT_SETUP([FREQUENCIES with LIMIT])
86 AT_DATA([frequencies.sps],
87   [data list free /v1 v2.
88 begin data.
89 0 1
90 2 5
91 4 3
92 3 5
93 end data.
94
95 frequencies v1 v2/statistics=none/FORMAT=LIMIT(3).
96 ])
97 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
98 Table: v2
99 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
100 ,1.00,1,25.00,25.00,25.00
101 ,3.00,1,25.00,25.00,50.00
102 ,5.00,2,50.00,50.00,100.00
103 Total,,4,100.0,100.0,
104 ])
105 AT_CLEANUP
106
107 # Tests for a bug where PSPP would crash when a FREQUENCIES command
108 # was used with the HTML output driver.
109 AT_SETUP([FREQUENCIES HTML output crash])
110 AT_DATA([frequencies.sps],
111   [data list free /v1 v2.
112 begin data.
113 0 1
114 2 3 
115 4 5
116 3 4
117 end data.
118
119 list.
120
121 frequencies v1/statistics=none.
122 ])
123 AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0],
124   [Table: Data List
125 v1,v2
126 .00,1.00
127 2.00,3.00
128 4.00,5.00
129 3.00,4.00
130
131 Table: v1
132 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
133 ,.00,1,25.00,25.00,25.00
134 ,2.00,1,25.00,25.00,50.00
135 ,3.00,1,25.00,25.00,75.00
136 ,4.00,1,25.00,25.00,100.00
137 Total,,4,100.0,100.0,
138 ])
139 AT_CHECK([test -s pspp.html])
140 AT_CLEANUP
141
142 # Tests for a bug which crashed PSPP when a piechart with too many
143 # segments was requested.
144 AT_SETUP([FREQUENCIES pie chart crash])
145 AT_DATA([frequencies.sps],
146   [data list list /x * w *.
147 begin data.
148 1  4
149 34 10
150 -9 15
151 232 6
152 11  4
153 134 1
154 9  5
155 32 16
156 -2 6
157 2  16
158 20  6
159 end data.
160
161 weight by w.
162
163 frequencies /x /format=notable /statistics=none
164         /piechart.
165 ])
166 # Cannot use the CSV driver for this because it does not output charts
167 # at all.
168 AT_CHECK([pspp frequencies.sps], [0], [dnl
169 Reading free-form data from INLINE.
170 +--------+------+
171 |Variable|Format|
172 #========#======#
173 |x       |F8.0  |
174 |w       |F8.0  |
175 +--------+------+
176 ])
177 AT_CLEANUP
178
179 dnl Check that histogram subcommand runs wihout crashing
180 AT_SETUP([FREQUENCIES histogram crash])
181 AT_DATA([frequencies.sps],
182   [data list notable list /x * w *.
183 begin data.
184 1  4
185 34 10
186 -9 15
187 232 6
188 11  4
189 134 1
190 9  5
191 32 16
192 -2 6
193 2  16
194 20  6
195 end data.
196
197 weight by w.
198
199 frequencies /x 
200             /format=notable 
201             /statistics=none
202             /histogram=minimum(0) maximum(50) percent(5) normal.
203 ])
204 # Cannot use the CSV driver for this because it does not output charts
205 # at all.
206 AT_CHECK([pspp -O format=pdf frequencies.sps], [0], [ignore])
207 AT_CLEANUP
208
209 # Tests for a bug which crashed PSPP when the median and a histogram
210 # were both requested.
211 AT_SETUP([FREQUENCIES median with histogram crash])
212 AT_DATA([frequencies.sps], [dnl
213 data list list notable /x.
214 begin data.
215 1
216 end data.
217
218 frequencies /x /histogram /STATISTICS=median.
219 ])
220 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [ignore])
221 dnl Ignore output - No crash test.
222 AT_CLEANUP
223
224 # Tests for a bug which caused FREQUENCIES following TEMPORARY to
225 # crash (bug #11492).
226 AT_SETUP([FREQUENCIES crash after TEMPORARY])
227 AT_DATA([frequencies.sps],
228   [DATA LIST LIST /SEX (A1) X *.
229 BEGIN DATA.
230 M 31
231 F 21
232 M 41
233 F 31
234 M 13
235 F 12
236 M 14
237 F 13
238 END DATA.
239
240
241 TEMPORARY
242 SELECT IF SEX EQ 'F'
243 FREQUENCIES /X .
244
245 FINISH
246 ])
247 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
248   [Table: Reading free-form data from INLINE.
249 Variable,Format
250 SEX,A1
251 X,F8.0
252
253 Table: X
254 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
255 ,12.00,1,25.00,25.00,25.00
256 ,13.00,1,25.00,25.00,50.00
257 ,21.00,1,25.00,25.00,75.00
258 ,31.00,1,25.00,25.00,100.00
259 Total,,4,100.0,100.0,
260
261 Table: X
262 N,Valid,4
263 ,Missing,0
264 Mean,,19.25
265 Std Dev,,8.81
266 Minimum,,12.00
267 Maximum,,31.00
268 ])
269 AT_CLEANUP
270
271 m4_define([FREQUENCIES_NTILES_OUTPUT],
272   [Table: x
273 N,Valid,5
274 ,Missing,0
275 Mean,,3.00
276 Std Dev,,1.58
277 Minimum,,1.00
278 Maximum,,5.00
279 Percentiles,0,1.00
280 ,25,2.00
281 ,33,2.33
282 ,50 (Median),3.00
283 ,67,3.67
284 ,75,4.00
285 ,100,5.00
286 ])
287 AT_SETUP([FREQUENCIES basic percentiles])
288 AT_DATA([frequencies.sps],
289   [DATA LIST LIST notable /x * .
290 BEGIN DATA.
291
292
293
294
295 5
296 END DATA.
297
298 FREQUENCIES 
299         VAR=x
300         /FORMAT=NOTABLE
301         /PERCENTILES = 0 25 33.333 50 66.666 75 100.
302 ])
303 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
304   [FREQUENCIES_NTILES_OUTPUT])
305 AT_CLEANUP
306
307 AT_SETUP([FREQUENCIES basic n-tiles])
308 AT_DATA([frequencies.sps],
309   [DATA LIST LIST notable /x * .
310 BEGIN DATA.
311
312
313
314
315 5
316 END DATA.
317
318 FREQUENCIES 
319         VAR=x
320         /FORMAT=NOTABLE
321         /NTILES = 3
322         /NTILES = 4.
323 ])
324 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
325   [FREQUENCIES_NTILES_OUTPUT])
326 AT_CLEANUP
327
328 AT_SETUP([FREQUENCIES compatibility percentiles])
329 AT_DATA([frequencies.sps],
330   [DATA LIST LIST notable /X * .
331 BEGIN DATA.
332
333
334
335
336 5
337 END DATA.
338
339 FREQUENCIES 
340         VAR=x
341         /ALGORITHM=COMPATIBLE
342         /PERCENTILES = 0 25 50 75 100.
343 ])
344 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
345   [Table: X
346 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
347 ,1.00,1,20.00,20.00,20.00
348 ,2.00,1,20.00,20.00,40.00
349 ,3.00,1,20.00,20.00,60.00
350 ,4.00,1,20.00,20.00,80.00
351 ,5.00,1,20.00,20.00,100.00
352 Total,,5,100.0,100.0,
353
354 Table: X
355 N,Valid,5
356 ,Missing,0
357 Mean,,3.00
358 Std Dev,,1.58
359 Minimum,,1.00
360 Maximum,,5.00
361 Percentiles,0,1.00
362 ,25,1.50
363 ,50 (Median),3.00
364 ,75,4.50
365 ,100,5.00
366 ])
367 AT_CLEANUP
368
369 AT_SETUP([FREQUENCIES enhanced percentiles])
370 AT_DATA([frequencies.sps],
371   [DATA LIST LIST notable /X * .
372 BEGIN DATA.
373
374
375
376
377 5
378 END DATA.
379
380 FREQUENCIES 
381         VAR=x
382         /PERCENTILES = 0 25 50 75 100.
383 ])
384 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
385   [Table: X
386 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
387 ,1.00,1,20.00,20.00,20.00
388 ,2.00,1,20.00,20.00,40.00
389 ,3.00,1,20.00,20.00,60.00
390 ,4.00,1,20.00,20.00,80.00
391 ,5.00,1,20.00,20.00,100.00
392 Total,,5,100.0,100.0,
393
394 Table: X
395 N,Valid,5
396 ,Missing,0
397 Mean,,3.00
398 Std Dev,,1.58
399 Minimum,,1.00
400 Maximum,,5.00
401 Percentiles,0,1.00
402 ,25,2.00
403 ,50 (Median),3.00
404 ,75,4.00
405 ,100,5.00
406 ])
407 AT_CLEANUP
408
409 AT_SETUP([FREQUENCIES enhanced percentiles, weighted])
410 AT_DATA([frequencies.sps],
411   [DATA LIST LIST notable /X * F *.
412 BEGIN DATA.
413 1 2
414 2 2
415 3 2
416 4 1
417 4 1
418 5 1
419 5 1
420 END DATA.
421
422 WEIGHT BY f.
423
424 FREQUENCIES 
425         VAR=x
426         /PERCENTILES = 0 25 50 75 100.
427 ])
428 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
429   [Table: X
430 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
431 ,1.00,2.00,20.00,20.00,20.00
432 ,2.00,2.00,20.00,20.00,40.00
433 ,3.00,2.00,20.00,20.00,60.00
434 ,4.00,2.00,20.00,20.00,80.00
435 ,5.00,2.00,20.00,20.00,100.00
436 Total,,10.00,100.0,100.0,
437
438 Table: X
439 N,Valid,10.00
440 ,Missing,.00
441 Mean,,3.00
442 Std Dev,,1.49
443 Minimum,,1.00
444 Maximum,,5.00
445 Percentiles,0,1.00
446 ,25,2.00
447 ,50 (Median),3.00
448 ,75,4.00
449 ,100,5.00
450 ])
451 AT_CLEANUP
452
453 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)])
454 AT_DATA([frequencies.sps],
455   [DATA LIST LIST notable /X * F *.
456 BEGIN DATA.
457 1 1
458 3 2
459 4 1
460 5 1
461 5 1
462 END DATA.
463
464 WEIGHT BY f.
465
466 FREQUENCIES 
467         VAR=x
468         /PERCENTILES = 0 25 50 75 100.
469 ])
470 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
471   [Table: X
472 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
473 ,1.00,1.00,16.67,16.67,16.67
474 ,3.00,2.00,33.33,33.33,50.00
475 ,4.00,1.00,16.67,16.67,66.67
476 ,5.00,2.00,33.33,33.33,100.00
477 Total,,6.00,100.0,100.0,
478
479 Table: X
480 N,Valid,6.00
481 ,Missing,.00
482 Mean,,3.50
483 Std Dev,,1.52
484 Minimum,,1.00
485 Maximum,,5.00
486 Percentiles,0,1.00
487 ,25,3.00
488 ,50 (Median),3.50
489 ,75,4.75
490 ,100,5.00
491 ])
492 AT_CLEANUP
493
494 dnl Data for this test case from Fabio Bordignon <bordignon@demos.it>.
495 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)])
496 AT_DATA([frequencies.sps],
497   [DATA LIST LIST notable /X * F *.
498 BEGIN DATA.
499 1 7
500 2 16
501 3 12
502 4 5
503 END DATA.
504
505 WEIGHT BY f.
506
507 FREQUENCIES 
508         VAR=x
509         /PERCENTILES = 0 25 50 75 100.
510 ])
511 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
512 Table: X
513 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
514 ,1.00,7.00,17.50,17.50,17.50
515 ,2.00,16.00,40.00,40.00,57.50
516 ,3.00,12.00,30.00,30.00,87.50
517 ,4.00,5.00,12.50,12.50,100.00
518 Total,,40.00,100.0,100.0,
519
520 Table: X
521 N,Valid,40.00
522 ,Missing,.00
523 Mean,,2.38
524 Std Dev,,.93
525 Minimum,,1.00
526 Maximum,,4.00
527 Percentiles,0,1.00
528 ,25,2.00
529 ,50 (Median),2.00
530 ,75,3.00
531 ,100,4.00
532 ])
533 AT_CLEANUP
534
535 AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values])
536 AT_DATA([frequencies.sps],
537   [DATA LIST LIST notable /X * F *.
538 BEGIN DATA.
539 1 1
540 3 2
541 4 1
542 5 1
543 5 1
544 99 4
545 END DATA.
546
547 MISSING VALUE x (99.0) .
548 WEIGHT BY f.
549
550 FREQUENCIES 
551         VAR=x
552         /PERCENTILES = 0 25 50 75 100.
553 ])
554
555 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
556   [Table: X
557 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
558 ,1.00,1.00,10.00,16.67,16.67
559 ,3.00,2.00,20.00,33.33,50.00
560 ,4.00,1.00,10.00,16.67,66.67
561 ,5.00,2.00,20.00,33.33,100.00
562 ,99.00,4.00,40.00,Missing,
563 Total,,10.00,100.0,100.0,
564
565 Table: X
566 N,Valid,6.00
567 ,Missing,4.00
568 Mean,,3.50
569 Std Dev,,1.52
570 Minimum,,1.00
571 Maximum,,5.00
572 Percentiles,0,1.00
573 ,25,3.00
574 ,50 (Median),3.50
575 ,75,4.75
576 ,100,5.00
577 ])
578 AT_CLEANUP
579
580 AT_SETUP([FREQUENCIES dichotomous histogram])
581 AT_DATA([frequencies.sps], [dnl
582 data list notable list /d4 *.
583 begin data.
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601 end data.
602
603 FREQUENCIES
604         /VARIABLES = d4
605         /FORMAT=AVALUE TABLE
606         /HISTOGRAM=NORMAL
607         .
608 ])
609
610 AT_CHECK([pspp frequencies.sps], [0],  [ignore])
611 AT_CLEANUP
612
613
614 AT_SETUP([FREQUENCIES median])
615 AT_DATA([median.sps], [dnl
616 data list notable list /x *.
617 begin data.
618 1
619 2
620 3000000
621 end data.
622
623 FREQUENCIES
624         /VARIABLES = x
625         /STATISTICS = MEDIAN
626         .
627 ])
628
629 AT_CHECK([pspp median.sps -O format=csv], [0],  [dnl
630 Table: x
631 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
632 ,1.00,1,33.33,33.33,33.33
633 ,2.00,1,33.33,33.33,66.67
634 ,3000000.00,1,33.33,33.33,100.00
635 Total,,3,100.0,100.0,
636
637 Table: x
638 N,Valid,3
639 ,Missing,0
640 Percentiles,50 (Median),2.00
641 ])
642 AT_CLEANUP
643
644 AT_SETUP([FREQUENCIES variance])
645 AT_DATA([variance.sps], [dnl
646 data list notable list /forename (A12) height.
647 begin data.
648 Ahmed 188
649 bertram 167
650 Catherine 134
651 David 109
652 end data.
653
654 FREQUENCIES
655    /VARIABLES = height
656    /STATISTICS = VARIANCE.
657 ])
658
659 AT_CHECK([pspp variance.sps -O format=csv], [0],  [dnl
660 Table: height
661 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
662 ,109.00,1,25.00,25.00,25.00
663 ,134.00,1,25.00,25.00,50.00
664 ,167.00,1,25.00,25.00,75.00
665 ,188.00,1,25.00,25.00,100.00
666 Total,,4,100.0,100.0,
667
668 Table: height
669 N,Valid,4
670 ,Missing,0
671 Variance,,1223.00
672 ])
673 AT_CLEANUP
674
675 AT_SETUP([FREQUENCIES default statistics])
676 AT_DATA([median.sps], [dnl
677 data list notable list /x *.
678 begin data.
679 10
680 20
681 3000000
682 end data.
683
684 FREQUENCIES
685         /VARIABLES = x
686         /STATISTICS
687         .
688
689 FREQUENCIES
690         /VARIABLES = x
691         /STATISTICS = DEFAULT
692         .
693 ])
694
695 AT_CHECK([pspp median.sps -O format=csv], [0],  [dnl
696 Table: x
697 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
698 ,10.00,1,33.33,33.33,33.33
699 ,20.00,1,33.33,33.33,66.67
700 ,3000000.00,1,33.33,33.33,100.00
701 Total,,3,100.0,100.0,
702
703 Table: x
704 N,Valid,3
705 ,Missing,0
706 Mean,,1000010.00
707 Std Dev,,1732042.15
708 Minimum,,10.00
709 Maximum,,3000000.00
710
711 Table: x
712 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
713 ,10.00,1,33.33,33.33,33.33
714 ,20.00,1,33.33,33.33,66.67
715 ,3000000.00,1,33.33,33.33,100.00
716 Total,,3,100.0,100.0,
717
718 Table: x
719 N,Valid,3
720 ,Missing,0
721 Mean,,1000010.00
722 Std Dev,,1732042.15
723 Minimum,,10.00
724 Maximum,,3000000.00
725 ])
726 AT_CLEANUP
727
728
729
730 AT_SETUP([FREQUENCIES no valid data])
731 AT_DATA([empty.sps], [dnl
732 data list notable list /x *.
733 begin data.
734 .
735 .
736 .
737 end data.
738
739 FREQUENCIES
740         /VARIABLES = x
741         /STATISTICS = ALL
742         .
743 ])
744
745 AT_CHECK([pspp empty.sps -O format=csv], [0],  [dnl
746 Table: x
747 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
748 ,.  ,3,100.00,Missing,
749 Total,,3,100.0,100.0,
750
751 Table: x
752 N,Valid,0
753 ,Missing,3
754 Mean,,.
755 S.E. Mean,,.
756 Mode,,.
757 Std Dev,,.
758 Variance,,.
759 Kurtosis,,.
760 S.E. Kurt,,.
761 Skewness,,.
762 S.E. Skew,,.
763 Range,,.
764 Minimum,,.
765 Maximum,,.
766 Sum,,.
767 Percentiles,,.
768 ])
769
770 AT_CLEANUP
771
772
773 AT_SETUP([FREQUENCIES histogram no valid cases])
774 AT_DATA([empty.sps], [dnl
775 data list notable list /x w *.
776 begin data.
777 1 .
778 2 .
779 3 .
780 end data.
781
782 weight by w.
783
784 FREQUENCIES
785         /VARIABLES = x
786         /histogram
787         .
788 ])
789
790 AT_CHECK([pspp empty.sps -O format=csv], [0],  [ignore])
791
792 AT_CLEANUP
793
794 AT_SETUP([FREQUENCIES percentiles + histogram bug#48128])
795 AT_DATA([bug.sps], [dnl
796 SET FORMAT=F8.0.
797
798 INPUT PROGRAM.
799         LOOP I=1 TO 10.
800                 COMPUTE SCORE=EXP(NORMAL(1)).
801                 END CASE.
802         END LOOP.
803         END FILE.
804 END INPUT PROGRAM.
805
806 FREQUENCIES VARIABLES=SCORE
807 /FORMAT=NOTABLE
808 /STATISTICS=ALL
809 /PERCENTILES=1 10 20 30 40 50 60 70 80 90 99
810 /HISTOGRAM.
811
812 ])
813
814 AT_CHECK([pspp bug.sps], [0],  [ignore])
815
816 AT_CLEANUP