FREQUENCIES: Fixed crash when there was no valid data
[pspp] / tests / language / stats / frequencies.at
1 AT_BANNER([FREQUENCIES procedure])
2
3 AT_SETUP([FREQUENCIES string variable])
4 AT_DATA([frequencies.sps],
5   [DATA LIST FREE/
6    name  (A8) value * quantity .
7 BEGIN DATA.
8 foo 1 5
9 bar 2 6
10 baz 1 9
11 quux 3 1
12 bar 1 2
13 baz 4 3
14 baz 1 4
15 baz 1 1
16 foo 6 0
17 quux 5 8
18 END DATA.
19 EXECUTE.
20
21 FREQUENCIES /VAR = name.
22 ])
23 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
24 Table: name
25 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
26 ,bar     ,2,20.00,20.00,20.00
27 ,baz     ,4,40.00,40.00,60.00
28 ,foo     ,2,20.00,20.00,80.00
29 ,quux    ,2,20.00,20.00,100.00
30 Total,,10,100.0,100.0,
31 ])
32 AT_CLEANUP
33
34 # Tests for a bug where pspp would crash if two FREQUENCIES commands
35 # existed in a input file.
36 AT_SETUP([FREQUENCIES two runs crash])
37 AT_DATA([frequencies.sps],
38   [data list free /v1 v2.
39 begin data.
40 0 1
41 2 3 
42 4 5
43 3 4
44 end data.
45
46 frequencies v1 v2/statistics=none.
47 frequencies v1 v2/statistics=none.
48 ])
49 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
50   [Table: v1
51 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
52 ,.00,1,25.00,25.00,25.00
53 ,2.00,1,25.00,25.00,50.00
54 ,3.00,1,25.00,25.00,75.00
55 ,4.00,1,25.00,25.00,100.00
56 Total,,4,100.0,100.0,
57
58 Table: v2
59 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
60 ,1.00,1,25.00,25.00,25.00
61 ,3.00,1,25.00,25.00,50.00
62 ,4.00,1,25.00,25.00,75.00
63 ,5.00,1,25.00,25.00,100.00
64 Total,,4,100.0,100.0,
65
66 Table: v1
67 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
68 ,.00,1,25.00,25.00,25.00
69 ,2.00,1,25.00,25.00,50.00
70 ,3.00,1,25.00,25.00,75.00
71 ,4.00,1,25.00,25.00,100.00
72 Total,,4,100.0,100.0,
73
74 Table: v2
75 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
76 ,1.00,1,25.00,25.00,25.00
77 ,3.00,1,25.00,25.00,50.00
78 ,4.00,1,25.00,25.00,75.00
79 ,5.00,1,25.00,25.00,100.00
80 Total,,4,100.0,100.0,
81 ])
82 AT_CLEANUP
83
84 # Tests for a bug where PSPP would crash when a FREQUENCIES command
85 # was used with the HTML output driver.
86 AT_SETUP([FREQUENCIES HTML output crash])
87 AT_DATA([frequencies.sps],
88   [data list free /v1 v2.
89 begin data.
90 0 1
91 2 3 
92 4 5
93 3 4
94 end data.
95
96 list.
97
98 frequencies v1/statistics=none.
99 ])
100 AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0],
101   [Table: Data List
102 v1,v2
103 .00,1.00
104 2.00,3.00
105 4.00,5.00
106 3.00,4.00
107
108 Table: v1
109 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
110 ,.00,1,25.00,25.00,25.00
111 ,2.00,1,25.00,25.00,50.00
112 ,3.00,1,25.00,25.00,75.00
113 ,4.00,1,25.00,25.00,100.00
114 Total,,4,100.0,100.0,
115 ])
116 AT_CHECK([test -s pspp.html])
117 AT_CLEANUP
118
119 # Tests for a bug which crashed PSPP when a piechart with too many
120 # segments was requested.
121 AT_SETUP([FREQUENCIES pie chart crash])
122 AT_DATA([frequencies.sps],
123   [data list list /x * w *.
124 begin data.
125 1  4
126 34 10
127 -9 15
128 232 6
129 11  4
130 134 1
131 9  5
132 32 16
133 -2 6
134 2  16
135 20  6
136 end data.
137
138 weight by w.
139
140 frequencies /x /format=notable /statistics=none
141         /piechart.
142 ])
143 # Cannot use the CSV driver for this because it does not output charts
144 # at all.
145 AT_CHECK([pspp frequencies.sps], [0], [dnl
146 Reading free-form data from INLINE.
147 +--------+------+
148 |Variable|Format|
149 #========#======#
150 |x       |F8.0  |
151 |w       |F8.0  |
152 +--------+------+
153 ])
154 AT_CLEANUP
155
156 dnl Check that histogram subcommand runs wihout crashing
157 AT_SETUP([FREQUENCIES histogram crash])
158 AT_DATA([frequencies.sps],
159   [data list notable list /x * w *.
160 begin data.
161 1  4
162 34 10
163 -9 15
164 232 6
165 11  4
166 134 1
167 9  5
168 32 16
169 -2 6
170 2  16
171 20  6
172 end data.
173
174 weight by w.
175
176 frequencies /x 
177             /format=notable 
178             /statistics=none
179             /histogram=minimum(0) maximum(50) percent(5) normal.
180 ])
181 # Cannot use the CSV driver for this because it does not output charts
182 # at all.
183 AT_CHECK([pspp -O format=pdf frequencies.sps], [0], [ignore])
184 AT_CLEANUP
185
186 # Tests for a bug which crashed PSPP when the median and a histogram
187 # were both requested.
188 AT_SETUP([FREQUENCIES median with histogram crash])
189 AT_DATA([frequencies.sps], [dnl
190 data list list notable /x.
191 begin data.
192 1
193 end data.
194
195 frequencies /x /histogram /STATISTICS=median.
196 ])
197 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [ignore])
198 dnl Ignore output - No crash test.
199 AT_CLEANUP
200
201 # Tests for a bug which caused FREQUENCIES following TEMPORARY to
202 # crash (bug #11492).
203 AT_SETUP([FREQUENCIES crash after TEMPORARY])
204 AT_DATA([frequencies.sps],
205   [DATA LIST LIST /SEX (A1) X *.
206 BEGIN DATA.
207 M 31
208 F 21
209 M 41
210 F 31
211 M 13
212 F 12
213 M 14
214 F 13
215 END DATA.
216
217
218 TEMPORARY
219 SELECT IF SEX EQ 'F'
220 FREQUENCIES /X .
221
222 FINISH
223 ])
224 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
225   [Table: Reading free-form data from INLINE.
226 Variable,Format
227 SEX,A1
228 X,F8.0
229
230 Table: X
231 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
232 ,12.00,1,25.00,25.00,25.00
233 ,13.00,1,25.00,25.00,50.00
234 ,21.00,1,25.00,25.00,75.00
235 ,31.00,1,25.00,25.00,100.00
236 Total,,4,100.0,100.0,
237
238 Table: X
239 N,Valid,4
240 ,Missing,0
241 Mean,,19.25
242 Std Dev,,8.81
243 Minimum,,12.00
244 Maximum,,31.00
245 ])
246 AT_CLEANUP
247
248 m4_define([FREQUENCIES_NTILES_OUTPUT],
249   [Table: x
250 N,Valid,5
251 ,Missing,0
252 Mean,,3.00
253 Std Dev,,1.58
254 Minimum,,1.00
255 Maximum,,5.00
256 Percentiles,0,1.00
257 ,25,2.00
258 ,33,2.33
259 ,50 (Median),3.00
260 ,67,3.67
261 ,75,4.00
262 ,100,5.00
263 ])
264 AT_SETUP([FREQUENCIES basic percentiles])
265 AT_DATA([frequencies.sps],
266   [DATA LIST LIST notable /x * .
267 BEGIN DATA.
268
269
270
271
272 5
273 END DATA.
274
275 FREQUENCIES 
276         VAR=x
277         /FORMAT=NOTABLE
278         /PERCENTILES = 0 25 33.333 50 66.666 75 100.
279 ])
280 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
281   [FREQUENCIES_NTILES_OUTPUT])
282 AT_CLEANUP
283
284 AT_SETUP([FREQUENCIES basic n-tiles])
285 AT_DATA([frequencies.sps],
286   [DATA LIST LIST notable /x * .
287 BEGIN DATA.
288
289
290
291
292 5
293 END DATA.
294
295 FREQUENCIES 
296         VAR=x
297         /FORMAT=NOTABLE
298         /NTILES = 3
299         /NTILES = 4.
300 ])
301 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
302   [FREQUENCIES_NTILES_OUTPUT])
303 AT_CLEANUP
304
305 AT_SETUP([FREQUENCIES compatibility percentiles])
306 AT_DATA([frequencies.sps],
307   [DATA LIST LIST notable /X * .
308 BEGIN DATA.
309
310
311
312
313 5
314 END DATA.
315
316 FREQUENCIES 
317         VAR=x
318         /ALGORITHM=COMPATIBLE
319         /PERCENTILES = 0 25 50 75 100.
320 ])
321 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
322   [Table: X
323 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
324 ,1.00,1,20.00,20.00,20.00
325 ,2.00,1,20.00,20.00,40.00
326 ,3.00,1,20.00,20.00,60.00
327 ,4.00,1,20.00,20.00,80.00
328 ,5.00,1,20.00,20.00,100.00
329 Total,,5,100.0,100.0,
330
331 Table: X
332 N,Valid,5
333 ,Missing,0
334 Mean,,3.00
335 Std Dev,,1.58
336 Minimum,,1.00
337 Maximum,,5.00
338 Percentiles,0,1.00
339 ,25,1.50
340 ,50 (Median),3.00
341 ,75,4.50
342 ,100,5.00
343 ])
344 AT_CLEANUP
345
346 AT_SETUP([FREQUENCIES enhanced percentiles])
347 AT_DATA([frequencies.sps],
348   [DATA LIST LIST notable /X * .
349 BEGIN DATA.
350
351
352
353
354 5
355 END DATA.
356
357 FREQUENCIES 
358         VAR=x
359         /PERCENTILES = 0 25 50 75 100.
360 ])
361 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
362   [Table: X
363 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
364 ,1.00,1,20.00,20.00,20.00
365 ,2.00,1,20.00,20.00,40.00
366 ,3.00,1,20.00,20.00,60.00
367 ,4.00,1,20.00,20.00,80.00
368 ,5.00,1,20.00,20.00,100.00
369 Total,,5,100.0,100.0,
370
371 Table: X
372 N,Valid,5
373 ,Missing,0
374 Mean,,3.00
375 Std Dev,,1.58
376 Minimum,,1.00
377 Maximum,,5.00
378 Percentiles,0,1.00
379 ,25,2.00
380 ,50 (Median),3.00
381 ,75,4.00
382 ,100,5.00
383 ])
384 AT_CLEANUP
385
386 AT_SETUP([FREQUENCIES enhanced percentiles, weighted])
387 AT_DATA([frequencies.sps],
388   [DATA LIST LIST notable /X * F *.
389 BEGIN DATA.
390 1 2
391 2 2
392 3 2
393 4 1
394 4 1
395 5 1
396 5 1
397 END DATA.
398
399 WEIGHT BY f.
400
401 FREQUENCIES 
402         VAR=x
403         /PERCENTILES = 0 25 50 75 100.
404 ])
405 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
406   [Table: X
407 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
408 ,1.00,2.00,20.00,20.00,20.00
409 ,2.00,2.00,20.00,20.00,40.00
410 ,3.00,2.00,20.00,20.00,60.00
411 ,4.00,2.00,20.00,20.00,80.00
412 ,5.00,2.00,20.00,20.00,100.00
413 Total,,10.00,100.0,100.0,
414
415 Table: X
416 N,Valid,10.00
417 ,Missing,.00
418 Mean,,3.00
419 Std Dev,,1.49
420 Minimum,,1.00
421 Maximum,,5.00
422 Percentiles,0,1.00
423 ,25,2.00
424 ,50 (Median),3.00
425 ,75,4.00
426 ,100,5.00
427 ])
428 AT_CLEANUP
429
430 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)])
431 AT_DATA([frequencies.sps],
432   [DATA LIST LIST notable /X * F *.
433 BEGIN DATA.
434 1 1
435 3 2
436 4 1
437 5 1
438 5 1
439 END DATA.
440
441 WEIGHT BY f.
442
443 FREQUENCIES 
444         VAR=x
445         /PERCENTILES = 0 25 50 75 100.
446 ])
447 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
448   [Table: X
449 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
450 ,1.00,1.00,16.67,16.67,16.67
451 ,3.00,2.00,33.33,33.33,50.00
452 ,4.00,1.00,16.67,16.67,66.67
453 ,5.00,2.00,33.33,33.33,100.00
454 Total,,6.00,100.0,100.0,
455
456 Table: X
457 N,Valid,6.00
458 ,Missing,.00
459 Mean,,3.50
460 Std Dev,,1.52
461 Minimum,,1.00
462 Maximum,,5.00
463 Percentiles,0,1.00
464 ,25,3.00
465 ,50 (Median),3.50
466 ,75,4.75
467 ,100,5.00
468 ])
469 AT_CLEANUP
470
471 dnl Data for this test case from Fabio Bordignon <bordignon@demos.it>.
472 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)])
473 AT_DATA([frequencies.sps],
474   [DATA LIST LIST notable /X * F *.
475 BEGIN DATA.
476 1 7
477 2 16
478 3 12
479 4 5
480 END DATA.
481
482 WEIGHT BY f.
483
484 FREQUENCIES 
485         VAR=x
486         /PERCENTILES = 0 25 50 75 100.
487 ])
488 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
489 Table: X
490 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
491 ,1.00,7.00,17.50,17.50,17.50
492 ,2.00,16.00,40.00,40.00,57.50
493 ,3.00,12.00,30.00,30.00,87.50
494 ,4.00,5.00,12.50,12.50,100.00
495 Total,,40.00,100.0,100.0,
496
497 Table: X
498 N,Valid,40.00
499 ,Missing,.00
500 Mean,,2.38
501 Std Dev,,.93
502 Minimum,,1.00
503 Maximum,,4.00
504 Percentiles,0,1.00
505 ,25,2.00
506 ,50 (Median),2.00
507 ,75,3.00
508 ,100,4.00
509 ])
510 AT_CLEANUP
511
512 AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values])
513 AT_DATA([frequencies.sps],
514   [DATA LIST LIST notable /X * F *.
515 BEGIN DATA.
516 1 1
517 3 2
518 4 1
519 5 1
520 5 1
521 99 4
522 END DATA.
523
524 MISSING VALUE x (99.0) .
525 WEIGHT BY f.
526
527 FREQUENCIES 
528         VAR=x
529         /PERCENTILES = 0 25 50 75 100.
530 ])
531
532 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
533   [Table: X
534 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
535 ,1.00,1.00,10.00,16.67,16.67
536 ,3.00,2.00,20.00,33.33,50.00
537 ,4.00,1.00,10.00,16.67,66.67
538 ,5.00,2.00,20.00,33.33,100.00
539 ,99.00,4.00,40.00,Missing,
540 Total,,10.00,100.0,100.0,
541
542 Table: X
543 N,Valid,6.00
544 ,Missing,4.00
545 Mean,,3.50
546 Std Dev,,1.52
547 Minimum,,1.00
548 Maximum,,5.00
549 Percentiles,0,1.00
550 ,25,3.00
551 ,50 (Median),3.50
552 ,75,4.75
553 ,100,5.00
554 ])
555 AT_CLEANUP
556
557 AT_SETUP([FREQUENCIES dichotomous histogram])
558 AT_DATA([frequencies.sps], [dnl
559 data list notable list /d4 *.
560 begin data.
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578 end data.
579
580 FREQUENCIES
581         /VARIABLES = d4
582         /FORMAT=AVALUE TABLE
583         /HISTOGRAM=NORMAL
584         .
585 ])
586
587 AT_CHECK([pspp frequencies.sps], [0],  [ignore])
588 AT_CLEANUP
589
590
591 AT_SETUP([FREQUENCIES median])
592 AT_DATA([median.sps], [dnl
593 data list notable list /x *.
594 begin data.
595 1
596 2
597 3000000
598 end data.
599
600 FREQUENCIES
601         /VARIABLES = x
602         /STATISTICS = MEDIAN
603         .
604 ])
605
606 AT_CHECK([pspp median.sps -O format=csv], [0],  [dnl
607 Table: x
608 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
609 ,1.00,1,33.33,33.33,33.33
610 ,2.00,1,33.33,33.33,66.67
611 ,3000000.00,1,33.33,33.33,100.00
612 Total,,3,100.0,100.0,
613
614 Table: x
615 N,Valid,3
616 ,Missing,0
617 Percentiles,50 (Median),2.00
618 ])
619 AT_CLEANUP
620
621
622
623 AT_SETUP([FREQUENCIES default statistics])
624 AT_DATA([median.sps], [dnl
625 data list notable list /x *.
626 begin data.
627 10
628 20
629 3000000
630 end data.
631
632 FREQUENCIES
633         /VARIABLES = x
634         /STATISTICS
635         .
636
637 FREQUENCIES
638         /VARIABLES = x
639         /STATISTICS = DEFAULT
640         .
641 ])
642
643 AT_CHECK([pspp median.sps -O format=csv], [0],  [dnl
644 Table: x
645 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
646 ,10.00,1,33.33,33.33,33.33
647 ,20.00,1,33.33,33.33,66.67
648 ,3000000.00,1,33.33,33.33,100.00
649 Total,,3,100.0,100.0,
650
651 Table: x
652 N,Valid,3
653 ,Missing,0
654 Mean,,1000010.00
655 Std Dev,,1732042.15
656 Minimum,,10.00
657 Maximum,,3000000.00
658
659 Table: x
660 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
661 ,10.00,1,33.33,33.33,33.33
662 ,20.00,1,33.33,33.33,66.67
663 ,3000000.00,1,33.33,33.33,100.00
664 Total,,3,100.0,100.0,
665
666 Table: x
667 N,Valid,3
668 ,Missing,0
669 Mean,,1000010.00
670 Std Dev,,1732042.15
671 Minimum,,10.00
672 Maximum,,3000000.00
673 ])
674 AT_CLEANUP
675
676
677
678 AT_SETUP([FREQUENCIES no valid data])
679 AT_DATA([empty.sps], [dnl
680 data list notable list /x *.
681 begin data.
682 .
683 .
684 .
685 end data.
686
687 FREQUENCIES
688         /VARIABLES = x
689         /STATISTICS = ALL
690         .
691 ])
692
693 AT_CHECK([pspp empty.sps -O format=csv], [0],  [dnl
694 Table: x
695 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
696 ,.  ,3,100.00,Missing,
697 Total,,3,100.0,100.0,
698
699 Table: x
700 N,Valid,0
701 ,Missing,3
702 Mean,,.
703 S.E. Mean,,.
704 Mode,,.
705 Std Dev,,.
706 Variance,,.
707 Kurtosis,,.
708 S.E. Kurt,,.
709 Skewness,,.
710 S.E. Skew,,.
711 Range,,.
712 Minimum,,.
713 Maximum,,.
714 Sum,,.
715 Percentiles,,.
716 ])
717
718 AT_CLEANUP