FREQUENCIES: Fix treatment of string variables.
[pspp] / tests / language / stats / frequencies.at
1 AT_BANNER([FREQUENCIES procedure])
2
3 AT_SETUP([FREQUENCIES string variable])
4 AT_DATA([frequencies.sps],
5   [DATA LIST FREE/
6    name  (A8) value * quantity .
7 BEGIN DATA.
8 foo 1 5
9 bar 2 6
10 baz 1 9
11 quux 3 1
12 bar 1 2
13 baz 4 3
14 baz 1 4
15 baz 1 1
16 foo 6 0
17 quux 5 8
18 END DATA.
19 EXECUTE.
20
21 FREQUENCIES /VAR = name.
22 ])
23 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
24 Table: name
25 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
26 ,bar     ,2,20.00,20.00,20.00
27 ,baz     ,4,40.00,40.00,60.00
28 ,foo     ,2,20.00,20.00,80.00
29 ,quux    ,2,20.00,20.00,100.00
30 Total,,10,100.0,100.0,
31 ])
32 AT_CLEANUP
33
34 # Tests for a bug where pspp would crash if two FREQUENCIES commands
35 # existed in a input file.
36 AT_SETUP([FREQUENCIES two runs crash])
37 AT_DATA([frequencies.sps],
38   [data list free /v1 v2.
39 begin data.
40 0 1
41 2 3 
42 4 5
43 3 4
44 end data.
45
46 frequencies v1 v2/statistics=none.
47 frequencies v1 v2/statistics=none.
48 ])
49 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
50   [Table: v1
51 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
52 ,.00,1,25.00,25.00,25.00
53 ,2.00,1,25.00,25.00,50.00
54 ,3.00,1,25.00,25.00,75.00
55 ,4.00,1,25.00,25.00,100.00
56 Total,,4,100.0,100.0,
57
58 Table: v2
59 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
60 ,1.00,1,25.00,25.00,25.00
61 ,3.00,1,25.00,25.00,50.00
62 ,4.00,1,25.00,25.00,75.00
63 ,5.00,1,25.00,25.00,100.00
64 Total,,4,100.0,100.0,
65
66 Table: v1
67 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
68 ,.00,1,25.00,25.00,25.00
69 ,2.00,1,25.00,25.00,50.00
70 ,3.00,1,25.00,25.00,75.00
71 ,4.00,1,25.00,25.00,100.00
72 Total,,4,100.0,100.0,
73
74 Table: v2
75 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
76 ,1.00,1,25.00,25.00,25.00
77 ,3.00,1,25.00,25.00,50.00
78 ,4.00,1,25.00,25.00,75.00
79 ,5.00,1,25.00,25.00,100.00
80 Total,,4,100.0,100.0,
81 ])
82 AT_CLEANUP
83
84 # Tests for a bug where PSPP would crash when a FREQUENCIES command
85 # was used with the HTML output driver.
86 AT_SETUP([FREQUENCIES HTML output crash])
87 AT_DATA([frequencies.sps],
88   [data list free /v1 v2.
89 begin data.
90 0 1
91 2 3 
92 4 5
93 3 4
94 end data.
95
96 list.
97
98 frequencies v1/statistics=none.
99 ])
100 AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0],
101   [Table: Data List
102 v1,v2
103 .00,1.00
104 2.00,3.00
105 4.00,5.00
106 3.00,4.00
107
108 Table: v1
109 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
110 ,.00,1,25.00,25.00,25.00
111 ,2.00,1,25.00,25.00,50.00
112 ,3.00,1,25.00,25.00,75.00
113 ,4.00,1,25.00,25.00,100.00
114 Total,,4,100.0,100.0,
115 ])
116 AT_CHECK([test -s pspp.html])
117 AT_CLEANUP
118
119 # Tests for a bug which crashed PSPP when a piechart with too many
120 # segments was requested.
121 AT_SETUP([FREQUENCIES pie chart crash])
122 AT_DATA([frequencies.sps],
123   [data list list /x * w *.
124 begin data.
125 1  4
126 34 10
127 -9 15
128 232 6
129 11  4
130 134 1
131 9  5
132 32 16
133 -2 6
134 2  16
135 20  6
136 end data.
137
138 weight by w.
139
140 frequencies /x /format=notable /statistics=none
141         /piechart.
142 ])
143 # Cannot use the CSV driver for this because it does not output charts
144 # at all.
145 AT_CHECK([pspp frequencies.sps], [0], [dnl
146 Reading free-form data from INLINE.
147 +--------+------+
148 |Variable|Format|
149 #========#======#
150 |x       |F8.0  |
151 |w       |F8.0  |
152 +--------+------+
153 ])
154 AT_CLEANUP
155
156 dnl Check that histogram subcommand runs wihout crashing
157 AT_SETUP([FREQUENCIES histogram crash])
158 AT_DATA([frequencies.sps],
159   [data list notable list /x * w *.
160 begin data.
161 1  4
162 34 10
163 -9 15
164 232 6
165 11  4
166 134 1
167 9  5
168 32 16
169 -2 6
170 2  16
171 20  6
172 end data.
173
174 weight by w.
175
176 frequencies /x 
177             /format=notable 
178             /statistics=none
179             /histogram=minimum(0) maximum(50) percent(5) normal.
180 ])
181 # Cannot use the CSV driver for this because it does not output charts
182 # at all.
183 AT_CHECK([pspp -O format=pdf frequencies.sps], [0], [ignore])
184 AT_CLEANUP
185
186 # Tests for a bug which crashed PSPP when the median and a histogram
187 # were both requested.
188 AT_SETUP([FREQUENCIES median with histogram crash])
189 AT_DATA([frequencies.sps], [dnl
190 data list list notable /x.
191 begin data.
192 1
193 end data.
194
195 frequencies /x /histogram /STATISTICS=median.
196 ])
197 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [ignore])
198 dnl Ignore output - No crash test.
199 AT_CLEANUP
200
201 # Tests for a bug which caused FREQUENCIES following TEMPORARY to
202 # crash (bug #11492).
203 AT_SETUP([FREQUENCIES crash after TEMPORARY])
204 AT_DATA([frequencies.sps],
205   [DATA LIST LIST /SEX (A1) X *.
206 BEGIN DATA.
207 M 31
208 F 21
209 M 41
210 F 31
211 M 13
212 F 12
213 M 14
214 F 13
215 END DATA.
216
217
218 TEMPORARY
219 SELECT IF SEX EQ 'F'
220 FREQUENCIES /X .
221
222 FINISH
223 ])
224 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
225   [Table: Reading free-form data from INLINE.
226 Variable,Format
227 SEX,A1
228 X,F8.0
229
230 Table: X
231 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
232 ,12.00,1,25.00,25.00,25.00
233 ,13.00,1,25.00,25.00,50.00
234 ,21.00,1,25.00,25.00,75.00
235 ,31.00,1,25.00,25.00,100.00
236 Total,,4,100.0,100.0,
237
238 Table: X
239 N,Valid,4
240 ,Missing,0
241 Mean,,19.25
242 Std Dev,,8.81
243 Minimum,,12.00
244 Maximum,,31.00
245 ])
246 AT_CLEANUP
247
248 m4_define([FREQUENCIES_NTILES_OUTPUT],
249   [Table: x
250 N,Valid,5
251 ,Missing,0
252 Mean,,3.00
253 Std Dev,,1.58
254 Minimum,,1.00
255 Maximum,,5.00
256 Percentiles,0,1.00
257 ,25,2.00
258 ,33,2.33
259 ,50 (Median),3.00
260 ,67,3.67
261 ,75,4.00
262 ,100,5.00
263 ])
264 AT_SETUP([FREQUENCIES basic percentiles])
265 AT_DATA([frequencies.sps],
266   [DATA LIST LIST notable /x * .
267 BEGIN DATA.
268
269
270
271
272 5
273 END DATA.
274
275 FREQUENCIES 
276         VAR=x
277         /FORMAT=NOTABLE
278         /PERCENTILES = 0 25 33.333 50 66.666 75 100.
279 ])
280 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
281   [FREQUENCIES_NTILES_OUTPUT])
282 AT_CLEANUP
283
284 AT_SETUP([FREQUENCIES basic n-tiles])
285 AT_DATA([frequencies.sps],
286   [DATA LIST LIST notable /x * .
287 BEGIN DATA.
288
289
290
291
292 5
293 END DATA.
294
295 FREQUENCIES 
296         VAR=x
297         /FORMAT=NOTABLE
298         /NTILES = 3
299         /NTILES = 4.
300 ])
301 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
302   [FREQUENCIES_NTILES_OUTPUT])
303 AT_CLEANUP
304
305 AT_SETUP([FREQUENCIES compatibility percentiles])
306 AT_DATA([frequencies.sps],
307   [DATA LIST LIST notable /X * .
308 BEGIN DATA.
309
310
311
312
313 5
314 END DATA.
315
316 FREQUENCIES 
317         VAR=x
318         /ALGORITHM=COMPATIBLE
319         /PERCENTILES = 0 25 50 75 100.
320 ])
321 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
322   [Table: X
323 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
324 ,1.00,1,20.00,20.00,20.00
325 ,2.00,1,20.00,20.00,40.00
326 ,3.00,1,20.00,20.00,60.00
327 ,4.00,1,20.00,20.00,80.00
328 ,5.00,1,20.00,20.00,100.00
329 Total,,5,100.0,100.0,
330
331 Table: X
332 N,Valid,5
333 ,Missing,0
334 Mean,,3.00
335 Std Dev,,1.58
336 Minimum,,1.00
337 Maximum,,5.00
338 Percentiles,0,1.00
339 ,25,1.50
340 ,50 (Median),3.00
341 ,75,4.50
342 ,100,5.00
343 ])
344 AT_CLEANUP
345
346 AT_SETUP([FREQUENCIES enhanced percentiles])
347 AT_DATA([frequencies.sps],
348   [DATA LIST LIST notable /X * .
349 BEGIN DATA.
350
351
352
353
354 5
355 END DATA.
356
357 FREQUENCIES 
358         VAR=x
359         /PERCENTILES = 0 25 50 75 100.
360 ])
361 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
362   [Table: X
363 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
364 ,1.00,1,20.00,20.00,20.00
365 ,2.00,1,20.00,20.00,40.00
366 ,3.00,1,20.00,20.00,60.00
367 ,4.00,1,20.00,20.00,80.00
368 ,5.00,1,20.00,20.00,100.00
369 Total,,5,100.0,100.0,
370
371 Table: X
372 N,Valid,5
373 ,Missing,0
374 Mean,,3.00
375 Std Dev,,1.58
376 Minimum,,1.00
377 Maximum,,5.00
378 Percentiles,0,1.00
379 ,25,2.00
380 ,50 (Median),3.00
381 ,75,4.00
382 ,100,5.00
383 ])
384 AT_CLEANUP
385
386 AT_SETUP([FREQUENCIES enhanced percentiles, weighted])
387 AT_DATA([frequencies.sps],
388   [DATA LIST LIST notable /X * F *.
389 BEGIN DATA.
390 1 2
391 2 2
392 3 2
393 4 1
394 4 1
395 5 1
396 5 1
397 END DATA.
398
399 WEIGHT BY f.
400
401 FREQUENCIES 
402         VAR=x
403         /PERCENTILES = 0 25 50 75 100.
404 ])
405 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
406   [Table: X
407 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
408 ,1.00,2.00,20.00,20.00,20.00
409 ,2.00,2.00,20.00,20.00,40.00
410 ,3.00,2.00,20.00,20.00,60.00
411 ,4.00,2.00,20.00,20.00,80.00
412 ,5.00,2.00,20.00,20.00,100.00
413 Total,,10.00,100.0,100.0,
414
415 Table: X
416 N,Valid,10.00
417 ,Missing,.00
418 Mean,,3.00
419 Std Dev,,1.49
420 Minimum,,1.00
421 Maximum,,5.00
422 Percentiles,0,1.00
423 ,25,2.00
424 ,50 (Median),3.00
425 ,75,4.00
426 ,100,5.00
427 ])
428 AT_CLEANUP
429
430 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)])
431 AT_DATA([frequencies.sps],
432   [DATA LIST LIST notable /X * F *.
433 BEGIN DATA.
434 1 1
435 3 2
436 4 1
437 5 1
438 5 1
439 END DATA.
440
441 WEIGHT BY f.
442
443 FREQUENCIES 
444         VAR=x
445         /PERCENTILES = 0 25 50 75 100.
446 ])
447 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
448   [Table: X
449 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
450 ,1.00,1.00,16.67,16.67,16.67
451 ,3.00,2.00,33.33,33.33,50.00
452 ,4.00,1.00,16.67,16.67,66.67
453 ,5.00,2.00,33.33,33.33,100.00
454 Total,,6.00,100.0,100.0,
455
456 Table: X
457 N,Valid,6.00
458 ,Missing,.00
459 Mean,,3.50
460 Std Dev,,1.52
461 Minimum,,1.00
462 Maximum,,5.00
463 Percentiles,0,1.00
464 ,25,3.00
465 ,50 (Median),3.50
466 ,75,4.75
467 ,100,5.00
468 ])
469 AT_CLEANUP
470
471 dnl Data for this test case from Fabio Bordignon <bordignon@demos.it>.
472 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)])
473 AT_DATA([frequencies.sps],
474   [DATA LIST LIST notable /X * F *.
475 BEGIN DATA.
476 1 7
477 2 16
478 3 12
479 4 5
480 END DATA.
481
482 WEIGHT BY f.
483
484 FREQUENCIES 
485         VAR=x
486         /PERCENTILES = 0 25 50 75 100.
487 ])
488 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
489 Table: X
490 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
491 ,1.00,7.00,17.50,17.50,17.50
492 ,2.00,16.00,40.00,40.00,57.50
493 ,3.00,12.00,30.00,30.00,87.50
494 ,4.00,5.00,12.50,12.50,100.00
495 Total,,40.00,100.0,100.0,
496
497 Table: X
498 N,Valid,40.00
499 ,Missing,.00
500 Mean,,2.38
501 Std Dev,,.93
502 Minimum,,1.00
503 Maximum,,4.00
504 Percentiles,0,1.00
505 ,25,2.00
506 ,50 (Median),2.00
507 ,75,3.00
508 ,100,4.00
509 ])
510 AT_CLEANUP
511
512 AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values])
513 AT_DATA([frequencies.sps],
514   [DATA LIST LIST notable /X * F *.
515 BEGIN DATA.
516 1 1
517 3 2
518 4 1
519 5 1
520 5 1
521 99 4
522 END DATA.
523
524 MISSING VALUE x (99.0) .
525 WEIGHT BY f.
526
527 FREQUENCIES 
528         VAR=x
529         /PERCENTILES = 0 25 50 75 100.
530 ])
531
532 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
533   [Table: X
534 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
535 ,1.00,1.00,10.00,16.67,16.67
536 ,3.00,2.00,20.00,33.33,50.00
537 ,4.00,1.00,10.00,16.67,66.67
538 ,5.00,2.00,20.00,33.33,100.00
539 ,99.00,4.00,40.00,Missing,
540 Total,,10.00,100.0,100.0,
541
542 Table: X
543 N,Valid,6.00
544 ,Missing,4.00
545 Mean,,3.50
546 Std Dev,,1.52
547 Minimum,,1.00
548 Maximum,,5.00
549 Percentiles,0,1.00
550 ,25,3.00
551 ,50 (Median),3.50
552 ,75,4.75
553 ,100,5.00
554 ])
555 AT_CLEANUP
556
557 AT_SETUP([FREQUENCIES dichotomous histogram])
558 AT_DATA([frequencies.sps], [dnl
559 data list notable list /d4 *.
560 begin data.
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578 end data.
579
580 FREQUENCIES
581         /VARIABLES = d4
582         /FORMAT=AVALUE TABLE
583         /HISTOGRAM=NORMAL
584         .
585 ])
586
587 AT_CHECK([pspp frequencies.sps], [0],  [ignore])
588 AT_CLEANUP