FREQUENCIES: Fix percentiles calculation.
[pspp-builds.git] / tests / language / stats / frequencies.at
1 AT_BANNER([FREQUENCIES procedure])
2
3 AT_SETUP([FREQUENCIES string variable crash])
4 AT_DATA([frequencies.sps],
5   [DATA LIST FREE/
6    name  (A8) value * quantity .
7 BEGIN DATA.
8 Cables 829 3 
9 END DATA.
10 EXECUTE.
11
12 FREQUENCIES /VAR = name.
13 ])
14 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
15   [Table: name
16 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
17 ,Cables  ,1,100.00,100.00,100.00
18 Total,,1,100.0,100.0,
19 ])
20 AT_CLEANUP
21
22 # Tests for a bug where pspp would crash if two FREQUENCIES commands
23 # existed in a input file.
24 AT_SETUP([FREQUENCIES two runs crash])
25 AT_DATA([frequencies.sps],
26   [data list free /v1 v2.
27 begin data.
28 0 1
29 2 3 
30 4 5
31 3 4
32 end data.
33
34 frequencies v1 v2/statistics=none.
35 frequencies v1 v2/statistics=none.
36 ])
37 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
38   [Table: v1
39 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
40 ,.00,1,25.00,25.00,25.00
41 ,2.00,1,25.00,25.00,50.00
42 ,3.00,1,25.00,25.00,75.00
43 ,4.00,1,25.00,25.00,100.00
44 Total,,4,100.0,100.0,
45
46 Table: v2
47 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
48 ,1.00,1,25.00,25.00,25.00
49 ,3.00,1,25.00,25.00,50.00
50 ,4.00,1,25.00,25.00,75.00
51 ,5.00,1,25.00,25.00,100.00
52 Total,,4,100.0,100.0,
53
54 Table: v1
55 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
56 ,.00,1,25.00,25.00,25.00
57 ,2.00,1,25.00,25.00,50.00
58 ,3.00,1,25.00,25.00,75.00
59 ,4.00,1,25.00,25.00,100.00
60 Total,,4,100.0,100.0,
61
62 Table: v2
63 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
64 ,1.00,1,25.00,25.00,25.00
65 ,3.00,1,25.00,25.00,50.00
66 ,4.00,1,25.00,25.00,75.00
67 ,5.00,1,25.00,25.00,100.00
68 Total,,4,100.0,100.0,
69 ])
70 AT_CLEANUP
71
72 # Tests for a bug where PSPP would crash when a FREQUENCIES command
73 # was used with the HTML output driver..
74 AT_SETUP([FREQUENCIES HTML output crash])
75 AT_DATA([frequencies.sps],
76   [data list free /v1 v2.
77 begin data.
78 0 1
79 2 3 
80 4 5
81 3 4
82 end data.
83
84 list.
85
86 frequencies v1/statistics=none.
87 ])
88 AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0],
89   [Table: Data List
90 v1,v2
91 .00,1.00
92 2.00,3.00
93 4.00,5.00
94 3.00,4.00
95
96 Table: v1
97 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
98 ,.00,1,25.00,25.00,25.00
99 ,2.00,1,25.00,25.00,50.00
100 ,3.00,1,25.00,25.00,75.00
101 ,4.00,1,25.00,25.00,100.00
102 Total,,4,100.0,100.0,
103 ])
104 AT_CHECK([test -s pspp.html])
105 AT_CLEANUP
106
107 # Tests for a bug which crashed PSPP when a piechart with too many
108 # segments was requested..
109 AT_SETUP([FREQUENCIES pie chart crash])
110 AT_DATA([frequencies.sps],
111   [data list list /x * w *.
112 begin data.
113 1  4
114 34 10
115 -9 15
116 232 6
117 11  4
118 134 1
119 9  5
120 32 16
121 -2 6
122 2  16
123 20  6
124 end data.
125
126 weight by w.
127
128 frequencies /x /format=notable /statistics=none
129         /piechart.
130 ])
131 # Cannot use the CSV driver for this because it does not output charts
132 # at all.
133 AT_CHECK([pspp frequencies.sps], [0],
134   [DATA LIST
135
136 Reading free-form data from INLINE.
137 +--------+------+
138 |Variable|Format|
139 #========#======#
140 |x       |F8.0  |
141 |w       |F8.0  |
142 +--------+------+
143
144 BEGIN DATA
145
146 WEIGHT
147
148 FREQUENCIES
149 ])
150 AT_CLEANUP
151
152 # Tests for a bug which caused FREQUENCIES following TEMPORARY to
153 # crash (bug #11492)..
154 AT_SETUP([FREQUENCIES crash after TEMPORARY])
155 AT_DATA([frequencies.sps],
156   [DATA LIST LIST /SEX (A1) X *.
157 BEGIN DATA.
158 M 31
159 F 21
160 M 41
161 F 31
162 M 13
163 F 12
164 M 14
165 F 13
166 END DATA.
167
168
169 TEMPORARY
170 SELECT IF SEX EQ 'F'
171 FREQUENCIES /X .
172
173 FINISH
174 ])
175 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
176   [Table: Reading free-form data from INLINE.
177 Variable,Format
178 SEX,A1
179 X,F8.0
180
181 Table: X
182 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
183 ,12.00,1,25.00,25.00,25.00
184 ,13.00,1,25.00,25.00,50.00
185 ,21.00,1,25.00,25.00,75.00
186 ,31.00,1,25.00,25.00,100.00
187 Total,,4,100.0,100.0,
188
189 Table: X
190 N,Valid,4
191 ,Missing,0
192 Mean,,19.25
193 Std Dev,,8.81
194 Minimum,,12.00
195 Maximum,,31.00
196 ])
197 AT_CLEANUP
198
199 m4_define([FREQUENCIES_NTILES_OUTPUT],
200   [Table: x
201 N,Valid,5
202 ,Missing,0
203 Mean,,3.00
204 Std Dev,,1.58
205 Minimum,,1.00
206 Maximum,,5.00
207 Percentiles,0,1.00
208 ,25,2.00
209 ,33,2.33
210 ,50 (Median),3.00
211 ,67,3.67
212 ,75,4.00
213 ,100,5.00
214 ])
215 AT_SETUP([FREQUENCIES basic percentiles])
216 AT_DATA([frequencies.sps],
217   [DATA LIST LIST notable /x * .
218 BEGIN DATA.
219
220
221
222
223 5
224 END DATA.
225
226 FREQUENCIES 
227         VAR=x
228         /FORMAT=NOTABLE
229         /PERCENTILES = 0 25 33.333 50 66.666 75 100.
230 ])
231 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
232   [FREQUENCIES_NTILES_OUTPUT])
233 AT_CLEANUP
234
235 AT_SETUP([FREQUENCIES basic n-tiles])
236 AT_DATA([frequencies.sps],
237   [DATA LIST LIST notable /x * .
238 BEGIN DATA.
239
240
241
242
243 5
244 END DATA.
245
246 FREQUENCIES 
247         VAR=x
248         /FORMAT=NOTABLE
249         /NTILES = 3
250         /NTILES = 4.
251 ])
252 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
253   [FREQUENCIES_NTILES_OUTPUT])
254 AT_CLEANUP
255
256 AT_SETUP([FREQUENCIES compatibility percentiles])
257 AT_DATA([frequencies.sps],
258   [DATA LIST LIST notable /X * .
259 BEGIN DATA.
260
261
262
263
264 5
265 END DATA.
266
267 FREQUENCIES 
268         VAR=x
269         /ALGORITHM=COMPATIBLE
270         /PERCENTILES = 0 25 50 75 100.
271 ])
272 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
273   [Table: X
274 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
275 ,1.00,1,20.00,20.00,20.00
276 ,2.00,1,20.00,20.00,40.00
277 ,3.00,1,20.00,20.00,60.00
278 ,4.00,1,20.00,20.00,80.00
279 ,5.00,1,20.00,20.00,100.00
280 Total,,5,100.0,100.0,
281
282 Table: X
283 N,Valid,5
284 ,Missing,0
285 Mean,,3.00
286 Std Dev,,1.58
287 Minimum,,1.00
288 Maximum,,5.00
289 Percentiles,0,1.00
290 ,25,1.50
291 ,50 (Median),3.00
292 ,75,4.50
293 ,100,5.00
294 ])
295 AT_CLEANUP
296
297 AT_SETUP([FREQUENCIES enhanced percentiles])
298 AT_DATA([frequencies.sps],
299   [DATA LIST LIST notable /X * .
300 BEGIN DATA.
301
302
303
304
305 5
306 END DATA.
307
308 FREQUENCIES 
309         VAR=x
310         /PERCENTILES = 0 25 50 75 100.
311 ])
312 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
313   [Table: X
314 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
315 ,1.00,1,20.00,20.00,20.00
316 ,2.00,1,20.00,20.00,40.00
317 ,3.00,1,20.00,20.00,60.00
318 ,4.00,1,20.00,20.00,80.00
319 ,5.00,1,20.00,20.00,100.00
320 Total,,5,100.0,100.0,
321
322 Table: X
323 N,Valid,5
324 ,Missing,0
325 Mean,,3.00
326 Std Dev,,1.58
327 Minimum,,1.00
328 Maximum,,5.00
329 Percentiles,0,1.00
330 ,25,2.00
331 ,50 (Median),3.00
332 ,75,4.00
333 ,100,5.00
334 ])
335 AT_CLEANUP
336
337 AT_SETUP([FREQUENCIES enhanced percentiles, weighted])
338 AT_DATA([frequencies.sps],
339   [DATA LIST LIST notable /X * F *.
340 BEGIN DATA.
341 1 2
342 2 2
343 3 2
344 4 1
345 4 1
346 5 1
347 5 1
348 END DATA.
349
350 WEIGHT BY f.
351
352 FREQUENCIES 
353         VAR=x
354         /PERCENTILES = 0 25 50 75 100.
355 ])
356 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
357   [Table: X
358 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
359 ,1.00,2.00,20.00,20.00,20.00
360 ,2.00,2.00,20.00,20.00,40.00
361 ,3.00,2.00,20.00,20.00,60.00
362 ,4.00,2.00,20.00,20.00,80.00
363 ,5.00,2.00,20.00,20.00,100.00
364 Total,,10.00,100.0,100.0,
365
366 Table: X
367 N,Valid,10.00
368 ,Missing,.00
369 Mean,,3.00
370 Std Dev,,1.49
371 Minimum,,1.00
372 Maximum,,5.00
373 Percentiles,0,1.00
374 ,25,2.00
375 ,50 (Median),3.00
376 ,75,4.00
377 ,100,5.00
378 ])
379 AT_CLEANUP
380
381 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)])
382 AT_DATA([frequencies.sps],
383   [DATA LIST LIST notable /X * F *.
384 BEGIN DATA.
385 1 1
386 3 2
387 4 1
388 5 1
389 5 1
390 END DATA.
391
392 WEIGHT BY f.
393
394 FREQUENCIES 
395         VAR=x
396         /PERCENTILES = 0 25 50 75 100.
397 ])
398 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
399   [Table: X
400 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
401 ,1.00,1.00,16.67,16.67,16.67
402 ,3.00,2.00,33.33,33.33,50.00
403 ,4.00,1.00,16.67,16.67,66.67
404 ,5.00,2.00,33.33,33.33,100.00
405 Total,,6.00,100.0,100.0,
406
407 Table: X
408 N,Valid,6.00
409 ,Missing,.00
410 Mean,,3.50
411 Std Dev,,1.52
412 Minimum,,1.00
413 Maximum,,5.00
414 Percentiles,0,1.00
415 ,25,3.00
416 ,50 (Median),3.50
417 ,75,4.75
418 ,100,5.00
419 ])
420 AT_CLEANUP
421
422 dnl Data for this test case from Fabio Bordignon <bordignon@demos.it>.
423 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)])
424 AT_DATA([frequencies.sps],
425   [DATA LIST LIST notable /X * F *.
426 BEGIN DATA.
427 1 7
428 2 16
429 3 12
430 4 5
431 END DATA.
432
433 WEIGHT BY f.
434
435 FREQUENCIES 
436         VAR=x
437         /PERCENTILES = 0 25 50 75 100.
438 ])
439 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
440 Table: X
441 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
442 ,1.00,7.00,17.50,17.50,17.50
443 ,2.00,16.00,40.00,40.00,57.50
444 ,3.00,12.00,30.00,30.00,87.50
445 ,4.00,5.00,12.50,12.50,100.00
446 Total,,40.00,100.0,100.0,
447
448 Table: X
449 N,Valid,40.00
450 ,Missing,.00
451 Mean,,2.38
452 Std Dev,,.93
453 Minimum,,1.00
454 Maximum,,4.00
455 Percentiles,0,1.00
456 ,25,2.00
457 ,50 (Median),2.00
458 ,75,3.00
459 ,100,4.00
460 ])
461 AT_CLEANUP
462
463 AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values])
464 AT_DATA([frequencies.sps],
465   [DATA LIST LIST notable /X * F *.
466 BEGIN DATA.
467 1 1
468 3 2
469 4 1
470 5 1
471 5 1
472 99 4
473 END DATA.
474
475 MISSING VALUE x (99.0) .
476 WEIGHT BY f.
477
478 FREQUENCIES 
479         VAR=x
480         /PERCENTILES = 0 25 50 75 100.
481 ])
482 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
483   [Table: X
484 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
485 ,1.00,1.00,10.00,16.67,16.67
486 ,3.00,2.00,20.00,33.33,50.00
487 ,4.00,1.00,10.00,16.67,66.67
488 ,5.00,2.00,20.00,33.33,100.00
489 ,99.00,4.00,40.00,Missing,
490 Total,,10.00,100.0,100.0,
491
492 Table: X
493 N,Valid,6.00
494 ,Missing,4.00
495 Mean,,3.50
496 Std Dev,,1.52
497 Minimum,,1.00
498 Maximum,,5.00
499 Percentiles,0,1.00
500 ,25,3.00
501 ,50 (Median),3.50
502 ,75,4.75
503 ,100,5.00
504 ])
505 AT_CLEANUP