Implemented the McNemar test. Closes bug #33242
[pspp-builds.git] / tests / language / stats / frequencies.at
1 AT_BANNER([FREQUENCIES procedure])
2
3 AT_SETUP([FREQUENCIES string variable crash])
4 AT_DATA([frequencies.sps],
5   [DATA LIST FREE/
6    name  (A8) value * quantity .
7 BEGIN DATA.
8 Cables 829 3 
9 END DATA.
10 EXECUTE.
11
12 FREQUENCIES /VAR = name.
13 ])
14 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
15   [Table: name
16 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
17 ,Cables  ,1,100.00,100.00,100.00
18 Total,,1,100.0,100.0,
19 ])
20 AT_CLEANUP
21
22 # Tests for a bug where pspp would crash if two FREQUENCIES commands
23 # existed in a input file.
24 AT_SETUP([FREQUENCIES two runs crash])
25 AT_DATA([frequencies.sps],
26   [data list free /v1 v2.
27 begin data.
28 0 1
29 2 3 
30 4 5
31 3 4
32 end data.
33
34 frequencies v1 v2/statistics=none.
35 frequencies v1 v2/statistics=none.
36 ])
37 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
38   [Table: v1
39 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
40 ,.00,1,25.00,25.00,25.00
41 ,2.00,1,25.00,25.00,50.00
42 ,3.00,1,25.00,25.00,75.00
43 ,4.00,1,25.00,25.00,100.00
44 Total,,4,100.0,100.0,
45
46 Table: v2
47 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
48 ,1.00,1,25.00,25.00,25.00
49 ,3.00,1,25.00,25.00,50.00
50 ,4.00,1,25.00,25.00,75.00
51 ,5.00,1,25.00,25.00,100.00
52 Total,,4,100.0,100.0,
53
54 Table: v1
55 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
56 ,.00,1,25.00,25.00,25.00
57 ,2.00,1,25.00,25.00,50.00
58 ,3.00,1,25.00,25.00,75.00
59 ,4.00,1,25.00,25.00,100.00
60 Total,,4,100.0,100.0,
61
62 Table: v2
63 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
64 ,1.00,1,25.00,25.00,25.00
65 ,3.00,1,25.00,25.00,50.00
66 ,4.00,1,25.00,25.00,75.00
67 ,5.00,1,25.00,25.00,100.00
68 Total,,4,100.0,100.0,
69 ])
70 AT_CLEANUP
71
72 # Tests for a bug where PSPP would crash when a FREQUENCIES command
73 # was used with the HTML output driver.
74 AT_SETUP([FREQUENCIES HTML output crash])
75 AT_DATA([frequencies.sps],
76   [data list free /v1 v2.
77 begin data.
78 0 1
79 2 3 
80 4 5
81 3 4
82 end data.
83
84 list.
85
86 frequencies v1/statistics=none.
87 ])
88 AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0],
89   [Table: Data List
90 v1,v2
91 .00,1.00
92 2.00,3.00
93 4.00,5.00
94 3.00,4.00
95
96 Table: v1
97 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
98 ,.00,1,25.00,25.00,25.00
99 ,2.00,1,25.00,25.00,50.00
100 ,3.00,1,25.00,25.00,75.00
101 ,4.00,1,25.00,25.00,100.00
102 Total,,4,100.0,100.0,
103 ])
104 AT_CHECK([test -s pspp.html])
105 AT_CLEANUP
106
107 # Tests for a bug which crashed PSPP when a piechart with too many
108 # segments was requested.
109 AT_SETUP([FREQUENCIES pie chart crash])
110 AT_DATA([frequencies.sps],
111   [data list list /x * w *.
112 begin data.
113 1  4
114 34 10
115 -9 15
116 232 6
117 11  4
118 134 1
119 9  5
120 32 16
121 -2 6
122 2  16
123 20  6
124 end data.
125
126 weight by w.
127
128 frequencies /x /format=notable /statistics=none
129         /piechart.
130 ])
131 # Cannot use the CSV driver for this because it does not output charts
132 # at all.
133 AT_CHECK([pspp frequencies.sps], [0], [dnl
134 Reading free-form data from INLINE.
135 +--------+------+
136 |Variable|Format|
137 #========#======#
138 |x       |F8.0  |
139 |w       |F8.0  |
140 +--------+------+
141 ])
142 AT_CLEANUP
143
144 # Tests for a bug which crashed PSPP when the median and a histogram
145 # were both requested.
146 AT_SETUP([FREQUENCIES median with histogram crash])
147 AT_DATA([frequencies.sps], [dnl
148 data list list notable /x.
149 begin data.
150 1
151 end data.
152
153 frequencies /x /histogram /STATISTICS=median.
154 ])
155 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
156 Table: x
157 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
158 ,1.00,1,100.00,100.00,100.00
159 Total,,1,100.0,100.0,
160
161 Table: x
162 N,Valid,1
163 ,Missing,0
164 S.E. Kurt,,.00
165 ,50 (Median),1.00
166 ])
167 AT_CLEANUP
168
169 # Tests for a bug which caused FREQUENCIES following TEMPORARY to
170 # crash (bug #11492).
171 AT_SETUP([FREQUENCIES crash after TEMPORARY])
172 AT_DATA([frequencies.sps],
173   [DATA LIST LIST /SEX (A1) X *.
174 BEGIN DATA.
175 M 31
176 F 21
177 M 41
178 F 31
179 M 13
180 F 12
181 M 14
182 F 13
183 END DATA.
184
185
186 TEMPORARY
187 SELECT IF SEX EQ 'F'
188 FREQUENCIES /X .
189
190 FINISH
191 ])
192 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
193   [Table: Reading free-form data from INLINE.
194 Variable,Format
195 SEX,A1
196 X,F8.0
197
198 Table: X
199 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
200 ,12.00,1,25.00,25.00,25.00
201 ,13.00,1,25.00,25.00,50.00
202 ,21.00,1,25.00,25.00,75.00
203 ,31.00,1,25.00,25.00,100.00
204 Total,,4,100.0,100.0,
205
206 Table: X
207 N,Valid,4
208 ,Missing,0
209 Mean,,19.25
210 Std Dev,,8.81
211 Minimum,,12.00
212 Maximum,,31.00
213 ])
214 AT_CLEANUP
215
216 m4_define([FREQUENCIES_NTILES_OUTPUT],
217   [Table: x
218 N,Valid,5
219 ,Missing,0
220 Mean,,3.00
221 Std Dev,,1.58
222 Minimum,,1.00
223 Maximum,,5.00
224 Percentiles,0,1.00
225 ,25,2.00
226 ,33,2.33
227 ,50 (Median),3.00
228 ,67,3.67
229 ,75,4.00
230 ,100,5.00
231 ])
232 AT_SETUP([FREQUENCIES basic percentiles])
233 AT_DATA([frequencies.sps],
234   [DATA LIST LIST notable /x * .
235 BEGIN DATA.
236
237
238
239
240 5
241 END DATA.
242
243 FREQUENCIES 
244         VAR=x
245         /FORMAT=NOTABLE
246         /PERCENTILES = 0 25 33.333 50 66.666 75 100.
247 ])
248 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
249   [FREQUENCIES_NTILES_OUTPUT])
250 AT_CLEANUP
251
252 AT_SETUP([FREQUENCIES basic n-tiles])
253 AT_DATA([frequencies.sps],
254   [DATA LIST LIST notable /x * .
255 BEGIN DATA.
256
257
258
259
260 5
261 END DATA.
262
263 FREQUENCIES 
264         VAR=x
265         /FORMAT=NOTABLE
266         /NTILES = 3
267         /NTILES = 4.
268 ])
269 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
270   [FREQUENCIES_NTILES_OUTPUT])
271 AT_CLEANUP
272
273 AT_SETUP([FREQUENCIES compatibility percentiles])
274 AT_DATA([frequencies.sps],
275   [DATA LIST LIST notable /X * .
276 BEGIN DATA.
277
278
279
280
281 5
282 END DATA.
283
284 FREQUENCIES 
285         VAR=x
286         /ALGORITHM=COMPATIBLE
287         /PERCENTILES = 0 25 50 75 100.
288 ])
289 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
290   [Table: X
291 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
292 ,1.00,1,20.00,20.00,20.00
293 ,2.00,1,20.00,20.00,40.00
294 ,3.00,1,20.00,20.00,60.00
295 ,4.00,1,20.00,20.00,80.00
296 ,5.00,1,20.00,20.00,100.00
297 Total,,5,100.0,100.0,
298
299 Table: X
300 N,Valid,5
301 ,Missing,0
302 Mean,,3.00
303 Std Dev,,1.58
304 Minimum,,1.00
305 Maximum,,5.00
306 Percentiles,0,1.00
307 ,25,1.50
308 ,50 (Median),3.00
309 ,75,4.50
310 ,100,5.00
311 ])
312 AT_CLEANUP
313
314 AT_SETUP([FREQUENCIES enhanced percentiles])
315 AT_DATA([frequencies.sps],
316   [DATA LIST LIST notable /X * .
317 BEGIN DATA.
318
319
320
321
322 5
323 END DATA.
324
325 FREQUENCIES 
326         VAR=x
327         /PERCENTILES = 0 25 50 75 100.
328 ])
329 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
330   [Table: X
331 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
332 ,1.00,1,20.00,20.00,20.00
333 ,2.00,1,20.00,20.00,40.00
334 ,3.00,1,20.00,20.00,60.00
335 ,4.00,1,20.00,20.00,80.00
336 ,5.00,1,20.00,20.00,100.00
337 Total,,5,100.0,100.0,
338
339 Table: X
340 N,Valid,5
341 ,Missing,0
342 Mean,,3.00
343 Std Dev,,1.58
344 Minimum,,1.00
345 Maximum,,5.00
346 Percentiles,0,1.00
347 ,25,2.00
348 ,50 (Median),3.00
349 ,75,4.00
350 ,100,5.00
351 ])
352 AT_CLEANUP
353
354 AT_SETUP([FREQUENCIES enhanced percentiles, weighted])
355 AT_DATA([frequencies.sps],
356   [DATA LIST LIST notable /X * F *.
357 BEGIN DATA.
358 1 2
359 2 2
360 3 2
361 4 1
362 4 1
363 5 1
364 5 1
365 END DATA.
366
367 WEIGHT BY f.
368
369 FREQUENCIES 
370         VAR=x
371         /PERCENTILES = 0 25 50 75 100.
372 ])
373 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
374   [Table: X
375 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
376 ,1.00,2.00,20.00,20.00,20.00
377 ,2.00,2.00,20.00,20.00,40.00
378 ,3.00,2.00,20.00,20.00,60.00
379 ,4.00,2.00,20.00,20.00,80.00
380 ,5.00,2.00,20.00,20.00,100.00
381 Total,,10.00,100.0,100.0,
382
383 Table: X
384 N,Valid,10.00
385 ,Missing,.00
386 Mean,,3.00
387 Std Dev,,1.49
388 Minimum,,1.00
389 Maximum,,5.00
390 Percentiles,0,1.00
391 ,25,2.00
392 ,50 (Median),3.00
393 ,75,4.00
394 ,100,5.00
395 ])
396 AT_CLEANUP
397
398 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)])
399 AT_DATA([frequencies.sps],
400   [DATA LIST LIST notable /X * F *.
401 BEGIN DATA.
402 1 1
403 3 2
404 4 1
405 5 1
406 5 1
407 END DATA.
408
409 WEIGHT BY f.
410
411 FREQUENCIES 
412         VAR=x
413         /PERCENTILES = 0 25 50 75 100.
414 ])
415 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
416   [Table: X
417 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
418 ,1.00,1.00,16.67,16.67,16.67
419 ,3.00,2.00,33.33,33.33,50.00
420 ,4.00,1.00,16.67,16.67,66.67
421 ,5.00,2.00,33.33,33.33,100.00
422 Total,,6.00,100.0,100.0,
423
424 Table: X
425 N,Valid,6.00
426 ,Missing,.00
427 Mean,,3.50
428 Std Dev,,1.52
429 Minimum,,1.00
430 Maximum,,5.00
431 Percentiles,0,1.00
432 ,25,3.00
433 ,50 (Median),3.50
434 ,75,4.75
435 ,100,5.00
436 ])
437 AT_CLEANUP
438
439 dnl Data for this test case from Fabio Bordignon <bordignon@demos.it>.
440 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)])
441 AT_DATA([frequencies.sps],
442   [DATA LIST LIST notable /X * F *.
443 BEGIN DATA.
444 1 7
445 2 16
446 3 12
447 4 5
448 END DATA.
449
450 WEIGHT BY f.
451
452 FREQUENCIES 
453         VAR=x
454         /PERCENTILES = 0 25 50 75 100.
455 ])
456 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
457 Table: X
458 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
459 ,1.00,7.00,17.50,17.50,17.50
460 ,2.00,16.00,40.00,40.00,57.50
461 ,3.00,12.00,30.00,30.00,87.50
462 ,4.00,5.00,12.50,12.50,100.00
463 Total,,40.00,100.0,100.0,
464
465 Table: X
466 N,Valid,40.00
467 ,Missing,.00
468 Mean,,2.38
469 Std Dev,,.93
470 Minimum,,1.00
471 Maximum,,4.00
472 Percentiles,0,1.00
473 ,25,2.00
474 ,50 (Median),2.00
475 ,75,3.00
476 ,100,4.00
477 ])
478 AT_CLEANUP
479
480 AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values])
481 AT_DATA([frequencies.sps],
482   [DATA LIST LIST notable /X * F *.
483 BEGIN DATA.
484 1 1
485 3 2
486 4 1
487 5 1
488 5 1
489 99 4
490 END DATA.
491
492 MISSING VALUE x (99.0) .
493 WEIGHT BY f.
494
495 FREQUENCIES 
496         VAR=x
497         /PERCENTILES = 0 25 50 75 100.
498 ])
499 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
500   [Table: X
501 Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
502 ,1.00,1.00,10.00,16.67,16.67
503 ,3.00,2.00,20.00,33.33,50.00
504 ,4.00,1.00,10.00,16.67,66.67
505 ,5.00,2.00,20.00,33.33,100.00
506 ,99.00,4.00,40.00,Missing,
507 Total,,10.00,100.0,100.0,
508
509 Table: X
510 N,Valid,6.00
511 ,Missing,4.00
512 Mean,,3.50
513 Std Dev,,1.52
514 Minimum,,1.00
515 Maximum,,5.00
516 Percentiles,0,1.00
517 ,25,3.00
518 ,50 (Median),3.50
519 ,75,4.75
520 ,100,5.00
521 ])
522 AT_CLEANUP