improve FREQUENCIES LAYERED
[pspp] / tests / language / stats / frequencies.at
1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017 Free Software Foundation, Inc.
3 dnl
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
8 dnl
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 dnl GNU General Public License for more details.
13 dnl
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 dnl
17 AT_BANNER([FREQUENCIES procedure])
18
19 AT_SETUP([FREQUENCIES string variable])
20 AT_DATA([frequencies.sps],
21   [DATA LIST FREE/
22    name  (A8) value * quantity .
23 BEGIN DATA.
24 foo 1 5
25 bar 2 6
26 baz 1 9
27 quux 3 1
28 bar 1 2
29 baz 4 3
30 baz 1 4
31 baz 1 1
32 foo 6 0
33 quux 5 8
34 END DATA.
35 EXECUTE.
36
37 FREQUENCIES /VAR = name/ORDER=ANALYSIS.
38 ])
39 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
40 Table: name
41 ,,Frequency,Percent,Valid Percent,Cumulative Percent
42 Valid,bar,2,20.0%,20.0%,20.0%
43 ,baz,4,40.0%,40.0%,60.0%
44 ,foo,2,20.0%,20.0%,80.0%
45 ,quux,2,20.0%,20.0%,100.0%
46 Total,,10,100.0%,,
47 ])
48 AT_CLEANUP
49
50 AT_SETUP([FREQUENCIES with SPLIT FILE - LAYERED])
51 AT_DATA([frequencies.sps], [dnl
52 DATA LIST LIST NOTABLE/name (A8) value quantity.
53 BEGIN DATA.
54 foo 1 5
55 bar 2 6
56 baz 1 9
57 quux 3 1
58 bar 1 2
59 baz 4 3
60 baz 1 4
61 baz 1 1
62 foo 6 0
63 quux 5 8
64 END DATA.
65 EXECUTE.
66
67 SORT CASES BY name.
68 SPLIT FILE BY name.
69 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
70 ])
71 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
72 Table: Statistics
73 ,,name,,,,,,,
74 ,,bar,,baz,,foo,,quux,
75 ,,value,quantity,value,quantity,value,quantity,value,quantity
76 N,Valid,2,2,4,4,2,2,2,2
77 ,Missing,0,0,0,0,0,0,0,0
78 Mean,,1.50,4.00,1.75,4.25,3.50,2.50,4.00,4.50
79 Std Dev,,.71,2.83,1.50,3.40,3.54,3.54,1.41,4.95
80 Minimum,,1.00,2.00,1.00,1.00,1.00,.00,3.00,1.00
81 Maximum,,2.00,6.00,4.00,9.00,6.00,5.00,5.00,8.00
82 ])
83 AT_CLEANUP
84
85 AT_SETUP([FREQUENCIES with SPLIT FILE - SEPARATE])
86 AT_DATA([frequencies.sps], [dnl
87 DATA LIST LIST NOTABLE/name (A8) value quantity.
88 BEGIN DATA.
89 foo 1 5
90 bar 2 6
91 baz 1 9
92 quux 3 1
93 bar 1 2
94 baz 4 3
95 baz 1 4
96 baz 1 1
97 foo 6 0
98 quux 5 8
99 END DATA.
100 EXECUTE.
101
102 SORT CASES BY name.
103 SPLIT FILE SEPARATE BY name.
104 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
105 ])
106 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
107 Table: Split Values
108 Variable,Value
109 name,bar
110
111 Table: Statistics
112 ,,value,quantity
113 N,Valid,2,2
114 ,Missing,0,0
115 Mean,,1.50,4.00
116 Std Dev,,.71,2.83
117 Minimum,,1.00,2.00
118 Maximum,,2.00,6.00
119
120 Table: Split Values
121 Variable,Value
122 name,baz
123
124 Table: Statistics
125 ,,value,quantity
126 N,Valid,4,4
127 ,Missing,0,0
128 Mean,,1.75,4.25
129 Std Dev,,1.50,3.40
130 Minimum,,1.00,1.00
131 Maximum,,4.00,9.00
132
133 Table: Split Values
134 Variable,Value
135 name,foo
136
137 Table: Statistics
138 ,,value,quantity
139 N,Valid,2,2
140 ,Missing,0,0
141 Mean,,3.50,2.50
142 Std Dev,,3.54,3.54
143 Minimum,,1.00,.00
144 Maximum,,6.00,5.00
145
146 Table: Split Values
147 Variable,Value
148 name,quux
149
150 Table: Statistics
151 ,,value,quantity
152 N,Valid,2,2
153 ,Missing,0,0
154 Mean,,4.00,4.50
155 Std Dev,,1.41,4.95
156 Minimum,,3.00,1.00
157 Maximum,,5.00,8.00
158 ])
159 AT_CLEANUP
160
161 AT_SETUP([FREQUENCIES with SPLIT FILE - LAYERED - unsorted data])
162 AT_DATA([frequencies.sps], [dnl
163 DATA LIST LIST NOTABLE/name (A8) value quantity.
164 BEGIN DATA.
165 foo 1 5
166 bar 2 6
167 baz 1 9
168 quux 3 1
169 baz 4 3
170 bar 1 2
171 baz 1 1
172 foo 6 0
173 baz 1 4
174 quux 5 8
175 END DATA.
176 EXECUTE.
177
178 SPLIT FILE BY name.
179 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
180 ])
181 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
182 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values.  Each run will be analyzed separately.  The duplicate split values are: name = baz     "
183
184 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values.  Each run will be analyzed separately.  The duplicate split values are: name = bar     "
185
186 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values.  Each run will be analyzed separately.  The duplicate split values are: name = baz     "
187
188 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values.  Each run will be analyzed separately.  The duplicate split values are: name = foo     "
189
190 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values.  Each run will be analyzed separately.  The duplicate split values are: name = baz     "
191
192 Table: Statistics
193 ,,name,,,,,,,,,,,,,,,,,,,
194 ,,foo,,bar,,baz,,quux,,baz,,bar,,baz,,foo,,baz,,quux,
195 ,,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity
196 N,Valid,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
197 ,Missing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
198 Mean,,1.00,5.00,2.00,6.00,1.00,9.00,3.00,1.00,4.00,3.00,1.00,2.00,1.00,1.00,6.00,.00,1.00,4.00,5.00,8.00
199 Std Dev,,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
200 Minimum,,1.00,5.00,2.00,6.00,1.00,9.00,3.00,1.00,4.00,3.00,1.00,2.00,1.00,1.00,6.00,.00,1.00,4.00,5.00,8.00
201 Maximum,,1.00,5.00,2.00,6.00,1.00,9.00,3.00,1.00,4.00,3.00,1.00,2.00,1.00,1.00,6.00,.00,1.00,4.00,5.00,8.00
202
203 frequencies.sps:17: warning: FREQUENCIES: Suppressed 1 additional warning about duplicate split values.
204 ])
205 AT_CLEANUP
206
207 # Tests for a bug where pspp would crash if two FREQUENCIES commands
208 # existed in a input file.
209 AT_SETUP([FREQUENCIES two runs crash])
210 AT_DATA([frequencies.sps],
211   [data list free /v1 v2.
212 begin data.
213 0 1
214 2 3
215 4 5
216 3 4
217 end data.
218
219 frequencies v1 v2/statistics=none/ORDER=VARIABLE.
220 frequencies v1 v2/statistics=none.
221 ])
222 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
223 Table: v1
224 ,,Frequency,Percent,Valid Percent,Cumulative Percent
225 Valid,.00,1,25.0%,25.0%,25.0%
226 ,2.00,1,25.0%,25.0%,50.0%
227 ,3.00,1,25.0%,25.0%,75.0%
228 ,4.00,1,25.0%,25.0%,100.0%
229 Total,,4,100.0%,,
230
231 Table: v2
232 ,,Frequency,Percent,Valid Percent,Cumulative Percent
233 Valid,1.00,1,25.0%,25.0%,25.0%
234 ,3.00,1,25.0%,25.0%,50.0%
235 ,4.00,1,25.0%,25.0%,75.0%
236 ,5.00,1,25.0%,25.0%,100.0%
237 Total,,4,100.0%,,
238
239 Table: v1
240 ,,Frequency,Percent,Valid Percent,Cumulative Percent
241 Valid,.00,1,25.0%,25.0%,25.0%
242 ,2.00,1,25.0%,25.0%,50.0%
243 ,3.00,1,25.0%,25.0%,75.0%
244 ,4.00,1,25.0%,25.0%,100.0%
245 Total,,4,100.0%,,
246
247 Table: v2
248 ,,Frequency,Percent,Valid Percent,Cumulative Percent
249 Valid,1.00,1,25.0%,25.0%,25.0%
250 ,3.00,1,25.0%,25.0%,50.0%
251 ,4.00,1,25.0%,25.0%,75.0%
252 ,5.00,1,25.0%,25.0%,100.0%
253 Total,,4,100.0%,,
254 ])
255 AT_CLEANUP
256
257 # Test that the LIMIT specification works.
258 AT_SETUP([FREQUENCIES with LIMIT])
259 AT_DATA([frequencies.sps],
260   [data list free /v1 v2.
261 begin data.
262 0 1
263 2 5
264 4 3
265 3 5
266 end data.
267
268 frequencies v1 v2/statistics=none/FORMAT=LIMIT(3).
269 ])
270 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
271 Table: v2
272 ,,Frequency,Percent,Valid Percent,Cumulative Percent
273 Valid,1.00,1,25.0%,25.0%,25.0%
274 ,3.00,1,25.0%,25.0%,50.0%
275 ,5.00,2,50.0%,50.0%,100.0%
276 Total,,4,100.0%,,
277 ])
278 AT_CLEANUP
279
280 # Tests for a bug where PSPP would crash when a FREQUENCIES command
281 # was used with the HTML output driver.
282 AT_SETUP([FREQUENCIES HTML output crash])
283 AT_DATA([frequencies.sps],
284   [data list free /v1 v2.
285 begin data.
286 0 1
287 2 3
288 4 5
289 3 4
290 end data.
291
292 list.
293
294 frequencies v1/statistics=none.
295 ])
296 AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0],
297   [Table: Data List
298 v1,v2
299 .00,1.00
300 2.00,3.00
301 4.00,5.00
302 3.00,4.00
303
304 Table: v1
305 ,,Frequency,Percent,Valid Percent,Cumulative Percent
306 Valid,.00,1,25.0%,25.0%,25.0%
307 ,2.00,1,25.0%,25.0%,50.0%
308 ,3.00,1,25.0%,25.0%,75.0%
309 ,4.00,1,25.0%,25.0%,100.0%
310 Total,,4,100.0%,,
311 ])
312 AT_CHECK([test -s pspp.html])
313 AT_CLEANUP
314
315 # Tests for a bug which crashed PSPP when a piechart with too many
316 # segments was requested.
317 AT_SETUP([FREQUENCIES pie chart crash])
318 AT_DATA([frequencies.sps],
319   [data list list /x * w *.
320 begin data.
321 1  4
322 34 10
323 -9 15
324 232 6
325 11  4
326 134 1
327 9  5
328 32 16
329 -2 6
330 2  16
331 20  6
332 end data.
333
334 weight by w.
335
336 frequencies /x /format=notable /statistics=none
337         /piechart.
338 ])
339 # Cannot use the CSV driver for this because it does not output charts
340 # at all.
341 AT_CHECK([pspp frequencies.sps], [0], [dnl
342 Reading free-form data from INLINE.
343 +--------+------+
344 |Variable|Format|
345 +--------+------+
346 |x       |F8.0  |
347 |w       |F8.0  |
348 +--------+------+
349 ])
350 AT_CLEANUP
351
352 dnl Check that histogram subcommand runs wihout crashing
353 AT_SETUP([FREQUENCIES histogram crash])
354 AT_DATA([frequencies.sps],
355   [data list notable list /x * w *.
356 begin data.
357 1  4
358 34 10
359 -9 15
360 232 6
361 11  4
362 134 1
363 9  5
364 32 16
365 -2 6
366 2  16
367 20  6
368 end data.
369
370 weight by w.
371
372 frequencies /x
373             /format=notable
374             /statistics=none
375             /histogram=minimum(0) maximum(50) percent(5) normal.
376 ])
377 # Cannot use the CSV driver for this because it does not output charts
378 # at all.
379 AT_CHECK([pspp -O format=pdf frequencies.sps], [0], [ignore], [ignore])
380 AT_CLEANUP
381
382 # Tests for a bug which crashed PSPP when the median and a histogram
383 # were both requested.
384 AT_SETUP([FREQUENCIES median with histogram crash])
385 AT_DATA([frequencies.sps], [dnl
386 data list list notable /x.
387 begin data.
388 1
389 end data.
390
391 frequencies /x /histogram /STATISTICS=median.
392 ])
393 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [ignore])
394 dnl Ignore output - No crash test.
395 AT_CLEANUP
396
397 # Tests for a bug which caused FREQUENCIES following TEMPORARY to
398 # crash (bug #11492).
399 AT_SETUP([FREQUENCIES crash after TEMPORARY])
400 AT_DATA([frequencies.sps],
401   [DATA LIST LIST /SEX (A1) X *.
402 BEGIN DATA.
403 M 31
404 F 21
405 M 41
406 F 31
407 M 13
408 F 12
409 M 14
410 F 13
411 END DATA.
412
413
414 TEMPORARY
415 SELECT IF SEX EQ 'F'
416 FREQUENCIES /X .
417
418 FINISH
419 ])
420 AT_CHECK([pspp -o pspp.csv -o pspp.txt frequencies.sps])
421 AT_CHECK([cat pspp.csv], [0], [dnl
422 Table: Reading free-form data from INLINE.
423 Variable,Format
424 SEX,A1
425 X,F8.0
426
427 Table: Statistics
428 ,,X
429 N,Valid,4
430 ,Missing,0
431 Mean,,19.25
432 Std Dev,,8.81
433 Minimum,,12.00
434 Maximum,,31.00
435
436 Table: X
437 ,,Frequency,Percent,Valid Percent,Cumulative Percent
438 Valid,12.00,1,25.0%,25.0%,25.0%
439 ,13.00,1,25.0%,25.0%,50.0%
440 ,21.00,1,25.0%,25.0%,75.0%
441 ,31.00,1,25.0%,25.0%,100.0%
442 Total,,4,100.0%,,
443 ])
444 AT_CLEANUP
445
446 m4_define([FREQUENCIES_NTILES_OUTPUT], [dnl
447 Table: Statistics
448 ,,x,y
449 N,Valid,5,5
450 ,Missing,0,0
451 Mean,,3.00,30.00
452 Std Dev,,1.58,15.81
453 Minimum,,1.00,10.00
454 Maximum,,5.00,50.00
455 Percentiles,0,1.00,10.00
456 ,25,2.00,20.00
457 ,33,2.33,23.33
458 ,50,3.00,30.00
459 ,67,3.67,36.67
460 ,75,4.00,40.00
461 ,100,5.00,50.00
462 ])
463 AT_SETUP([FREQUENCIES basic percentiles])
464 AT_DATA([frequencies.sps],
465   [DATA LIST LIST notable /x y.
466 BEGIN DATA.
467 1 10
468 2 20
469 3 30
470 4 40
471 5 50
472 END DATA.
473
474 FREQUENCIES
475         VAR=x y
476         /FORMAT=NOTABLE
477         /PERCENTILES = 0 25 33.333 50 66.666 75 100.
478 ])
479 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
480   [FREQUENCIES_NTILES_OUTPUT])
481 AT_CLEANUP
482
483 AT_SETUP([FREQUENCIES basic n-tiles])
484 AT_DATA([frequencies.sps],
485   [DATA LIST LIST notable /x y.
486 BEGIN DATA.
487 1 10
488 2 20
489 3 30
490 4 40
491 5 50
492 END DATA.
493
494 FREQUENCIES
495         VAR=x y
496         /FORMAT=NOTABLE
497         /NTILES = 3
498         /NTILES = 4.
499 ])
500 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
501   [FREQUENCIES_NTILES_OUTPUT])
502 AT_CLEANUP
503
504 AT_SETUP([FREQUENCIES compatibility percentiles])
505 AT_DATA([frequencies.sps],
506   [DATA LIST LIST notable /X * .
507 BEGIN DATA.
508 1
509 2
510 3
511 4
512 5
513 END DATA.
514
515 FREQUENCIES
516         VAR=x
517         /ALGORITHM=COMPATIBLE
518         /PERCENTILES = 0 25 50 75 100.
519 ])
520 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
521 Table: Statistics
522 ,,X
523 N,Valid,5
524 ,Missing,0
525 Mean,,3.00
526 Std Dev,,1.58
527 Minimum,,1.00
528 Maximum,,5.00
529 Percentiles,0,1.00
530 ,25,1.50
531 ,50,3.00
532 ,75,4.50
533 ,100,5.00
534
535 Table: X
536 ,,Frequency,Percent,Valid Percent,Cumulative Percent
537 Valid,1.00,1,20.0%,20.0%,20.0%
538 ,2.00,1,20.0%,20.0%,40.0%
539 ,3.00,1,20.0%,20.0%,60.0%
540 ,4.00,1,20.0%,20.0%,80.0%
541 ,5.00,1,20.0%,20.0%,100.0%
542 Total,,5,100.0%,,
543 ])
544 AT_CLEANUP
545
546 AT_SETUP([FREQUENCIES enhanced percentiles])
547 AT_DATA([frequencies.sps],
548   [DATA LIST LIST notable /X * .
549 BEGIN DATA.
550 1
551 2
552 3
553 4
554 5
555 END DATA.
556
557 FREQUENCIES
558         VAR=x
559         /PERCENTILES = 0 25 50 75 100.
560 ])
561 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
562 Table: Statistics
563 ,,X
564 N,Valid,5
565 ,Missing,0
566 Mean,,3.00
567 Std Dev,,1.58
568 Minimum,,1.00
569 Maximum,,5.00
570 Percentiles,0,1.00
571 ,25,2.00
572 ,50,3.00
573 ,75,4.00
574 ,100,5.00
575
576 Table: X
577 ,,Frequency,Percent,Valid Percent,Cumulative Percent
578 Valid,1.00,1,20.0%,20.0%,20.0%
579 ,2.00,1,20.0%,20.0%,40.0%
580 ,3.00,1,20.0%,20.0%,60.0%
581 ,4.00,1,20.0%,20.0%,80.0%
582 ,5.00,1,20.0%,20.0%,100.0%
583 Total,,5,100.0%,,
584 ])
585 AT_CLEANUP
586
587 AT_SETUP([FREQUENCIES enhanced percentiles, weighted])
588 AT_DATA([frequencies.sps],
589   [DATA LIST LIST notable /X * F *.
590 BEGIN DATA.
591 1 2
592 2 2
593 3 2
594 4 1
595 4 1
596 5 1
597 5 1
598 END DATA.
599
600 WEIGHT BY f.
601
602 FREQUENCIES
603         VAR=x
604         /PERCENTILES = 0 25 50 75 100.
605 ])
606 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
607 Table: Statistics
608 ,,X
609 N,Valid,10.00
610 ,Missing,.00
611 Mean,,3.00
612 Std Dev,,1.49
613 Minimum,,1.00
614 Maximum,,5.00
615 Percentiles,0,1.00
616 ,25,2.00
617 ,50,3.00
618 ,75,4.00
619 ,100,5.00
620
621 Table: X
622 ,,Frequency,Percent,Valid Percent,Cumulative Percent
623 Valid,1.00,2.00,20.0%,20.0%,20.0%
624 ,2.00,2.00,20.0%,20.0%,40.0%
625 ,3.00,2.00,20.0%,20.0%,60.0%
626 ,4.00,2.00,20.0%,20.0%,80.0%
627 ,5.00,2.00,20.0%,20.0%,100.0%
628 Total,,10.00,100.0%,,
629 ])
630 AT_CLEANUP
631
632 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)])
633 AT_DATA([frequencies.sps],
634   [DATA LIST LIST notable /X * F *.
635 BEGIN DATA.
636 1 1
637 3 2
638 4 1
639 5 1
640 5 1
641 END DATA.
642
643 WEIGHT BY f.
644
645 FREQUENCIES
646         VAR=x
647         /PERCENTILES = 0 25 50 75 100.
648 ])
649 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
650 Table: Statistics
651 ,,X
652 N,Valid,6.00
653 ,Missing,.00
654 Mean,,3.50
655 Std Dev,,1.52
656 Minimum,,1.00
657 Maximum,,5.00
658 Percentiles,0,1.00
659 ,25,3.00
660 ,50,3.50
661 ,75,4.75
662 ,100,5.00
663
664 Table: X
665 ,,Frequency,Percent,Valid Percent,Cumulative Percent
666 Valid,1.00,1.00,16.7%,16.7%,16.7%
667 ,3.00,2.00,33.3%,33.3%,50.0%
668 ,4.00,1.00,16.7%,16.7%,66.7%
669 ,5.00,2.00,33.3%,33.3%,100.0%
670 Total,,6.00,100.0%,,
671 ])
672 AT_CLEANUP
673
674 dnl Data for this test case from Fabio Bordignon <bordignon@demos.it>.
675 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)])
676 AT_DATA([frequencies.sps],
677   [DATA LIST LIST notable /X * F *.
678 BEGIN DATA.
679 1 7
680 2 16
681 3 12
682 4 5
683 END DATA.
684
685 WEIGHT BY f.
686
687 FREQUENCIES
688         VAR=x
689         /PERCENTILES = 0 25 50 75 100.
690 ])
691 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
692 Table: Statistics
693 ,,X
694 N,Valid,40.00
695 ,Missing,.00
696 Mean,,2.38
697 Std Dev,,.93
698 Minimum,,1.00
699 Maximum,,4.00
700 Percentiles,0,1.00
701 ,25,2.00
702 ,50,2.00
703 ,75,3.00
704 ,100,4.00
705
706 Table: X
707 ,,Frequency,Percent,Valid Percent,Cumulative Percent
708 Valid,1.00,7.00,17.5%,17.5%,17.5%
709 ,2.00,16.00,40.0%,40.0%,57.5%
710 ,3.00,12.00,30.0%,30.0%,87.5%
711 ,4.00,5.00,12.5%,12.5%,100.0%
712 Total,,40.00,100.0%,,
713 ])
714 AT_CLEANUP
715
716 AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values])
717 AT_DATA([frequencies.sps],
718   [DATA LIST LIST notable /X * F *.
719 BEGIN DATA.
720 1 1
721 3 2
722 4 1
723 5 1
724 5 1
725 99 4
726 END DATA.
727
728 MISSING VALUE x (99.0) .
729 WEIGHT BY f.
730
731 FREQUENCIES
732         VAR=x
733         /PERCENTILES = 0 25 50 75 100.
734 ])
735
736 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
737 Table: Statistics
738 ,,X
739 N,Valid,6.00
740 ,Missing,4.00
741 Mean,,3.50
742 Std Dev,,1.52
743 Minimum,,1.00
744 Maximum,,5.00
745 Percentiles,0,1.00
746 ,25,3.00
747 ,50,3.50
748 ,75,4.75
749 ,100,5.00
750
751 Table: X
752 ,,Frequency,Percent,Valid Percent,Cumulative Percent
753 Valid,1.00,1.00,10.0%,16.7%,16.7%
754 ,3.00,2.00,20.0%,33.3%,50.0%
755 ,4.00,1.00,10.0%,16.7%,66.7%
756 ,5.00,2.00,20.0%,33.3%,100.0%
757 Missing,99.00,4.00,40.0%,,
758 Total,,10.00,100.0%,,
759 ])
760 AT_CLEANUP
761
762 AT_SETUP([FREQUENCIES dichotomous histogram])
763 AT_DATA([frequencies.sps], [dnl
764 data list notable list /d4 *.
765 begin data.
766 0
767 0
768 0
769 1
770 0
771 0
772 0
773 0
774 1
775 0
776 0
777 0
778 0
779 0
780 1
781 2
782 0
783 end data.
784
785 FREQUENCIES
786         /VARIABLES = d4
787         /FORMAT=AVALUE TABLE
788         /HISTOGRAM=NORMAL
789         .
790 ])
791
792 AT_CHECK([pspp frequencies.sps], [0],  [ignore])
793 AT_CLEANUP
794
795
796 AT_SETUP([FREQUENCIES median])
797 AT_DATA([median.sps], [dnl
798 data list notable list /x *.
799 begin data.
800 1
801 2
802 3000000
803 end data.
804
805 FREQUENCIES
806         /VARIABLES = x
807         /STATISTICS = MEDIAN
808         .
809 ])
810
811 AT_CHECK([pspp median.sps -O format=csv], [0], [dnl
812 Table: Statistics
813 ,,x
814 N,Valid,3
815 ,Missing,0
816 Median,,2.00
817
818 Table: x
819 ,,Frequency,Percent,Valid Percent,Cumulative Percent
820 Valid,1.00,1,33.3%,33.3%,33.3%
821 ,2.00,1,33.3%,33.3%,66.7%
822 ,3000000,1,33.3%,33.3%,100.0%
823 Total,,3,100.0%,,
824 ])
825 AT_CLEANUP
826
827 AT_SETUP([FREQUENCIES variance])
828 AT_DATA([variance.sps], [dnl
829 data list notable list /forename (A12) height.
830 begin data.
831 Ahmed 188
832 bertram 167
833 Catherine 134
834 David 109
835 end data.
836
837 FREQUENCIES
838    /VARIABLES = height
839    /STATISTICS = VARIANCE.
840 ])
841
842 AT_CHECK([pspp variance.sps -O format=csv], [0], [dnl
843 Table: Statistics
844 ,,height
845 N,Valid,4
846 ,Missing,0
847 Variance,,1223.00
848
849 Table: height
850 ,,Frequency,Percent,Valid Percent,Cumulative Percent
851 Valid,109.00,1,25.0%,25.0%,25.0%
852 ,134.00,1,25.0%,25.0%,50.0%
853 ,167.00,1,25.0%,25.0%,75.0%
854 ,188.00,1,25.0%,25.0%,100.0%
855 Total,,4,100.0%,,
856 ])
857 AT_CLEANUP
858
859 AT_SETUP([FREQUENCIES default statistics])
860 AT_DATA([median.sps], [dnl
861 data list notable list /x *.
862 begin data.
863 10
864 20
865 3000000
866 end data.
867
868 FREQUENCIES
869         /VARIABLES = x
870         /STATISTICS
871         .
872
873 FREQUENCIES
874         /VARIABLES = x
875         /STATISTICS = DEFAULT
876         .
877 ])
878
879 AT_CHECK([pspp median.sps -o pspp.csv -o pspp.txt])
880 AT_CHECK([cat pspp.csv], [0], [dnl
881 Table: Statistics
882 ,,x
883 N,Valid,3
884 ,Missing,0
885 Mean,,1000010
886 Std Dev,,1732042
887 Minimum,,10.00
888 Maximum,,3000000
889
890 Table: x
891 ,,Frequency,Percent,Valid Percent,Cumulative Percent
892 Valid,10.00,1,33.3%,33.3%,33.3%
893 ,20.00,1,33.3%,33.3%,66.7%
894 ,3000000,1,33.3%,33.3%,100.0%
895 Total,,3,100.0%,,
896
897 Table: Statistics
898 ,,x
899 N,Valid,3
900 ,Missing,0
901 Mean,,1000010
902 Std Dev,,1732042
903 Minimum,,10.00
904 Maximum,,3000000
905
906 Table: x
907 ,,Frequency,Percent,Valid Percent,Cumulative Percent
908 Valid,10.00,1,33.3%,33.3%,33.3%
909 ,20.00,1,33.3%,33.3%,66.7%
910 ,3000000,1,33.3%,33.3%,100.0%
911 Total,,3,100.0%,,
912 ])
913 AT_CLEANUP
914
915
916
917 AT_SETUP([FREQUENCIES no valid data])
918 AT_DATA([empty.sps], [dnl
919 data list notable list /x *.
920 begin data.
921 .
922 .
923 .
924 end data.
925
926 FREQUENCIES
927         /VARIABLES = x
928         /STATISTICS = ALL
929         .
930 ])
931
932 AT_CHECK([pspp empty.sps -O format=csv], [0],  [dnl
933 Table: Statistics
934 ,,x
935 N,Valid,0
936 ,Missing,3
937 Mean,,.  @&t@
938 S.E. Mean,,.  @&t@
939 Median,,.  @&t@
940 Mode,,.  @&t@
941 Std Dev,,.  @&t@
942 Variance,,.  @&t@
943 Kurtosis,,.  @&t@
944 S.E. Kurt,,.  @&t@
945 Skewness,,.  @&t@
946 S.E. Skew,,.  @&t@
947 Range,,.  @&t@
948 Minimum,,.  @&t@
949 Maximum,,.  @&t@
950 Sum,,.  @&t@
951
952 Table: x
953 ,,Frequency,Percent
954 Missing,.  ,3,100.0%
955 Total,,3,.0%
956 ])
957
958 AT_CLEANUP
959
960
961 AT_SETUP([FREQUENCIES histogram no valid cases])
962 AT_DATA([empty.sps], [dnl
963 data list notable list /x w *.
964 begin data.
965 1 .
966 2 .
967 3 .
968 end data.
969
970 weight by w.
971
972 FREQUENCIES
973         /VARIABLES = x
974         /histogram
975         .
976 ])
977
978 AT_CHECK([pspp empty.sps -O format=csv], [0],  [ignore])
979
980 AT_CLEANUP
981
982 AT_SETUP([FREQUENCIES percentiles + histogram bug#48128])
983 AT_DATA([bug.sps], [dnl
984 SET FORMAT=F8.0.
985
986 INPUT PROGRAM.
987         LOOP I=1 TO 10.
988                 COMPUTE SCORE=EXP(NORMAL(1)).
989                 END CASE.
990         END LOOP.
991         END FILE.
992 END INPUT PROGRAM.
993
994 FREQUENCIES VARIABLES=SCORE
995 /FORMAT=NOTABLE
996 /STATISTICS=ALL
997 /PERCENTILES=1 10 20 30 40 50 60 70 80 90 99
998 /HISTOGRAM.
999
1000 ])
1001
1002 AT_CHECK([pspp bug.sps], [0],  [ignore])
1003
1004 AT_CLEANUP
1005
1006
1007 AT_SETUP([FREQUENCIES vs. missing weights])
1008 AT_DATA([warn.sps], [dnl
1009 data list notable list /x w .
1010 begin data.
1011 1 1
1012 2 1
1013 1 1
1014 3 1
1015 3 .
1016 4 .
1017 end data.
1018
1019 weight by w.
1020
1021 frequencies /variables=x.
1022 ])
1023
1024 AT_CHECK([pspp warn.sps -O format=csv], [0],  [dnl
1025 "warn.sps:13: warning: FREQUENCIES: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative.  These case(s) were ignored."
1026
1027 Table: Statistics
1028 ,,x
1029 N,Valid,4.00
1030 ,Missing,.00
1031 Mean,,1.75
1032 Std Dev,,.96
1033 Minimum,,1.00
1034 Maximum,,4.00
1035
1036 Table: x
1037 ,,Frequency,Percent,Valid Percent,Cumulative Percent
1038 Valid,1.00,2.00,50.0%,50.0%,50.0%
1039 ,2.00,1.00,25.0%,25.0%,75.0%
1040 ,3.00,1.00,25.0%,25.0%,100.0%
1041 ,4.00,.00,.0%,.0%,100.0%
1042 Total,,4.00,100.0%,,
1043 ])
1044
1045 AT_CLEANUP