FREQUENCIES and layered split file has tests that work
[pspp] / tests / language / stats / frequencies.at
1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017 Free Software Foundation, Inc.
3 dnl
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
8 dnl
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 dnl GNU General Public License for more details.
13 dnl
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 dnl
17 AT_BANNER([FREQUENCIES procedure])
18
19 AT_SETUP([FREQUENCIES string variable])
20 AT_DATA([frequencies.sps],
21   [DATA LIST FREE/
22    name  (A8) value * quantity .
23 BEGIN DATA.
24 foo 1 5
25 bar 2 6
26 baz 1 9
27 quux 3 1
28 bar 1 2
29 baz 4 3
30 baz 1 4
31 baz 1 1
32 foo 6 0
33 quux 5 8
34 END DATA.
35 EXECUTE.
36
37 FREQUENCIES /VAR = name/ORDER=ANALYSIS.
38 ])
39 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
40 Table: name
41 ,,Frequency,Percent,Valid Percent,Cumulative Percent
42 Valid,bar,2,20.0%,20.0%,20.0%
43 ,baz,4,40.0%,40.0%,60.0%
44 ,foo,2,20.0%,20.0%,80.0%
45 ,quux,2,20.0%,20.0%,100.0%
46 Total,,10,100.0%,,
47 ])
48 AT_CLEANUP
49
50 AT_SETUP([FREQUENCIES with SPLIT FILE - LAYERED])
51 AT_DATA([frequencies.sps], [dnl
52 DATA LIST LIST NOTABLE/name (A8) value quantity.
53 BEGIN DATA.
54 foo 1 5
55 bar 2 6
56 baz 1 9
57 quux 3 1
58 bar 1 2
59 baz 4 3
60 baz 1 4
61 baz 1 1
62 foo 6 0
63 quux 5 8
64 END DATA.
65 EXECUTE.
66
67 SORT CASES BY name.
68 SPLIT FILE BY name.
69 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
70 ])
71 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
72 Table: Statistics
73 name,,,value,quantity
74 bar,N,Valid,2,2
75 ,,Missing,0,0
76 ,Mean,,1.50,4.00
77 ,Std Dev,,.71,2.83
78 ,Minimum,,1.00,2.00
79 ,Maximum,,2.00,6.00
80 baz,N,Valid,4,4
81 ,,Missing,0,0
82 ,Mean,,1.75,4.25
83 ,Std Dev,,1.50,3.40
84 ,Minimum,,1.00,1.00
85 ,Maximum,,4.00,9.00
86 foo,N,Valid,2,2
87 ,,Missing,0,0
88 ,Mean,,3.50,2.50
89 ,Std Dev,,3.54,3.54
90 ,Minimum,,1.00,.00
91 ,Maximum,,6.00,5.00
92 quux,N,Valid,2,2
93 ,,Missing,0,0
94 ,Mean,,4.00,4.50
95 ,Std Dev,,1.41,4.95
96 ,Minimum,,3.00,1.00
97 ,Maximum,,5.00,8.00
98 ])
99 AT_CLEANUP
100
101 AT_SETUP([FREQUENCIES with SPLIT FILE - SEPARATE])
102 AT_DATA([frequencies.sps], [dnl
103 DATA LIST LIST NOTABLE/name (A8) value quantity.
104 BEGIN DATA.
105 foo 1 5
106 bar 2 6
107 baz 1 9
108 quux 3 1
109 bar 1 2
110 baz 4 3
111 baz 1 4
112 baz 1 1
113 foo 6 0
114 quux 5 8
115 END DATA.
116 EXECUTE.
117
118 SORT CASES BY name.
119 SPLIT FILE SEPARATE BY name.
120 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
121 ])
122 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
123 Table: Split Values
124 Variable,Value
125 name,bar
126
127 Table: Statistics
128 ,,value,quantity
129 N,Valid,2,2
130 ,Missing,0,0
131 Mean,,1.50,4.00
132 Std Dev,,.71,2.83
133 Minimum,,1.00,2.00
134 Maximum,,2.00,6.00
135
136 Table: Split Values
137 Variable,Value
138 name,baz
139
140 Table: Statistics
141 ,,value,quantity
142 N,Valid,4,4
143 ,Missing,0,0
144 Mean,,1.75,4.25
145 Std Dev,,1.50,3.40
146 Minimum,,1.00,1.00
147 Maximum,,4.00,9.00
148
149 Table: Split Values
150 Variable,Value
151 name,foo
152
153 Table: Statistics
154 ,,value,quantity
155 N,Valid,2,2
156 ,Missing,0,0
157 Mean,,3.50,2.50
158 Std Dev,,3.54,3.54
159 Minimum,,1.00,.00
160 Maximum,,6.00,5.00
161
162 Table: Split Values
163 Variable,Value
164 name,quux
165
166 Table: Statistics
167 ,,value,quantity
168 N,Valid,2,2
169 ,Missing,0,0
170 Mean,,4.00,4.50
171 Std Dev,,1.41,4.95
172 Minimum,,3.00,1.00
173 Maximum,,5.00,8.00
174 ])
175 AT_CLEANUP
176
177 AT_SETUP([FREQUENCIES with SPLIT FILE - LAYERED - unsorted data])
178 AT_DATA([frequencies.sps], [dnl
179 DATA LIST LIST NOTABLE/name (A8) value quantity.
180 BEGIN DATA.
181 foo 1 5
182 bar 2 6
183 baz 1 9
184 quux 3 1
185 baz 4 3
186 bar 1 2
187 baz 1 1
188 foo 6 0
189 baz 1 4
190 quux 5 8
191 END DATA.
192 EXECUTE.
193
194 SPLIT FILE BY name.
195 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
196 ])
197 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
198 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values.  Each run will be analyzed separately.  The duplicate split values are: name = baz     "
199
200 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values.  Each run will be analyzed separately.  The duplicate split values are: name = bar     "
201
202 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values.  Each run will be analyzed separately.  The duplicate split values are: name = baz     "
203
204 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values.  Each run will be analyzed separately.  The duplicate split values are: name = foo     "
205
206 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values.  Each run will be analyzed separately.  The duplicate split values are: name = baz     "
207
208 Table: Statistics
209 name,,,value,quantity
210 foo,N,Valid,1,1
211 ,,Missing,0,0
212 ,Mean,,1.00,5.00
213 ,Std Dev,,NaN,NaN
214 ,Minimum,,1.00,5.00
215 ,Maximum,,1.00,5.00
216 bar,N,Valid,1,1
217 ,,Missing,0,0
218 ,Mean,,2.00,6.00
219 ,Std Dev,,NaN,NaN
220 ,Minimum,,2.00,6.00
221 ,Maximum,,2.00,6.00
222 baz,N,Valid,1,1
223 ,,Missing,0,0
224 ,Mean,,1.00,9.00
225 ,Std Dev,,NaN,NaN
226 ,Minimum,,1.00,9.00
227 ,Maximum,,1.00,9.00
228 quux,N,Valid,1,1
229 ,,Missing,0,0
230 ,Mean,,3.00,1.00
231 ,Std Dev,,NaN,NaN
232 ,Minimum,,3.00,1.00
233 ,Maximum,,3.00,1.00
234 baz,N,Valid,1,1
235 ,,Missing,0,0
236 ,Mean,,4.00,3.00
237 ,Std Dev,,NaN,NaN
238 ,Minimum,,4.00,3.00
239 ,Maximum,,4.00,3.00
240 bar,N,Valid,1,1
241 ,,Missing,0,0
242 ,Mean,,1.00,2.00
243 ,Std Dev,,NaN,NaN
244 ,Minimum,,1.00,2.00
245 ,Maximum,,1.00,2.00
246 baz,N,Valid,1,1
247 ,,Missing,0,0
248 ,Mean,,1.00,1.00
249 ,Std Dev,,NaN,NaN
250 ,Minimum,,1.00,1.00
251 ,Maximum,,1.00,1.00
252 foo,N,Valid,1,1
253 ,,Missing,0,0
254 ,Mean,,6.00,.00
255 ,Std Dev,,NaN,NaN
256 ,Minimum,,6.00,.00
257 ,Maximum,,6.00,.00
258 baz,N,Valid,1,1
259 ,,Missing,0,0
260 ,Mean,,1.00,4.00
261 ,Std Dev,,NaN,NaN
262 ,Minimum,,1.00,4.00
263 ,Maximum,,1.00,4.00
264 quux,N,Valid,1,1
265 ,,Missing,0,0
266 ,Mean,,5.00,8.00
267 ,Std Dev,,NaN,NaN
268 ,Minimum,,5.00,8.00
269 ,Maximum,,5.00,8.00
270
271 frequencies.sps:17: warning: FREQUENCIES: Suppressed 1 additional warning about duplicate split values.
272 ])
273 AT_CLEANUP
274
275 # Tests for a bug where pspp would crash if two FREQUENCIES commands
276 # existed in a input file.
277 AT_SETUP([FREQUENCIES two runs crash])
278 AT_DATA([frequencies.sps],
279   [data list free /v1 v2.
280 begin data.
281 0 1
282 2 3
283 4 5
284 3 4
285 end data.
286
287 frequencies v1 v2/statistics=none/ORDER=VARIABLE.
288 frequencies v1 v2/statistics=none.
289 ])
290 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
291 Table: v1
292 ,,Frequency,Percent,Valid Percent,Cumulative Percent
293 Valid,.00,1,25.0%,25.0%,25.0%
294 ,2.00,1,25.0%,25.0%,50.0%
295 ,3.00,1,25.0%,25.0%,75.0%
296 ,4.00,1,25.0%,25.0%,100.0%
297 Total,,4,100.0%,,
298
299 Table: v2
300 ,,Frequency,Percent,Valid Percent,Cumulative Percent
301 Valid,1.00,1,25.0%,25.0%,25.0%
302 ,3.00,1,25.0%,25.0%,50.0%
303 ,4.00,1,25.0%,25.0%,75.0%
304 ,5.00,1,25.0%,25.0%,100.0%
305 Total,,4,100.0%,,
306
307 Table: v1
308 ,,Frequency,Percent,Valid Percent,Cumulative Percent
309 Valid,.00,1,25.0%,25.0%,25.0%
310 ,2.00,1,25.0%,25.0%,50.0%
311 ,3.00,1,25.0%,25.0%,75.0%
312 ,4.00,1,25.0%,25.0%,100.0%
313 Total,,4,100.0%,,
314
315 Table: v2
316 ,,Frequency,Percent,Valid Percent,Cumulative Percent
317 Valid,1.00,1,25.0%,25.0%,25.0%
318 ,3.00,1,25.0%,25.0%,50.0%
319 ,4.00,1,25.0%,25.0%,75.0%
320 ,5.00,1,25.0%,25.0%,100.0%
321 Total,,4,100.0%,,
322 ])
323 AT_CLEANUP
324
325 # Test that the LIMIT specification works.
326 AT_SETUP([FREQUENCIES with LIMIT])
327 AT_DATA([frequencies.sps],
328   [data list free /v1 v2.
329 begin data.
330 0 1
331 2 5
332 4 3
333 3 5
334 end data.
335
336 frequencies v1 v2/statistics=none/FORMAT=LIMIT(3).
337 ])
338 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
339 Table: v2
340 ,,Frequency,Percent,Valid Percent,Cumulative Percent
341 Valid,1.00,1,25.0%,25.0%,25.0%
342 ,3.00,1,25.0%,25.0%,50.0%
343 ,5.00,2,50.0%,50.0%,100.0%
344 Total,,4,100.0%,,
345 ])
346 AT_CLEANUP
347
348 # Tests for a bug where PSPP would crash when a FREQUENCIES command
349 # was used with the HTML output driver.
350 AT_SETUP([FREQUENCIES HTML output crash])
351 AT_DATA([frequencies.sps],
352   [data list free /v1 v2.
353 begin data.
354 0 1
355 2 3
356 4 5
357 3 4
358 end data.
359
360 list.
361
362 frequencies v1/statistics=none.
363 ])
364 AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0],
365   [Table: Data List
366 v1,v2
367 .00,1.00
368 2.00,3.00
369 4.00,5.00
370 3.00,4.00
371
372 Table: v1
373 ,,Frequency,Percent,Valid Percent,Cumulative Percent
374 Valid,.00,1,25.0%,25.0%,25.0%
375 ,2.00,1,25.0%,25.0%,50.0%
376 ,3.00,1,25.0%,25.0%,75.0%
377 ,4.00,1,25.0%,25.0%,100.0%
378 Total,,4,100.0%,,
379 ])
380 AT_CHECK([test -s pspp.html])
381 AT_CLEANUP
382
383 # Tests for a bug which crashed PSPP when a piechart with too many
384 # segments was requested.
385 AT_SETUP([FREQUENCIES pie chart crash])
386 AT_DATA([frequencies.sps],
387   [data list list /x * w *.
388 begin data.
389 1  4
390 34 10
391 -9 15
392 232 6
393 11  4
394 134 1
395 9  5
396 32 16
397 -2 6
398 2  16
399 20  6
400 end data.
401
402 weight by w.
403
404 frequencies /x /format=notable /statistics=none
405         /piechart.
406 ])
407 # Cannot use the CSV driver for this because it does not output charts
408 # at all.
409 AT_CHECK([pspp frequencies.sps], [0], [dnl
410 Reading free-form data from INLINE.
411 +--------+------+
412 |Variable|Format|
413 +--------+------+
414 |x       |F8.0  |
415 |w       |F8.0  |
416 +--------+------+
417 ])
418 AT_CLEANUP
419
420 dnl Check that histogram subcommand runs wihout crashing
421 AT_SETUP([FREQUENCIES histogram crash])
422 AT_DATA([frequencies.sps],
423   [data list notable list /x * w *.
424 begin data.
425 1  4
426 34 10
427 -9 15
428 232 6
429 11  4
430 134 1
431 9  5
432 32 16
433 -2 6
434 2  16
435 20  6
436 end data.
437
438 weight by w.
439
440 frequencies /x
441             /format=notable
442             /statistics=none
443             /histogram=minimum(0) maximum(50) percent(5) normal.
444 ])
445 # Cannot use the CSV driver for this because it does not output charts
446 # at all.
447 AT_CHECK([pspp -O format=pdf frequencies.sps], [0], [ignore], [ignore])
448 AT_CLEANUP
449
450 # Tests for a bug which crashed PSPP when the median and a histogram
451 # were both requested.
452 AT_SETUP([FREQUENCIES median with histogram crash])
453 AT_DATA([frequencies.sps], [dnl
454 data list list notable /x.
455 begin data.
456 1
457 end data.
458
459 frequencies /x /histogram /STATISTICS=median.
460 ])
461 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [ignore])
462 dnl Ignore output - No crash test.
463 AT_CLEANUP
464
465 # Tests for a bug which caused FREQUENCIES following TEMPORARY to
466 # crash (bug #11492).
467 AT_SETUP([FREQUENCIES crash after TEMPORARY])
468 AT_DATA([frequencies.sps],
469   [DATA LIST LIST /SEX (A1) X *.
470 BEGIN DATA.
471 M 31
472 F 21
473 M 41
474 F 31
475 M 13
476 F 12
477 M 14
478 F 13
479 END DATA.
480
481
482 TEMPORARY
483 SELECT IF SEX EQ 'F'
484 FREQUENCIES /X .
485
486 FINISH
487 ])
488 AT_CHECK([pspp -o pspp.csv -o pspp.txt frequencies.sps])
489 AT_CHECK([cat pspp.csv], [0], [dnl
490 Table: Reading free-form data from INLINE.
491 Variable,Format
492 SEX,A1
493 X,F8.0
494
495 Table: Statistics
496 ,,X
497 N,Valid,4
498 ,Missing,0
499 Mean,,19.25
500 Std Dev,,8.81
501 Minimum,,12.00
502 Maximum,,31.00
503
504 Table: X
505 ,,Frequency,Percent,Valid Percent,Cumulative Percent
506 Valid,12.00,1,25.0%,25.0%,25.0%
507 ,13.00,1,25.0%,25.0%,50.0%
508 ,21.00,1,25.0%,25.0%,75.0%
509 ,31.00,1,25.0%,25.0%,100.0%
510 Total,,4,100.0%,,
511 ])
512 AT_CLEANUP
513
514 m4_define([FREQUENCIES_NTILES_OUTPUT], [dnl
515 Table: Statistics
516 ,,x,y
517 N,Valid,5,5
518 ,Missing,0,0
519 Mean,,3.00,30.00
520 Std Dev,,1.58,15.81
521 Minimum,,1.00,10.00
522 Maximum,,5.00,50.00
523 Percentiles,0,1.00,10.00
524 ,25,2.00,20.00
525 ,33,2.33,23.33
526 ,50,3.00,30.00
527 ,67,3.67,36.67
528 ,75,4.00,40.00
529 ,100,5.00,50.00
530 ])
531 AT_SETUP([FREQUENCIES basic percentiles])
532 AT_DATA([frequencies.sps],
533   [DATA LIST LIST notable /x y.
534 BEGIN DATA.
535 1 10
536 2 20
537 3 30
538 4 40
539 5 50
540 END DATA.
541
542 FREQUENCIES
543         VAR=x y
544         /FORMAT=NOTABLE
545         /PERCENTILES = 0 25 33.333 50 66.666 75 100.
546 ])
547 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
548   [FREQUENCIES_NTILES_OUTPUT])
549 AT_CLEANUP
550
551 AT_SETUP([FREQUENCIES basic n-tiles])
552 AT_DATA([frequencies.sps],
553   [DATA LIST LIST notable /x y.
554 BEGIN DATA.
555 1 10
556 2 20
557 3 30
558 4 40
559 5 50
560 END DATA.
561
562 FREQUENCIES
563         VAR=x y
564         /FORMAT=NOTABLE
565         /NTILES = 3
566         /NTILES = 4.
567 ])
568 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
569   [FREQUENCIES_NTILES_OUTPUT])
570 AT_CLEANUP
571
572 AT_SETUP([FREQUENCIES compatibility percentiles])
573 AT_DATA([frequencies.sps],
574   [DATA LIST LIST notable /X * .
575 BEGIN DATA.
576 1
577 2
578 3
579 4
580 5
581 END DATA.
582
583 FREQUENCIES
584         VAR=x
585         /ALGORITHM=COMPATIBLE
586         /PERCENTILES = 0 25 50 75 100.
587 ])
588 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
589 Table: Statistics
590 ,,X
591 N,Valid,5
592 ,Missing,0
593 Mean,,3.00
594 Std Dev,,1.58
595 Minimum,,1.00
596 Maximum,,5.00
597 Percentiles,0,1.00
598 ,25,1.50
599 ,50,3.00
600 ,75,4.50
601 ,100,5.00
602
603 Table: X
604 ,,Frequency,Percent,Valid Percent,Cumulative Percent
605 Valid,1.00,1,20.0%,20.0%,20.0%
606 ,2.00,1,20.0%,20.0%,40.0%
607 ,3.00,1,20.0%,20.0%,60.0%
608 ,4.00,1,20.0%,20.0%,80.0%
609 ,5.00,1,20.0%,20.0%,100.0%
610 Total,,5,100.0%,,
611 ])
612 AT_CLEANUP
613
614 AT_SETUP([FREQUENCIES enhanced percentiles])
615 AT_DATA([frequencies.sps],
616   [DATA LIST LIST notable /X * .
617 BEGIN DATA.
618 1
619 2
620 3
621 4
622 5
623 END DATA.
624
625 FREQUENCIES
626         VAR=x
627         /PERCENTILES = 0 25 50 75 100.
628 ])
629 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
630 Table: Statistics
631 ,,X
632 N,Valid,5
633 ,Missing,0
634 Mean,,3.00
635 Std Dev,,1.58
636 Minimum,,1.00
637 Maximum,,5.00
638 Percentiles,0,1.00
639 ,25,2.00
640 ,50,3.00
641 ,75,4.00
642 ,100,5.00
643
644 Table: X
645 ,,Frequency,Percent,Valid Percent,Cumulative Percent
646 Valid,1.00,1,20.0%,20.0%,20.0%
647 ,2.00,1,20.0%,20.0%,40.0%
648 ,3.00,1,20.0%,20.0%,60.0%
649 ,4.00,1,20.0%,20.0%,80.0%
650 ,5.00,1,20.0%,20.0%,100.0%
651 Total,,5,100.0%,,
652 ])
653 AT_CLEANUP
654
655 AT_SETUP([FREQUENCIES enhanced percentiles, weighted])
656 AT_DATA([frequencies.sps],
657   [DATA LIST LIST notable /X * F *.
658 BEGIN DATA.
659 1 2
660 2 2
661 3 2
662 4 1
663 4 1
664 5 1
665 5 1
666 END DATA.
667
668 WEIGHT BY f.
669
670 FREQUENCIES
671         VAR=x
672         /PERCENTILES = 0 25 50 75 100.
673 ])
674 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
675 Table: Statistics
676 ,,X
677 N,Valid,10.00
678 ,Missing,.00
679 Mean,,3.00
680 Std Dev,,1.49
681 Minimum,,1.00
682 Maximum,,5.00
683 Percentiles,0,1.00
684 ,25,2.00
685 ,50,3.00
686 ,75,4.00
687 ,100,5.00
688
689 Table: X
690 ,,Frequency,Percent,Valid Percent,Cumulative Percent
691 Valid,1.00,2.00,20.0%,20.0%,20.0%
692 ,2.00,2.00,20.0%,20.0%,40.0%
693 ,3.00,2.00,20.0%,20.0%,60.0%
694 ,4.00,2.00,20.0%,20.0%,80.0%
695 ,5.00,2.00,20.0%,20.0%,100.0%
696 Total,,10.00,100.0%,,
697 ])
698 AT_CLEANUP
699
700 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)])
701 AT_DATA([frequencies.sps],
702   [DATA LIST LIST notable /X * F *.
703 BEGIN DATA.
704 1 1
705 3 2
706 4 1
707 5 1
708 5 1
709 END DATA.
710
711 WEIGHT BY f.
712
713 FREQUENCIES
714         VAR=x
715         /PERCENTILES = 0 25 50 75 100.
716 ])
717 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
718 Table: Statistics
719 ,,X
720 N,Valid,6.00
721 ,Missing,.00
722 Mean,,3.50
723 Std Dev,,1.52
724 Minimum,,1.00
725 Maximum,,5.00
726 Percentiles,0,1.00
727 ,25,3.00
728 ,50,3.50
729 ,75,4.75
730 ,100,5.00
731
732 Table: X
733 ,,Frequency,Percent,Valid Percent,Cumulative Percent
734 Valid,1.00,1.00,16.7%,16.7%,16.7%
735 ,3.00,2.00,33.3%,33.3%,50.0%
736 ,4.00,1.00,16.7%,16.7%,66.7%
737 ,5.00,2.00,33.3%,33.3%,100.0%
738 Total,,6.00,100.0%,,
739 ])
740 AT_CLEANUP
741
742 dnl Data for this test case from Fabio Bordignon <bordignon@demos.it>.
743 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)])
744 AT_DATA([frequencies.sps],
745   [DATA LIST LIST notable /X * F *.
746 BEGIN DATA.
747 1 7
748 2 16
749 3 12
750 4 5
751 END DATA.
752
753 WEIGHT BY f.
754
755 FREQUENCIES
756         VAR=x
757         /PERCENTILES = 0 25 50 75 100.
758 ])
759 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
760 Table: Statistics
761 ,,X
762 N,Valid,40.00
763 ,Missing,.00
764 Mean,,2.38
765 Std Dev,,.93
766 Minimum,,1.00
767 Maximum,,4.00
768 Percentiles,0,1.00
769 ,25,2.00
770 ,50,2.00
771 ,75,3.00
772 ,100,4.00
773
774 Table: X
775 ,,Frequency,Percent,Valid Percent,Cumulative Percent
776 Valid,1.00,7.00,17.5%,17.5%,17.5%
777 ,2.00,16.00,40.0%,40.0%,57.5%
778 ,3.00,12.00,30.0%,30.0%,87.5%
779 ,4.00,5.00,12.5%,12.5%,100.0%
780 Total,,40.00,100.0%,,
781 ])
782 AT_CLEANUP
783
784 AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values])
785 AT_DATA([frequencies.sps],
786   [DATA LIST LIST notable /X * F *.
787 BEGIN DATA.
788 1 1
789 3 2
790 4 1
791 5 1
792 5 1
793 99 4
794 END DATA.
795
796 MISSING VALUE x (99.0) .
797 WEIGHT BY f.
798
799 FREQUENCIES
800         VAR=x
801         /PERCENTILES = 0 25 50 75 100.
802 ])
803
804 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
805 Table: Statistics
806 ,,X
807 N,Valid,6.00
808 ,Missing,4.00
809 Mean,,3.50
810 Std Dev,,1.52
811 Minimum,,1.00
812 Maximum,,5.00
813 Percentiles,0,1.00
814 ,25,3.00
815 ,50,3.50
816 ,75,4.75
817 ,100,5.00
818
819 Table: X
820 ,,Frequency,Percent,Valid Percent,Cumulative Percent
821 Valid,1.00,1.00,10.0%,16.7%,16.7%
822 ,3.00,2.00,20.0%,33.3%,50.0%
823 ,4.00,1.00,10.0%,16.7%,66.7%
824 ,5.00,2.00,20.0%,33.3%,100.0%
825 Missing,99.00,4.00,40.0%,,
826 Total,,10.00,100.0%,,
827 ])
828 AT_CLEANUP
829
830 AT_SETUP([FREQUENCIES dichotomous histogram])
831 AT_DATA([frequencies.sps], [dnl
832 data list notable list /d4 *.
833 begin data.
834 0
835 0
836 0
837 1
838 0
839 0
840 0
841 0
842 1
843 0
844 0
845 0
846 0
847 0
848 1
849 2
850 0
851 end data.
852
853 FREQUENCIES
854         /VARIABLES = d4
855         /FORMAT=AVALUE TABLE
856         /HISTOGRAM=NORMAL
857         .
858 ])
859
860 AT_CHECK([pspp frequencies.sps], [0],  [ignore])
861 AT_CLEANUP
862
863
864 AT_SETUP([FREQUENCIES median])
865 AT_DATA([median.sps], [dnl
866 data list notable list /x *.
867 begin data.
868 1
869 2
870 3000000
871 end data.
872
873 FREQUENCIES
874         /VARIABLES = x
875         /STATISTICS = MEDIAN
876         .
877 ])
878
879 AT_CHECK([pspp median.sps -O format=csv], [0], [dnl
880 Table: Statistics
881 ,,x
882 N,Valid,3
883 ,Missing,0
884 Median,,2.00
885
886 Table: x
887 ,,Frequency,Percent,Valid Percent,Cumulative Percent
888 Valid,1.00,1,33.3%,33.3%,33.3%
889 ,2.00,1,33.3%,33.3%,66.7%
890 ,3000000,1,33.3%,33.3%,100.0%
891 Total,,3,100.0%,,
892 ])
893 AT_CLEANUP
894
895 AT_SETUP([FREQUENCIES variance])
896 AT_DATA([variance.sps], [dnl
897 data list notable list /forename (A12) height.
898 begin data.
899 Ahmed 188
900 bertram 167
901 Catherine 134
902 David 109
903 end data.
904
905 FREQUENCIES
906    /VARIABLES = height
907    /STATISTICS = VARIANCE.
908 ])
909
910 AT_CHECK([pspp variance.sps -O format=csv], [0], [dnl
911 Table: Statistics
912 ,,height
913 N,Valid,4
914 ,Missing,0
915 Variance,,1223.00
916
917 Table: height
918 ,,Frequency,Percent,Valid Percent,Cumulative Percent
919 Valid,109.00,1,25.0%,25.0%,25.0%
920 ,134.00,1,25.0%,25.0%,50.0%
921 ,167.00,1,25.0%,25.0%,75.0%
922 ,188.00,1,25.0%,25.0%,100.0%
923 Total,,4,100.0%,,
924 ])
925 AT_CLEANUP
926
927 AT_SETUP([FREQUENCIES default statistics])
928 AT_DATA([median.sps], [dnl
929 data list notable list /x *.
930 begin data.
931 10
932 20
933 3000000
934 end data.
935
936 FREQUENCIES
937         /VARIABLES = x
938         /STATISTICS
939         .
940
941 FREQUENCIES
942         /VARIABLES = x
943         /STATISTICS = DEFAULT
944         .
945 ])
946
947 AT_CHECK([pspp median.sps -o pspp.csv -o pspp.txt])
948 AT_CHECK([cat pspp.csv], [0], [dnl
949 Table: Statistics
950 ,,x
951 N,Valid,3
952 ,Missing,0
953 Mean,,1000010
954 Std Dev,,1732042
955 Minimum,,10.00
956 Maximum,,3000000
957
958 Table: x
959 ,,Frequency,Percent,Valid Percent,Cumulative Percent
960 Valid,10.00,1,33.3%,33.3%,33.3%
961 ,20.00,1,33.3%,33.3%,66.7%
962 ,3000000,1,33.3%,33.3%,100.0%
963 Total,,3,100.0%,,
964
965 Table: Statistics
966 ,,x
967 N,Valid,3
968 ,Missing,0
969 Mean,,1000010
970 Std Dev,,1732042
971 Minimum,,10.00
972 Maximum,,3000000
973
974 Table: x
975 ,,Frequency,Percent,Valid Percent,Cumulative Percent
976 Valid,10.00,1,33.3%,33.3%,33.3%
977 ,20.00,1,33.3%,33.3%,66.7%
978 ,3000000,1,33.3%,33.3%,100.0%
979 Total,,3,100.0%,,
980 ])
981 AT_CLEANUP
982
983
984
985 AT_SETUP([FREQUENCIES no valid data])
986 AT_DATA([empty.sps], [dnl
987 data list notable list /x *.
988 begin data.
989 .
990 .
991 .
992 end data.
993
994 FREQUENCIES
995         /VARIABLES = x
996         /STATISTICS = ALL
997         .
998 ])
999
1000 AT_CHECK([pspp empty.sps -O format=csv], [0],  [dnl
1001 Table: Statistics
1002 ,,x
1003 N,Valid,0
1004 ,Missing,3
1005 Mean,,.  @&t@
1006 S.E. Mean,,.  @&t@
1007 Median,,.  @&t@
1008 Mode,,.  @&t@
1009 Std Dev,,.  @&t@
1010 Variance,,.  @&t@
1011 Kurtosis,,.  @&t@
1012 S.E. Kurt,,.  @&t@
1013 Skewness,,.  @&t@
1014 S.E. Skew,,.  @&t@
1015 Range,,.  @&t@
1016 Minimum,,.  @&t@
1017 Maximum,,.  @&t@
1018 Sum,,.  @&t@
1019
1020 Table: x
1021 ,,Frequency,Percent
1022 Missing,.  ,3,100.0%
1023 Total,,3,.0%
1024 ])
1025
1026 AT_CLEANUP
1027
1028
1029 AT_SETUP([FREQUENCIES histogram no valid cases])
1030 AT_DATA([empty.sps], [dnl
1031 data list notable list /x w *.
1032 begin data.
1033 1 .
1034 2 .
1035 3 .
1036 end data.
1037
1038 weight by w.
1039
1040 FREQUENCIES
1041         /VARIABLES = x
1042         /histogram
1043         .
1044 ])
1045
1046 AT_CHECK([pspp empty.sps -O format=csv], [0],  [ignore])
1047
1048 AT_CLEANUP
1049
1050 AT_SETUP([FREQUENCIES percentiles + histogram bug#48128])
1051 AT_DATA([bug.sps], [dnl
1052 SET FORMAT=F8.0.
1053
1054 INPUT PROGRAM.
1055         LOOP I=1 TO 10.
1056                 COMPUTE SCORE=EXP(NORMAL(1)).
1057                 END CASE.
1058         END LOOP.
1059         END FILE.
1060 END INPUT PROGRAM.
1061
1062 FREQUENCIES VARIABLES=SCORE
1063 /FORMAT=NOTABLE
1064 /STATISTICS=ALL
1065 /PERCENTILES=1 10 20 30 40 50 60 70 80 90 99
1066 /HISTOGRAM.
1067
1068 ])
1069
1070 AT_CHECK([pspp bug.sps], [0],  [ignore])
1071
1072 AT_CLEANUP
1073
1074
1075 AT_SETUP([FREQUENCIES vs. missing weights])
1076 AT_DATA([warn.sps], [dnl
1077 data list notable list /x w .
1078 begin data.
1079 1 1
1080 2 1
1081 1 1
1082 3 1
1083 3 .
1084 4 .
1085 end data.
1086
1087 weight by w.
1088
1089 frequencies /variables=x.
1090 ])
1091
1092 AT_CHECK([pspp warn.sps -O format=csv], [0],  [dnl
1093 "warn.sps:13: warning: FREQUENCIES: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative.  These case(s) were ignored."
1094
1095 Table: Statistics
1096 ,,x
1097 N,Valid,4.00
1098 ,Missing,.00
1099 Mean,,1.75
1100 Std Dev,,.96
1101 Minimum,,1.00
1102 Maximum,,4.00
1103
1104 Table: x
1105 ,,Frequency,Percent,Valid Percent,Cumulative Percent
1106 Valid,1.00,2.00,50.0%,50.0%,50.0%
1107 ,2.00,1.00,25.0%,25.0%,75.0%
1108 ,3.00,1.00,25.0%,25.0%,100.0%
1109 ,4.00,.00,.0%,.0%,100.0%
1110 Total,,4.00,100.0%,,
1111 ])
1112
1113 AT_CLEANUP