1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017 Free Software Foundation, Inc.
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 dnl GNU General Public License for more details.
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
17 AT_BANNER([FREQUENCIES procedure])
19 AT_SETUP([FREQUENCIES string variable])
20 AT_DATA([frequencies.sps],
22 name (A8) value * quantity .
37 FREQUENCIES /VAR = name/ORDER=ANALYSIS.
39 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
41 ,,Frequency,Percent,Valid Percent,Cumulative Percent
42 Valid,bar,2,20.0%,20.0%,20.0%
43 ,baz,4,40.0%,40.0%,60.0%
44 ,foo,2,20.0%,20.0%,80.0%
45 ,quux,2,20.0%,20.0%,100.0%
50 AT_SETUP([FREQUENCIES with SPLIT FILE - LAYERED])
51 AT_DATA([frequencies.sps], [dnl
52 DATA LIST LIST NOTABLE/name (A8) value quantity.
69 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
71 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
74 ,,bar,,baz,,foo,,quux,
75 ,,value,quantity,value,quantity,value,quantity,value,quantity
76 N,Valid,2,2,4,4,2,2,2,2
77 ,Missing,0,0,0,0,0,0,0,0
78 Mean,,1.50,4.00,1.75,4.25,3.50,2.50,4.00,4.50
79 Std Dev,,.71,2.83,1.50,3.40,3.54,3.54,1.41,4.95
80 Minimum,,1.00,2.00,1.00,1.00,1.00,.00,3.00,1.00
81 Maximum,,2.00,6.00,4.00,9.00,6.00,5.00,5.00,8.00
85 AT_SETUP([FREQUENCIES with SPLIT FILE - SEPARATE])
86 AT_DATA([frequencies.sps], [dnl
87 DATA LIST LIST NOTABLE/name (A8) value quantity.
103 SPLIT FILE SEPARATE BY name.
104 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
106 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
161 AT_SETUP([FREQUENCIES with SPLIT FILE - LAYERED - unsorted data])
162 AT_DATA([frequencies.sps], [dnl
163 DATA LIST LIST NOTABLE/name (A8) value quantity.
179 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
181 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
182 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values. Each run will be analyzed separately. The duplicate split values are: name = baz "
184 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values. Each run will be analyzed separately. The duplicate split values are: name = bar "
186 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values. Each run will be analyzed separately. The duplicate split values are: name = baz "
188 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values. Each run will be analyzed separately. The duplicate split values are: name = foo "
190 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values. Each run will be analyzed separately. The duplicate split values are: name = baz "
193 ,,name,,,,,,,,,,,,,,,,,,,
194 ,,foo,,bar,,baz,,quux,,baz,,bar,,baz,,foo,,baz,,quux,
195 ,,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity,value,quantity
196 N,Valid,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
197 ,Missing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
198 Mean,,1.00,5.00,2.00,6.00,1.00,9.00,3.00,1.00,4.00,3.00,1.00,2.00,1.00,1.00,6.00,.00,1.00,4.00,5.00,8.00
199 Std Dev,,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
200 Minimum,,1.00,5.00,2.00,6.00,1.00,9.00,3.00,1.00,4.00,3.00,1.00,2.00,1.00,1.00,6.00,.00,1.00,4.00,5.00,8.00
201 Maximum,,1.00,5.00,2.00,6.00,1.00,9.00,3.00,1.00,4.00,3.00,1.00,2.00,1.00,1.00,6.00,.00,1.00,4.00,5.00,8.00
203 frequencies.sps:17: warning: FREQUENCIES: Suppressed 1 additional warning about duplicate split values.
207 # Tests for a bug where pspp would crash if two FREQUENCIES commands
208 # existed in a input file.
209 AT_SETUP([FREQUENCIES two runs crash])
210 AT_DATA([frequencies.sps],
211 [data list free /v1 v2.
219 frequencies v1 v2/statistics=none/ORDER=VARIABLE.
220 frequencies v1 v2/statistics=none.
222 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
224 ,,Frequency,Percent,Valid Percent,Cumulative Percent
225 Valid,.00,1,25.0%,25.0%,25.0%
226 ,2.00,1,25.0%,25.0%,50.0%
227 ,3.00,1,25.0%,25.0%,75.0%
228 ,4.00,1,25.0%,25.0%,100.0%
232 ,,Frequency,Percent,Valid Percent,Cumulative Percent
233 Valid,1.00,1,25.0%,25.0%,25.0%
234 ,3.00,1,25.0%,25.0%,50.0%
235 ,4.00,1,25.0%,25.0%,75.0%
236 ,5.00,1,25.0%,25.0%,100.0%
240 ,,Frequency,Percent,Valid Percent,Cumulative Percent
241 Valid,.00,1,25.0%,25.0%,25.0%
242 ,2.00,1,25.0%,25.0%,50.0%
243 ,3.00,1,25.0%,25.0%,75.0%
244 ,4.00,1,25.0%,25.0%,100.0%
248 ,,Frequency,Percent,Valid Percent,Cumulative Percent
249 Valid,1.00,1,25.0%,25.0%,25.0%
250 ,3.00,1,25.0%,25.0%,50.0%
251 ,4.00,1,25.0%,25.0%,75.0%
252 ,5.00,1,25.0%,25.0%,100.0%
257 # Test that the LIMIT specification works.
258 AT_SETUP([FREQUENCIES with LIMIT])
259 AT_DATA([frequencies.sps],
260 [data list free /v1 v2.
268 frequencies v1 v2/statistics=none/FORMAT=LIMIT(3).
270 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
272 ,,Frequency,Percent,Valid Percent,Cumulative Percent
273 Valid,1.00,1,25.0%,25.0%,25.0%
274 ,3.00,1,25.0%,25.0%,50.0%
275 ,5.00,2,50.0%,50.0%,100.0%
280 # Tests for a bug where PSPP would crash when a FREQUENCIES command
281 # was used with the HTML output driver.
282 AT_SETUP([FREQUENCIES HTML output crash])
283 AT_DATA([frequencies.sps],
284 [data list free /v1 v2.
294 frequencies v1/statistics=none.
296 AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0],
305 ,,Frequency,Percent,Valid Percent,Cumulative Percent
306 Valid,.00,1,25.0%,25.0%,25.0%
307 ,2.00,1,25.0%,25.0%,50.0%
308 ,3.00,1,25.0%,25.0%,75.0%
309 ,4.00,1,25.0%,25.0%,100.0%
312 AT_CHECK([test -s pspp.html])
315 # Tests for a bug which crashed PSPP when a piechart with too many
316 # segments was requested.
317 AT_SETUP([FREQUENCIES pie chart crash])
318 AT_DATA([frequencies.sps],
319 [data list list /x * w *.
336 frequencies /x /format=notable /statistics=none
339 # Cannot use the CSV driver for this because it does not output charts
341 AT_CHECK([pspp frequencies.sps], [0], [dnl
342 Reading free-form data from INLINE.
352 dnl Check that histogram subcommand runs wihout crashing
353 AT_SETUP([FREQUENCIES histogram crash])
354 AT_DATA([frequencies.sps],
355 [data list notable list /x * w *.
375 /histogram=minimum(0) maximum(50) percent(5) normal.
377 # Cannot use the CSV driver for this because it does not output charts
379 AT_CHECK([pspp -O format=pdf frequencies.sps], [0], [ignore], [ignore])
382 # Tests for a bug which crashed PSPP when the median and a histogram
383 # were both requested.
384 AT_SETUP([FREQUENCIES median with histogram crash])
385 AT_DATA([frequencies.sps], [dnl
386 data list list notable /x.
391 frequencies /x /histogram /STATISTICS=median.
393 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [ignore])
394 dnl Ignore output - No crash test.
397 # Tests for a bug which caused FREQUENCIES following TEMPORARY to
398 # crash (bug #11492).
399 AT_SETUP([FREQUENCIES crash after TEMPORARY])
400 AT_DATA([frequencies.sps],
401 [DATA LIST LIST /SEX (A1) X *.
420 AT_CHECK([pspp -o pspp.csv -o pspp.txt frequencies.sps])
421 AT_CHECK([cat pspp.csv], [0], [dnl
422 Table: Reading free-form data from INLINE.
437 ,,Frequency,Percent,Valid Percent,Cumulative Percent
438 Valid,12.00,1,25.0%,25.0%,25.0%
439 ,13.00,1,25.0%,25.0%,50.0%
440 ,21.00,1,25.0%,25.0%,75.0%
441 ,31.00,1,25.0%,25.0%,100.0%
446 m4_define([FREQUENCIES_NTILES_OUTPUT], [dnl
455 Percentiles,0,1.00,10.00
463 AT_SETUP([FREQUENCIES basic percentiles])
464 AT_DATA([frequencies.sps],
465 [DATA LIST LIST notable /x y.
477 /PERCENTILES = 0 25 33.333 50 66.666 75 100.
479 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
480 [FREQUENCIES_NTILES_OUTPUT])
483 AT_SETUP([FREQUENCIES basic n-tiles])
484 AT_DATA([frequencies.sps],
485 [DATA LIST LIST notable /x y.
500 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
501 [FREQUENCIES_NTILES_OUTPUT])
504 AT_SETUP([FREQUENCIES compatibility percentiles])
505 AT_DATA([frequencies.sps],
506 [DATA LIST LIST notable /X * .
517 /ALGORITHM=COMPATIBLE
518 /PERCENTILES = 0 25 50 75 100.
520 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
536 ,,Frequency,Percent,Valid Percent,Cumulative Percent
537 Valid,1.00,1,20.0%,20.0%,20.0%
538 ,2.00,1,20.0%,20.0%,40.0%
539 ,3.00,1,20.0%,20.0%,60.0%
540 ,4.00,1,20.0%,20.0%,80.0%
541 ,5.00,1,20.0%,20.0%,100.0%
546 AT_SETUP([FREQUENCIES enhanced percentiles])
547 AT_DATA([frequencies.sps],
548 [DATA LIST LIST notable /X * .
559 /PERCENTILES = 0 25 50 75 100.
561 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
577 ,,Frequency,Percent,Valid Percent,Cumulative Percent
578 Valid,1.00,1,20.0%,20.0%,20.0%
579 ,2.00,1,20.0%,20.0%,40.0%
580 ,3.00,1,20.0%,20.0%,60.0%
581 ,4.00,1,20.0%,20.0%,80.0%
582 ,5.00,1,20.0%,20.0%,100.0%
587 AT_SETUP([FREQUENCIES enhanced percentiles, weighted])
588 AT_DATA([frequencies.sps],
589 [DATA LIST LIST notable /X * F *.
604 /PERCENTILES = 0 25 50 75 100.
606 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
622 ,,Frequency,Percent,Valid Percent,Cumulative Percent
623 Valid,1.00,2.00,20.0%,20.0%,20.0%
624 ,2.00,2.00,20.0%,20.0%,40.0%
625 ,3.00,2.00,20.0%,20.0%,60.0%
626 ,4.00,2.00,20.0%,20.0%,80.0%
627 ,5.00,2.00,20.0%,20.0%,100.0%
628 Total,,10.00,100.0%,,
632 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)])
633 AT_DATA([frequencies.sps],
634 [DATA LIST LIST notable /X * F *.
647 /PERCENTILES = 0 25 50 75 100.
649 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
665 ,,Frequency,Percent,Valid Percent,Cumulative Percent
666 Valid,1.00,1.00,16.7%,16.7%,16.7%
667 ,3.00,2.00,33.3%,33.3%,50.0%
668 ,4.00,1.00,16.7%,16.7%,66.7%
669 ,5.00,2.00,33.3%,33.3%,100.0%
674 dnl Data for this test case from Fabio Bordignon <bordignon@demos.it>.
675 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)])
676 AT_DATA([frequencies.sps],
677 [DATA LIST LIST notable /X * F *.
689 /PERCENTILES = 0 25 50 75 100.
691 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
707 ,,Frequency,Percent,Valid Percent,Cumulative Percent
708 Valid,1.00,7.00,17.5%,17.5%,17.5%
709 ,2.00,16.00,40.0%,40.0%,57.5%
710 ,3.00,12.00,30.0%,30.0%,87.5%
711 ,4.00,5.00,12.5%,12.5%,100.0%
712 Total,,40.00,100.0%,,
716 AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values])
717 AT_DATA([frequencies.sps],
718 [DATA LIST LIST notable /X * F *.
728 MISSING VALUE x (99.0) .
733 /PERCENTILES = 0 25 50 75 100.
736 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
752 ,,Frequency,Percent,Valid Percent,Cumulative Percent
753 Valid,1.00,1.00,10.0%,16.7%,16.7%
754 ,3.00,2.00,20.0%,33.3%,50.0%
755 ,4.00,1.00,10.0%,16.7%,66.7%
756 ,5.00,2.00,20.0%,33.3%,100.0%
757 Missing,99.00,4.00,40.0%,,
758 Total,,10.00,100.0%,,
762 AT_SETUP([FREQUENCIES dichotomous histogram])
763 AT_DATA([frequencies.sps], [dnl
764 data list notable list /d4 *.
792 AT_CHECK([pspp frequencies.sps], [0], [ignore])
796 AT_SETUP([FREQUENCIES median])
797 AT_DATA([median.sps], [dnl
798 data list notable list /x *.
811 AT_CHECK([pspp median.sps -O format=csv], [0], [dnl
819 ,,Frequency,Percent,Valid Percent,Cumulative Percent
820 Valid,1.00,1,33.3%,33.3%,33.3%
821 ,2.00,1,33.3%,33.3%,66.7%
822 ,3000000,1,33.3%,33.3%,100.0%
827 AT_SETUP([FREQUENCIES variance])
828 AT_DATA([variance.sps], [dnl
829 data list notable list /forename (A12) height.
839 /STATISTICS = VARIANCE.
842 AT_CHECK([pspp variance.sps -O format=csv], [0], [dnl
850 ,,Frequency,Percent,Valid Percent,Cumulative Percent
851 Valid,109.00,1,25.0%,25.0%,25.0%
852 ,134.00,1,25.0%,25.0%,50.0%
853 ,167.00,1,25.0%,25.0%,75.0%
854 ,188.00,1,25.0%,25.0%,100.0%
859 AT_SETUP([FREQUENCIES default statistics])
860 AT_DATA([median.sps], [dnl
861 data list notable list /x *.
875 /STATISTICS = DEFAULT
879 AT_CHECK([pspp median.sps -o pspp.csv -o pspp.txt])
880 AT_CHECK([cat pspp.csv], [0], [dnl
891 ,,Frequency,Percent,Valid Percent,Cumulative Percent
892 Valid,10.00,1,33.3%,33.3%,33.3%
893 ,20.00,1,33.3%,33.3%,66.7%
894 ,3000000,1,33.3%,33.3%,100.0%
907 ,,Frequency,Percent,Valid Percent,Cumulative Percent
908 Valid,10.00,1,33.3%,33.3%,33.3%
909 ,20.00,1,33.3%,33.3%,66.7%
910 ,3000000,1,33.3%,33.3%,100.0%
917 AT_SETUP([FREQUENCIES no valid data])
918 AT_DATA([empty.sps], [dnl
919 data list notable list /x *.
932 AT_CHECK([pspp empty.sps -O format=csv], [0], [dnl
961 AT_SETUP([FREQUENCIES histogram no valid cases])
962 AT_DATA([empty.sps], [dnl
963 data list notable list /x w *.
978 AT_CHECK([pspp empty.sps -O format=csv], [0], [ignore])
982 AT_SETUP([FREQUENCIES percentiles + histogram bug#48128])
983 AT_DATA([bug.sps], [dnl
988 COMPUTE SCORE=EXP(NORMAL(1)).
994 FREQUENCIES VARIABLES=SCORE
997 /PERCENTILES=1 10 20 30 40 50 60 70 80 90 99
1002 AT_CHECK([pspp bug.sps], [0], [ignore])
1007 AT_SETUP([FREQUENCIES vs. missing weights])
1008 AT_DATA([warn.sps], [dnl
1009 data list notable list /x w .
1021 frequencies /variables=x.
1024 AT_CHECK([pspp warn.sps -O format=csv], [0], [dnl
1025 "warn.sps:13: warning: FREQUENCIES: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative. These case(s) were ignored."
1037 ,,Frequency,Percent,Valid Percent,Cumulative Percent
1038 Valid,1.00,2.00,50.0%,50.0%,50.0%
1039 ,2.00,1.00,25.0%,25.0%,75.0%
1040 ,3.00,1.00,25.0%,25.0%,100.0%
1041 ,4.00,.00,.0%,.0%,100.0%
1042 Total,,4.00,100.0%,,