1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017 Free Software Foundation, Inc.
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 dnl GNU General Public License for more details.
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
17 AT_BANNER([FREQUENCIES procedure])
19 AT_SETUP([FREQUENCIES string variable])
20 AT_DATA([frequencies.sps],
22 name (A8) value * quantity .
37 FREQUENCIES /VAR = name/ORDER=ANALYSIS.
39 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
41 ,,Frequency,Percent,Valid Percent,Cumulative Percent
42 Valid,bar,2,20.0%,20.0%,20.0%
43 ,baz,4,40.0%,40.0%,60.0%
44 ,foo,2,20.0%,20.0%,80.0%
45 ,quux,2,20.0%,20.0%,100.0%
50 AT_SETUP([FREQUENCIES with SPLIT FILE - LAYERED])
51 AT_DATA([frequencies.sps], [dnl
52 DATA LIST LIST NOTABLE/name (A8) value quantity.
69 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
71 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
101 AT_SETUP([FREQUENCIES with SPLIT FILE - SEPARATE])
102 AT_DATA([frequencies.sps], [dnl
103 DATA LIST LIST NOTABLE/name (A8) value quantity.
119 SPLIT FILE SEPARATE BY name.
120 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
122 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
177 AT_SETUP([FREQUENCIES with SPLIT FILE - LAYERED - unsorted data])
178 AT_DATA([frequencies.sps], [dnl
179 DATA LIST LIST NOTABLE/name (A8) value quantity.
195 FREQUENCIES /VARIABLES=value quantity /FORMAT NOTABLE.
197 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
198 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values. Each run will be analyzed separately. The duplicate split values are: name = baz "
200 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values. Each run will be analyzed separately. The duplicate split values are: name = bar "
202 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values. Each run will be analyzed separately. The duplicate split values are: name = baz "
204 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values. Each run will be analyzed separately. The duplicate split values are: name = foo "
206 "frequencies.sps:17: warning: FREQUENCIES: When SPLIT FILE is in effect, the input data must be sorted by the split variables (for example, using SORT CASES), but multiple runs of cases with the same split values were found separated by cases with different values. Each run will be analyzed separately. The duplicate split values are: name = baz "
209 name,,,value,quantity
271 frequencies.sps:17: warning: FREQUENCIES: Suppressed 1 additional warning about duplicate split values.
275 # Tests for a bug where pspp would crash if two FREQUENCIES commands
276 # existed in a input file.
277 AT_SETUP([FREQUENCIES two runs crash])
278 AT_DATA([frequencies.sps],
279 [data list free /v1 v2.
287 frequencies v1 v2/statistics=none/ORDER=VARIABLE.
288 frequencies v1 v2/statistics=none.
290 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
292 ,,Frequency,Percent,Valid Percent,Cumulative Percent
293 Valid,.00,1,25.0%,25.0%,25.0%
294 ,2.00,1,25.0%,25.0%,50.0%
295 ,3.00,1,25.0%,25.0%,75.0%
296 ,4.00,1,25.0%,25.0%,100.0%
300 ,,Frequency,Percent,Valid Percent,Cumulative Percent
301 Valid,1.00,1,25.0%,25.0%,25.0%
302 ,3.00,1,25.0%,25.0%,50.0%
303 ,4.00,1,25.0%,25.0%,75.0%
304 ,5.00,1,25.0%,25.0%,100.0%
308 ,,Frequency,Percent,Valid Percent,Cumulative Percent
309 Valid,.00,1,25.0%,25.0%,25.0%
310 ,2.00,1,25.0%,25.0%,50.0%
311 ,3.00,1,25.0%,25.0%,75.0%
312 ,4.00,1,25.0%,25.0%,100.0%
316 ,,Frequency,Percent,Valid Percent,Cumulative Percent
317 Valid,1.00,1,25.0%,25.0%,25.0%
318 ,3.00,1,25.0%,25.0%,50.0%
319 ,4.00,1,25.0%,25.0%,75.0%
320 ,5.00,1,25.0%,25.0%,100.0%
325 # Test that the LIMIT specification works.
326 AT_SETUP([FREQUENCIES with LIMIT])
327 AT_DATA([frequencies.sps],
328 [data list free /v1 v2.
336 frequencies v1 v2/statistics=none/FORMAT=LIMIT(3).
338 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
340 ,,Frequency,Percent,Valid Percent,Cumulative Percent
341 Valid,1.00,1,25.0%,25.0%,25.0%
342 ,3.00,1,25.0%,25.0%,50.0%
343 ,5.00,2,50.0%,50.0%,100.0%
348 # Tests for a bug where PSPP would crash when a FREQUENCIES command
349 # was used with the HTML output driver.
350 AT_SETUP([FREQUENCIES HTML output crash])
351 AT_DATA([frequencies.sps],
352 [data list free /v1 v2.
362 frequencies v1/statistics=none.
364 AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0],
373 ,,Frequency,Percent,Valid Percent,Cumulative Percent
374 Valid,.00,1,25.0%,25.0%,25.0%
375 ,2.00,1,25.0%,25.0%,50.0%
376 ,3.00,1,25.0%,25.0%,75.0%
377 ,4.00,1,25.0%,25.0%,100.0%
380 AT_CHECK([test -s pspp.html])
383 # Tests for a bug which crashed PSPP when a piechart with too many
384 # segments was requested.
385 AT_SETUP([FREQUENCIES pie chart crash])
386 AT_DATA([frequencies.sps],
387 [data list list /x * w *.
404 frequencies /x /format=notable /statistics=none
407 # Cannot use the CSV driver for this because it does not output charts
409 AT_CHECK([pspp frequencies.sps], [0], [dnl
410 Reading free-form data from INLINE.
420 dnl Check that histogram subcommand runs wihout crashing
421 AT_SETUP([FREQUENCIES histogram crash])
422 AT_DATA([frequencies.sps],
423 [data list notable list /x * w *.
443 /histogram=minimum(0) maximum(50) percent(5) normal.
445 # Cannot use the CSV driver for this because it does not output charts
447 AT_CHECK([pspp -O format=pdf frequencies.sps], [0], [ignore], [ignore])
450 # Tests for a bug which crashed PSPP when the median and a histogram
451 # were both requested.
452 AT_SETUP([FREQUENCIES median with histogram crash])
453 AT_DATA([frequencies.sps], [dnl
454 data list list notable /x.
459 frequencies /x /histogram /STATISTICS=median.
461 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [ignore])
462 dnl Ignore output - No crash test.
465 # Tests for a bug which caused FREQUENCIES following TEMPORARY to
466 # crash (bug #11492).
467 AT_SETUP([FREQUENCIES crash after TEMPORARY])
468 AT_DATA([frequencies.sps],
469 [DATA LIST LIST /SEX (A1) X *.
488 AT_CHECK([pspp -o pspp.csv -o pspp.txt frequencies.sps])
489 AT_CHECK([cat pspp.csv], [0], [dnl
490 Table: Reading free-form data from INLINE.
505 ,,Frequency,Percent,Valid Percent,Cumulative Percent
506 Valid,12.00,1,25.0%,25.0%,25.0%
507 ,13.00,1,25.0%,25.0%,50.0%
508 ,21.00,1,25.0%,25.0%,75.0%
509 ,31.00,1,25.0%,25.0%,100.0%
514 m4_define([FREQUENCIES_NTILES_OUTPUT], [dnl
523 Percentiles,0,1.00,10.00
531 AT_SETUP([FREQUENCIES basic percentiles])
532 AT_DATA([frequencies.sps],
533 [DATA LIST LIST notable /x y.
545 /PERCENTILES = 0 25 33.333 50 66.666 75 100.
547 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
548 [FREQUENCIES_NTILES_OUTPUT])
551 AT_SETUP([FREQUENCIES basic n-tiles])
552 AT_DATA([frequencies.sps],
553 [DATA LIST LIST notable /x y.
568 AT_CHECK([pspp -O format=csv frequencies.sps], [0],
569 [FREQUENCIES_NTILES_OUTPUT])
572 AT_SETUP([FREQUENCIES compatibility percentiles])
573 AT_DATA([frequencies.sps],
574 [DATA LIST LIST notable /X * .
585 /ALGORITHM=COMPATIBLE
586 /PERCENTILES = 0 25 50 75 100.
588 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
604 ,,Frequency,Percent,Valid Percent,Cumulative Percent
605 Valid,1.00,1,20.0%,20.0%,20.0%
606 ,2.00,1,20.0%,20.0%,40.0%
607 ,3.00,1,20.0%,20.0%,60.0%
608 ,4.00,1,20.0%,20.0%,80.0%
609 ,5.00,1,20.0%,20.0%,100.0%
614 AT_SETUP([FREQUENCIES enhanced percentiles])
615 AT_DATA([frequencies.sps],
616 [DATA LIST LIST notable /X * .
627 /PERCENTILES = 0 25 50 75 100.
629 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
645 ,,Frequency,Percent,Valid Percent,Cumulative Percent
646 Valid,1.00,1,20.0%,20.0%,20.0%
647 ,2.00,1,20.0%,20.0%,40.0%
648 ,3.00,1,20.0%,20.0%,60.0%
649 ,4.00,1,20.0%,20.0%,80.0%
650 ,5.00,1,20.0%,20.0%,100.0%
655 AT_SETUP([FREQUENCIES enhanced percentiles, weighted])
656 AT_DATA([frequencies.sps],
657 [DATA LIST LIST notable /X * F *.
672 /PERCENTILES = 0 25 50 75 100.
674 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
690 ,,Frequency,Percent,Valid Percent,Cumulative Percent
691 Valid,1.00,2.00,20.0%,20.0%,20.0%
692 ,2.00,2.00,20.0%,20.0%,40.0%
693 ,3.00,2.00,20.0%,20.0%,60.0%
694 ,4.00,2.00,20.0%,20.0%,80.0%
695 ,5.00,2.00,20.0%,20.0%,100.0%
696 Total,,10.00,100.0%,,
700 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)])
701 AT_DATA([frequencies.sps],
702 [DATA LIST LIST notable /X * F *.
715 /PERCENTILES = 0 25 50 75 100.
717 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
733 ,,Frequency,Percent,Valid Percent,Cumulative Percent
734 Valid,1.00,1.00,16.7%,16.7%,16.7%
735 ,3.00,2.00,33.3%,33.3%,50.0%
736 ,4.00,1.00,16.7%,16.7%,66.7%
737 ,5.00,2.00,33.3%,33.3%,100.0%
742 dnl Data for this test case from Fabio Bordignon <bordignon@demos.it>.
743 AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)])
744 AT_DATA([frequencies.sps],
745 [DATA LIST LIST notable /X * F *.
757 /PERCENTILES = 0 25 50 75 100.
759 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
775 ,,Frequency,Percent,Valid Percent,Cumulative Percent
776 Valid,1.00,7.00,17.5%,17.5%,17.5%
777 ,2.00,16.00,40.0%,40.0%,57.5%
778 ,3.00,12.00,30.0%,30.0%,87.5%
779 ,4.00,5.00,12.5%,12.5%,100.0%
780 Total,,40.00,100.0%,,
784 AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values])
785 AT_DATA([frequencies.sps],
786 [DATA LIST LIST notable /X * F *.
796 MISSING VALUE x (99.0) .
801 /PERCENTILES = 0 25 50 75 100.
804 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
820 ,,Frequency,Percent,Valid Percent,Cumulative Percent
821 Valid,1.00,1.00,10.0%,16.7%,16.7%
822 ,3.00,2.00,20.0%,33.3%,50.0%
823 ,4.00,1.00,10.0%,16.7%,66.7%
824 ,5.00,2.00,20.0%,33.3%,100.0%
825 Missing,99.00,4.00,40.0%,,
826 Total,,10.00,100.0%,,
830 AT_SETUP([FREQUENCIES dichotomous histogram])
831 AT_DATA([frequencies.sps], [dnl
832 data list notable list /d4 *.
860 AT_CHECK([pspp frequencies.sps], [0], [ignore])
864 AT_SETUP([FREQUENCIES median])
865 AT_DATA([median.sps], [dnl
866 data list notable list /x *.
879 AT_CHECK([pspp median.sps -O format=csv], [0], [dnl
887 ,,Frequency,Percent,Valid Percent,Cumulative Percent
888 Valid,1.00,1,33.3%,33.3%,33.3%
889 ,2.00,1,33.3%,33.3%,66.7%
890 ,3000000,1,33.3%,33.3%,100.0%
895 AT_SETUP([FREQUENCIES variance])
896 AT_DATA([variance.sps], [dnl
897 data list notable list /forename (A12) height.
907 /STATISTICS = VARIANCE.
910 AT_CHECK([pspp variance.sps -O format=csv], [0], [dnl
918 ,,Frequency,Percent,Valid Percent,Cumulative Percent
919 Valid,109.00,1,25.0%,25.0%,25.0%
920 ,134.00,1,25.0%,25.0%,50.0%
921 ,167.00,1,25.0%,25.0%,75.0%
922 ,188.00,1,25.0%,25.0%,100.0%
927 AT_SETUP([FREQUENCIES default statistics])
928 AT_DATA([median.sps], [dnl
929 data list notable list /x *.
943 /STATISTICS = DEFAULT
947 AT_CHECK([pspp median.sps -o pspp.csv -o pspp.txt])
948 AT_CHECK([cat pspp.csv], [0], [dnl
959 ,,Frequency,Percent,Valid Percent,Cumulative Percent
960 Valid,10.00,1,33.3%,33.3%,33.3%
961 ,20.00,1,33.3%,33.3%,66.7%
962 ,3000000,1,33.3%,33.3%,100.0%
975 ,,Frequency,Percent,Valid Percent,Cumulative Percent
976 Valid,10.00,1,33.3%,33.3%,33.3%
977 ,20.00,1,33.3%,33.3%,66.7%
978 ,3000000,1,33.3%,33.3%,100.0%
985 AT_SETUP([FREQUENCIES no valid data])
986 AT_DATA([empty.sps], [dnl
987 data list notable list /x *.
1000 AT_CHECK([pspp empty.sps -O format=csv], [0], [dnl
1029 AT_SETUP([FREQUENCIES histogram no valid cases])
1030 AT_DATA([empty.sps], [dnl
1031 data list notable list /x w *.
1046 AT_CHECK([pspp empty.sps -O format=csv], [0], [ignore])
1050 AT_SETUP([FREQUENCIES percentiles + histogram bug#48128])
1051 AT_DATA([bug.sps], [dnl
1056 COMPUTE SCORE=EXP(NORMAL(1)).
1062 FREQUENCIES VARIABLES=SCORE
1065 /PERCENTILES=1 10 20 30 40 50 60 70 80 90 99
1070 AT_CHECK([pspp bug.sps], [0], [ignore])
1075 AT_SETUP([FREQUENCIES vs. missing weights])
1076 AT_DATA([warn.sps], [dnl
1077 data list notable list /x w .
1089 frequencies /variables=x.
1092 AT_CHECK([pspp warn.sps -O format=csv], [0], [dnl
1093 "warn.sps:13: warning: FREQUENCIES: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative. These case(s) were ignored."
1105 ,,Frequency,Percent,Valid Percent,Cumulative Percent
1106 Valid,1.00,2.00,50.0%,50.0%,50.0%
1107 ,2.00,1.00,25.0%,25.0%,75.0%
1108 ,3.00,1.00,25.0%,25.0%,100.0%
1109 ,4.00,.00,.0%,.0%,100.0%
1110 Total,,4.00,100.0%,,