dnl PSPP - a program for statistical analysis. dnl Copyright (C) 2017 Free Software Foundation, Inc. dnl dnl This program is free software: you can redistribute it and/or modify dnl it under the terms of the GNU General Public License as published by dnl the Free Software Foundation, either version 3 of the License, or dnl (at your option) any later version. dnl dnl This program is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the dnl GNU General Public License for more details. dnl dnl You should have received a copy of the GNU General Public License dnl along with this program. If not, see . dnl AT_BANNER([FREQUENCIES procedure]) AT_SETUP([FREQUENCIES string variable]) AT_DATA([frequencies.sps], [DATA LIST FREE/ name (A8) value * quantity . BEGIN DATA. foo 1 5 bar 2 6 baz 1 9 quux 3 1 bar 1 2 baz 4 3 baz 1 4 baz 1 1 foo 6 0 quux 5 8 END DATA. EXECUTE. FREQUENCIES /VAR = name/ORDER=ANALYSIS. ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl Table: name ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,bar,2,20.0%,20.0%,20.0% ,baz,4,40.0%,40.0%,60.0% ,foo,2,20.0%,20.0%,80.0% ,quux,2,20.0%,20.0%,100.0% Total,,10,100.0%,, ]) AT_CLEANUP # Tests for a bug where pspp would crash if two FREQUENCIES commands # existed in a input file. AT_SETUP([FREQUENCIES two runs crash]) AT_DATA([frequencies.sps], [data list free /v1 v2. begin data. 0 1 2 3 4 5 3 4 end data. frequencies v1 v2/statistics=none/ORDER=VARIABLE. frequencies v1 v2/statistics=none. ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl Table: v1 ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,.00,1,25.0%,25.0%,25.0% ,2.00,1,25.0%,25.0%,50.0% ,3.00,1,25.0%,25.0%,75.0% ,4.00,1,25.0%,25.0%,100.0% Total,,4,100.0%,, Table: v2 ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,1.00,1,25.0%,25.0%,25.0% ,3.00,1,25.0%,25.0%,50.0% ,4.00,1,25.0%,25.0%,75.0% ,5.00,1,25.0%,25.0%,100.0% Total,,4,100.0%,, Table: v1 ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,.00,1,25.0%,25.0%,25.0% ,2.00,1,25.0%,25.0%,50.0% ,3.00,1,25.0%,25.0%,75.0% ,4.00,1,25.0%,25.0%,100.0% Total,,4,100.0%,, Table: v2 ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,1.00,1,25.0%,25.0%,25.0% ,3.00,1,25.0%,25.0%,50.0% ,4.00,1,25.0%,25.0%,75.0% ,5.00,1,25.0%,25.0%,100.0% Total,,4,100.0%,, ]) AT_CLEANUP # Test that the LIMIT specification works. AT_SETUP([FREQUENCIES with LIMIT]) AT_DATA([frequencies.sps], [data list free /v1 v2. begin data. 0 1 2 5 4 3 3 5 end data. frequencies v1 v2/statistics=none/FORMAT=LIMIT(3). ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl Table: v2 ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,1.00,1,25.0%,25.0%,25.0% ,3.00,1,25.0%,25.0%,50.0% ,5.00,2,50.0%,50.0%,100.0% Total,,4,100.0%,, ]) AT_CLEANUP # Tests for a bug where PSPP would crash when a FREQUENCIES command # was used with the HTML output driver. AT_SETUP([FREQUENCIES HTML output crash]) AT_DATA([frequencies.sps], [data list free /v1 v2. begin data. 0 1 2 3 4 5 3 4 end data. list. frequencies v1/statistics=none. ]) AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0], [Table: Data List v1,v2 .00,1.00 2.00,3.00 4.00,5.00 3.00,4.00 Table: v1 ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,.00,1,25.0%,25.0%,25.0% ,2.00,1,25.0%,25.0%,50.0% ,3.00,1,25.0%,25.0%,75.0% ,4.00,1,25.0%,25.0%,100.0% Total,,4,100.0%,, ]) AT_CHECK([test -s pspp.html]) AT_CLEANUP # Tests for a bug which crashed PSPP when a piechart with too many # segments was requested. AT_SETUP([FREQUENCIES pie chart crash]) AT_DATA([frequencies.sps], [data list list /x * w *. begin data. 1 4 34 10 -9 15 232 6 11 4 134 1 9 5 32 16 -2 6 2 16 20 6 end data. weight by w. frequencies /x /format=notable /statistics=none /piechart. ]) # Cannot use the CSV driver for this because it does not output charts # at all. AT_CHECK([pspp frequencies.sps], [0], [dnl Reading free-form data from INLINE. +--------+------+ |Variable|Format| +--------+------+ |x |F8.0 | |w |F8.0 | +--------+------+ ]) AT_CLEANUP dnl Check that histogram subcommand runs wihout crashing AT_SETUP([FREQUENCIES histogram crash]) AT_DATA([frequencies.sps], [data list notable list /x * w *. begin data. 1 4 34 10 -9 15 232 6 11 4 134 1 9 5 32 16 -2 6 2 16 20 6 end data. weight by w. frequencies /x /format=notable /statistics=none /histogram=minimum(0) maximum(50) percent(5) normal. ]) # Cannot use the CSV driver for this because it does not output charts # at all. AT_CHECK([pspp -O format=pdf frequencies.sps], [0], [ignore]) AT_CLEANUP # Tests for a bug which crashed PSPP when the median and a histogram # were both requested. AT_SETUP([FREQUENCIES median with histogram crash]) AT_DATA([frequencies.sps], [dnl data list list notable /x. begin data. 1 end data. frequencies /x /histogram /STATISTICS=median. ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [ignore]) dnl Ignore output - No crash test. AT_CLEANUP # Tests for a bug which caused FREQUENCIES following TEMPORARY to # crash (bug #11492). AT_SETUP([FREQUENCIES crash after TEMPORARY]) AT_DATA([frequencies.sps], [DATA LIST LIST /SEX (A1) X *. BEGIN DATA. M 31 F 21 M 41 F 31 M 13 F 12 M 14 F 13 END DATA. TEMPORARY SELECT IF SEX EQ 'F' FREQUENCIES /X . FINISH ]) AT_CHECK([pspp -o pspp.csv -o pspp.txt frequencies.sps]) AT_CHECK([cat pspp.csv], [0], [dnl Table: Reading free-form data from INLINE. Variable,Format SEX,A1 X,F8.0 Table: Statistics ,,X N,Valid,4 ,Missing,0 Mean,,19.25 Std Dev,,8.81 Minimum,,12.00 Maximum,,31.00 Table: X ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,12.00,1,25.0%,25.0%,25.0% ,13.00,1,25.0%,25.0%,50.0% ,21.00,1,25.0%,25.0%,75.0% ,31.00,1,25.0%,25.0%,100.0% Total,,4,100.0%,, ]) AT_CLEANUP m4_define([FREQUENCIES_NTILES_OUTPUT], [dnl Table: Statistics ,,x N,Valid,5 ,Missing,0 Mean,,3.00 Std Dev,,1.58 Minimum,,1.00 Maximum,,5.00 Percentiles,0,1.00 ,25,2.00 ,33,2.33 ,50,3.00 ,67,3.67 ,75,4.00 ,100,5.00 ]) AT_SETUP([FREQUENCIES basic percentiles]) AT_DATA([frequencies.sps], [DATA LIST LIST notable /x * . BEGIN DATA. 1 2 3 4 5 END DATA. FREQUENCIES VAR=x /FORMAT=NOTABLE /PERCENTILES = 0 25 33.333 50 66.666 75 100. ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [FREQUENCIES_NTILES_OUTPUT]) AT_CLEANUP AT_SETUP([FREQUENCIES basic n-tiles]) AT_DATA([frequencies.sps], [DATA LIST LIST notable /x * . BEGIN DATA. 1 2 3 4 5 END DATA. FREQUENCIES VAR=x /FORMAT=NOTABLE /NTILES = 3 /NTILES = 4. ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [FREQUENCIES_NTILES_OUTPUT]) AT_CLEANUP AT_SETUP([FREQUENCIES compatibility percentiles]) AT_DATA([frequencies.sps], [DATA LIST LIST notable /X * . BEGIN DATA. 1 2 3 4 5 END DATA. FREQUENCIES VAR=x /ALGORITHM=COMPATIBLE /PERCENTILES = 0 25 50 75 100. ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl Table: Statistics ,,X N,Valid,5 ,Missing,0 Mean,,3.00 Std Dev,,1.58 Minimum,,1.00 Maximum,,5.00 Percentiles,0,1.00 ,25,1.50 ,50,3.00 ,75,4.50 ,100,5.00 Table: X ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,1.00,1,20.0%,20.0%,20.0% ,2.00,1,20.0%,20.0%,40.0% ,3.00,1,20.0%,20.0%,60.0% ,4.00,1,20.0%,20.0%,80.0% ,5.00,1,20.0%,20.0%,100.0% Total,,5,100.0%,, ]) AT_CLEANUP AT_SETUP([FREQUENCIES enhanced percentiles]) AT_DATA([frequencies.sps], [DATA LIST LIST notable /X * . BEGIN DATA. 1 2 3 4 5 END DATA. FREQUENCIES VAR=x /PERCENTILES = 0 25 50 75 100. ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl Table: Statistics ,,X N,Valid,5 ,Missing,0 Mean,,3.00 Std Dev,,1.58 Minimum,,1.00 Maximum,,5.00 Percentiles,0,1.00 ,25,2.00 ,50,3.00 ,75,4.00 ,100,5.00 Table: X ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,1.00,1,20.0%,20.0%,20.0% ,2.00,1,20.0%,20.0%,40.0% ,3.00,1,20.0%,20.0%,60.0% ,4.00,1,20.0%,20.0%,80.0% ,5.00,1,20.0%,20.0%,100.0% Total,,5,100.0%,, ]) AT_CLEANUP AT_SETUP([FREQUENCIES enhanced percentiles, weighted]) AT_DATA([frequencies.sps], [DATA LIST LIST notable /X * F *. BEGIN DATA. 1 2 2 2 3 2 4 1 4 1 5 1 5 1 END DATA. WEIGHT BY f. FREQUENCIES VAR=x /PERCENTILES = 0 25 50 75 100. ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl Table: Statistics ,,X N,Valid,10.00 ,Missing,.00 Mean,,3.00 Std Dev,,1.49 Minimum,,1.00 Maximum,,5.00 Percentiles,0,1.00 ,25,2.00 ,50,3.00 ,75,4.00 ,100,5.00 Table: X ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,1.00,2.00,20.0%,20.0%,20.0% ,2.00,2.00,20.0%,20.0%,40.0% ,3.00,2.00,20.0%,20.0%,60.0% ,4.00,2.00,20.0%,20.0%,80.0% ,5.00,2.00,20.0%,20.0%,100.0% Total,,10.00,100.0%,, ]) AT_CLEANUP AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)]) AT_DATA([frequencies.sps], [DATA LIST LIST notable /X * F *. BEGIN DATA. 1 1 3 2 4 1 5 1 5 1 END DATA. WEIGHT BY f. FREQUENCIES VAR=x /PERCENTILES = 0 25 50 75 100. ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl Table: Statistics ,,X N,Valid,6.00 ,Missing,.00 Mean,,3.50 Std Dev,,1.52 Minimum,,1.00 Maximum,,5.00 Percentiles,0,1.00 ,25,3.00 ,50,3.50 ,75,4.75 ,100,5.00 Table: X ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,1.00,1.00,16.7%,16.7%,16.7% ,3.00,2.00,33.3%,33.3%,50.0% ,4.00,1.00,16.7%,16.7%,66.7% ,5.00,2.00,33.3%,33.3%,100.0% Total,,6.00,100.0%,, ]) AT_CLEANUP dnl Data for this test case from Fabio Bordignon . AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)]) AT_DATA([frequencies.sps], [DATA LIST LIST notable /X * F *. BEGIN DATA. 1 7 2 16 3 12 4 5 END DATA. WEIGHT BY f. FREQUENCIES VAR=x /PERCENTILES = 0 25 50 75 100. ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl Table: Statistics ,,X N,Valid,40.00 ,Missing,.00 Mean,,2.38 Std Dev,,.93 Minimum,,1.00 Maximum,,4.00 Percentiles,0,1.00 ,25,2.00 ,50,2.00 ,75,3.00 ,100,4.00 Table: X ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,1.00,7.00,17.5%,17.5%,17.5% ,2.00,16.00,40.0%,40.0%,57.5% ,3.00,12.00,30.0%,30.0%,87.5% ,4.00,5.00,12.5%,12.5%,100.0% Total,,40.00,100.0%,, ]) AT_CLEANUP AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values]) AT_DATA([frequencies.sps], [DATA LIST LIST notable /X * F *. BEGIN DATA. 1 1 3 2 4 1 5 1 5 1 99 4 END DATA. MISSING VALUE x (99.0) . WEIGHT BY f. FREQUENCIES VAR=x /PERCENTILES = 0 25 50 75 100. ]) AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl Table: Statistics ,,X N,Valid,6.00 ,Missing,4.00 Mean,,3.50 Std Dev,,1.52 Minimum,,1.00 Maximum,,5.00 Percentiles,0,1.00 ,25,3.00 ,50,3.50 ,75,4.75 ,100,5.00 Table: X ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,1.00,1.00,10.0%,16.7%,16.7% ,3.00,2.00,20.0%,33.3%,50.0% ,4.00,1.00,10.0%,16.7%,66.7% ,5.00,2.00,20.0%,33.3%,100.0% Missing,99.00,4.00,40.0%,, Total,,10.00,100.0%,, ]) AT_CLEANUP AT_SETUP([FREQUENCIES dichotomous histogram]) AT_DATA([frequencies.sps], [dnl data list notable list /d4 *. begin data. 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 2 0 end data. FREQUENCIES /VARIABLES = d4 /FORMAT=AVALUE TABLE /HISTOGRAM=NORMAL . ]) AT_CHECK([pspp frequencies.sps], [0], [ignore]) AT_CLEANUP AT_SETUP([FREQUENCIES median]) AT_DATA([median.sps], [dnl data list notable list /x *. begin data. 1 2 3000000 end data. FREQUENCIES /VARIABLES = x /STATISTICS = MEDIAN . ]) AT_CHECK([pspp median.sps -O format=csv], [0], [dnl Table: Statistics ,,x N,Valid,3 ,Missing,0 Median,,2.00 Table: x ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,1.00,1,33.3%,33.3%,33.3% ,2.00,1,33.3%,33.3%,66.7% ,3000000,1,33.3%,33.3%,100.0% Total,,3,100.0%,, ]) AT_CLEANUP AT_SETUP([FREQUENCIES variance]) AT_DATA([variance.sps], [dnl data list notable list /forename (A12) height. begin data. Ahmed 188 bertram 167 Catherine 134 David 109 end data. FREQUENCIES /VARIABLES = height /STATISTICS = VARIANCE. ]) AT_CHECK([pspp variance.sps -O format=csv], [0], [dnl Table: Statistics ,,height N,Valid,4 ,Missing,0 Variance,,1223.00 Table: height ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,109.00,1,25.0%,25.0%,25.0% ,134.00,1,25.0%,25.0%,50.0% ,167.00,1,25.0%,25.0%,75.0% ,188.00,1,25.0%,25.0%,100.0% Total,,4,100.0%,, ]) AT_CLEANUP AT_SETUP([FREQUENCIES default statistics]) AT_DATA([median.sps], [dnl data list notable list /x *. begin data. 10 20 3000000 end data. FREQUENCIES /VARIABLES = x /STATISTICS . FREQUENCIES /VARIABLES = x /STATISTICS = DEFAULT . ]) AT_CHECK([pspp median.sps -o pspp.csv -o pspp.txt]) AT_CHECK([cat pspp.csv], [0], [dnl Table: Statistics ,,x N,Valid,3 ,Missing,0 Mean,,1000010 Std Dev,,1732042 Minimum,,10.00 Maximum,,3000000 Table: x ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,10.00,1,33.3%,33.3%,33.3% ,20.00,1,33.3%,33.3%,66.7% ,3000000,1,33.3%,33.3%,100.0% Total,,3,100.0%,, Table: Statistics ,,x N,Valid,3 ,Missing,0 Mean,,1000010 Std Dev,,1732042 Minimum,,10.00 Maximum,,3000000 Table: x ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,10.00,1,33.3%,33.3%,33.3% ,20.00,1,33.3%,33.3%,66.7% ,3000000,1,33.3%,33.3%,100.0% Total,,3,100.0%,, ]) AT_CLEANUP AT_SETUP([FREQUENCIES no valid data]) AT_DATA([empty.sps], [dnl data list notable list /x *. begin data. . . . end data. FREQUENCIES /VARIABLES = x /STATISTICS = ALL . ]) AT_CHECK([pspp empty.sps -O format=csv], [0], [dnl Table: Statistics ,,x N,Valid,0 ,Missing,3 Mean,,. @&t@ S.E. Mean,,. @&t@ Median,,. @&t@ Mode,,. @&t@ Std Dev,,. @&t@ Variance,,. @&t@ Kurtosis,,. @&t@ S.E. Kurt,,. @&t@ Skewness,,. @&t@ S.E. Skew,,. @&t@ Range,,. @&t@ Minimum,,. @&t@ Maximum,,. @&t@ Sum,,. @&t@ Table: x ,,Frequency,Percent,Valid Percent,Cumulative Percent Missing,. ,3,100.0%,, Total,,3,.0%,, ]) AT_CLEANUP AT_SETUP([FREQUENCIES histogram no valid cases]) AT_DATA([empty.sps], [dnl data list notable list /x w *. begin data. 1 . 2 . 3 . end data. weight by w. FREQUENCIES /VARIABLES = x /histogram . ]) AT_CHECK([pspp empty.sps -O format=csv], [0], [ignore]) AT_CLEANUP AT_SETUP([FREQUENCIES percentiles + histogram bug#48128]) AT_DATA([bug.sps], [dnl SET FORMAT=F8.0. INPUT PROGRAM. LOOP I=1 TO 10. COMPUTE SCORE=EXP(NORMAL(1)). END CASE. END LOOP. END FILE. END INPUT PROGRAM. FREQUENCIES VARIABLES=SCORE /FORMAT=NOTABLE /STATISTICS=ALL /PERCENTILES=1 10 20 30 40 50 60 70 80 90 99 /HISTOGRAM. ]) AT_CHECK([pspp bug.sps], [0], [ignore]) AT_CLEANUP AT_SETUP([FREQUENCIES vs. missing weights]) AT_DATA([warn.sps], [dnl data list notable list /x w . begin data. 1 1 2 1 1 1 3 1 3 . 4 . end data. weight by w. frequencies /variables=x. ]) AT_CHECK([pspp warn.sps -O format=csv], [0], [dnl "warn.sps:13: warning: FREQUENCIES: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative. These case(s) were ignored." Table: Statistics ,,x N,Valid,4.00 ,Missing,.00 Mean,,1.75 Std Dev,,.96 Minimum,,1.00 Maximum,,4.00 Table: x ,,Frequency,Percent,Valid Percent,Cumulative Percent Valid,1.00,2.00,50.0%,50.0%,50.0% ,2.00,1.00,25.0%,25.0%,75.0% ,3.00,1.00,25.0%,25.0%,100.0% ,4.00,.00,.0%,.0%,100.0% Total,,4.00,100.0%,, ]) AT_CLEANUP