dnl PSPP - a program for statistical analysis.
dnl Copyright (C) 2017 Free Software Foundation, Inc.
dnl
dnl This program is free software: you can redistribute it and/or modify
dnl it under the terms of the GNU General Public License as published by
dnl the Free Software Foundation, either version 3 of the License, or
dnl (at your option) any later version.
dnl
dnl This program is distributed in the hope that it will be useful,
dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
dnl GNU General Public License for more details.
dnl
dnl You should have received a copy of the GNU General Public License
dnl along with this program. If not, see .
dnl
AT_BANNER([FREQUENCIES procedure])
AT_SETUP([FREQUENCIES string variable])
AT_DATA([frequencies.sps],
[DATA LIST FREE/
name (A8) value * quantity .
BEGIN DATA.
foo 1 5
bar 2 6
baz 1 9
quux 3 1
bar 1 2
baz 4 3
baz 1 4
baz 1 1
foo 6 0
quux 5 8
END DATA.
EXECUTE.
FREQUENCIES /VAR = name/ORDER=ANALYSIS.
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
Table: name
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,bar,2,20.0%,20.0%,20.0%
,baz,4,40.0%,40.0%,60.0%
,foo,2,20.0%,20.0%,80.0%
,quux,2,20.0%,20.0%,100.0%
Total,,10,100.0%,,
])
AT_CLEANUP
# Tests for a bug where pspp would crash if two FREQUENCIES commands
# existed in a input file.
AT_SETUP([FREQUENCIES two runs crash])
AT_DATA([frequencies.sps],
[data list free /v1 v2.
begin data.
0 1
2 3
4 5
3 4
end data.
frequencies v1 v2/statistics=none/ORDER=VARIABLE.
frequencies v1 v2/statistics=none.
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
Table: v1
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,.00,1,25.0%,25.0%,25.0%
,2.00,1,25.0%,25.0%,50.0%
,3.00,1,25.0%,25.0%,75.0%
,4.00,1,25.0%,25.0%,100.0%
Total,,4,100.0%,,
Table: v2
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,1.00,1,25.0%,25.0%,25.0%
,3.00,1,25.0%,25.0%,50.0%
,4.00,1,25.0%,25.0%,75.0%
,5.00,1,25.0%,25.0%,100.0%
Total,,4,100.0%,,
Table: v1
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,.00,1,25.0%,25.0%,25.0%
,2.00,1,25.0%,25.0%,50.0%
,3.00,1,25.0%,25.0%,75.0%
,4.00,1,25.0%,25.0%,100.0%
Total,,4,100.0%,,
Table: v2
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,1.00,1,25.0%,25.0%,25.0%
,3.00,1,25.0%,25.0%,50.0%
,4.00,1,25.0%,25.0%,75.0%
,5.00,1,25.0%,25.0%,100.0%
Total,,4,100.0%,,
])
AT_CLEANUP
# Test that the LIMIT specification works.
AT_SETUP([FREQUENCIES with LIMIT])
AT_DATA([frequencies.sps],
[data list free /v1 v2.
begin data.
0 1
2 5
4 3
3 5
end data.
frequencies v1 v2/statistics=none/FORMAT=LIMIT(3).
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
Table: v2
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,1.00,1,25.0%,25.0%,25.0%
,3.00,1,25.0%,25.0%,50.0%
,5.00,2,50.0%,50.0%,100.0%
Total,,4,100.0%,,
])
AT_CLEANUP
# Tests for a bug where PSPP would crash when a FREQUENCIES command
# was used with the HTML output driver.
AT_SETUP([FREQUENCIES HTML output crash])
AT_DATA([frequencies.sps],
[data list free /v1 v2.
begin data.
0 1
2 3
4 5
3 4
end data.
list.
frequencies v1/statistics=none.
])
AT_CHECK([pspp -o - -O format=csv -o pspp.html frequencies.sps], [0],
[Table: Data List
v1,v2
.00,1.00
2.00,3.00
4.00,5.00
3.00,4.00
Table: v1
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,.00,1,25.0%,25.0%,25.0%
,2.00,1,25.0%,25.0%,50.0%
,3.00,1,25.0%,25.0%,75.0%
,4.00,1,25.0%,25.0%,100.0%
Total,,4,100.0%,,
])
AT_CHECK([test -s pspp.html])
AT_CLEANUP
# Tests for a bug which crashed PSPP when a piechart with too many
# segments was requested.
AT_SETUP([FREQUENCIES pie chart crash])
AT_DATA([frequencies.sps],
[data list list /x * w *.
begin data.
1 4
34 10
-9 15
232 6
11 4
134 1
9 5
32 16
-2 6
2 16
20 6
end data.
weight by w.
frequencies /x /format=notable /statistics=none
/piechart.
])
# Cannot use the CSV driver for this because it does not output charts
# at all.
AT_CHECK([pspp frequencies.sps], [0], [dnl
Reading free-form data from INLINE.
+--------+------+
|Variable|Format|
+--------+------+
|x |F8.0 |
|w |F8.0 |
+--------+------+
])
AT_CLEANUP
dnl Check that histogram subcommand runs wihout crashing
AT_SETUP([FREQUENCIES histogram crash])
AT_DATA([frequencies.sps],
[data list notable list /x * w *.
begin data.
1 4
34 10
-9 15
232 6
11 4
134 1
9 5
32 16
-2 6
2 16
20 6
end data.
weight by w.
frequencies /x
/format=notable
/statistics=none
/histogram=minimum(0) maximum(50) percent(5) normal.
])
# Cannot use the CSV driver for this because it does not output charts
# at all.
AT_CHECK([pspp -O format=pdf frequencies.sps], [0], [ignore], [ignore])
AT_CLEANUP
# Tests for a bug which crashed PSPP when the median and a histogram
# were both requested.
AT_SETUP([FREQUENCIES median with histogram crash])
AT_DATA([frequencies.sps], [dnl
data list list notable /x.
begin data.
1
end data.
frequencies /x /histogram /STATISTICS=median.
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0], [ignore])
dnl Ignore output - No crash test.
AT_CLEANUP
# Tests for a bug which caused FREQUENCIES following TEMPORARY to
# crash (bug #11492).
AT_SETUP([FREQUENCIES crash after TEMPORARY])
AT_DATA([frequencies.sps],
[DATA LIST LIST /SEX (A1) X *.
BEGIN DATA.
M 31
F 21
M 41
F 31
M 13
F 12
M 14
F 13
END DATA.
TEMPORARY
SELECT IF SEX EQ 'F'
FREQUENCIES /X .
FINISH
])
AT_CHECK([pspp -o pspp.csv -o pspp.txt frequencies.sps])
AT_CHECK([cat pspp.csv], [0], [dnl
Table: Reading free-form data from INLINE.
Variable,Format
SEX,A1
X,F8.0
Table: Statistics
,,X
N,Valid,4
,Missing,0
Mean,,19.25
Std Dev,,8.81
Minimum,,12.00
Maximum,,31.00
Table: X
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,12.00,1,25.0%,25.0%,25.0%
,13.00,1,25.0%,25.0%,50.0%
,21.00,1,25.0%,25.0%,75.0%
,31.00,1,25.0%,25.0%,100.0%
Total,,4,100.0%,,
])
AT_CLEANUP
m4_define([FREQUENCIES_NTILES_OUTPUT], [dnl
Table: Statistics
,,x
N,Valid,5
,Missing,0
Mean,,3.00
Std Dev,,1.58
Minimum,,1.00
Maximum,,5.00
Percentiles,0,1.00
,25,2.00
,33,2.33
,50,3.00
,67,3.67
,75,4.00
,100,5.00
])
AT_SETUP([FREQUENCIES basic percentiles])
AT_DATA([frequencies.sps],
[DATA LIST LIST notable /x * .
BEGIN DATA.
1
2
3
4
5
END DATA.
FREQUENCIES
VAR=x
/FORMAT=NOTABLE
/PERCENTILES = 0 25 33.333 50 66.666 75 100.
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0],
[FREQUENCIES_NTILES_OUTPUT])
AT_CLEANUP
AT_SETUP([FREQUENCIES basic n-tiles])
AT_DATA([frequencies.sps],
[DATA LIST LIST notable /x * .
BEGIN DATA.
1
2
3
4
5
END DATA.
FREQUENCIES
VAR=x
/FORMAT=NOTABLE
/NTILES = 3
/NTILES = 4.
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0],
[FREQUENCIES_NTILES_OUTPUT])
AT_CLEANUP
AT_SETUP([FREQUENCIES compatibility percentiles])
AT_DATA([frequencies.sps],
[DATA LIST LIST notable /X * .
BEGIN DATA.
1
2
3
4
5
END DATA.
FREQUENCIES
VAR=x
/ALGORITHM=COMPATIBLE
/PERCENTILES = 0 25 50 75 100.
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
Table: Statistics
,,X
N,Valid,5
,Missing,0
Mean,,3.00
Std Dev,,1.58
Minimum,,1.00
Maximum,,5.00
Percentiles,0,1.00
,25,1.50
,50,3.00
,75,4.50
,100,5.00
Table: X
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,1.00,1,20.0%,20.0%,20.0%
,2.00,1,20.0%,20.0%,40.0%
,3.00,1,20.0%,20.0%,60.0%
,4.00,1,20.0%,20.0%,80.0%
,5.00,1,20.0%,20.0%,100.0%
Total,,5,100.0%,,
])
AT_CLEANUP
AT_SETUP([FREQUENCIES enhanced percentiles])
AT_DATA([frequencies.sps],
[DATA LIST LIST notable /X * .
BEGIN DATA.
1
2
3
4
5
END DATA.
FREQUENCIES
VAR=x
/PERCENTILES = 0 25 50 75 100.
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
Table: Statistics
,,X
N,Valid,5
,Missing,0
Mean,,3.00
Std Dev,,1.58
Minimum,,1.00
Maximum,,5.00
Percentiles,0,1.00
,25,2.00
,50,3.00
,75,4.00
,100,5.00
Table: X
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,1.00,1,20.0%,20.0%,20.0%
,2.00,1,20.0%,20.0%,40.0%
,3.00,1,20.0%,20.0%,60.0%
,4.00,1,20.0%,20.0%,80.0%
,5.00,1,20.0%,20.0%,100.0%
Total,,5,100.0%,,
])
AT_CLEANUP
AT_SETUP([FREQUENCIES enhanced percentiles, weighted])
AT_DATA([frequencies.sps],
[DATA LIST LIST notable /X * F *.
BEGIN DATA.
1 2
2 2
3 2
4 1
4 1
5 1
5 1
END DATA.
WEIGHT BY f.
FREQUENCIES
VAR=x
/PERCENTILES = 0 25 50 75 100.
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
Table: Statistics
,,X
N,Valid,10.00
,Missing,.00
Mean,,3.00
Std Dev,,1.49
Minimum,,1.00
Maximum,,5.00
Percentiles,0,1.00
,25,2.00
,50,3.00
,75,4.00
,100,5.00
Table: X
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,1.00,2.00,20.0%,20.0%,20.0%
,2.00,2.00,20.0%,20.0%,40.0%
,3.00,2.00,20.0%,20.0%,60.0%
,4.00,2.00,20.0%,20.0%,80.0%
,5.00,2.00,20.0%,20.0%,100.0%
Total,,10.00,100.0%,,
])
AT_CLEANUP
AT_SETUP([FREQUENCIES enhanced percentiles, weighted (2)])
AT_DATA([frequencies.sps],
[DATA LIST LIST notable /X * F *.
BEGIN DATA.
1 1
3 2
4 1
5 1
5 1
END DATA.
WEIGHT BY f.
FREQUENCIES
VAR=x
/PERCENTILES = 0 25 50 75 100.
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
Table: Statistics
,,X
N,Valid,6.00
,Missing,.00
Mean,,3.50
Std Dev,,1.52
Minimum,,1.00
Maximum,,5.00
Percentiles,0,1.00
,25,3.00
,50,3.50
,75,4.75
,100,5.00
Table: X
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,1.00,1.00,16.7%,16.7%,16.7%
,3.00,2.00,33.3%,33.3%,50.0%
,4.00,1.00,16.7%,16.7%,66.7%
,5.00,2.00,33.3%,33.3%,100.0%
Total,,6.00,100.0%,,
])
AT_CLEANUP
dnl Data for this test case from Fabio Bordignon .
AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)])
AT_DATA([frequencies.sps],
[DATA LIST LIST notable /X * F *.
BEGIN DATA.
1 7
2 16
3 12
4 5
END DATA.
WEIGHT BY f.
FREQUENCIES
VAR=x
/PERCENTILES = 0 25 50 75 100.
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
Table: Statistics
,,X
N,Valid,40.00
,Missing,.00
Mean,,2.38
Std Dev,,.93
Minimum,,1.00
Maximum,,4.00
Percentiles,0,1.00
,25,2.00
,50,2.00
,75,3.00
,100,4.00
Table: X
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,1.00,7.00,17.5%,17.5%,17.5%
,2.00,16.00,40.0%,40.0%,57.5%
,3.00,12.00,30.0%,30.0%,87.5%
,4.00,5.00,12.5%,12.5%,100.0%
Total,,40.00,100.0%,,
])
AT_CLEANUP
AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values])
AT_DATA([frequencies.sps],
[DATA LIST LIST notable /X * F *.
BEGIN DATA.
1 1
3 2
4 1
5 1
5 1
99 4
END DATA.
MISSING VALUE x (99.0) .
WEIGHT BY f.
FREQUENCIES
VAR=x
/PERCENTILES = 0 25 50 75 100.
])
AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
Table: Statistics
,,X
N,Valid,6.00
,Missing,4.00
Mean,,3.50
Std Dev,,1.52
Minimum,,1.00
Maximum,,5.00
Percentiles,0,1.00
,25,3.00
,50,3.50
,75,4.75
,100,5.00
Table: X
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,1.00,1.00,10.0%,16.7%,16.7%
,3.00,2.00,20.0%,33.3%,50.0%
,4.00,1.00,10.0%,16.7%,66.7%
,5.00,2.00,20.0%,33.3%,100.0%
Missing,99.00,4.00,40.0%,,
Total,,10.00,100.0%,,
])
AT_CLEANUP
AT_SETUP([FREQUENCIES dichotomous histogram])
AT_DATA([frequencies.sps], [dnl
data list notable list /d4 *.
begin data.
0
0
0
1
0
0
0
0
1
0
0
0
0
0
1
2
0
end data.
FREQUENCIES
/VARIABLES = d4
/FORMAT=AVALUE TABLE
/HISTOGRAM=NORMAL
.
])
AT_CHECK([pspp frequencies.sps], [0], [ignore])
AT_CLEANUP
AT_SETUP([FREQUENCIES median])
AT_DATA([median.sps], [dnl
data list notable list /x *.
begin data.
1
2
3000000
end data.
FREQUENCIES
/VARIABLES = x
/STATISTICS = MEDIAN
.
])
AT_CHECK([pspp median.sps -O format=csv], [0], [dnl
Table: Statistics
,,x
N,Valid,3
,Missing,0
Median,,2.00
Table: x
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,1.00,1,33.3%,33.3%,33.3%
,2.00,1,33.3%,33.3%,66.7%
,3000000,1,33.3%,33.3%,100.0%
Total,,3,100.0%,,
])
AT_CLEANUP
AT_SETUP([FREQUENCIES variance])
AT_DATA([variance.sps], [dnl
data list notable list /forename (A12) height.
begin data.
Ahmed 188
bertram 167
Catherine 134
David 109
end data.
FREQUENCIES
/VARIABLES = height
/STATISTICS = VARIANCE.
])
AT_CHECK([pspp variance.sps -O format=csv], [0], [dnl
Table: Statistics
,,height
N,Valid,4
,Missing,0
Variance,,1223.00
Table: height
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,109.00,1,25.0%,25.0%,25.0%
,134.00,1,25.0%,25.0%,50.0%
,167.00,1,25.0%,25.0%,75.0%
,188.00,1,25.0%,25.0%,100.0%
Total,,4,100.0%,,
])
AT_CLEANUP
AT_SETUP([FREQUENCIES default statistics])
AT_DATA([median.sps], [dnl
data list notable list /x *.
begin data.
10
20
3000000
end data.
FREQUENCIES
/VARIABLES = x
/STATISTICS
.
FREQUENCIES
/VARIABLES = x
/STATISTICS = DEFAULT
.
])
AT_CHECK([pspp median.sps -o pspp.csv -o pspp.txt])
AT_CHECK([cat pspp.csv], [0], [dnl
Table: Statistics
,,x
N,Valid,3
,Missing,0
Mean,,1000010
Std Dev,,1732042
Minimum,,10.00
Maximum,,3000000
Table: x
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,10.00,1,33.3%,33.3%,33.3%
,20.00,1,33.3%,33.3%,66.7%
,3000000,1,33.3%,33.3%,100.0%
Total,,3,100.0%,,
Table: Statistics
,,x
N,Valid,3
,Missing,0
Mean,,1000010
Std Dev,,1732042
Minimum,,10.00
Maximum,,3000000
Table: x
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,10.00,1,33.3%,33.3%,33.3%
,20.00,1,33.3%,33.3%,66.7%
,3000000,1,33.3%,33.3%,100.0%
Total,,3,100.0%,,
])
AT_CLEANUP
AT_SETUP([FREQUENCIES no valid data])
AT_DATA([empty.sps], [dnl
data list notable list /x *.
begin data.
.
.
.
end data.
FREQUENCIES
/VARIABLES = x
/STATISTICS = ALL
.
])
AT_CHECK([pspp empty.sps -O format=csv], [0], [dnl
Table: Statistics
,,x
N,Valid,0
,Missing,3
Mean,,. @&t@
S.E. Mean,,. @&t@
Median,,. @&t@
Mode,,. @&t@
Std Dev,,. @&t@
Variance,,. @&t@
Kurtosis,,. @&t@
S.E. Kurt,,. @&t@
Skewness,,. @&t@
S.E. Skew,,. @&t@
Range,,. @&t@
Minimum,,. @&t@
Maximum,,. @&t@
Sum,,. @&t@
Table: x
,,Frequency,Percent,Valid Percent,Cumulative Percent
Missing,. ,3,100.0%,,
Total,,3,.0%,,
])
AT_CLEANUP
AT_SETUP([FREQUENCIES histogram no valid cases])
AT_DATA([empty.sps], [dnl
data list notable list /x w *.
begin data.
1 .
2 .
3 .
end data.
weight by w.
FREQUENCIES
/VARIABLES = x
/histogram
.
])
AT_CHECK([pspp empty.sps -O format=csv], [0], [ignore])
AT_CLEANUP
AT_SETUP([FREQUENCIES percentiles + histogram bug#48128])
AT_DATA([bug.sps], [dnl
SET FORMAT=F8.0.
INPUT PROGRAM.
LOOP I=1 TO 10.
COMPUTE SCORE=EXP(NORMAL(1)).
END CASE.
END LOOP.
END FILE.
END INPUT PROGRAM.
FREQUENCIES VARIABLES=SCORE
/FORMAT=NOTABLE
/STATISTICS=ALL
/PERCENTILES=1 10 20 30 40 50 60 70 80 90 99
/HISTOGRAM.
])
AT_CHECK([pspp bug.sps], [0], [ignore])
AT_CLEANUP
AT_SETUP([FREQUENCIES vs. missing weights])
AT_DATA([warn.sps], [dnl
data list notable list /x w .
begin data.
1 1
2 1
1 1
3 1
3 .
4 .
end data.
weight by w.
frequencies /variables=x.
])
AT_CHECK([pspp warn.sps -O format=csv], [0], [dnl
"warn.sps:13: warning: FREQUENCIES: At least one case in the data file had a weight value that was user-missing, system-missing, zero, or negative. These case(s) were ignored."
Table: Statistics
,,x
N,Valid,4.00
,Missing,.00
Mean,,1.75
Std Dev,,.96
Minimum,,1.00
Maximum,,4.00
Table: x
,,Frequency,Percent,Valid Percent,Cumulative Percent
Valid,1.00,2.00,50.0%,50.0%,50.0%
,2.00,1.00,25.0%,25.0%,75.0%
,3.00,1.00,25.0%,25.0%,100.0%
,4.00,.00,.0%,.0%,100.0%
Total,,4.00,100.0%,,
])
AT_CLEANUP