output: Introduce pivot tables.
[pspp] / tests / language / stats / frequencies.at
index d321e576a0c9a11ffa0040de2dc5d31ef005164b..ca0467264e84a7f63abf8cec3cc40a846c0129e5 100644 (file)
@@ -1,3 +1,19 @@
+dnl PSPP - a program for statistical analysis.
+dnl Copyright (C) 2017 Free Software Foundation, Inc.
+dnl 
+dnl This program is free software: you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation, either version 3 of the License, or
+dnl (at your option) any later version.
+dnl 
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+dnl GNU General Public License for more details.
+dnl 
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
+dnl
 AT_BANNER([FREQUENCIES procedure])
 
 AT_SETUP([FREQUENCIES string variable])
@@ -18,16 +34,16 @@ quux 5 8
 END DATA.
 EXECUTE.
 
-FREQUENCIES /VAR = name.
+FREQUENCIES /VAR = name/ORDER=ANALYSIS.
 ])
 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
 Table: name
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,bar     ,2,20.00,20.00,20.00
-,baz     ,4,40.00,40.00,60.00
-,foo     ,2,20.00,20.00,80.00
-,quux    ,2,20.00,20.00,100.00
-Total,,10,100.0,100.0,
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,bar,2,20.0%,20.0%,20.0%
+,baz,4,40.0%,40.0%,60.0%
+,foo,2,20.0%,20.0%,80.0%
+,quux,2,20.0%,20.0%,100.0%
+Total,,10,100.0%,,
 ])
 AT_CLEANUP
 
@@ -43,41 +59,64 @@ begin data.
 3 4
 end data.
 
-frequencies v1 v2/statistics=none.
+frequencies v1 v2/statistics=none/ORDER=VARIABLE.
 frequencies v1 v2/statistics=none.
 ])
-AT_CHECK([pspp -O format=csv frequencies.sps], [0],
-  [Table: v1
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,.00,1,25.00,25.00,25.00
-,2.00,1,25.00,25.00,50.00
-,3.00,1,25.00,25.00,75.00
-,4.00,1,25.00,25.00,100.00
-Total,,4,100.0,100.0,
+AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
+Table: v1
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,.00,1,25.0%,25.0%,25.0%
+,2.00,1,25.0%,25.0%,50.0%
+,3.00,1,25.0%,25.0%,75.0%
+,4.00,1,25.0%,25.0%,100.0%
+Total,,4,100.0%,,
 
 Table: v2
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,1.00,1,25.00,25.00,25.00
-,3.00,1,25.00,25.00,50.00
-,4.00,1,25.00,25.00,75.00
-,5.00,1,25.00,25.00,100.00
-Total,,4,100.0,100.0,
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,1.00,1,25.0%,25.0%,25.0%
+,3.00,1,25.0%,25.0%,50.0%
+,4.00,1,25.0%,25.0%,75.0%
+,5.00,1,25.0%,25.0%,100.0%
+Total,,4,100.0%,,
 
 Table: v1
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,.00,1,25.00,25.00,25.00
-,2.00,1,25.00,25.00,50.00
-,3.00,1,25.00,25.00,75.00
-,4.00,1,25.00,25.00,100.00
-Total,,4,100.0,100.0,
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,.00,1,25.0%,25.0%,25.0%
+,2.00,1,25.0%,25.0%,50.0%
+,3.00,1,25.0%,25.0%,75.0%
+,4.00,1,25.0%,25.0%,100.0%
+Total,,4,100.0%,,
 
 Table: v2
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,1.00,1,25.00,25.00,25.00
-,3.00,1,25.00,25.00,50.00
-,4.00,1,25.00,25.00,75.00
-,5.00,1,25.00,25.00,100.00
-Total,,4,100.0,100.0,
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,1.00,1,25.0%,25.0%,25.0%
+,3.00,1,25.0%,25.0%,50.0%
+,4.00,1,25.0%,25.0%,75.0%
+,5.00,1,25.0%,25.0%,100.0%
+Total,,4,100.0%,,
+])
+AT_CLEANUP
+
+# Test that the LIMIT specification works.
+AT_SETUP([FREQUENCIES with LIMIT])
+AT_DATA([frequencies.sps],
+  [data list free /v1 v2.
+begin data.
+0 1
+2 5
+4 3
+3 5
+end data.
+
+frequencies v1 v2/statistics=none/FORMAT=LIMIT(3).
+])
+AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
+Table: v2
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,1.00,1,25.0%,25.0%,25.0%
+,3.00,1,25.0%,25.0%,50.0%
+,5.00,2,50.0%,50.0%,100.0%
+Total,,4,100.0%,,
 ])
 AT_CLEANUP
 
@@ -106,12 +145,12 @@ v1,v2
 3.00,4.00
 
 Table: v1
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,.00,1,25.00,25.00,25.00
-,2.00,1,25.00,25.00,50.00
-,3.00,1,25.00,25.00,75.00
-,4.00,1,25.00,25.00,100.00
-Total,,4,100.0,100.0,
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,.00,1,25.0%,25.0%,25.0%
+,2.00,1,25.0%,25.0%,50.0%
+,3.00,1,25.0%,25.0%,75.0%
+,4.00,1,25.0%,25.0%,100.0%
+Total,,4,100.0%,,
 ])
 AT_CHECK([test -s pspp.html])
 AT_CLEANUP
@@ -146,7 +185,7 @@ AT_CHECK([pspp frequencies.sps], [0], [dnl
 Reading free-form data from INLINE.
 +--------+------+
 |Variable|Format|
-#========#======#
++--------+------+
 |x       |F8.0  |
 |w       |F8.0  |
 +--------+------+
@@ -221,32 +260,35 @@ FREQUENCIES /X .
 
 FINISH
 ])
-AT_CHECK([pspp -O format=csv frequencies.sps], [0],
-  [Table: Reading free-form data from INLINE.
+AT_CHECK([pspp -o pspp.csv -o pspp.txt frequencies.sps])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Table: Reading free-form data from INLINE.
 Variable,Format
 SEX,A1
 X,F8.0
 
-Table: X
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,12.00,1,25.00,25.00,25.00
-,13.00,1,25.00,25.00,50.00
-,21.00,1,25.00,25.00,75.00
-,31.00,1,25.00,25.00,100.00
-Total,,4,100.0,100.0,
-
-Table: X
+Table: Statistics
+,,X
 N,Valid,4
 ,Missing,0
 Mean,,19.25
 Std Dev,,8.81
 Minimum,,12.00
 Maximum,,31.00
+
+Table: X
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,12.00,1,25.0%,25.0%,25.0%
+,13.00,1,25.0%,25.0%,50.0%
+,21.00,1,25.0%,25.0%,75.0%
+,31.00,1,25.0%,25.0%,100.0%
+Total,,4,100.0%,,
 ])
 AT_CLEANUP
 
-m4_define([FREQUENCIES_NTILES_OUTPUT],
-  [Table: x
+m4_define([FREQUENCIES_NTILES_OUTPUT], [dnl
+Table: Statistics
+,,x
 N,Valid,5
 ,Missing,0
 Mean,,3.00
@@ -256,7 +298,7 @@ Maximum,,5.00
 Percentiles,0,1.00
 ,25,2.00
 ,33,2.33
-,50 (Median),3.00
+,50,3.00
 ,67,3.67
 ,75,4.00
 ,100,5.00
@@ -318,17 +360,9 @@ FREQUENCIES
        /ALGORITHM=COMPATIBLE
        /PERCENTILES = 0 25 50 75 100.
 ])
-AT_CHECK([pspp -O format=csv frequencies.sps], [0],
-  [Table: X
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,1.00,1,20.00,20.00,20.00
-,2.00,1,20.00,20.00,40.00
-,3.00,1,20.00,20.00,60.00
-,4.00,1,20.00,20.00,80.00
-,5.00,1,20.00,20.00,100.00
-Total,,5,100.0,100.0,
-
-Table: X
+AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
+Table: Statistics
+,,X
 N,Valid,5
 ,Missing,0
 Mean,,3.00
@@ -337,9 +371,18 @@ Minimum,,1.00
 Maximum,,5.00
 Percentiles,0,1.00
 ,25,1.50
-,50 (Median),3.00
+,50,3.00
 ,75,4.50
 ,100,5.00
+
+Table: X
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,1.00,1,20.0%,20.0%,20.0%
+,2.00,1,20.0%,20.0%,40.0%
+,3.00,1,20.0%,20.0%,60.0%
+,4.00,1,20.0%,20.0%,80.0%
+,5.00,1,20.0%,20.0%,100.0%
+Total,,5,100.0%,,
 ])
 AT_CLEANUP
 
@@ -358,17 +401,9 @@ FREQUENCIES
        VAR=x
        /PERCENTILES = 0 25 50 75 100.
 ])
-AT_CHECK([pspp -O format=csv frequencies.sps], [0],
-  [Table: X
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,1.00,1,20.00,20.00,20.00
-,2.00,1,20.00,20.00,40.00
-,3.00,1,20.00,20.00,60.00
-,4.00,1,20.00,20.00,80.00
-,5.00,1,20.00,20.00,100.00
-Total,,5,100.0,100.0,
-
-Table: X
+AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
+Table: Statistics
+,,X
 N,Valid,5
 ,Missing,0
 Mean,,3.00
@@ -377,9 +412,18 @@ Minimum,,1.00
 Maximum,,5.00
 Percentiles,0,1.00
 ,25,2.00
-,50 (Median),3.00
+,50,3.00
 ,75,4.00
 ,100,5.00
+
+Table: X
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,1.00,1,20.0%,20.0%,20.0%
+,2.00,1,20.0%,20.0%,40.0%
+,3.00,1,20.0%,20.0%,60.0%
+,4.00,1,20.0%,20.0%,80.0%
+,5.00,1,20.0%,20.0%,100.0%
+Total,,5,100.0%,,
 ])
 AT_CLEANUP
 
@@ -402,17 +446,9 @@ FREQUENCIES
        VAR=x
        /PERCENTILES = 0 25 50 75 100.
 ])
-AT_CHECK([pspp -O format=csv frequencies.sps], [0],
-  [Table: X
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,1.00,2.00,20.00,20.00,20.00
-,2.00,2.00,20.00,20.00,40.00
-,3.00,2.00,20.00,20.00,60.00
-,4.00,2.00,20.00,20.00,80.00
-,5.00,2.00,20.00,20.00,100.00
-Total,,10.00,100.0,100.0,
-
-Table: X
+AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
+Table: Statistics
+,,X
 N,Valid,10.00
 ,Missing,.00
 Mean,,3.00
@@ -421,9 +457,18 @@ Minimum,,1.00
 Maximum,,5.00
 Percentiles,0,1.00
 ,25,2.00
-,50 (Median),3.00
+,50,3.00
 ,75,4.00
 ,100,5.00
+
+Table: X
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,1.00,2.00,20.0%,20.0%,20.0%
+,2.00,2.00,20.0%,20.0%,40.0%
+,3.00,2.00,20.0%,20.0%,60.0%
+,4.00,2.00,20.0%,20.0%,80.0%
+,5.00,2.00,20.0%,20.0%,100.0%
+Total,,10.00,100.0%,,
 ])
 AT_CLEANUP
 
@@ -444,16 +489,9 @@ FREQUENCIES
        VAR=x
        /PERCENTILES = 0 25 50 75 100.
 ])
-AT_CHECK([pspp -O format=csv frequencies.sps], [0],
-  [Table: X
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,1.00,1.00,16.67,16.67,16.67
-,3.00,2.00,33.33,33.33,50.00
-,4.00,1.00,16.67,16.67,66.67
-,5.00,2.00,33.33,33.33,100.00
-Total,,6.00,100.0,100.0,
-
-Table: X
+AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
+Table: Statistics
+,,X
 N,Valid,6.00
 ,Missing,.00
 Mean,,3.50
@@ -462,9 +500,17 @@ Minimum,,1.00
 Maximum,,5.00
 Percentiles,0,1.00
 ,25,3.00
-,50 (Median),3.50
+,50,3.50
 ,75,4.75
 ,100,5.00
+
+Table: X
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,1.00,1.00,16.7%,16.7%,16.7%
+,3.00,2.00,33.3%,33.3%,50.0%
+,4.00,1.00,16.7%,16.7%,66.7%
+,5.00,2.00,33.3%,33.3%,100.0%
+Total,,6.00,100.0%,,
 ])
 AT_CLEANUP
 
@@ -486,15 +532,8 @@ FREQUENCIES
        /PERCENTILES = 0 25 50 75 100.
 ])
 AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
-Table: X
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,1.00,7.00,17.50,17.50,17.50
-,2.00,16.00,40.00,40.00,57.50
-,3.00,12.00,30.00,30.00,87.50
-,4.00,5.00,12.50,12.50,100.00
-Total,,40.00,100.0,100.0,
-
-Table: X
+Table: Statistics
+,,X
 N,Valid,40.00
 ,Missing,.00
 Mean,,2.38
@@ -503,9 +542,17 @@ Minimum,,1.00
 Maximum,,4.00
 Percentiles,0,1.00
 ,25,2.00
-,50 (Median),2.00
+,50,2.00
 ,75,3.00
 ,100,4.00
+
+Table: X
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,1.00,7.00,17.5%,17.5%,17.5%
+,2.00,16.00,40.0%,40.0%,57.5%
+,3.00,12.00,30.0%,30.0%,87.5%
+,4.00,5.00,12.5%,12.5%,100.0%
+Total,,40.00,100.0%,,
 ])
 AT_CLEANUP
 
@@ -529,17 +576,9 @@ FREQUENCIES
        /PERCENTILES = 0 25 50 75 100.
 ])
 
-AT_CHECK([pspp -O format=csv frequencies.sps], [0],
-  [Table: X
-Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent
-,1.00,1.00,10.00,16.67,16.67
-,3.00,2.00,20.00,33.33,50.00
-,4.00,1.00,10.00,16.67,66.67
-,5.00,2.00,20.00,33.33,100.00
-,99.00,4.00,40.00,Missing,
-Total,,10.00,100.0,100.0,
-
-Table: X
+AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl
+Table: Statistics
+,,X
 N,Valid,6.00
 ,Missing,4.00
 Mean,,3.50
@@ -548,9 +587,18 @@ Minimum,,1.00
 Maximum,,5.00
 Percentiles,0,1.00
 ,25,3.00
-,50 (Median),3.50
+,50,3.50
 ,75,4.75
 ,100,5.00
+
+Table: X
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,1.00,1.00,10.0%,16.7%,16.7%
+,3.00,2.00,20.0%,33.3%,50.0%
+,4.00,1.00,10.0%,16.7%,66.7%
+,5.00,2.00,20.0%,33.3%,100.0%
+Missing,99.00,4.00,40.0%,,
+Total,,10.00,100.0%,,
 ])
 AT_CLEANUP
 
@@ -586,3 +634,214 @@ FREQUENCIES
 
 AT_CHECK([pspp frequencies.sps], [0],  [ignore])
 AT_CLEANUP
+
+
+AT_SETUP([FREQUENCIES median])
+AT_DATA([median.sps], [dnl
+data list notable list /x *.
+begin data.
+1
+2
+3000000
+end data.
+
+FREQUENCIES
+       /VARIABLES = x
+       /STATISTICS = MEDIAN
+       .
+])
+
+AT_CHECK([pspp median.sps -O format=csv], [0], [dnl
+Table: Statistics
+,,x
+N,Valid,3
+,Missing,0
+Median,,2.00
+
+Table: x
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,1.00,1,33.3%,33.3%,33.3%
+,2.00,1,33.3%,33.3%,66.7%
+,3000000,1,33.3%,33.3%,100.0%
+Total,,3,100.0%,,
+])
+AT_CLEANUP
+
+AT_SETUP([FREQUENCIES variance])
+AT_DATA([variance.sps], [dnl
+data list notable list /forename (A12) height.
+begin data.
+Ahmed 188
+bertram 167
+Catherine 134
+David 109
+end data.
+
+FREQUENCIES
+   /VARIABLES = height
+   /STATISTICS = VARIANCE.
+])
+
+AT_CHECK([pspp variance.sps -O format=csv], [0], [dnl
+Table: Statistics
+,,height
+N,Valid,4
+,Missing,0
+Variance,,1223.00
+
+Table: height
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,109.00,1,25.0%,25.0%,25.0%
+,134.00,1,25.0%,25.0%,50.0%
+,167.00,1,25.0%,25.0%,75.0%
+,188.00,1,25.0%,25.0%,100.0%
+Total,,4,100.0%,,
+])
+AT_CLEANUP
+
+AT_SETUP([FREQUENCIES default statistics])
+AT_DATA([median.sps], [dnl
+data list notable list /x *.
+begin data.
+10
+20
+3000000
+end data.
+
+FREQUENCIES
+       /VARIABLES = x
+       /STATISTICS
+       .
+
+FREQUENCIES
+       /VARIABLES = x
+       /STATISTICS = DEFAULT
+       .
+])
+
+AT_CHECK([pspp median.sps -o pspp.csv -o pspp.txt])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Table: Statistics
+,,x
+N,Valid,3
+,Missing,0
+Mean,,1000010
+Std Dev,,1732042
+Minimum,,10.00
+Maximum,,3000000
+
+Table: x
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,10.00,1,33.3%,33.3%,33.3%
+,20.00,1,33.3%,33.3%,66.7%
+,3000000,1,33.3%,33.3%,100.0%
+Total,,3,100.0%,,
+
+Table: Statistics
+,,x
+N,Valid,3
+,Missing,0
+Mean,,1000010
+Std Dev,,1732042
+Minimum,,10.00
+Maximum,,3000000
+
+Table: x
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Valid,10.00,1,33.3%,33.3%,33.3%
+,20.00,1,33.3%,33.3%,66.7%
+,3000000,1,33.3%,33.3%,100.0%
+Total,,3,100.0%,,
+])
+AT_CLEANUP
+
+
+
+AT_SETUP([FREQUENCIES no valid data])
+AT_DATA([empty.sps], [dnl
+data list notable list /x *.
+begin data.
+.
+.
+.
+end data.
+
+FREQUENCIES
+       /VARIABLES = x
+       /STATISTICS = ALL
+       .
+])
+
+AT_CHECK([pspp empty.sps -O format=csv], [0],  [dnl
+Table: Statistics
+,,x
+N,Valid,0
+,Missing,3
+Mean,,.  @&t@
+S.E. Mean,,.  @&t@
+Median,,.  @&t@
+Mode,,.  @&t@
+Std Dev,,.  @&t@
+Variance,,.  @&t@
+Kurtosis,,.  @&t@
+S.E. Kurt,,.  @&t@
+Skewness,,.  @&t@
+S.E. Skew,,.  @&t@
+Range,,.  @&t@
+Minimum,,.  @&t@
+Maximum,,.  @&t@
+Sum,,.  @&t@
+
+Table: x
+,,Frequency,Percent,Valid Percent,Cumulative Percent
+Missing,.  ,3,100.0%,,
+Total,,3,.0%,,
+])
+
+AT_CLEANUP
+
+
+AT_SETUP([FREQUENCIES histogram no valid cases])
+AT_DATA([empty.sps], [dnl
+data list notable list /x w *.
+begin data.
+1 .
+2 .
+3 .
+end data.
+
+weight by w.
+
+FREQUENCIES
+       /VARIABLES = x
+       /histogram
+       .
+])
+
+AT_CHECK([pspp empty.sps -O format=csv], [0],  [ignore])
+
+AT_CLEANUP
+
+AT_SETUP([FREQUENCIES percentiles + histogram bug#48128])
+AT_DATA([bug.sps], [dnl
+SET FORMAT=F8.0.
+
+INPUT PROGRAM.
+       LOOP I=1 TO 10.
+               COMPUTE SCORE=EXP(NORMAL(1)).
+               END CASE.
+       END LOOP.
+       END FILE.
+END INPUT PROGRAM.
+
+FREQUENCIES VARIABLES=SCORE
+/FORMAT=NOTABLE
+/STATISTICS=ALL
+/PERCENTILES=1 10 20 30 40 50 60 70 80 90 99
+/HISTOGRAM.
+
+])
+
+AT_CHECK([pspp bug.sps], [0],  [ignore])
+
+AT_CLEANUP