dnl PSPP - a program for statistical analysis. dnl Copyright (C) 2017 Free Software Foundation, Inc. dnl dnl This program is free software: you can redistribute it and/or modify dnl it under the terms of the GNU General Public License as published by dnl the Free Software Foundation, either version 3 of the License, or dnl (at your option) any later version. dnl dnl This program is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the dnl GNU General Public License for more details. dnl dnl You should have received a copy of the GNU General Public License dnl along with this program. If not, see . dnl AT_BANNER([DESCRIPTIVES procedure]) AT_SETUP([DESCRIPTIVES basics]) AT_DATA([descriptives.sps], [title 'Test DESCRIPTIVES procedure'. data list / V0 to V16 1-17. begin data. 12128989012389023 34128080123890128 56127781237893217 78127378123793112 90913781237892318 37978547878935789 52878237892378279 12377912789378932 26787654347894348 29137178947891888 end data. descript all/stat=all/format=serial. ]) AT_CHECK([pspp -O format=csv descriptives.sps], [0], [Title: Test DESCRIPTIVES procedure Table: Reading 1 record from INLINE. Variable,Record,Columns,Format V0,1,1- 1,F1.0 V1,1,2- 2,F1.0 V2,1,3- 3,F1.0 V3,1,4- 4,F1.0 V4,1,5- 5,F1.0 V5,1,6- 6,F1.0 V6,1,7- 7,F1.0 V7,1,8- 8,F1.0 V8,1,9- 9,F1.0 V9,1,10- 10,F1.0 V10,1,11- 11,F1.0 V11,1,12- 12,F1.0 V12,1,13- 13,F1.0 V13,1,14- 14,F1.0 V14,1,15- 15,F1.0 V15,1,16- 16,F1.0 V16,1,17- 17,F1.0 Table: Valid cases = 10; cases with missing value(s) = 0. Variable,Valid N,Missing N,Mean,S.E. Mean,Std Dev,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,Range,Minimum,Maximum,Sum V0,10,0,3.80,.84,2.66,7.07,-.03,1.33,.89,.69,8.00,1.00,9.00,38.00 V1,10,0,4.60,.96,3.03,9.16,-1.39,1.33,-.03,.69,9.00,.00,9.00,46.00 V2,10,0,4.10,1.16,3.67,13.43,-2.02,1.33,.48,.69,8.00,1.00,9.00,41.00 V3,10,0,4.10,.87,2.77,7.66,-2.05,1.33,.42,.69,7.00,1.00,8.00,41.00 V4,10,0,7.00,.47,1.49,2.22,7.15,1.33,-2.52,.69,5.00,3.00,8.00,70.00 V5,10,0,4.90,1.03,3.25,10.54,-1.40,1.33,-.20,.69,9.00,.00,9.00,49.00 V6,10,0,5.90,.80,2.51,6.32,-.29,1.33,-.96,.69,7.00,1.00,8.00,59.00 V7,10,0,4.70,1.10,3.47,12.01,-1.99,1.33,-.16,.69,9.00,.00,9.00,47.00 V8,10,0,4.10,1.10,3.48,12.10,-1.93,1.33,.37,.69,9.00,.00,9.00,41.00 V9,10,0,4.30,.87,2.75,7.57,-.87,1.33,.73,.69,8.00,1.00,9.00,43.00 V10,10,0,5.50,.85,2.68,7.17,-1.84,1.33,-.33,.69,7.00,2.00,9.00,55.00 V11,10,0,6.50,.78,2.46,6.06,-1.28,1.33,-.89,.69,6.00,3.00,9.00,65.00 V12,10,0,7.90,.60,1.91,3.66,5.24,1.33,-2.21,.69,6.00,3.00,9.00,79.00 V13,10,0,4.30,.99,3.13,9.79,-1.25,1.33,.33,.69,9.00,.00,9.00,43.00 V14,10,0,3.60,1.01,3.20,10.27,-.96,1.33,.81,.69,9.00,.00,9.00,36.00 V15,10,0,3.70,.92,2.91,8.46,-1.35,1.33,.71,.69,7.00,1.00,8.00,37.00 V16,10,0,6.40,.91,2.88,8.27,-1.14,1.33,-.92,.69,7.00,2.00,9.00,64.00 ]) AT_CLEANUP m4_define([DESCRIPTIVES_MISSING_DATA], [data list notable / V1 TO V3 1-3. mis val v1 to v3 (1). begin data. 111 1 1 1 112 123 234 end data. ]) AT_SETUP([DESCRIPTIVES -- excluding missing data]) AT_DATA([descriptives.sps], [DESCRIPTIVES_MISSING_DATA descript all/stat=all/format=serial. ]) AT_CHECK([pspp -O format=csv descriptives.sps], [0], [Table: Valid cases = 7; cases with missing value(s) = 6. Variable,Valid N,Missing N,Mean,S.E. Mean,Std Dev,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,Range,Minimum,Maximum,Sum V1,1,6,2.00,. ,. ,. ,. ,. ,. ,. ,.00,2.00,2.00,2.00 V2,2,5,2.50,.50,.71,.50,. ,. ,. ,. ,1.00,2.00,3.00,5.00 V3,3,4,3.00,.58,1.00,1.00,. ,. ,.00,1.22,2.00,2.00,4.00,9.00 ]) AT_CLEANUP AT_SETUP([DESCRIPTIVES -- including missing data]) AT_DATA([descriptives.sps], [DESCRIPTIVES_MISSING_DATA descript all/stat=all/format=serial/missing=include. ]) AT_CHECK([pspp -O format=csv descriptives.sps], [0], [Table: Valid cases = 7; cases with missing value(s) = 3. Variable,Valid N,Missing N,Mean,S.E. Mean,Std Dev,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,Range,Minimum,Maximum,Sum V1,5,2,1.20,.20,.45,.20,5.00,2.00,2.24,.91,1.00,1.00,2.00,6.00 V2,5,2,1.60,.40,.89,.80,.31,2.00,1.26,.91,2.00,1.00,3.00,8.00 V3,5,2,2.20,.58,1.30,1.70,-1.49,2.00,.54,.91,3.00,1.00,4.00,11.00 ]) AT_CLEANUP AT_SETUP([DESCRIPTIVES -- excluding missing data listwise]) AT_DATA([descriptives.sps], [DESCRIPTIVES_MISSING_DATA descript all/stat=all/format=serial/missing=listwise. ]) AT_CHECK([pspp -O format=csv descriptives.sps], [0], [Table: Valid cases = 1; cases with missing value(s) = 6. Variable,Valid N,Missing N,Mean,S.E. Mean,Std Dev,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,Range,Minimum,Maximum,Sum V1,1,0,2.00,. ,. ,. ,. ,. ,. ,. ,.00,2.00,2.00,2.00 V2,1,0,3.00,. ,. ,. ,. ,. ,. ,. ,.00,3.00,3.00,3.00 V3,1,0,4.00,. ,. ,. ,. ,. ,. ,. ,.00,4.00,4.00,4.00 ]) AT_CLEANUP AT_SETUP([DESCRIPTIVES -- including missing data listwise]) AT_DATA([descriptives.sps], [DESCRIPTIVES_MISSING_DATA descript all/stat=all/format=serial/missing=listwise include. ]) AT_CHECK([pspp -O format=csv descriptives.sps], [0], [Table: Valid cases = 4; cases with missing value(s) = 3. Variable,Valid N,Missing N,Mean,S.E. Mean,Std Dev,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,Range,Minimum,Maximum,Sum V1,4,0,1.25,.25,.50,.25,4.00,2.62,2.00,1.01,1.00,1.00,2.00,5.00 V2,4,0,1.75,.48,.96,.92,-1.29,2.62,.85,1.01,2.00,1.00,3.00,7.00 V3,4,0,2.50,.65,1.29,1.67,-1.20,2.62,.00,1.01,3.00,1.00,4.00,10.00 ]) AT_CLEANUP AT_SETUP([DESCRIPTIVES bug calculating mean only]) AT_DATA([descriptives.sps], [SET FORMAT F8.3. data list notable / X 1. begin data. 0 1 2 3 4 5 end data. descript all/stat=mean. ]) AT_CHECK([pspp -O format=csv descriptives.sps], [0], [Table: Valid cases = 6; cases with missing value(s) = 0. Variable,N,Mean X,6,2.500 ]) AT_CLEANUP dnl Git history shows that this was probably a bug in the PSPP dnl core regarding multipass procedures, not anything specific dnl to DESCRIPTIVES. AT_SETUP([DESCRIPTIVES bug with TEMPORARY]) AT_DATA([descriptives.sps], [dnl DATA LIST LIST NOTABLE /id * abc *. BEGIN DATA. 1 3.5 2 2.0 3 2.0 4 3.5 5 3.0 6 4.0 7 5.0 END DATA. TEMPORARY. SELECT IF id < 7 . DESCRIPTIVES /VAR=abc. ]) AT_CHECK([pspp -O format=csv descriptives.sps], [0], [dnl Table: Valid cases = 6; cases with missing value(s) = 0. Variable,N,Mean,Std Dev,Minimum,Maximum abc,6,3.00,.84,2.00,4.00 ]) AT_CLEANUP AT_SETUP([DESCRIPTIVES -- Z scores]) AT_DATA([descriptives.sps], [dnl DATA LIST LIST NOTABLE /a b. BEGIN DATA. 1 50 2 60 3 70 END DATA. DESCRIPTIVES /VAR=a b /SAVE. LIST. ]) AT_CHECK([pspp -O format=csv descriptives.sps], [0], [dnl Table: Mapping of variables to corresponding Z-scores. Source,Target a,Za b,Zb Table: Valid cases = 3; cases with missing value(s) = 0. Variable,N,Mean,Std Dev,Minimum,Maximum a,3,2.00,1.00,1.00,3.00 b,3,60.00,10.00,50.00,70.00 Table: Data List a,b,Za,Zb 1.00,50.00,-1.00,-1.00 2.00,60.00,.00,.00 3.00,70.00,1.00,1.00 ]) AT_CLEANUP AT_SETUP([DESCRIPTIVES -- Z scores with SPLIT FILE]) AT_DATA([descriptives.sps], [dnl DATA LIST LIST NOTABLE /group a b. BEGIN DATA. 1 1 50 1 2 60 1 3 70 2 100 6000 2 200 7000 2 400 9000 2 500 10000 END DATA. SPLIT FILE BY group. DESCRIPTIVES /VAR=a b /SAVE. LIST. ]) AT_CHECK([pspp -O format=csv descriptives.sps], [0], [dnl Table: Mapping of variables to corresponding Z-scores. Source,Target a,Za b,Zb Variable,Value,Label group,1.00, Table: Valid cases = 3; cases with missing value(s) = 0. Variable,N,Mean,Std Dev,Minimum,Maximum a,3,2.00,1.00,1.00,3.00 b,3,60.00,10.00,50.00,70.00 Variable,Value,Label group,2.00, Table: Valid cases = 4; cases with missing value(s) = 0. Variable,N,Mean,Std Dev,Minimum,Maximum a,4,300.00,182.57,100.00,500.00 b,4,8000.00,1825.74,6000.00,10000.00 Variable,Value,Label group,1.00, Table: Data List group,a,b,Za,Zb 1.00,1.00,50.00,-1.00,-1.00 1.00,2.00,60.00,.00,.00 1.00,3.00,70.00,1.00,1.00 Variable,Value,Label group,2.00, Table: Data List group,a,b,Za,Zb 2.00,100.00,6000.00,-1.10,-1.10 2.00,200.00,7000.00,-.55,-.55 2.00,400.00,9000.00,.55,.55 2.00,500.00,10000.00,1.10,1.10 ]) AT_CLEANUP dnl Ideally DESCRIPTIVES would not make temporary transformations permanent dnl as it does now (bug #38786), so these results are imperfect. However, dnl this test does verify that DESCRIPTIVES does not crash in this situation dnl (as it once did). AT_SETUP([DESCRIPTIVES -- Z scores bug with TEMPORARY]) AT_DATA([descriptives.sps], [dnl DATA LIST LIST NOTABLE /id abc. BEGIN DATA. 1 3.5 2 2.0 3 2.0 4 3.5 5 3.0 6 4.0 7 5.0 END DATA. TEMPORARY. SELECT IF id < 7 . DESCRIPTIVES /VAR=abc/SAVE. LIST. ]) AT_CHECK([pspp -O format=csv descriptives.sps], [0], [dnl descriptives.sps:15: warning: DESCRIPTIVES: DESCRIPTIVES with Z scores ignores TEMPORARY. Temporary transformations will be made permanent. Table: Mapping of variables to corresponding Z-scores. Source,Target abc,Zabc Table: Valid cases = 6; cases with missing value(s) = 0. Variable,N,Mean,Std Dev,Minimum,Maximum abc,6,3.00,.84,2.00,4.00 Table: Data List id,abc,Zabc 1.00,3.50,.60 2.00,2.00,-1.20 3.00,2.00,-1.20 4.00,3.50,.60 5.00,3.00,.00 6.00,4.00,1.20 ]) AT_CLEANUP dnl This test was supplied by Mindaugus as part of the report for bug #42012. AT_SETUP([DESCRIPTIVES -- Z scores with FILTER]) AT_DATA([descriptives.sps], [dnl DATA LIST LIST/filter1 filter2 x. BEGIN DATA. 0,0,300 0,1,200 0,1,100 1,0,5 1,0,4 1,1,3 1,1,2 1,1,1 END DATA. FILTER OFF. SPLIT FILE OFF. DESCRIPTIVES /VARIABLES=X /SAVE. FILTER BY filter1. SPLIT FILE OFF. DESCRIPTIVES /VARIABLES=X /SAVE. FILTER OFF. SORT CASES BY filter1. SPLIT FILE BY filter1. DESCRIPTIVES /VARIABLES=X /SAVE. FILTER BY filter2. SPLIT FILE BY filter1. DESCRIPTIVES /VARIABLES=X /SAVE. FILTER OFF. SORT CASES BY filter1 filter2. SPLIT FILE BY filter1 filter2. DESCRIPTIVES /VARIABLES=X /SAVE. EXECUTE. SPLIT FILE OFF. LIST. ]) AT_CHECK([pspp -o pspp.csv descriptives.sps]) AT_CHECK([sed -n '/Table: Data List/,$p' < pspp.csv], [0], [dnl Table: Data List filter1,filter2,x,Zx,ZSC001,ZSC002,ZSC003,ZSC004 .00,.00,300.00,1.94,. ,1.00,. ,. @&t@ .00,1.00,200.00,1.07,. ,.00,.71,.71 .00,1.00,100.00,.20,. ,-1.00,-.71,-.71 1.00,.00,5.00,-.62,1.26,1.26,. ,.71 1.00,.00,4.00,-.63,.63,.63,. ,-.71 1.00,1.00,3.00,-.64,.00,.00,1.00,1.00 1.00,1.00,2.00,-.65,-.63,-.63,.00,.00 1.00,1.00,1.00,-.66,-1.26,-1.26,-1.00,-1.00 ]) AT_CLEANUP