Add some tests for empty parens
[pspp] / tests / language / stats / examine.at
index 9bfa9c78ed5f952eeed35076df291924ec81a2d4..29dab48ff2690f6095b0b9280fc14772d43c2a20 100644 (file)
@@ -36,10 +36,11 @@ EXAMINE
        /STATISTICS descriptives extreme(3)
        .
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps])
+
+
 dnl In the following data, only the extreme values have been checked.
 dnl The descriptives have been blindly pasted.
-AT_CHECK([cat pspp.csv], [0], [dnl
+AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
 Table: Reading free-form data from INLINE.
 Variable,Format
 QUALITY,F8.0
@@ -75,10 +76,10 @@ Table: Extreme Values
 ,,,Case Number,Value
 Breaking Strain,Highest,1,12,7.00
 ,,2,16,6.00
-,,3,7,5.00
+,,3,14,5.00
 ,Lowest,1,3,1.00
-,,2,3,1.00
-,,3,4,1.00
+,,2,4,1.00
+,,3,2,2.00
 
 Table: Descriptives
 ,,,Statistic,Std. Error
@@ -106,15 +107,15 @@ Breaking Strain,Aspeger,8.00,100%,.00,0%,8.00,100%
 
 Table: Extreme Values
 ,Manufacturer,,,Case Number,Value
-Breaking Strain,Aspeger,Highest,1,5,4.00
-,,,2,6,4.00
+Breaking Strain,Aspeger,Highest,1,6,4.00
+,,,2,5,4.00
 ,,,3,1,3.00
 ,,Lowest,1,3,1.00
-,,,2,3,1.00
-,,,3,4,1.00
+,,,2,4,1.00
+,,,3,2,2.00
 ,Bloggs,Highest,1,7,5.00
 ,,,2,9,4.00
-,,,3,9,4.00
+,,,3,11,3.00
 ,,Lowest,1,8,2.00
 ,,,2,10,2.00
 ,,,3,11,3.00
@@ -123,7 +124,7 @@ Breaking Strain,Aspeger,Highest,1,5,4.00
 ,,,3,14,5.00
 ,,Lowest,1,15,3.00
 ,,,2,13,4.00
-,,,3,13,4.00
+,,,3,14,5.00
 
 Table: Descriptives
 ,Manufacturer,,,Statistic,Std. Error
@@ -167,6 +168,7 @@ Breaking Strain,Aspeger,Mean,,2.25,.45
 ,,Skewness,,.30,.75
 ,,Kurtosis,,.15,1.48
 ])
+
 AT_CLEANUP
 
 AT_SETUP([EXAMINE -- extremes])
@@ -202,8 +204,8 @@ examine v1
  /statistics=extreme(6)
  .
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
+
+AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
@@ -215,16 +217,92 @@ Table: Extreme Values
 V1,Highest,1,21,20.00
 ,,2,20,19.00
 ,,3,19,18.00
-,,4,19,18.00
-,,5,18,17.00
-,,6,17,16.00
+,,4,18,17.00
+,,5,17,16.00
+,,6,16,15.00
 ,Lowest,1,1,1.00
 ,,2,2,2.00
 ,,3,3,3.00
-,,4,3,3.00
-,,5,4,3.00
-,,6,5,4.00
+,,4,4,3.00
+,,5,5,4.00
+,,6,6,5.00
+])
+
+AT_CLEANUP
+
+
+
+AT_SETUP([EXAMINE -- extremes with fractional weights])
+AT_DATA([extreme.sps], [dnl
+set format=F20.3.
+data list notable list /w * x *.
+begin data.
+ 0.88  300000
+ 0.86  320000
+ 0.98  480000
+ 0.93  960000
+ 1.35  960000
+ 1.31  960000
+ 0.88  960000
+ 0.88  1080000
+ 0.88  1080000
+ 0.95  1200000
+ 1.47  1200000
+ 0.93  1200000
+ 0.98  1320000
+ 1.31  1380000
+ 0.93  1440000
+ 0.88  1560000
+ 1.56  1560000
+ 1.47  1560000
+end data.
+
+weight by w.
+
+
+EXAMINE
+        x
+        /STATISTICS = DESCRIPTIVES EXTREME (5)
+        .
+])
+
+AT_CHECK([pspp -O format=csv  extreme.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,19.430,100%,.000,0%,19.430,100%
+
+Table: Extreme Values
+,,,Case Number,Value
+x,Highest,1,18,1560000.000
+,,2,17,1560000.000
+,,3,16,1560000.000
+,,4,15,1440000.000
+,,5,14,1380000.000
+,Lowest,1,1,300000.000
+,,2,2,320000.000
+,,3,3,480000.000
+,,4,4,960000.000
+,,5,5,960000.000
+
+Table: Descriptives
+,,,Statistic,Std. Error
+x,Mean,,1120010.293,86222.178
+,95% Confidence Interval for Mean,Lower Bound,939166.693,
+,,Upper Bound,1300853.894,
+,5% Trimmed Mean,,1141017.899,
+,Median,,1200000.000,
+,Variance,,144447748124.869,
+,Std. Deviation,,380062.821,
+,Minimum,,300000.000,
+,Maximum,,1560000.000,
+,Range,,1260000.000,
+,Interquartile Range,,467258.065,
+,Skewness,,-.887,.519
+,Kurtosis,,.340,1.005
 ])
+
 AT_CLEANUP
 
 dnl Test the PERCENTILES subcommand of the EXAMINE command.
@@ -335,7 +413,9 @@ BEGIN DATA.
 .   2
 END DATA
 
-EXAMINE /x by y.
+EXAMINE /x by y
+        /MISSING = PAIRWISE
+        .
 ])
 AT_CHECK([pspp -o pspp.csv examine.sps])
 AT_CHECK([cat pspp.csv], [0], [dnl
@@ -359,6 +439,33 @@ x,1.00,4,100%,0,0%,4,100%
 ])
 AT_CLEANUP
 
+
+AT_SETUP([EXAMINE -- user missing values])
+AT_DATA([examine-m.sps], [dnl
+DATA LIST notable LIST /x * y *.
+BEGIN DATA.
+1                   2
+9999999999          2
+9999999999          99
+END DATA.
+
+MISSING VALUES x (9999999999).
+MISSING VALUES y (99).
+
+EXAMINE
+       /VARIABLES= x y
+       /MISSING=PAIRWISE.
+])
+AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,1,33.3333%,2,66.6667%,3,100%
+y,2,66.6667%,1,33.3333%,3,100%
+])
+AT_CLEANUP
+
 AT_SETUP([EXAMINE -- missing values and percentiles])
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /X *.
@@ -490,9 +597,10 @@ END DATA
 EXAMINE
        quality 
        /STATISTICS descriptives 
+        /PLOT = histogram
        .
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps])
+AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
 
@@ -509,9 +617,7 @@ END DATA.
 
 EXAMINE /x PLOT=HISTOGRAM.
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps], [0], [dnl
-warning: Not creating plot because data set is empty.
-])
+AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
 
@@ -549,7 +655,7 @@ DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
 AGGREGATE OUTFILE=* /BREAK=X /A=N.
 
 
-EXAMINE /A BY /X.
+EXAMINE /A BY X.
 ])
 AT_CHECK([pspp -o pspp.csv examine.sps])
 dnl Ignore output -- this is just a no-crash check.
@@ -560,3 +666,378 @@ AT_CHECK([$PERL more-big-input.pl >> large.txt])
 AT_CHECK([pspp -o pspp.csv examine.sps])
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
+
+
+dnl Test that the ID command works with non-numberic variables
+AT_SETUP([EXAMINE -- non-numeric ID])
+
+AT_DATA([examine-id.sps], [dnl
+data list notable list /x * y (a12).
+begin data.
+1  one
+2  two
+3  three
+4  four
+5  five
+6  six
+7  seven
+8  eight
+9  nine
+10 ten
+11 eleven
+12 twelve
+30 thirty
+300 threehundred
+end data.
+
+examine x
+       /statistics = extreme
+       /id = y
+       /plot = boxplot
+       .
+])
+
+AT_CHECK([pspp -O format=csv examine-id.sps], [0], 
+[Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,14,100%,0,0%,14,100%
+
+Table: Extreme Values
+,,,y,Value
+x,Highest,1,threehundred,300.00
+,,2,thirty      ,30.00
+,,3,twelve      ,12.00
+,,4,eleven      ,11.00
+,,5,ten         ,10.00
+,Lowest,1,one         ,1.00
+,,2,two         ,2.00
+,,3,three       ,3.00
+,,4,four        ,4.00
+,,5,five        ,5.00
+])
+
+AT_CLEANUP 
+
+dnl Test for a crash which happened on cleanup from a bad input syntax
+AT_SETUP([EXAMINE -- Bad Input])
+
+AT_DATA([examine-bad.sps], [dnl
+data list list /h * g *.
+begin data.
+1 1
+2 1
+3 1
+4 1
+5 2
+6 2
+7 2
+8 2
+9 2
+end data.
+
+EXAMINE 
+       /VARIABLES= h
+       BY  g
+       /STATISTICS = DESCRIPTIVES EXTREME
+        /PLOT = lkajsdas
+       .
+])
+
+AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
+
+AT_CLEANUP 
+
+
+dnl Check the MISSING=REPORT option
+AT_SETUP([EXAMINE -- MISSING=REPORT])
+
+
+AT_DATA([examine-report.sps], [dnl
+set format = F22.0.
+data list list /x * g *.
+begin data.
+1   1
+2   1
+3   1
+4   1
+5   1
+6   1
+7   1
+8   1
+9   1
+10   2
+20   2
+30   2
+40   2
+50   2
+60   2
+70   2
+80   2
+90   2
+101   9
+201   9
+301   9
+401   9
+501   99
+601   99
+701   99
+801   99
+901   99
+1001  .
+2002  .
+3003  .
+4004  .
+end data.
+
+MISSING VALUES g (9, 99, 999).
+
+EXAMINE
+        /VARIABLES = x
+        BY  g
+        /STATISTICS = EXTREME
+        /NOTOTAL
+        /MISSING = REPORT.
+])
+
+
+AT_CHECK([pspp -O format=csv examine-report.sps], [0], [dnl
+Table: Reading free-form data from INLINE.
+Variable,Format
+x,F8.0
+g,F8.0
+
+Table: Case Processing Summary
+,,Cases,,,,,
+,,Valid,,Missing,,Total,
+,g,N,Percent,N,Percent,N,Percent
+x,. (missing),4,100%,0,0%,4,100%
+,1,9,100%,0,0%,9,100%
+,2,9,100%,0,0%,9,100%
+,9 (missing),4,100%,0,0%,4,100%
+,99 (missing),5,100%,0,0%,5,100%
+
+Table: Extreme Values
+,g,,,Case Number,Value
+x,. (missing),Highest,1,31,4004
+,,,2,30,3003
+,,,3,29,2002
+,,,4,28,1001
+,,,5,0,0
+,,Lowest,1,28,1001
+,,,2,29,2002
+,,,3,30,3003
+,,,4,31,4004
+,,,5,31,4004
+,1,Highest,1,9,9
+,,,2,8,8
+,,,3,7,7
+,,,4,6,6
+,,,5,5,5
+,,Lowest,1,1,1
+,,,2,2,2
+,,,3,3,3
+,,,4,4,4
+,,,5,5,5
+,2,Highest,1,18,90
+,,,2,17,80
+,,,3,16,70
+,,,4,15,60
+,,,5,14,50
+,,Lowest,1,10,10
+,,,2,11,20
+,,,3,12,30
+,,,4,13,40
+,,,5,14,50
+,9 (missing),Highest,1,22,401
+,,,2,21,301
+,,,3,20,201
+,,,4,19,101
+,,,5,0,0
+,,Lowest,1,19,101
+,,,2,20,201
+,,,3,21,301
+,,,4,22,401
+,,,5,22,401
+,99 (missing),Highest,1,27,901
+,,,2,26,801
+,,,3,25,701
+,,,4,24,601
+,,,5,23,501
+,,Lowest,1,23,501
+,,,2,24,601
+,,,3,25,701
+,,,4,26,801
+,,,5,27,901
+])
+
+
+AT_CLEANUP 
+
+
+dnl Run a test of the basic STATISTICS using a "real"
+dnl dataset and comparing with "real" results kindly
+dnl provided by Olaf Nöhring
+AT_SETUP([EXAMINE -- sample unweighted])
+
+AT_DATA([sample.sps], [dnl
+set format = F22.4.
+DATA LIST notable LIST /X *
+BEGIN DATA.
+461.19000000
+466.38000000
+479.46000000
+480.10000000
+483.43000000
+488.30000000
+489.00000000
+491.62000000
+505.62000000
+511.30000000
+521.53000000
+526.70000000
+528.25000000
+538.70000000
+540.22000000
+540.58000000
+546.10000000
+548.17000000
+553.99000000
+566.21000000
+575.90000000
+584.38000000
+593.40000000
+357.05000000
+359.73000000
+360.48000000
+373.98000000
+374.13000000
+381.45000000
+383.72000000
+390.00000000
+400.34000000
+415.32000000
+415.91000000
+418.30000000
+421.03000000
+422.43000000
+426.93000000
+433.25000000
+436.89000000
+445.33000000
+446.33000000
+446.55000000
+456.44000000
+689.49000000
+691.92000000
+695.00000000
+695.36000000
+698.21000000
+699.46000000
+706.61000000
+710.69000000
+715.82000000
+715.82000000
+741.39000000
+752.27000000
+756.73000000
+757.74000000
+759.57000000
+796.07000000
+813.78000000
+817.25000000
+825.48000000
+831.28000000
+849.24000000
+890.00000000
+894.78000000
+935.65000000
+935.90000000
+945.90000000
+1012.8600000
+1022.6000000
+1061.8100000
+1063.5000000
+1077.2300000
+1151.6300000
+1355.2800000
+598.88000000
+606.91000000
+621.60000000
+624.80000000
+636.13000000
+637.38000000
+640.32000000
+649.35000000
+656.51000000
+662.55000000
+664.69000000
+106.22000000
+132.24000000
+174.76000000
+204.85000000
+264.93000000
+264.99000000
+269.84000000
+325.12000000
+331.67000000
+337.26000000
+347.68000000
+354.91000000
+END DATA.
+
+EXAMINE
+       x
+       /STATISTICS=DESCRIPTIVES
+       .
+])
+
+AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+X,100,100%,0,0%,100,100%
+
+Table: Descriptives
+,,,Statistic,Std. Error
+X,Mean,,587.6603,23.2665
+,95% Confidence Interval for Mean,Lower Bound,541.4946,
+,,Upper Bound,633.8260,
+,5% Trimmed Mean,,579.7064,
+,Median,,547.1350,
+,Variance,,54132.8466,
+,Std. Deviation,,232.6647,
+,Minimum,,106.2200,
+,Maximum,,1355.2800,
+,Range,,1249.0600,
+,Interquartile Range,,293.1575,
+,Skewness,,.6331,.2414
+,Kurtosis,,.5300,.4783
+])
+
+AT_CLEANUP 
+
+
+
+dnl Test for a crash which happened on bad input syntax
+AT_SETUP([EXAMINE -- Empty Parentheses])
+
+AT_DATA([examine-empty-parens.sps], [dnl
+DATA LIST notable LIST /X *
+BEGIN DATA.
+2
+3
+END DATA.
+
+
+EXAMINE
+       x
+       /PLOT = SPREADLEVEL()
+       .
+])
+
+AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
+
+AT_CLEANUP