EXAMINE: Extend test for empty dataset and fix bug.
[pspp] / tests / language / stats / examine.at
index 9bfa9c78ed5f952eeed35076df291924ec81a2d4..d4540ba45d0a16b10c1908906d21547b81fee259 100644 (file)
@@ -36,10 +36,11 @@ EXAMINE
        /STATISTICS descriptives extreme(3)
        .
 ])
        /STATISTICS descriptives extreme(3)
        .
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps])
+
+
 dnl In the following data, only the extreme values have been checked.
 dnl The descriptives have been blindly pasted.
 dnl In the following data, only the extreme values have been checked.
 dnl The descriptives have been blindly pasted.
-AT_CHECK([cat pspp.csv], [0], [dnl
+AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl
 Table: Reading free-form data from INLINE.
 Variable,Format
 QUALITY,F8.0
 Table: Reading free-form data from INLINE.
 Variable,Format
 QUALITY,F8.0
@@ -75,10 +76,10 @@ Table: Extreme Values
 ,,,Case Number,Value
 Breaking Strain,Highest,1,12,7.00
 ,,2,16,6.00
 ,,,Case Number,Value
 Breaking Strain,Highest,1,12,7.00
 ,,2,16,6.00
-,,3,7,5.00
+,,3,14,5.00
 ,Lowest,1,3,1.00
 ,Lowest,1,3,1.00
-,,2,3,1.00
-,,3,4,1.00
+,,2,4,1.00
+,,3,2,2.00
 
 Table: Descriptives
 ,,,Statistic,Std. Error
 
 Table: Descriptives
 ,,,Statistic,Std. Error
@@ -106,15 +107,15 @@ Breaking Strain,Aspeger,8.00,100%,.00,0%,8.00,100%
 
 Table: Extreme Values
 ,Manufacturer,,,Case Number,Value
 
 Table: Extreme Values
 ,Manufacturer,,,Case Number,Value
-Breaking Strain,Aspeger,Highest,1,5,4.00
-,,,2,6,4.00
+Breaking Strain,Aspeger,Highest,1,6,4.00
+,,,2,5,4.00
 ,,,3,1,3.00
 ,,Lowest,1,3,1.00
 ,,,3,1,3.00
 ,,Lowest,1,3,1.00
-,,,2,3,1.00
-,,,3,4,1.00
+,,,2,4,1.00
+,,,3,2,2.00
 ,Bloggs,Highest,1,7,5.00
 ,,,2,9,4.00
 ,Bloggs,Highest,1,7,5.00
 ,,,2,9,4.00
-,,,3,9,4.00
+,,,3,11,3.00
 ,,Lowest,1,8,2.00
 ,,,2,10,2.00
 ,,,3,11,3.00
 ,,Lowest,1,8,2.00
 ,,,2,10,2.00
 ,,,3,11,3.00
@@ -123,7 +124,7 @@ Breaking Strain,Aspeger,Highest,1,5,4.00
 ,,,3,14,5.00
 ,,Lowest,1,15,3.00
 ,,,2,13,4.00
 ,,,3,14,5.00
 ,,Lowest,1,15,3.00
 ,,,2,13,4.00
-,,,3,13,4.00
+,,,3,14,5.00
 
 Table: Descriptives
 ,Manufacturer,,,Statistic,Std. Error
 
 Table: Descriptives
 ,Manufacturer,,,Statistic,Std. Error
@@ -167,6 +168,7 @@ Breaking Strain,Aspeger,Mean,,2.25,.45
 ,,Skewness,,.30,.75
 ,,Kurtosis,,.15,1.48
 ])
 ,,Skewness,,.30,.75
 ,,Kurtosis,,.15,1.48
 ])
+
 AT_CLEANUP
 
 AT_SETUP([EXAMINE -- extremes])
 AT_CLEANUP
 
 AT_SETUP([EXAMINE -- extremes])
@@ -202,8 +204,8 @@ examine v1
  /statistics=extreme(6)
  .
 ])
  /statistics=extreme(6)
  .
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
+
+AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Valid,,Missing,,Total,
@@ -215,16 +217,92 @@ Table: Extreme Values
 V1,Highest,1,21,20.00
 ,,2,20,19.00
 ,,3,19,18.00
 V1,Highest,1,21,20.00
 ,,2,20,19.00
 ,,3,19,18.00
-,,4,19,18.00
-,,5,18,17.00
-,,6,17,16.00
+,,4,18,17.00
+,,5,17,16.00
+,,6,16,15.00
 ,Lowest,1,1,1.00
 ,,2,2,2.00
 ,,3,3,3.00
 ,Lowest,1,1,1.00
 ,,2,2,2.00
 ,,3,3,3.00
-,,4,3,3.00
-,,5,4,3.00
-,,6,5,4.00
+,,4,4,3.00
+,,5,5,4.00
+,,6,6,5.00
+])
+
+AT_CLEANUP
+
+
+
+AT_SETUP([EXAMINE -- extremes with fractional weights])
+AT_DATA([extreme.sps], [dnl
+set format=F20.3.
+data list notable list /w * x *.
+begin data.
+ 0.88  300000
+ 0.86  320000
+ 0.98  480000
+ 0.93  960000
+ 1.35  960000
+ 1.31  960000
+ 0.88  960000
+ 0.88  1080000
+ 0.88  1080000
+ 0.95  1200000
+ 1.47  1200000
+ 0.93  1200000
+ 0.98  1320000
+ 1.31  1380000
+ 0.93  1440000
+ 0.88  1560000
+ 1.56  1560000
+ 1.47  1560000
+end data.
+
+weight by w.
+
+
+EXAMINE
+        x
+        /STATISTICS = DESCRIPTIVES EXTREME (5)
+        .
+])
+
+AT_CHECK([pspp -O format=csv  extreme.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,19.430,100%,.000,0%,19.430,100%
+
+Table: Extreme Values
+,,,Case Number,Value
+x,Highest,1,18,1560000.000
+,,2,17,1560000.000
+,,3,16,1560000.000
+,,4,15,1440000.000
+,,5,14,1380000.000
+,Lowest,1,1,300000.000
+,,2,2,320000.000
+,,3,3,480000.000
+,,4,4,960000.000
+,,5,5,960000.000
+
+Table: Descriptives
+,,,Statistic,Std. Error
+x,Mean,,1120010.293,86222.178
+,95% Confidence Interval for Mean,Lower Bound,939166.693,
+,,Upper Bound,1300853.894,
+,5% Trimmed Mean,,1141017.899,
+,Median,,1200000.000,
+,Variance,,144447748124.869,
+,Std. Deviation,,380062.821,
+,Minimum,,300000.000,
+,Maximum,,1560000.000,
+,Range,,1260000.000,
+,Interquartile Range,,467258.065,
+,Skewness,,-.887,.519
+,Kurtosis,,.340,1.005
 ])
 ])
+
 AT_CLEANUP
 
 dnl Test the PERCENTILES subcommand of the EXAMINE command.
 AT_CLEANUP
 
 dnl Test the PERCENTILES subcommand of the EXAMINE command.
@@ -335,7 +413,9 @@ BEGIN DATA.
 .   2
 END DATA
 
 .   2
 END DATA
 
-EXAMINE /x by y.
+EXAMINE /x by y
+        /MISSING = PAIRWISE
+        .
 ])
 AT_CHECK([pspp -o pspp.csv examine.sps])
 AT_CHECK([cat pspp.csv], [0], [dnl
 ])
 AT_CHECK([pspp -o pspp.csv examine.sps])
 AT_CHECK([cat pspp.csv], [0], [dnl
@@ -359,6 +439,33 @@ x,1.00,4,100%,0,0%,4,100%
 ])
 AT_CLEANUP
 
 ])
 AT_CLEANUP
 
+
+AT_SETUP([EXAMINE -- user missing values])
+AT_DATA([examine-m.sps], [dnl
+DATA LIST notable LIST /x * y *.
+BEGIN DATA.
+1                   2
+9999999999          2
+9999999999          99
+END DATA.
+
+MISSING VALUES x (9999999999).
+MISSING VALUES y (99).
+
+EXAMINE
+       /VARIABLES= x y
+       /MISSING=PAIRWISE.
+])
+AT_CHECK([pspp -O format=csv examine-m.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,1,33.3333%,2,66.6667%,3,100%
+y,2,66.6667%,1,33.3333%,3,100%
+])
+AT_CLEANUP
+
 AT_SETUP([EXAMINE -- missing values and percentiles])
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /X *.
 AT_SETUP([EXAMINE -- missing values and percentiles])
 AT_DATA([examine.sps], [dnl
 DATA LIST LIST /X *.
@@ -490,9 +597,10 @@ END DATA
 EXAMINE
        quality 
        /STATISTICS descriptives 
 EXAMINE
        quality 
        /STATISTICS descriptives 
+        /PLOT = histogram
        .
 ])
        .
 ])
-AT_CHECK([pspp -o pspp.csv examine.sps])
+AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
 
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
 
@@ -507,11 +615,14 @@ BEGIN DATA.
 .
 END DATA.
 
 .
 END DATA.
 
-EXAMINE /x PLOT=HISTOGRAM.
-])
-AT_CHECK([pspp -o pspp.csv examine.sps], [0], [dnl
-warning: Not creating plot because data set is empty.
+EXAMINE /x 
+       PLOT=HISTOGRAM BOXPLOT NPPLOT SPREADLEVEL(1) ALL
+       /ID=x
+        /STATISTICS = DESCRIPTIVES EXTREME (5) ALL
+       /PERCENTILE=AEMPIRICAL
+       .
 ])
 ])
+AT_CHECK([pspp -o pspp.csv examine.sps], [0], [ignore])
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
 
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
 
@@ -549,7 +660,7 @@ DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
 AGGREGATE OUTFILE=* /BREAK=X /A=N.
 
 
 AGGREGATE OUTFILE=* /BREAK=X /A=N.
 
 
-EXAMINE /A BY /X.
+EXAMINE /A BY X.
 ])
 AT_CHECK([pspp -o pspp.csv examine.sps])
 dnl Ignore output -- this is just a no-crash check.
 ])
 AT_CHECK([pspp -o pspp.csv examine.sps])
 dnl Ignore output -- this is just a no-crash check.
@@ -560,3 +671,378 @@ AT_CHECK([$PERL more-big-input.pl >> large.txt])
 AT_CHECK([pspp -o pspp.csv examine.sps])
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
 AT_CHECK([pspp -o pspp.csv examine.sps])
 dnl Ignore output -- this is just a no-crash check.
 AT_CLEANUP
+
+
+dnl Test that the ID command works with non-numberic variables
+AT_SETUP([EXAMINE -- non-numeric ID])
+
+AT_DATA([examine-id.sps], [dnl
+data list notable list /x * y (a12).
+begin data.
+1  one
+2  two
+3  three
+4  four
+5  five
+6  six
+7  seven
+8  eight
+9  nine
+10 ten
+11 eleven
+12 twelve
+30 thirty
+300 threehundred
+end data.
+
+examine x
+       /statistics = extreme
+       /id = y
+       /plot = boxplot
+       .
+])
+
+AT_CHECK([pspp -O format=csv examine-id.sps], [0], 
+[Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,14,100%,0,0%,14,100%
+
+Table: Extreme Values
+,,,y,Value
+x,Highest,1,threehundred,300.00
+,,2,thirty      ,30.00
+,,3,twelve      ,12.00
+,,4,eleven      ,11.00
+,,5,ten         ,10.00
+,Lowest,1,one         ,1.00
+,,2,two         ,2.00
+,,3,three       ,3.00
+,,4,four        ,4.00
+,,5,five        ,5.00
+])
+
+AT_CLEANUP 
+
+dnl Test for a crash which happened on cleanup from a bad input syntax
+AT_SETUP([EXAMINE -- Bad Input])
+
+AT_DATA([examine-bad.sps], [dnl
+data list list /h * g *.
+begin data.
+1 1
+2 1
+3 1
+4 1
+5 2
+6 2
+7 2
+8 2
+9 2
+end data.
+
+EXAMINE 
+       /VARIABLES= h
+       BY  g
+       /STATISTICS = DESCRIPTIVES EXTREME
+        /PLOT = lkajsdas
+       .
+])
+
+AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
+
+AT_CLEANUP 
+
+
+dnl Check the MISSING=REPORT option
+AT_SETUP([EXAMINE -- MISSING=REPORT])
+
+
+AT_DATA([examine-report.sps], [dnl
+set format = F22.0.
+data list list /x * g *.
+begin data.
+1   1
+2   1
+3   1
+4   1
+5   1
+6   1
+7   1
+8   1
+9   1
+10   2
+20   2
+30   2
+40   2
+50   2
+60   2
+70   2
+80   2
+90   2
+101   9
+201   9
+301   9
+401   9
+501   99
+601   99
+701   99
+801   99
+901   99
+1001  .
+2002  .
+3003  .
+4004  .
+end data.
+
+MISSING VALUES g (9, 99, 999).
+
+EXAMINE
+        /VARIABLES = x
+        BY  g
+        /STATISTICS = EXTREME
+        /NOTOTAL
+        /MISSING = REPORT.
+])
+
+
+AT_CHECK([pspp -O format=csv examine-report.sps], [0], [dnl
+Table: Reading free-form data from INLINE.
+Variable,Format
+x,F8.0
+g,F8.0
+
+Table: Case Processing Summary
+,,Cases,,,,,
+,,Valid,,Missing,,Total,
+,g,N,Percent,N,Percent,N,Percent
+x,. (missing),4,100%,0,0%,4,100%
+,1,9,100%,0,0%,9,100%
+,2,9,100%,0,0%,9,100%
+,9 (missing),4,100%,0,0%,4,100%
+,99 (missing),5,100%,0,0%,5,100%
+
+Table: Extreme Values
+,g,,,Case Number,Value
+x,. (missing),Highest,1,31,4004
+,,,2,30,3003
+,,,3,29,2002
+,,,4,28,1001
+,,,5,0,0
+,,Lowest,1,28,1001
+,,,2,29,2002
+,,,3,30,3003
+,,,4,31,4004
+,,,5,31,4004
+,1,Highest,1,9,9
+,,,2,8,8
+,,,3,7,7
+,,,4,6,6
+,,,5,5,5
+,,Lowest,1,1,1
+,,,2,2,2
+,,,3,3,3
+,,,4,4,4
+,,,5,5,5
+,2,Highest,1,18,90
+,,,2,17,80
+,,,3,16,70
+,,,4,15,60
+,,,5,14,50
+,,Lowest,1,10,10
+,,,2,11,20
+,,,3,12,30
+,,,4,13,40
+,,,5,14,50
+,9 (missing),Highest,1,22,401
+,,,2,21,301
+,,,3,20,201
+,,,4,19,101
+,,,5,0,0
+,,Lowest,1,19,101
+,,,2,20,201
+,,,3,21,301
+,,,4,22,401
+,,,5,22,401
+,99 (missing),Highest,1,27,901
+,,,2,26,801
+,,,3,25,701
+,,,4,24,601
+,,,5,23,501
+,,Lowest,1,23,501
+,,,2,24,601
+,,,3,25,701
+,,,4,26,801
+,,,5,27,901
+])
+
+
+AT_CLEANUP 
+
+
+dnl Run a test of the basic STATISTICS using a "real"
+dnl dataset and comparing with "real" results kindly
+dnl provided by Olaf Nöhring
+AT_SETUP([EXAMINE -- sample unweighted])
+
+AT_DATA([sample.sps], [dnl
+set format = F22.4.
+DATA LIST notable LIST /X *
+BEGIN DATA.
+461.19000000
+466.38000000
+479.46000000
+480.10000000
+483.43000000
+488.30000000
+489.00000000
+491.62000000
+505.62000000
+511.30000000
+521.53000000
+526.70000000
+528.25000000
+538.70000000
+540.22000000
+540.58000000
+546.10000000
+548.17000000
+553.99000000
+566.21000000
+575.90000000
+584.38000000
+593.40000000
+357.05000000
+359.73000000
+360.48000000
+373.98000000
+374.13000000
+381.45000000
+383.72000000
+390.00000000
+400.34000000
+415.32000000
+415.91000000
+418.30000000
+421.03000000
+422.43000000
+426.93000000
+433.25000000
+436.89000000
+445.33000000
+446.33000000
+446.55000000
+456.44000000
+689.49000000
+691.92000000
+695.00000000
+695.36000000
+698.21000000
+699.46000000
+706.61000000
+710.69000000
+715.82000000
+715.82000000
+741.39000000
+752.27000000
+756.73000000
+757.74000000
+759.57000000
+796.07000000
+813.78000000
+817.25000000
+825.48000000
+831.28000000
+849.24000000
+890.00000000
+894.78000000
+935.65000000
+935.90000000
+945.90000000
+1012.8600000
+1022.6000000
+1061.8100000
+1063.5000000
+1077.2300000
+1151.6300000
+1355.2800000
+598.88000000
+606.91000000
+621.60000000
+624.80000000
+636.13000000
+637.38000000
+640.32000000
+649.35000000
+656.51000000
+662.55000000
+664.69000000
+106.22000000
+132.24000000
+174.76000000
+204.85000000
+264.93000000
+264.99000000
+269.84000000
+325.12000000
+331.67000000
+337.26000000
+347.68000000
+354.91000000
+END DATA.
+
+EXAMINE
+       x
+       /STATISTICS=DESCRIPTIVES
+       .
+])
+
+AT_CHECK([pspp -O format=csv sample.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+X,100,100%,0,0%,100,100%
+
+Table: Descriptives
+,,,Statistic,Std. Error
+X,Mean,,587.6603,23.2665
+,95% Confidence Interval for Mean,Lower Bound,541.4946,
+,,Upper Bound,633.8260,
+,5% Trimmed Mean,,579.7064,
+,Median,,547.1350,
+,Variance,,54132.8466,
+,Std. Deviation,,232.6647,
+,Minimum,,106.2200,
+,Maximum,,1355.2800,
+,Range,,1249.0600,
+,Interquartile Range,,293.1575,
+,Skewness,,.6331,.2414
+,Kurtosis,,.5300,.4783
+])
+
+AT_CLEANUP 
+
+
+
+dnl Test for a crash which happened on bad input syntax
+AT_SETUP([EXAMINE -- Empty Parentheses])
+
+AT_DATA([examine-empty-parens.sps], [dnl
+DATA LIST notable LIST /X *
+BEGIN DATA.
+2
+3
+END DATA.
+
+
+EXAMINE
+       x
+       /PLOT = SPREADLEVEL()
+       .
+])
+
+AT_CHECK([pspp -o pspp.csv examine-empty-parens.sps], [1], [ignore])
+
+AT_CLEANUP