+
+
+dnl Test that missing values on the categorical predictors are treated
+dnl properly.
+AT_SETUP([LOGISTIC REGRESSION missing categoricals])
+AT_KEYWORDS([categorical categoricals])
+
+AT_DATA([data.txt], [dnl
+ .00 3.69 .00
+ .00 1.16 1.00
+ 1.00 -12.99 .00
+ .00 2.97 1.00
+ .00 20.48 .00
+ .00 4.90 .00
+ 1.00 -4.38 .00
+ .00 -1.69 1.00
+ 1.00 -5.71 .00
+ 1.00 -14.28 .00
+ .00 9.00 .00
+ .00 2.89 1.00
+ .00 13.51 1.00
+ .00 23.32 1.00
+ .00 2.31 1.00
+ .00 -2.07 1.00
+ 1.00 -4.52 1.00
+ 1.00 -5.83 .00
+ 1.00 -1.91 .00
+ 1.00 -11.12 1.00
+ .00 -1.51 .00
+ .00 6.59 1.00
+ .00 19.28 1.00
+ .00 5.94 .00
+ .00 8.21 1.00
+ .00 8.11 1.00
+ .00 2.49 .00
+ .00 9.62 .00
+ 1.00 -20.74 1.00
+ .00 -1.41 1.00
+ .00 15.15 1.00
+ .00 9.39 .00
+ 1.00 -15.14 1.00
+ 1.00 -5.86 .00
+ 1.00 -11.64 1.00
+ 1.00 -14.36 .00
+ 1.00 -8.95 1.00
+ 1.00 -16.42 1.00
+ 1.00 -1.04 1.00
+ .00 12.89 1.00
+ .00 -7.08 1.00
+ .00 4.87 1.00
+ .00 11.53 1.00
+ 1.00 -6.24 1.00
+ .00 1.25 1.00
+ .00 4.39 1.00
+ .00 3.17 .00
+ .00 19.39 1.00
+ .00 13.03 1.00
+ .00 2.43 .00
+ 1.00 -14.73 1.00
+ .00 8.25 1.00
+ 1.00 -13.28 1.00
+ .00 5.27 1.00
+ 1.00 -3.46 1.00
+ .00 13.81 1.00
+ .00 1.35 1.00
+ 1.00 -3.94 1.00
+ .00 20.73 1.00
+ 1.00 -15.40 .00
+ 1.00 -11.01 1.00
+ .00 4.56 .00
+ 1.00 -15.35 1.00
+ .00 15.21 .00
+ .00 5.34 1.00
+ 1.00 -21.55 1.00
+ .00 10.12 1.00
+ .00 -.73 1.00
+ .00 15.28 1.00
+ .00 11.08 1.00
+ 1.00 -8.24 .00
+ .00 2.46 .00
+ .00 9.60 .00
+ .00 11.24 .00
+ .00 14.13 1.00
+ .00 19.72 1.00
+ .00 5.58 .00
+ .00 26.23 1.00
+ .00 7.25 .00
+ 1.00 -.79 .00
+ .00 6.24 .00
+ 1.00 1.16 .00
+ 1.00 -7.89 1.00
+ 1.00 -1.86 1.00
+ 1.00 -10.80 1.00
+ 1.00 -5.51 .00
+ .00 7.51 .00
+ .00 11.18 .00
+ .00 8.73 .00
+ 1.00 -11.21 1.00
+ 1.00 -13.24 .00
+ .00 19.34 .00
+ .00 9.32 1.00
+ .00 17.97 1.00
+ 1.00 -1.56 1.00
+ 1.00 -3.13 .00
+ .00 3.98 .00
+ .00 -1.21 1.00
+ .00 2.37 .00
+ 1.00 -18.03 1.00
+])
+
+AT_DATA([miss.sps], [dnl
+data list notable file='data.txt' list /y x1 cat0*.
+
+logistic regression y with x1 cat0
+ /categorical = cat0.
+])
+
+AT_CHECK([pspp -O format=csv miss.sps > file1], [0], [ignore])
+
+dnl Append a case with a missing categorical.
+AT_CHECK([echo '1 34 .' >> data.txt], [0], [ignore])
+
+AT_CHECK([pspp -O format=csv miss.sps > file2], [0], [ignore])
+
+AT_CHECK([diff file1 file2], [1], [dnl
+8,10c8,10
+< Included in Analysis,100,100.0%
+< Missing Cases,0,.0%
+< Total,100,100.0%
+---
+> Included in Analysis,100,99.0%
+> Missing Cases,1,1.0%
+> Total,101,100.0%
+])
+
+AT_CLEANUP
+
+
+dnl Check that the confidence intervals are properly reported.
+dnl Use an example with categoricals, because that was buggy at
+dnl one point. The data in this example comes from:
+dnl http://people.ysu.edu/~gchang/SPSSE/SPSS_lab2Regression.pdf
+AT_SETUP([LOGISTIC REGRESSION confidence interval])
+AT_KEYWORDS([categorical categoricals])
+
+AT_DATA([ci.sps], [dnl
+set FORMAT=F20.3
+data list notable list /disease age sciostat sector savings *.
+begin data.
+0 33 1 1 1
+0 35 1 1 1
+0 6 1 1 0
+0 60 1 1 1
+1 18 3 1 0
+0 26 3 1 0
+0 6 3 1 0
+1 31 2 1 1
+1 26 2 1 0
+0 37 2 1 0
+0 23 1 1 0
+0 23 1 1 0
+0 27 1 1 1
+1 9 1 1 1
+1 37 1 2 1
+1 22 1 2 1
+1 67 1 2 1
+0 8 1 2 1
+1 6 1 2 1
+1 15 1 2 1
+1 21 2 2 1
+1 32 2 2 1
+1 16 1 2 1
+0 11 2 2 0
+0 14 3 2 0
+0 9 2 2 0
+0 18 2 2 0
+0 2 3 1 0
+0 61 3 1 1
+0 20 3 1 0
+0 16 3 1 0
+0 9 2 1 0
+0 35 2 1 1
+0 4 1 1 1
+0 44 3 2 0
+1 11 3 2 0
+0 3 2 2 1
+0 6 3 2 0
+1 17 2 2 0
+0 1 3 2 1
+1 53 2 2 1
+1 13 1 2 0
+0 24 1 2 0
+1 70 1 2 1
+1 16 3 2 1
+0 12 2 2 1
+1 20 3 2 1
+0 65 3 2 1
+1 40 2 2 0
+1 38 2 2 1
+1 68 2 2 1
+1 74 1 2 1
+1 14 1 2 1
+1 27 1 2 1
+0 31 1 2 1
+0 18 1 2 1
+0 39 1 2 0
+0 50 1 2 1
+0 31 1 2 1
+0 61 1 2 1
+0 18 3 1 0
+0 5 3 1 0
+0 2 3 1 1
+0 16 3 1 0
+1 59 3 1 1
+0 22 3 1 0
+0 24 1 1 1
+0 30 1 1 1
+0 46 1 1 1
+0 28 1 1 0
+0 27 1 1 1
+1 27 1 1 0
+0 28 1 1 1
+1 52 1 1 1
+0 11 3 1 1
+0 6 2 1 1
+0 46 3 1 0
+1 20 2 1 1
+0 3 1 1 1
+0 18 2 1 0
+0 25 2 1 0
+0 6 3 1 1
+1 65 3 1 1
+0 51 3 1 1
+0 39 2 1 1
+0 8 1 1 1
+0 8 2 1 0
+0 14 3 1 0
+0 6 3 1 0
+0 6 3 1 1
+0 7 3 1 0
+0 4 3 1 0
+0 8 3 1 0
+0 9 2 1 0
+1 32 3 1 0
+0 19 3 1 0
+0 11 3 1 0
+0 35 3 1 0
+0 16 1 1 0
+0 1 1 1 1
+0 6 1 1 1
+0 27 1 1 1
+0 25 1 1 1
+0 18 1 1 0
+0 37 3 1 0
+1 33 3 1 0
+0 27 2 1 0
+0 2 1 1 0
+0 8 2 1 0
+0 5 1 1 0
+0 1 1 1 1
+0 32 1 1 0
+1 25 1 1 1
+0 15 1 2 0
+0 15 1 2 1
+0 26 1 2 1
+1 42 1 2 1
+0 7 1 2 1
+0 2 1 2 0
+1 65 1 2 1
+0 33 2 2 1
+1 8 2 2 0
+0 30 2 2 0
+0 5 3 2 0
+0 15 3 2 0
+1 60 3 2 1
+1 13 3 2 1
+0 70 3 1 1
+0 5 3 1 0
+0 3 3 1 1
+0 50 2 1 1
+0 6 2 1 0
+0 12 2 1 1
+1 39 3 2 0
+0 15 2 2 1
+1 35 2 2 0
+0 2 2 2 1
+0 17 3 2 0
+1 43 3 2 1
+0 30 2 2 1
+0 11 1 2 1
+1 39 1 2 1
+0 32 1 2 1
+0 17 1 2 1
+0 3 3 2 1
+0 7 3 2 0
+0 2 2 2 0
+1 64 2 2 1
+1 13 1 2 2
+1 15 2 2 1
+0 48 2 2 1
+0 23 1 2 1
+1 48 1 2 0
+0 25 1 2 1
+0 12 1 2 1
+1 46 1 2 1
+0 79 1 2 1
+0 56 1 2 1
+0 8 1 2 1
+1 29 3 1 0
+1 35 3 1 0
+1 11 3 1 0
+0 69 3 1 1
+1 21 3 1 0
+0 13 3 1 0
+0 21 1 1 1
+1 32 1 1 1
+1 24 1 1 0
+0 24 1 1 1
+0 73 1 1 1
+0 42 1 1 1
+1 34 1 1 1
+0 30 2 1 0
+0 7 2 1 0
+1 29 3 1 0
+1 22 3 1 0
+0 38 2 1 1
+0 13 2 1 1
+0 12 2 1 1
+0 42 3 1 0
+1 17 3 1 0
+0 21 3 1 1
+0 34 1 1 1
+0 1 3 1 0
+0 14 2 1 0
+0 16 2 1 0
+0 9 3 1 0
+0 53 3 1 0
+0 27 3 1 0
+0 15 3 1 0
+0 9 3 1 0
+0 4 2 1 1
+0 10 3 1 1
+0 31 3 1 0
+0 85 3 1 1
+0 24 2 1 0
+end data.
+
+logistic regression
+ disease WITH age sciostat sector savings
+ /categorical = sciostat sector
+ /print = ci(95).
+])
+
+AT_CHECK([pspp -O format=csv ci.sps], [0], [dnl
+Table: Dependent Variable Encoding
+Original Value,Internal Value
+.000,.000
+1.000,1.000
+
+Table: Case Processing Summary
+Unweighted Cases,N,Percent
+Included in Analysis,196,100.0%
+Missing Cases,0,.0%
+Total,196,100.0%
+
+note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001
+
+Table: Model Summary
+Step,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
+1,211.195,.120,.172
+
+Table: Categorical Variables' Codings
+,,Frequency,Parameter coding,
+,,,(1),(2)
+sciostat,1.000,77,1,0
+,2.000,49,0,1
+,3.000,70,0,0
+sector,1.000,117,1,
+,2.000,79,0,
+
+Table: Classification Table
+,,,Predicted,,
+,,,disease,,Percentage Correct
+,Observed,,.000,1.000,
+Step 1,disease,.000,131,8,94.2%
+,,1.000,41,16,28.1%
+,Overall Percentage,,,,75.0%
+
+Table: Variables in the Equation
+,,B,S.E.,Wald,df,Sig.,Exp(B),95% CI for Exp(B),
+,,,,,,,,Lower,Upper
+Step 1,age,.027,.009,8.647,1,.003,1.027,1.009,1.045
+,savings,.061,.386,.025,1,.874,1.063,.499,2.264
+,sciostat,,,.440,2,.803,,,
+,sciostat(1),-.278,.434,.409,1,.522,.757,.323,1.775
+,sciostat(2),-.219,.459,.227,1,.634,.803,.327,1.976
+,sector,,,11.974,1,.001,,,
+,sector(1),-1.235,.357,11.974,1,.001,.291,.145,.586
+,Constant,-.814,.452,3.246,1,.072,.443,,
+])
+
+AT_CLEANUP
+