-
+dnl PSPP - a program for statistical analysis.
+dnl Copyright (C) 2017 Free Software Foundation, Inc.
+dnl
+dnl This program is free software: you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation, either version 3 of the License, or
+dnl (at your option) any later version.
+dnl
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+dnl GNU General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
+dnl
AT_BANNER([LOGISTIC REGRESSION])
dnl These examples are adapted from
Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
,37.323,.455,.659
+Table: Classification Table
+,,,Predicted,,
+,,,outcome,,"Percentage
+Correct"
+,Observed,,1.000,2.000,
+Step 1,outcome,1.000,43,5,89.583
+,,2.000,4,14,77.778
+,Overall Percentage,,,,86.364
+
Table: Variables in the Equation
,,B,S.E.,Wald,df,Sig.,Exp(B)
Step 1,survrate,-.081,.019,17.756,1,.000,.922
data list notable file='lr-data.txt'
list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
-missing values survrate (999) avoid (44444).
+missing values survrate (999) avoid (44444) outcome (99).
logistic regression
variables = outcome with survrate avoid
AT_CHECK([pspp -O format=csv lr-data.sps > run0], [0], [ignore])
+dnl Append some cases with missing values into the data.
cat >> lr-data.txt << HERE
105.00 1.00 999.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1
106.00 1.00 999.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1
107.00 1.00 5.00 3.00 2.00 .28 44444 34 .00897 -1.00905 1
+ 108.00 99 5.00 3.00 2.00 .28 4 34 .00897 -1.00905 1
HERE
AT_CHECK([pspp -O format=csv lr-data.sps > run1], [0], [ignore])
< Missing Cases,0,.000
< Total,66,100.000
---
-> Included in Analysis,66,95.652
-> Missing Cases,3,4.348
-> Total,69,100.000
+> Included in Analysis,66,94.286
+> Missing Cases,4,5.714
+> Total,70,100.000
])
AT_CLEANUP
< Total,66,100.000
---
> Total,63,100.000
+23,24c23,24
+< Step 1,outcome,1.000,43,5,89.583
+< ,,2.000,4,14,77.778
+---
+> Step 1,outcome,1.000,43.000,5.000,89.583
+> ,,2.000,4.000,14.000,77.778
])
Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
,275.637,.008,.011
+Table: Classification Table
+,,,Predicted,,
+,,,female,,"Percentage
+Correct"
+,Observed,,.00,1.00,
+Step 1,female,.00,0,91,.000
+,,1.00,0,109,100.000
+,Overall Percentage,,,,54.500
+
Table: Variables in the Equation
,,B,S.E.,Wald,df,Sig.,Exp(B)
Step 1,constant,.180,.142,1.616,1,.204,1.198
,3.000,121,0,0,1
,4.000,67,0,0,0
+Table: Classification Table
+,,,Predicted,,
+,,,y,,"Percentage
+Correct"
+,Observed,,4.000,9.000,
+Step 1,y,4.000,254,19,93.040
+,,9.000,97,30,23.622
+,Overall Percentage,,,,71.000
+
Table: Variables in the Equation
,,B,S.E.,Wald,df,Sig.,Exp(B)
Step 1,b1,.002,.001,4.284,1,.038,1.002
,b,95,0,1
,c,58,0,0
+Table: Classification Table
+,,,Predicted,,
+,,,honcomp,,"Percentage
+Correct"
+,Observed,,.000,1.000,
+Step 1,honcomp,.000,132,15,89.796
+,,1.000,26,27,50.943
+,Overall Percentage,,,,79.500
+
Table: Variables in the Equation
,,B,S.E.,Wald,df,Sig.,Exp(B)
Step 1,read,.098,.025,15.199,1,.000,1.103
])
AT_CLEANUP
+
+
+dnl Check that it doesn't crash if a categorical variable
+dnl has only one distinct value
+AT_SETUP([LOGISTIC REGRESSION identical categories])
+
+AT_DATA([crash.sps], [dnl
+data list notable list /y x1 x2*.
+begin data
+0 1 1
+1 2 1
+end data.
+
+logistic regression y with x1 x2
+ /categorical = x2.
+])
+
+AT_CHECK([pspp -O format=csv crash.sps], [1], [ignore])
+
+AT_CLEANUP
+
+
+dnl Test that missing values on the categorical predictors are treated
+dnl properly.
+AT_SETUP([LOGISTIC REGRESSION missing categoricals])
+
+AT_DATA([data.txt], [dnl
+ .00 3.69 .00
+ .00 1.16 1.00
+ 1.00 -12.99 .00
+ .00 2.97 1.00
+ .00 20.48 .00
+ .00 4.90 .00
+ 1.00 -4.38 .00
+ .00 -1.69 1.00
+ 1.00 -5.71 .00
+ 1.00 -14.28 .00
+ .00 9.00 .00
+ .00 2.89 1.00
+ .00 13.51 1.00
+ .00 23.32 1.00
+ .00 2.31 1.00
+ .00 -2.07 1.00
+ 1.00 -4.52 1.00
+ 1.00 -5.83 .00
+ 1.00 -1.91 .00
+ 1.00 -11.12 1.00
+ .00 -1.51 .00
+ .00 6.59 1.00
+ .00 19.28 1.00
+ .00 5.94 .00
+ .00 8.21 1.00
+ .00 8.11 1.00
+ .00 2.49 .00
+ .00 9.62 .00
+ 1.00 -20.74 1.00
+ .00 -1.41 1.00
+ .00 15.15 1.00
+ .00 9.39 .00
+ 1.00 -15.14 1.00
+ 1.00 -5.86 .00
+ 1.00 -11.64 1.00
+ 1.00 -14.36 .00
+ 1.00 -8.95 1.00
+ 1.00 -16.42 1.00
+ 1.00 -1.04 1.00
+ .00 12.89 1.00
+ .00 -7.08 1.00
+ .00 4.87 1.00
+ .00 11.53 1.00
+ 1.00 -6.24 1.00
+ .00 1.25 1.00
+ .00 4.39 1.00
+ .00 3.17 .00
+ .00 19.39 1.00
+ .00 13.03 1.00
+ .00 2.43 .00
+ 1.00 -14.73 1.00
+ .00 8.25 1.00
+ 1.00 -13.28 1.00
+ .00 5.27 1.00
+ 1.00 -3.46 1.00
+ .00 13.81 1.00
+ .00 1.35 1.00
+ 1.00 -3.94 1.00
+ .00 20.73 1.00
+ 1.00 -15.40 .00
+ 1.00 -11.01 1.00
+ .00 4.56 .00
+ 1.00 -15.35 1.00
+ .00 15.21 .00
+ .00 5.34 1.00
+ 1.00 -21.55 1.00
+ .00 10.12 1.00
+ .00 -.73 1.00
+ .00 15.28 1.00
+ .00 11.08 1.00
+ 1.00 -8.24 .00
+ .00 2.46 .00
+ .00 9.60 .00
+ .00 11.24 .00
+ .00 14.13 1.00
+ .00 19.72 1.00
+ .00 5.58 .00
+ .00 26.23 1.00
+ .00 7.25 .00
+ 1.00 -.79 .00
+ .00 6.24 .00
+ 1.00 1.16 .00
+ 1.00 -7.89 1.00
+ 1.00 -1.86 1.00
+ 1.00 -10.80 1.00
+ 1.00 -5.51 .00
+ .00 7.51 .00
+ .00 11.18 .00
+ .00 8.73 .00
+ 1.00 -11.21 1.00
+ 1.00 -13.24 .00
+ .00 19.34 .00
+ .00 9.32 1.00
+ .00 17.97 1.00
+ 1.00 -1.56 1.00
+ 1.00 -3.13 .00
+ .00 3.98 .00
+ .00 -1.21 1.00
+ .00 2.37 .00
+ 1.00 -18.03 1.00
+])
+
+AT_DATA([miss.sps], [dnl
+data list notable file='data.txt' list /y x1 cat0*.
+
+logistic regression y with x1 cat0
+ /categorical = cat0.
+])
+
+AT_CHECK([pspp -O format=csv miss.sps > file1], [0], [ignore])
+
+dnl Append a case with a missing categorical.
+AT_CHECK([echo '1 34 .' >> data.txt], [0], [ignore])
+
+AT_CHECK([pspp -O format=csv miss.sps > file2], [0], [ignore])
+
+AT_CHECK([diff file1 file2], [1], [dnl
+8,10c8,10
+< Included in Analysis,100,100.00
+< Missing Cases,0,.00
+< Total,100,100.00
+---
+> Included in Analysis,100,99.01
+> Missing Cases,1,.99
+> Total,101,100.00
+])
+
+AT_CLEANUP
+
+
+dnl Check that the confidence intervals are properly reported.
+dnl Use an example with categoricals, because that was buggy at
+dnl one point. The data in this example comes from:
+dnl http://people.ysu.edu/~gchang/SPSSE/SPSS_lab2Regression.pdf
+AT_SETUP([LOGISTIC REGRESSION confidence interval])
+
+AT_DATA([ci.sps], [dnl
+set FORMAT=F20.3
+data list notable list /disease age sciostat sector savings *.
+begin data.
+0 33 1 1 1
+0 35 1 1 1
+0 6 1 1 0
+0 60 1 1 1
+1 18 3 1 0
+0 26 3 1 0
+0 6 3 1 0
+1 31 2 1 1
+1 26 2 1 0
+0 37 2 1 0
+0 23 1 1 0
+0 23 1 1 0
+0 27 1 1 1
+1 9 1 1 1
+1 37 1 2 1
+1 22 1 2 1
+1 67 1 2 1
+0 8 1 2 1
+1 6 1 2 1
+1 15 1 2 1
+1 21 2 2 1
+1 32 2 2 1
+1 16 1 2 1
+0 11 2 2 0
+0 14 3 2 0
+0 9 2 2 0
+0 18 2 2 0
+0 2 3 1 0
+0 61 3 1 1
+0 20 3 1 0
+0 16 3 1 0
+0 9 2 1 0
+0 35 2 1 1
+0 4 1 1 1
+0 44 3 2 0
+1 11 3 2 0
+0 3 2 2 1
+0 6 3 2 0
+1 17 2 2 0
+0 1 3 2 1
+1 53 2 2 1
+1 13 1 2 0
+0 24 1 2 0
+1 70 1 2 1
+1 16 3 2 1
+0 12 2 2 1
+1 20 3 2 1
+0 65 3 2 1
+1 40 2 2 0
+1 38 2 2 1
+1 68 2 2 1
+1 74 1 2 1
+1 14 1 2 1
+1 27 1 2 1
+0 31 1 2 1
+0 18 1 2 1
+0 39 1 2 0
+0 50 1 2 1
+0 31 1 2 1
+0 61 1 2 1
+0 18 3 1 0
+0 5 3 1 0
+0 2 3 1 1
+0 16 3 1 0
+1 59 3 1 1
+0 22 3 1 0
+0 24 1 1 1
+0 30 1 1 1
+0 46 1 1 1
+0 28 1 1 0
+0 27 1 1 1
+1 27 1 1 0
+0 28 1 1 1
+1 52 1 1 1
+0 11 3 1 1
+0 6 2 1 1
+0 46 3 1 0
+1 20 2 1 1
+0 3 1 1 1
+0 18 2 1 0
+0 25 2 1 0
+0 6 3 1 1
+1 65 3 1 1
+0 51 3 1 1
+0 39 2 1 1
+0 8 1 1 1
+0 8 2 1 0
+0 14 3 1 0
+0 6 3 1 0
+0 6 3 1 1
+0 7 3 1 0
+0 4 3 1 0
+0 8 3 1 0
+0 9 2 1 0
+1 32 3 1 0
+0 19 3 1 0
+0 11 3 1 0
+0 35 3 1 0
+0 16 1 1 0
+0 1 1 1 1
+0 6 1 1 1
+0 27 1 1 1
+0 25 1 1 1
+0 18 1 1 0
+0 37 3 1 0
+1 33 3 1 0
+0 27 2 1 0
+0 2 1 1 0
+0 8 2 1 0
+0 5 1 1 0
+0 1 1 1 1
+0 32 1 1 0
+1 25 1 1 1
+0 15 1 2 0
+0 15 1 2 1
+0 26 1 2 1
+1 42 1 2 1
+0 7 1 2 1
+0 2 1 2 0
+1 65 1 2 1
+0 33 2 2 1
+1 8 2 2 0
+0 30 2 2 0
+0 5 3 2 0
+0 15 3 2 0
+1 60 3 2 1
+1 13 3 2 1
+0 70 3 1 1
+0 5 3 1 0
+0 3 3 1 1
+0 50 2 1 1
+0 6 2 1 0
+0 12 2 1 1
+1 39 3 2 0
+0 15 2 2 1
+1 35 2 2 0
+0 2 2 2 1
+0 17 3 2 0
+1 43 3 2 1
+0 30 2 2 1
+0 11 1 2 1
+1 39 1 2 1
+0 32 1 2 1
+0 17 1 2 1
+0 3 3 2 1
+0 7 3 2 0
+0 2 2 2 0
+1 64 2 2 1
+1 13 1 2 2
+1 15 2 2 1
+0 48 2 2 1
+0 23 1 2 1
+1 48 1 2 0
+0 25 1 2 1
+0 12 1 2 1
+1 46 1 2 1
+0 79 1 2 1
+0 56 1 2 1
+0 8 1 2 1
+1 29 3 1 0
+1 35 3 1 0
+1 11 3 1 0
+0 69 3 1 1
+1 21 3 1 0
+0 13 3 1 0
+0 21 1 1 1
+1 32 1 1 1
+1 24 1 1 0
+0 24 1 1 1
+0 73 1 1 1
+0 42 1 1 1
+1 34 1 1 1
+0 30 2 1 0
+0 7 2 1 0
+1 29 3 1 0
+1 22 3 1 0
+0 38 2 1 1
+0 13 2 1 1
+0 12 2 1 1
+0 42 3 1 0
+1 17 3 1 0
+0 21 3 1 1
+0 34 1 1 1
+0 1 3 1 0
+0 14 2 1 0
+0 16 2 1 0
+0 9 3 1 0
+0 53 3 1 0
+0 27 3 1 0
+0 15 3 1 0
+0 9 3 1 0
+0 4 2 1 1
+0 10 3 1 1
+0 31 3 1 0
+0 85 3 1 1
+0 24 2 1 0
+end data.
+
+logistic regression
+ disease WITH age sciostat sector savings
+ /categorical = sciostat sector
+ /print = ci(95).
+])
+
+AT_CHECK([pspp -O format=csv ci.sps], [0], [dnl
+Table: Dependent Variable Encoding
+Original Value,Internal Value
+.000,0
+1.000,1
+
+Table: Case Processing Summary
+Unweighted Cases,N,Percent
+Included in Analysis,196,100.000
+Missing Cases,0,.000
+Total,196,100.000
+
+note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001
+
+Table: Model Summary
+Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
+,211.195,.120,.172
+
+Table: Categorical Variables' Codings
+,,,Parameter coding,
+,,Frequency,(1),(2)
+sciostat,1.000,77,1,0
+,2.000,49,0,1
+,3.000,70,0,0
+sector,1.000,117,1,
+,2.000,79,0,
+
+Table: Classification Table
+,,,Predicted,,
+,,,disease,,"Percentage
+Correct"
+,Observed,,.000,1.000,
+Step 1,disease,.000,131,8,94.245
+,,1.000,41,16,28.070
+,Overall Percentage,,,,75.000
+
+Table: Variables in the Equation
+,,,,,,,,95% CI for Exp(B),
+,,B,S.E.,Wald,df,Sig.,Exp(B),Lower,Upper
+Step 1,age,.027,.009,8.647,1,.003,1.027,1.009,1.045
+,savings,.061,.386,.025,1,.874,1.063,.499,2.264
+,sciostat,,,.440,2,.803,,,
+,sciostat(1),-.278,.434,.409,1,.522,.757,.323,1.775
+,sciostat(2),-.219,.459,.227,1,.634,.803,.327,1.976
+,sector,,,11.974,1,.001,,,
+,sector(1),-1.235,.357,11.974,1,.001,.291,.145,.586
+,Constant,-.814,.452,3.246,1,.072,.443,,
+])
+
+AT_CLEANUP
+