dnl of the 2nd and 3rd variables. We use this for weight testing.
AT_SETUP([LOGISTIC REGRESSION basic test])
+AT_KEYWORDS([categorical categoricals])
LOGIT_TEST_DATA
.
])
-AT_CHECK([pspp -O format=csv lr-data.sps], [0],
- [dnl
-Table: Dependent Variable Encoding
+AT_CHECK([pspp -o pspp.csv -o pspp.txt lr-data.sps], [0], [dnl
+note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001
+])
+AT_CHECK([cat pspp.csv], [0], [Table: Dependent Variable Encoding
Original Value,Internal Value
-1.000,0
-2.000,1
+1.000,.000
+2.000,1.000
Table: Case Processing Summary
Unweighted Cases,N,Percent
-Included in Analysis,66,100.000
-Missing Cases,0,.000
-Total,66,100.000
+Included in Analysis,66,100.0%
+Missing Cases,0,.0%
+Total,66,100.0%
note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001
Table: Model Summary
-Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
-,37.323,.455,.659
+Step,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
+1,37.323,.455,.659
Table: Classification Table
,,,Predicted,,
-,,,outcome,,"Percentage
-Correct"
+,,,outcome,,Percentage Correct
,Observed,,1.000,2.000,
-Step 1,outcome,1.000,43,5,89.583
-,,2.000,4,14,77.778
-,Overall Percentage,,,,86.364
+Step 1,outcome,1.000,43,5,89.6%
+,,2.000,4,14,77.8%
+,Overall Percentage,,,,86.4%
Table: Variables in the Equation
,,B,S.E.,Wald,df,Sig.,Exp(B)
Step 1,survrate,-.081,.019,17.756,1,.000,.922
,Constant,2.684,.811,10.941,1,.001,14.639
])
-
-
AT_CLEANUP
AT_SETUP([LOGISTIC REGRESSION missing values])
+AT_KEYWORDS([categorical categoricals])
LOGIT_TEST_DATA
dnl Only the summary information should be different
AT_CHECK([diff run0 run1], [1], [dnl
8,10c8,10
-< Included in Analysis,66,100.000
-< Missing Cases,0,.000
-< Total,66,100.000
+< Included in Analysis,66,100.0%
+< Missing Cases,0,.0%
+< Total,66,100.0%
---
-> Included in Analysis,66,94.286
-> Missing Cases,4,5.714
-> Total,70,100.000
+> Included in Analysis,66,94.3%
+> Missing Cases,4,5.7%
+> Total,70,100.0%
])
AT_CLEANUP
dnl To do this, the same data set is used, one weighted, one not.
dnl The weighted dataset omits certain cases which are identical
AT_SETUP([LOGISTIC REGRESSION weights])
+AT_KEYWORDS([categorical categoricals])
LOGIT_TEST_DATA
dnl this displays the unweighted totals.
AT_CHECK([diff unweighted-result weighted-result], [1], [dnl
8c8
-< Included in Analysis,66,100.000
+< Included in Analysis,66,100.0%
---
-> Included in Analysis,63,100.000
+> Included in Analysis,63,100.0%
10c10
-< Total,66,100.000
+< Total,66,100.0%
---
-> Total,63,100.000
-23,24c23,24
-< Step 1,outcome,1.000,43,5,89.583
-< ,,2.000,4,14,77.778
+> Total,63,100.0%
+22,23c22,23
+< Step 1,outcome,1.000,43,5,89.6%
+< ,,2.000,4,14,77.8%
---
-> Step 1,outcome,1.000,43.000,5.000,89.583
-> ,,2.000,4.000,14.000,77.778
+> Step 1,outcome,1.000,43.000,5.000,89.6%
+> ,,2.000,4.000,14.000,77.8%
])
dnl The results this produces are very similar to those
dnl at the example in http://www.ats.ucla.edu/stat/SPSS/faq/logregconst.htm
AT_SETUP([LOGISTIC REGRESSION without constant])
+AT_KEYWORDS([categorical categoricals])
AT_DATA([non-const.sps], [dnl
set format=F20.3.
logistic regression female with constant /noconst.
])
-AT_CHECK([pspp -O format=csv non-const.sps], [0],
- [dnl
+AT_CHECK([pspp -O format=csv non-const.sps], [0], [dnl
Table: Dependent Variable Encoding
Original Value,Internal Value
-.00,0
-1.00,1
+.00,.000
+1.00,1.000
Table: Case Processing Summary
Unweighted Cases,N,Percent
-Included in Analysis,200,100.000
-Missing Cases,0,.000
-Total,200,100.000
+Included in Analysis,200,100.0%
+Missing Cases,0,.0%
+Total,200,100.0%
note: Estimation terminated at iteration number 2 because parameter estimates changed by less than 0.001
Table: Model Summary
-Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
-,275.637,.008,.011
+Step,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
+1,275.637,.008,.011
Table: Classification Table
,,,Predicted,,
-,,,female,,"Percentage
-Correct"
+,,,female,,Percentage Correct
,Observed,,.00,1.00,
-Step 1,female,.00,0,91,.000
-,,1.00,0,109,100.000
-,Overall Percentage,,,,54.500
+Step 1,female,.00,0,91,.0%
+,,1.00,0,109,100.0%
+,Overall Percentage,,,,54.5%
Table: Variables in the Equation
,,B,S.E.,Wald,df,Sig.,Exp(B)
dnl Check that if somebody passes a dependent variable which is not dichtomous,
dnl then an error is raised.
AT_SETUP([LOGISTIC REGRESSION non-dichotomous dep var])
+AT_KEYWORDS([categorical categoricals])
AT_DATA([non-dich.sps], [dnl
data list notable list /y x1 x2 x3 x4.
dnl variable. This examṕle was inspired from that at:
dnl http://www.ats.ucla.edu/stat/spss/dae/logit.htm
AT_SETUP([LOGISTIC REGRESSION with categorical])
+AT_KEYWORDS([categorical categoricals])
AT_DATA([lr-cat.data], [dnl
620 3.07 2 4
.
])
-AT_CHECK([pspp -O format=csv lr-cat.sps], [0],
- [dnl
+AT_CHECK([pspp -O format=csv lr-cat.sps], [0], [dnl
Table: Dependent Variable Encoding
Original Value,Internal Value
-4.000,0
-9.000,1
+4.000,.000
+9.000,1.000
Table: Case Processing Summary
Unweighted Cases,N,Percent
-Included in Analysis,400,100.000
-Missing Cases,0,.000
-Total,400,100.000
+Included in Analysis,400,100.0%
+Missing Cases,0,.0%
+Total,400,100.0%
note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001
Table: Model Summary
-Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
-,458.517,.098,.138
+Step,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
+1,458.517,.098,.138
Table: Categorical Variables' Codings
-,,,Parameter coding,,
-,,Frequency,(1),(2),(3)
+,,Frequency,Parameter coding,,
+,,,(1),(2),(3)
bcat,1.000,61,1,0,0
,2.000,151,0,1,0
,3.000,121,0,0,1
Table: Classification Table
,,,Predicted,,
-,,,y,,"Percentage
-Correct"
+,,,y,,Percentage Correct
,Observed,,4.000,9.000,
-Step 1,y,4.000,254,19,93.040
-,,9.000,97,30,23.622
-,Overall Percentage,,,,71.000
+Step 1,y,4.000,254,19,93.0%
+,,9.000,97,30,23.6%
+,Overall Percentage,,,,71.0%
Table: Variables in the Equation
,,B,S.E.,Wald,df,Sig.,Exp(B)
,bcat(3),.211,.393,.289,1,.591,1.235
,Constant,-5.541,1.138,23.709,1,.000,.004
])
-
AT_CLEANUP
dnl This example is inspired by http://www.ats.ucla.edu/stat/spss/output/logistic.htm
AT_SETUP([LOGISTIC REGRESSION with cat var 2])
+AT_KEYWORDS([categorical categoricals])
AT_DATA([lr-cat2.data], [dnl
60.00 1.00 8.00 50.00
])
-AT_CHECK([pspp -O format=csv stringcat.sps], [0],
- [dnl
+AT_CHECK([pspp -O format=csv stringcat.sps], [0], [dnl
Table: Dependent Variable Encoding
Original Value,Internal Value
-.000,0
-1.000,1
+.000,.000
+1.000,1.000
Table: Case Processing Summary
Unweighted Cases,N,Percent
-Included in Analysis,200,100.000
-Missing Cases,0,.000
-Total,200,100.000
+Included in Analysis,200,100.0%
+Missing Cases,0,.0%
+Total,200,100.0%
note: Estimation terminated at iteration number 5 because parameter estimates changed by less than 0.001
Table: Model Summary
-Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
-,165.701,.280,.408
+Step,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
+1,165.701,.280,.408
Table: Categorical Variables' Codings
-,,,Parameter coding,
-,,Frequency,(1),(2)
+,,Frequency,Parameter coding,
+,,,(1),(2)
ses,a,47,1,0
,b,95,0,1
,c,58,0,0
Table: Classification Table
,,,Predicted,,
-,,,honcomp,,"Percentage
-Correct"
+,,,honcomp,,Percentage Correct
,Observed,,.000,1.000,
-Step 1,honcomp,.000,132,15,89.796
-,,1.000,26,27,50.943
-,Overall Percentage,,,,79.500
+Step 1,honcomp,.000,132,15,89.8%
+,,1.000,26,27,50.9%
+,Overall Percentage,,,,79.5%
Table: Variables in the Equation
,,B,S.E.,Wald,df,Sig.,Exp(B)
dnl Check that it doesn't crash if a categorical variable
dnl has only one distinct value
AT_SETUP([LOGISTIC REGRESSION identical categories])
+AT_KEYWORDS([categorical categoricals])
AT_DATA([crash.sps], [dnl
data list notable list /y x1 x2*.
dnl Test that missing values on the categorical predictors are treated
dnl properly.
AT_SETUP([LOGISTIC REGRESSION missing categoricals])
+AT_KEYWORDS([categorical categoricals])
AT_DATA([data.txt], [dnl
.00 3.69 .00
AT_CHECK([diff file1 file2], [1], [dnl
8,10c8,10
-< Included in Analysis,100,100.00
-< Missing Cases,0,.00
-< Total,100,100.00
+< Included in Analysis,100,100.0%
+< Missing Cases,0,.0%
+< Total,100,100.0%
---
-> Included in Analysis,100,99.01
-> Missing Cases,1,.99
-> Total,101,100.00
+> Included in Analysis,100,99.0%
+> Missing Cases,1,1.0%
+> Total,101,100.0%
])
AT_CLEANUP
dnl one point. The data in this example comes from:
dnl http://people.ysu.edu/~gchang/SPSSE/SPSS_lab2Regression.pdf
AT_SETUP([LOGISTIC REGRESSION confidence interval])
+AT_KEYWORDS([categorical categoricals])
AT_DATA([ci.sps], [dnl
set FORMAT=F20.3
AT_CHECK([pspp -O format=csv ci.sps], [0], [dnl
Table: Dependent Variable Encoding
Original Value,Internal Value
-.000,0
-1.000,1
+.000,.000
+1.000,1.000
Table: Case Processing Summary
Unweighted Cases,N,Percent
-Included in Analysis,196,100.000
-Missing Cases,0,.000
-Total,196,100.000
+Included in Analysis,196,100.0%
+Missing Cases,0,.0%
+Total,196,100.0%
note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001
Table: Model Summary
-Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
-,211.195,.120,.172
+Step,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
+1,211.195,.120,.172
Table: Categorical Variables' Codings
-,,,Parameter coding,
-,,Frequency,(1),(2)
+,,Frequency,Parameter coding,
+,,,(1),(2)
sciostat,1.000,77,1,0
,2.000,49,0,1
,3.000,70,0,0
Table: Classification Table
,,,Predicted,,
-,,,disease,,"Percentage
-Correct"
+,,,disease,,Percentage Correct
,Observed,,.000,1.000,
-Step 1,disease,.000,131,8,94.245
-,,1.000,41,16,28.070
-,Overall Percentage,,,,75.000
+Step 1,disease,.000,131,8,94.2%
+,,1.000,41,16,28.1%
+,Overall Percentage,,,,75.0%
Table: Variables in the Equation
-,,,,,,,,95% CI for Exp(B),
-,,B,S.E.,Wald,df,Sig.,Exp(B),Lower,Upper
+,,B,S.E.,Wald,df,Sig.,Exp(B),95% CI for Exp(B),
+,,,,,,,,Lower,Upper
Step 1,age,.027,.009,8.647,1,.003,1.027,1.009,1.045
,savings,.061,.386,.025,1,.874,1.063,.499,2.264
,sciostat,,,.440,2,.803,,,