1 AT_BANNER([LOGISTIC REGRESSION])
3 dnl These examples are adapted from
4 dnl http://www.uvm.edu/~dhowell/gradstat/psych341/lectures/Logistic%20Regression/LogisticReg1.html
8 m4_define([LOGIT_TEST_DATA],
9 [AT_DATA([lr-data.txt], dnl
10 105.00 1.00 33.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1
11 106.00 1.00 50.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1
12 107.00 1.00 91.00 3.00 2.00 .28 15.00 7.00 .00897 -1.00905 1
13 108.00 1.00 90.00 3.00 2.00 .20 2.00 2.00 .00972 -1.00982 1
14 109.00 1.00 70.00 3.00 3.00 .38 23.00 27.00 .04745 -1.04981 1
15 111.00 2.00 31.00 2.00 2.00 .00 19.00 10.00 .54159 1.84640 1
16 112.00 1.00 91.00 2.00 3.00 .18 6.00 16.00 .00897 -1.00905 1
17 113.00 1.00 81.00 3.00 2.00 .00 3.00 9.00 .01998 -1.02039 1
18 114.00 2.00 15.00 1.00 2.00 .13 19.00 13.00 .81241 1.23090 1
19 116.00 2.00 1.00 1.00 2.00 .88 15.00 7.00 .93102 1.07410 1
20 117.00 1.00 93.00 3.00 2.00 .18 9.00 15.00 .00764 -1.00770 1
21 118.00 2.00 14.00 1.00 3.00 .15 23.00 18.00 .82447 1.21289 1
22 120.00 1.00 91.00 2.00 2.00 .43 17.00 14.00 .00897 -1.00905 1
23 121.00 1.00 55.00 3.00 2.00 .69 20.00 14.00 .14409 -1.16834 1
24 122.00 1.00 70.00 2.00 3.00 .03 .00 6.00 .04745 -1.04981 1
25 123.00 1.00 25.00 2.00 2.00 .45 4.00 10.00 .65789 -2.92301 1
26 125.00 1.00 91.00 2.00 2.00 .13 .00 3.00 .00897 -1.00905 1
27 126.00 1.00 91.00 3.00 3.00 .23 4.00 6.00 .00897 -1.00905 1
28 127.00 1.00 91.00 3.00 2.00 .00 8.00 8.00 .00897 -1.00905 1
29 128.00 2.00 13.00 2.00 2.00 .65 16.00 14.00 .83592 1.19629 1
30 129.00 1.00 50.00 2.00 2.00 .25 20.00 23.00 .20168 -1.25264 1
31 135.00 1.00 90.00 3.00 3.00 .03 5.00 12.00 .00972 -1.00982 1
32 138.00 1.00 70.00 3.00 3.00 .10 1.00 6.00 .04745 -1.04981 1
33 139.00 2.00 19.00 3.00 3.00 .10 11.00 12.00 .75787 1.31949 1
34 149.00 2.00 50.00 3.00 2.00 .03 .00 .00 .20168 4.95826 1
35 204.00 1.00 50.00 3.00 1.00 .13 .00 1.00 .20168 -1.25264 1
36 205.00 1.00 91.00 3.00 3.00 .72 16.00 18.00 .00897 -1.00905 1
37 206.00 2.00 24.00 1.00 1.00 .10 5.00 21.00 .67592 1.47947 1
38 207.00 1.00 80.00 3.00 3.00 .13 6.00 7.00 .02164 -1.02212 1
39 208.00 1.00 87.00 2.00 2.00 .18 9.00 20.00 .01237 -1.01253 1
40 209.00 1.00 70.00 2.00 2.00 .53 15.00 12.00 .04745 -1.04981 1
41 211.00 1.00 55.00 2.00 1.00 .33 8.00 5.00 .14409 -1.16834 1
42 212.00 1.00 56.00 3.00 1.00 .30 6.00 20.00 .13436 -1.15522 1
43 214.00 1.00 54.00 2.00 2.00 .15 .00 16.00 .15439 -1.18258 1
44 215.00 1.00 71.00 3.00 3.00 .35 12.00 12.00 .04391 -1.04592 1
45 217.00 2.00 36.00 1.00 1.00 .10 12.00 8.00 .44049 2.27020 1
46 218.00 1.00 91.00 2.00 2.00 .05 11.00 25.00 .00897 -1.00905 1
47 219.00 1.00 91.00 2.00 2.00 1.23 11.00 24.00 .00897 -1.00905 1
48 220.00 1.00 91.00 2.00 3.00 .08 8.00 11.00 .00897 -1.00905 1
49 221.00 1.00 91.00 2.00 2.00 .33 5.00 11.00 .00897 -1.00905 1
50 222.00 2.00 36.00 2.00 1.00 .18 5.00 3.00 .44049 2.27020 1
51 223.00 1.00 70.00 2.00 3.00 .18 14.00 3.00 .04745 -1.04981 1
52 224.00 1.00 91.00 2.00 2.00 .43 2.00 10.00 .00897 -1.00905 1
53 225.00 1.00 55.00 2.00 1.00 .18 6.00 11.00 .14409 -1.16834 1
54 229.00 2.00 75.00 2.00 2.00 .40 30.00 25.00 .03212 31.12941 1
55 232.00 1.00 91.00 3.00 2.00 .15 6.00 3.00 .00897 -1.00905 1
56 233.00 1.00 70.00 2.00 1.00 .00 11.00 8.00 .04745 -1.04981 1
57 234.00 1.00 54.00 3.00 2.00 .10 .00 .00 .15439 -1.18258 1
58 237.00 1.00 70.00 3.00 2.00 .18 5.00 25.00 .04745 -1.04981 1
59 241.00 1.00 19.00 2.00 3.00 .33 13.00 9.00 .75787 -4.12995 1
60 304.00 2.00 18.00 2.00 2.00 .26 25.00 6.00 .77245 1.29458 1
61 305.00 1.00 88.00 3.00 2.00 1.35 17.00 29.00 .01142 -1.01155 1
62 306.00 1.00 70.00 2.00 3.00 .63 14.00 33.00 .04745 -1.04981 1
63 307.00 1.00 85.00 2.00 2.00 2.65 18.00 14.00 .01452 -1.01474 1
64 308.00 1.00 13.00 2.00 2.00 .23 5.00 5.00 .83592 -6.09442 1
65 309.00 2.00 13.00 2.00 2.00 .23 7.00 17.00 .83592 1.19629 1
66 311.00 2.00 1.00 2.00 2.00 .50 20.00 14.00 .93102 1.07410 1
67 315.00 1.00 19.00 2.00 3.00 .18 1.00 11.00 .75787 -4.12995 1
68 316.00 1.00 88.00 2.00 2.00 .38 12.00 11.00 .01142 -1.01155 2
69 318.00 1.00 88.00 3.00 2.00 .03 5.00 5.00 .01142 -1.01155 3
70 319.00 2.00 18.00 2.00 3.00 .30 15.00 16.00 .77245 1.29458 1
71 321.00 2.00 15.00 2.00 2.00 .63 15.00 18.00 .81241 1.23090 1
72 322.00 1.00 88.00 3.00 2.00 .40 18.00 15.00 .01142 -1.01155 1
73 325.00 2.00 18.00 2.00 3.00 1.00 28.00 18.00 .77245 1.29458 1
74 329.00 1.00 88.00 3.00 2.00 .03 7.00 11.00 .01142 -1.01155 4
75 332.00 2.00 2.00 2.00 2.00 .05 8.00 9.00 .92562 1.08036 1
78 dnl Note: In the above data cases 305, 316 318 and 329 have identical values
79 dnl of the 2nd and 3rd variables. We use this for weight testing.
81 AT_SETUP([LOGISTIC REGRESSION basic test])
85 AT_DATA([lr-data.sps], [dnl
88 data list notable file='lr-data.txt'
89 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
92 variables = outcome with survrate
96 AT_CHECK([pspp -O format=csv lr-data.sps], [0],
98 Table: Dependent Variable Encoding
99 Original Value,Internal Value
103 Table: Case Processing Summary
104 Unweighted Cases,N,Percent
105 Included in Analysis,66,100.000
109 note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001
112 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
115 Table: Variables in the Equation
116 ,,B,S.E.,Wald,df,Sig.,Exp(B)
117 Step 1,survrate,-.081,.019,17.756,1,.000,.922
118 ,Constant,2.684,.811,10.941,1,.001,14.639
124 AT_SETUP([LOGISTIC REGRESSION missing values])
128 AT_DATA([lr-data.sps], [dnl
131 data list notable file='lr-data.txt'
132 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
134 missing values survrate (999) avoid (44444).
137 variables = outcome with survrate avoid
141 AT_CHECK([pspp -O format=csv lr-data.sps > run0], [0], [ignore])
143 cat >> lr-data.txt << HERE
144 105.00 1.00 999.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1
145 106.00 1.00 999.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1
146 107.00 1.00 5.00 3.00 2.00 .28 44444 34 .00897 -1.00905 1
149 AT_CHECK([pspp -O format=csv lr-data.sps > run1], [0], [ignore])
151 dnl Only the summary information should be different
152 AT_CHECK([diff run0 run1], [1], [dnl
154 < Included in Analysis,66,100.000
155 < Missing Cases,0,.000
158 > Included in Analysis,66,95.652
159 > Missing Cases,3,4.348
167 dnl Check that a weighted dataset is interpreted correctly
168 dnl To do this, the same data set is used, one weighted, one not.
169 dnl The weighted dataset omits certain cases which are identical
170 AT_SETUP([LOGISTIC REGRESSION weights])
174 AT_DATA([lr-data-unweighted.sps], [dnl
177 data list notable file='lr-data.txt'
178 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
181 variables = outcome with survrate
185 AT_DATA([lr-data-weighted.sps], [dnl
188 data list notable file='lr-data.txt'
189 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
193 * Omit duplicate cases.
194 select if id <> 305 and id <> 316 and id <> 318.
197 variables = outcome with survrate
202 AT_CHECK([pspp -O format=csv lr-data-unweighted.sps > unweighted-result], [0], [ignore])
203 AT_CHECK([pspp -O format=csv lr-data-weighted.sps > weighted-result], [0], [ignore])
205 dnl The only difference should be the summary information, since
206 dnl this displays the unweighted totals.
207 AT_CHECK([diff unweighted-result weighted-result], [1], [dnl
209 < Included in Analysis,66,100.000
211 > Included in Analysis,63,100.000
222 dnl Check that the /NOCONST option works as intended.
223 dnl The results this produces are very similar to those
224 dnl at the example in http://www.ats.ucla.edu/stat/SPSS/faq/logregconst.htm
225 AT_SETUP([LOGISTIC REGRESSION without constant])
227 AT_DATA([non-const.sps], [dnl
232 compute female = (#i > 91).
238 compute constant = 1.
240 logistic regression female with constant /noconst.
243 AT_CHECK([pspp -O format=csv non-const.sps], [0],
245 Table: Dependent Variable Encoding
246 Original Value,Internal Value
250 Table: Case Processing Summary
251 Unweighted Cases,N,Percent
252 Included in Analysis,200,100.000
256 note: Estimation terminated at iteration number 2 because parameter estimates changed by less than 0.001
259 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
262 Table: Variables in the Equation
263 ,,B,S.E.,Wald,df,Sig.,Exp(B)
264 Step 1,constant,.180,.142,1.616,1,.204,1.198
271 dnl Check that if somebody passes a dependent variable which is not dichtomous,
272 dnl then an error is raised.
273 AT_SETUP([LOGISTIC REGRESSION non-dichotomous dep var])
275 AT_DATA([non-dich.sps], [dnl
276 data list notable list /y x1 x2 x3 x4.
283 logistic regression y with x1 x2 x3 x4.
286 AT_CHECK([pspp -O format=csv non-dich.sps], [1],
288 error: Dependent variable's values are not dichotomous.
295 dnl An example to check the behaviour of LOGISTIC REGRESSION with a categorical
296 dnl variable. This examṕle was inspired from that at:
297 dnl http://www.ats.ucla.edu/stat/spss/dae/logit.htm
298 AT_SETUP([LOGISTIC REGRESSION with categorical])
300 AT_DATA([lr-cat.data], [dnl
703 AT_DATA([lr-cat.sps], [dnl
706 data list notable list file='lr-cat.data' /b1 b2 bcat y.
714 AT_CHECK([pspp -O format=csv lr-cat.sps], [0],
716 Table: Dependent Variable Encoding
717 Original Value,Internal Value
721 Table: Case Processing Summary
722 Unweighted Cases,N,Percent
723 Included in Analysis,400,100.000
727 note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001
730 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
733 Table: Categorical Variables' Codings
734 ,,,Parameter coding,,
735 ,,Frequency,(1),(2),(3)
741 Table: Variables in the Equation
742 ,,B,S.E.,Wald,df,Sig.,Exp(B)
743 Step 1,b1,.002,.001,4.284,1,.038,1.002
744 ,b2,.804,.332,5.872,1,.015,2.235
745 ,bcat,,,20.895,3,.000,
746 ,bcat(1),1.551,.418,13.788,1,.000,4.718
747 ,bcat(2),.876,.367,5.706,1,.017,2.401
748 ,bcat(3),.211,.393,.289,1,.591,1.235
749 ,Constant,-5.541,1.138,23.709,1,.000,.004
756 dnl This example is inspired by http://www.ats.ucla.edu/stat/spss/output/logistic.htm
757 AT_SETUP([LOGISTIC REGRESSION with cat var 2])
759 AT_DATA([lr-cat2.data], [dnl
760 60.00 1.00 8.00 50.00
762 57.00 1.00 7.00 53.00
770 68.00 1.00 9.00 69.00
774 57.00 1.00 7.00 61.00
775 55.00 1.00 8.00 50.00
778 50.00 1.00 9.00 66.00
782 47.00 1.00 7.00 34.00
794 68.00 1.00 9.00 69.00
796 63.00 1.00 9.00 61.00
797 65.00 1.00 9.00 61.00
798 63.00 1.00 9.00 53.00
802 52.00 1.00 7.00 56.00
804 47.00 1.00 7.00 53.00
806 50.00 1.00 8.00 55.00
817 68.00 1.00 9.00 55.00
818 47.00 1.00 8.00 50.00
826 55.00 1.00 9.00 49.00
827 68.00 1.00 8.00 50.00
828 52.00 1.00 9.00 63.00
831 66.00 1.00 9.00 61.00
832 65.00 1.00 7.00 58.00
834 68.00 1.00 7.00 59.00
835 60.00 1.00 9.00 61.00
837 57.00 1.00 7.00 54.00
846 63.00 1.00 7.00 63.00
848 57.00 1.00 8.00 63.00
855 65.00 1.00 9.00 63.00
860 63.00 1.00 9.00 55.00
869 47.00 1.00 9.00 69.00
874 50.00 1.00 7.00 63.00
877 73.00 1.00 9.00 61.00
882 57.00 1.00 8.00 55.00
883 53.00 1.00 8.00 57.00
887 57.00 1.00 8.00 58.00
897 73.00 1.00 8.00 69.00
898 71.00 1.00 9.00 58.00
900 63.00 1.00 7.00 54.00
906 65.00 1.00 8.00 55.00
907 76.00 1.00 9.00 67.00
908 71.00 1.00 8.00 66.00
910 47.00 1.00 9.00 63.00
913 54.00 1.00 9.00 55.00
914 55.00 1.00 8.00 58.00
916 55.00 1.00 9.00 63.00
925 65.00 1.00 9.00 66.00
929 63.00 1.00 8.00 72.00
935 73.00 1.00 9.00 58.00
937 63.00 1.00 9.00 69.00
939 65.00 1.00 9.00 66.00
940 73.00 1.00 8.00 63.00
949 60.00 1.00 9.00 50.00
951 73.00 1.00 9.00 55.00
952 52.00 1.00 8.00 47.00
962 AT_DATA([stringcat.sps], [dnl
964 data list notable file='lr-cat2.data' list /read honcomp wiz science *.
967 recode wiz (7 = "a") (8 = "b") (9 = "c") into ses.
969 logistic regression honcomp with read science ses
974 AT_CHECK([pspp -O format=csv stringcat.sps], [0],
976 Table: Dependent Variable Encoding
977 Original Value,Internal Value
981 Table: Case Processing Summary
982 Unweighted Cases,N,Percent
983 Included in Analysis,200,100.000
987 note: Estimation terminated at iteration number 5 because parameter estimates changed by less than 0.001
990 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
993 Table: Categorical Variables' Codings
1000 Table: Variables in the Equation
1001 ,,B,S.E.,Wald,df,Sig.,Exp(B)
1002 Step 1,read,.098,.025,15.199,1,.000,1.103
1003 ,science,.066,.027,5.867,1,.015,1.068
1004 ,ses,,,6.690,2,.035,
1005 ,ses(1),.058,.532,.012,1,.913,1.060
1006 ,ses(2),-1.013,.444,5.212,1,.022,.363
1007 ,Constant,-9.561,1.662,33.113,1,.000,.000
1013 dnl Check that it doesn't crash if a categorical variable
1014 dnl has only one distinct value
1015 AT_SETUP([LOGISTIC REGRESSION identical categories])
1017 AT_DATA([crash.sps], [dnl
1018 data list notable list /y x1 x2*.
1024 logistic regression y with x1 x2
1028 AT_CHECK([pspp -O format=csv crash.sps], [1], [ignore])
1033 dnl Test that missing values on the categorical predictors are treated
1035 AT_SETUP([LOGISTIC REGRESSION missing categoricals])
1037 AT_DATA([data.txt], [dnl
1140 AT_DATA([miss.sps], [dnl
1141 data list notable file='data.txt' list /y x1 cat0*.
1143 logistic regression y with x1 cat0
1144 /categorical = cat0.
1147 AT_CHECK([pspp -O format=csv miss.sps > file1], [0], [ignore])
1149 dnl Append a case with a missing categorical.
1150 AT_CHECK([echo '1 34 .' >> data.txt], [0], [ignore])
1152 AT_CHECK([pspp -O format=csv miss.sps > file2], [0], [ignore])
1154 AT_CHECK([diff file1 file2], [1], [dnl
1156 < Included in Analysis,100,100.00
1157 < Missing Cases,0,.00
1160 > Included in Analysis,100,99.01
1161 > Missing Cases,1,.99