1 AT_BANNER([LOGISTIC REGRESSION])
3 dnl These examples are adapted from
4 dnl http://www.uvm.edu/~dhowell/gradstat/psych341/lectures/Logistic%20Regression/LogisticReg1.html
8 m4_define([LOGIT_TEST_DATA],
9 [AT_DATA([lr-data.txt], dnl
10 105.00 1.00 33.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1
11 106.00 1.00 50.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1
12 107.00 1.00 91.00 3.00 2.00 .28 15.00 7.00 .00897 -1.00905 1
13 108.00 1.00 90.00 3.00 2.00 .20 2.00 2.00 .00972 -1.00982 1
14 109.00 1.00 70.00 3.00 3.00 .38 23.00 27.00 .04745 -1.04981 1
15 111.00 2.00 31.00 2.00 2.00 .00 19.00 10.00 .54159 1.84640 1
16 112.00 1.00 91.00 2.00 3.00 .18 6.00 16.00 .00897 -1.00905 1
17 113.00 1.00 81.00 3.00 2.00 .00 3.00 9.00 .01998 -1.02039 1
18 114.00 2.00 15.00 1.00 2.00 .13 19.00 13.00 .81241 1.23090 1
19 116.00 2.00 1.00 1.00 2.00 .88 15.00 7.00 .93102 1.07410 1
20 117.00 1.00 93.00 3.00 2.00 .18 9.00 15.00 .00764 -1.00770 1
21 118.00 2.00 14.00 1.00 3.00 .15 23.00 18.00 .82447 1.21289 1
22 120.00 1.00 91.00 2.00 2.00 .43 17.00 14.00 .00897 -1.00905 1
23 121.00 1.00 55.00 3.00 2.00 .69 20.00 14.00 .14409 -1.16834 1
24 122.00 1.00 70.00 2.00 3.00 .03 .00 6.00 .04745 -1.04981 1
25 123.00 1.00 25.00 2.00 2.00 .45 4.00 10.00 .65789 -2.92301 1
26 125.00 1.00 91.00 2.00 2.00 .13 .00 3.00 .00897 -1.00905 1
27 126.00 1.00 91.00 3.00 3.00 .23 4.00 6.00 .00897 -1.00905 1
28 127.00 1.00 91.00 3.00 2.00 .00 8.00 8.00 .00897 -1.00905 1
29 128.00 2.00 13.00 2.00 2.00 .65 16.00 14.00 .83592 1.19629 1
30 129.00 1.00 50.00 2.00 2.00 .25 20.00 23.00 .20168 -1.25264 1
31 135.00 1.00 90.00 3.00 3.00 .03 5.00 12.00 .00972 -1.00982 1
32 138.00 1.00 70.00 3.00 3.00 .10 1.00 6.00 .04745 -1.04981 1
33 139.00 2.00 19.00 3.00 3.00 .10 11.00 12.00 .75787 1.31949 1
34 149.00 2.00 50.00 3.00 2.00 .03 .00 .00 .20168 4.95826 1
35 204.00 1.00 50.00 3.00 1.00 .13 .00 1.00 .20168 -1.25264 1
36 205.00 1.00 91.00 3.00 3.00 .72 16.00 18.00 .00897 -1.00905 1
37 206.00 2.00 24.00 1.00 1.00 .10 5.00 21.00 .67592 1.47947 1
38 207.00 1.00 80.00 3.00 3.00 .13 6.00 7.00 .02164 -1.02212 1
39 208.00 1.00 87.00 2.00 2.00 .18 9.00 20.00 .01237 -1.01253 1
40 209.00 1.00 70.00 2.00 2.00 .53 15.00 12.00 .04745 -1.04981 1
41 211.00 1.00 55.00 2.00 1.00 .33 8.00 5.00 .14409 -1.16834 1
42 212.00 1.00 56.00 3.00 1.00 .30 6.00 20.00 .13436 -1.15522 1
43 214.00 1.00 54.00 2.00 2.00 .15 .00 16.00 .15439 -1.18258 1
44 215.00 1.00 71.00 3.00 3.00 .35 12.00 12.00 .04391 -1.04592 1
45 217.00 2.00 36.00 1.00 1.00 .10 12.00 8.00 .44049 2.27020 1
46 218.00 1.00 91.00 2.00 2.00 .05 11.00 25.00 .00897 -1.00905 1
47 219.00 1.00 91.00 2.00 2.00 1.23 11.00 24.00 .00897 -1.00905 1
48 220.00 1.00 91.00 2.00 3.00 .08 8.00 11.00 .00897 -1.00905 1
49 221.00 1.00 91.00 2.00 2.00 .33 5.00 11.00 .00897 -1.00905 1
50 222.00 2.00 36.00 2.00 1.00 .18 5.00 3.00 .44049 2.27020 1
51 223.00 1.00 70.00 2.00 3.00 .18 14.00 3.00 .04745 -1.04981 1
52 224.00 1.00 91.00 2.00 2.00 .43 2.00 10.00 .00897 -1.00905 1
53 225.00 1.00 55.00 2.00 1.00 .18 6.00 11.00 .14409 -1.16834 1
54 229.00 2.00 75.00 2.00 2.00 .40 30.00 25.00 .03212 31.12941 1
55 232.00 1.00 91.00 3.00 2.00 .15 6.00 3.00 .00897 -1.00905 1
56 233.00 1.00 70.00 2.00 1.00 .00 11.00 8.00 .04745 -1.04981 1
57 234.00 1.00 54.00 3.00 2.00 .10 .00 .00 .15439 -1.18258 1
58 237.00 1.00 70.00 3.00 2.00 .18 5.00 25.00 .04745 -1.04981 1
59 241.00 1.00 19.00 2.00 3.00 .33 13.00 9.00 .75787 -4.12995 1
60 304.00 2.00 18.00 2.00 2.00 .26 25.00 6.00 .77245 1.29458 1
61 305.00 1.00 88.00 3.00 2.00 1.35 17.00 29.00 .01142 -1.01155 1
62 306.00 1.00 70.00 2.00 3.00 .63 14.00 33.00 .04745 -1.04981 1
63 307.00 1.00 85.00 2.00 2.00 2.65 18.00 14.00 .01452 -1.01474 1
64 308.00 1.00 13.00 2.00 2.00 .23 5.00 5.00 .83592 -6.09442 1
65 309.00 2.00 13.00 2.00 2.00 .23 7.00 17.00 .83592 1.19629 1
66 311.00 2.00 1.00 2.00 2.00 .50 20.00 14.00 .93102 1.07410 1
67 315.00 1.00 19.00 2.00 3.00 .18 1.00 11.00 .75787 -4.12995 1
68 316.00 1.00 88.00 2.00 2.00 .38 12.00 11.00 .01142 -1.01155 2
69 318.00 1.00 88.00 3.00 2.00 .03 5.00 5.00 .01142 -1.01155 3
70 319.00 2.00 18.00 2.00 3.00 .30 15.00 16.00 .77245 1.29458 1
71 321.00 2.00 15.00 2.00 2.00 .63 15.00 18.00 .81241 1.23090 1
72 322.00 1.00 88.00 3.00 2.00 .40 18.00 15.00 .01142 -1.01155 1
73 325.00 2.00 18.00 2.00 3.00 1.00 28.00 18.00 .77245 1.29458 1
74 329.00 1.00 88.00 3.00 2.00 .03 7.00 11.00 .01142 -1.01155 4
75 332.00 2.00 2.00 2.00 2.00 .05 8.00 9.00 .92562 1.08036 1
78 dnl Note: In the above data cases 305, 316 318 and 329 have identical values
79 dnl of the 2nd and 3rd variables. We use this for weight testing.
81 AT_SETUP([LOGISTIC REGRESSION basic test])
85 AT_DATA([lr-data.sps], [dnl
88 data list notable file='lr-data.txt'
89 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
92 variables = outcome with survrate
96 AT_CHECK([pspp -O format=csv lr-data.sps], [0],
98 Table: Dependent Variable Encoding
99 Original Value,Internal Value
103 Table: Case Processing Summary
104 Unweighted Cases,N,Percent
105 Included in Analysis,66,100.000
109 note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001
112 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
115 Table: Variables in the Equation
116 ,,B,S.E.,Wald,df,Sig.,Exp(B)
117 Step 1,survrate,-.081,.019,17.756,1,.000,.922
118 ,Constant,2.684,.811,10.941,1,.001,14.639
124 AT_SETUP([LOGISTIC REGRESSION missing values])
128 AT_DATA([lr-data.sps], [dnl
131 data list notable file='lr-data.txt'
132 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
134 missing values survrate (999) avoid (44444).
137 variables = outcome with survrate avoid
141 AT_CHECK([pspp -O format=csv lr-data.sps > run0], [0], [ignore])
143 cat >> lr-data.txt << HERE
144 105.00 1.00 999.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1
145 106.00 1.00 999.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1
146 107.00 1.00 5.00 3.00 2.00 .28 44444 34 .00897 -1.00905 1
149 AT_CHECK([pspp -O format=csv lr-data.sps > run1], [0], [ignore])
151 dnl Only the summary information should be different
152 AT_CHECK([diff run0 run1], [1], [dnl
154 < Included in Analysis,66,100.000
155 < Missing Cases,0,.000
158 > Included in Analysis,66,95.652
159 > Missing Cases,3,4.348
167 dnl Check that a weighted dataset is interpreted correctly
168 dnl To do this, the same data set is used, one weighted, one not.
169 dnl The weighted dataset omits certain cases which are identical
170 AT_SETUP([LOGISTIC REGRESSION weights])
174 AT_DATA([lr-data-unweighted.sps], [dnl
177 data list notable file='lr-data.txt'
178 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
181 variables = outcome with survrate
185 AT_DATA([lr-data-weighted.sps], [dnl
188 data list notable file='lr-data.txt'
189 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
193 * Omit duplicate cases.
194 select if id <> 305 and id <> 316 and id <> 318.
197 variables = outcome with survrate
202 AT_CHECK([pspp -O format=csv lr-data-unweighted.sps > unweighted-result], [0], [ignore])
203 AT_CHECK([pspp -O format=csv lr-data-weighted.sps > weighted-result], [0], [ignore])
205 dnl The only difference should be the summary information, since
206 dnl this displays the unweighted totals.
207 AT_CHECK([diff unweighted-result weighted-result], [1], [dnl
209 < Included in Analysis,66,100.000
211 > Included in Analysis,63,100.000
222 dnl Check that the /NOCONST option works as intended.
223 dnl The results this produces are very similar to those
224 dnl at the example in http://www.ats.ucla.edu/stat/SPSS/faq/logregconst.htm
225 AT_SETUP([LOGISTIC REGRESSION without constant])
227 AT_DATA([non-const.sps], [dnl
232 compute female = (#i > 91).
238 compute constant = 1.
240 logistic regression female with constant /noconst.
243 AT_CHECK([pspp -O format=csv non-const.sps], [0],
245 Table: Dependent Variable Encoding
246 Original Value,Internal Value
250 Table: Case Processing Summary
251 Unweighted Cases,N,Percent
252 Included in Analysis,200,100.000
256 note: Estimation terminated at iteration number 2 because parameter estimates changed by less than 0.001
259 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
262 Table: Variables in the Equation
263 ,,B,S.E.,Wald,df,Sig.,Exp(B)
264 Step 1,constant,.180,.142,1.616,1,.204,1.198
271 dnl Check that if somebody passes a dependent variable which is not dichtomous,
272 dnl then an error is raised.
273 AT_SETUP([LOGISTIC REGRESSION non-dichotomous dep var])
275 AT_DATA([non-dich.sps], [dnl
276 data list notable list /y x1 x2 x3 x4.
283 logistic regression y with x1 x2 x3 x4.
286 AT_CHECK([pspp -O format=csv non-dich.sps], [1],
288 error: Dependent variable's values are not dichotomous.