2 AT_BANNER([LOGISTIC REGRESSION])
4 dnl These examples are adapted from
5 dnl http://www.uvm.edu/~dhowell/gradstat/psych341/lectures/Logistic%20Regression/LogisticReg1.html
9 m4_define([LOGIT_TEST_DATA],
10 [AT_DATA([lr-data.txt], dnl
11 105.00 1.00 33.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1
12 106.00 1.00 50.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1
13 107.00 1.00 91.00 3.00 2.00 .28 15.00 7.00 .00897 -1.00905 1
14 108.00 1.00 90.00 3.00 2.00 .20 2.00 2.00 .00972 -1.00982 1
15 109.00 1.00 70.00 3.00 3.00 .38 23.00 27.00 .04745 -1.04981 1
16 111.00 2.00 31.00 2.00 2.00 .00 19.00 10.00 .54159 1.84640 1
17 112.00 1.00 91.00 2.00 3.00 .18 6.00 16.00 .00897 -1.00905 1
18 113.00 1.00 81.00 3.00 2.00 .00 3.00 9.00 .01998 -1.02039 1
19 114.00 2.00 15.00 1.00 2.00 .13 19.00 13.00 .81241 1.23090 1
20 116.00 2.00 1.00 1.00 2.00 .88 15.00 7.00 .93102 1.07410 1
21 117.00 1.00 93.00 3.00 2.00 .18 9.00 15.00 .00764 -1.00770 1
22 118.00 2.00 14.00 1.00 3.00 .15 23.00 18.00 .82447 1.21289 1
23 120.00 1.00 91.00 2.00 2.00 .43 17.00 14.00 .00897 -1.00905 1
24 121.00 1.00 55.00 3.00 2.00 .69 20.00 14.00 .14409 -1.16834 1
25 122.00 1.00 70.00 2.00 3.00 .03 .00 6.00 .04745 -1.04981 1
26 123.00 1.00 25.00 2.00 2.00 .45 4.00 10.00 .65789 -2.92301 1
27 125.00 1.00 91.00 2.00 2.00 .13 .00 3.00 .00897 -1.00905 1
28 126.00 1.00 91.00 3.00 3.00 .23 4.00 6.00 .00897 -1.00905 1
29 127.00 1.00 91.00 3.00 2.00 .00 8.00 8.00 .00897 -1.00905 1
30 128.00 2.00 13.00 2.00 2.00 .65 16.00 14.00 .83592 1.19629 1
31 129.00 1.00 50.00 2.00 2.00 .25 20.00 23.00 .20168 -1.25264 1
32 135.00 1.00 90.00 3.00 3.00 .03 5.00 12.00 .00972 -1.00982 1
33 138.00 1.00 70.00 3.00 3.00 .10 1.00 6.00 .04745 -1.04981 1
34 139.00 2.00 19.00 3.00 3.00 .10 11.00 12.00 .75787 1.31949 1
35 149.00 2.00 50.00 3.00 2.00 .03 .00 .00 .20168 4.95826 1
36 204.00 1.00 50.00 3.00 1.00 .13 .00 1.00 .20168 -1.25264 1
37 205.00 1.00 91.00 3.00 3.00 .72 16.00 18.00 .00897 -1.00905 1
38 206.00 2.00 24.00 1.00 1.00 .10 5.00 21.00 .67592 1.47947 1
39 207.00 1.00 80.00 3.00 3.00 .13 6.00 7.00 .02164 -1.02212 1
40 208.00 1.00 87.00 2.00 2.00 .18 9.00 20.00 .01237 -1.01253 1
41 209.00 1.00 70.00 2.00 2.00 .53 15.00 12.00 .04745 -1.04981 1
42 211.00 1.00 55.00 2.00 1.00 .33 8.00 5.00 .14409 -1.16834 1
43 212.00 1.00 56.00 3.00 1.00 .30 6.00 20.00 .13436 -1.15522 1
44 214.00 1.00 54.00 2.00 2.00 .15 .00 16.00 .15439 -1.18258 1
45 215.00 1.00 71.00 3.00 3.00 .35 12.00 12.00 .04391 -1.04592 1
46 217.00 2.00 36.00 1.00 1.00 .10 12.00 8.00 .44049 2.27020 1
47 218.00 1.00 91.00 2.00 2.00 .05 11.00 25.00 .00897 -1.00905 1
48 219.00 1.00 91.00 2.00 2.00 1.23 11.00 24.00 .00897 -1.00905 1
49 220.00 1.00 91.00 2.00 3.00 .08 8.00 11.00 .00897 -1.00905 1
50 221.00 1.00 91.00 2.00 2.00 .33 5.00 11.00 .00897 -1.00905 1
51 222.00 2.00 36.00 2.00 1.00 .18 5.00 3.00 .44049 2.27020 1
52 223.00 1.00 70.00 2.00 3.00 .18 14.00 3.00 .04745 -1.04981 1
53 224.00 1.00 91.00 2.00 2.00 .43 2.00 10.00 .00897 -1.00905 1
54 225.00 1.00 55.00 2.00 1.00 .18 6.00 11.00 .14409 -1.16834 1
55 229.00 2.00 75.00 2.00 2.00 .40 30.00 25.00 .03212 31.12941 1
56 232.00 1.00 91.00 3.00 2.00 .15 6.00 3.00 .00897 -1.00905 1
57 233.00 1.00 70.00 2.00 1.00 .00 11.00 8.00 .04745 -1.04981 1
58 234.00 1.00 54.00 3.00 2.00 .10 .00 .00 .15439 -1.18258 1
59 237.00 1.00 70.00 3.00 2.00 .18 5.00 25.00 .04745 -1.04981 1
60 241.00 1.00 19.00 2.00 3.00 .33 13.00 9.00 .75787 -4.12995 1
61 304.00 2.00 18.00 2.00 2.00 .26 25.00 6.00 .77245 1.29458 1
62 305.00 1.00 88.00 3.00 2.00 1.35 17.00 29.00 .01142 -1.01155 1
63 306.00 1.00 70.00 2.00 3.00 .63 14.00 33.00 .04745 -1.04981 1
64 307.00 1.00 85.00 2.00 2.00 2.65 18.00 14.00 .01452 -1.01474 1
65 308.00 1.00 13.00 2.00 2.00 .23 5.00 5.00 .83592 -6.09442 1
66 309.00 2.00 13.00 2.00 2.00 .23 7.00 17.00 .83592 1.19629 1
67 311.00 2.00 1.00 2.00 2.00 .50 20.00 14.00 .93102 1.07410 1
68 315.00 1.00 19.00 2.00 3.00 .18 1.00 11.00 .75787 -4.12995 1
69 316.00 1.00 88.00 2.00 2.00 .38 12.00 11.00 .01142 -1.01155 2
70 318.00 1.00 88.00 3.00 2.00 .03 5.00 5.00 .01142 -1.01155 3
71 319.00 2.00 18.00 2.00 3.00 .30 15.00 16.00 .77245 1.29458 1
72 321.00 2.00 15.00 2.00 2.00 .63 15.00 18.00 .81241 1.23090 1
73 322.00 1.00 88.00 3.00 2.00 .40 18.00 15.00 .01142 -1.01155 1
74 325.00 2.00 18.00 2.00 3.00 1.00 28.00 18.00 .77245 1.29458 1
75 329.00 1.00 88.00 3.00 2.00 .03 7.00 11.00 .01142 -1.01155 4
76 332.00 2.00 2.00 2.00 2.00 .05 8.00 9.00 .92562 1.08036 1
79 dnl Note: In the above data cases 305, 316 318 and 329 have identical values
80 dnl of the 2nd and 3rd variables. We use this for weight testing.
82 AT_SETUP([LOGISTIC REGRESSION basic test])
86 AT_DATA([lr-data.sps], [dnl
89 data list notable file='lr-data.txt'
90 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
93 variables = outcome with survrate
97 AT_CHECK([pspp -O format=csv lr-data.sps], [0],
99 Table: Dependent Variable Encoding
100 Original Value,Internal Value
104 Table: Case Processing Summary
105 Unweighted Cases,N,Percent
106 Included in Analysis,66,100.000
110 note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001
113 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
116 Table: Variables in the Equation
117 ,,B,S.E.,Wald,df,Sig.,Exp(B)
118 Step 1,survrate,-.081,.019,17.756,1,.000,.922
119 ,Constant,2.684,.811,10.941,1,.001,14.639
125 AT_SETUP([LOGISTIC REGRESSION missing values])
129 AT_DATA([lr-data.sps], [dnl
132 data list notable file='lr-data.txt'
133 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
135 missing values survrate (999) avoid (44444).
138 variables = outcome with survrate avoid
142 AT_CHECK([pspp -O format=csv lr-data.sps > run0], [0], [ignore])
144 cat >> lr-data.txt << HERE
145 105.00 1.00 999.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1
146 106.00 1.00 999.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1
147 107.00 1.00 5.00 3.00 2.00 .28 44444 34 .00897 -1.00905 1
150 AT_CHECK([pspp -O format=csv lr-data.sps > run1], [0], [ignore])
152 dnl Only the summary information should be different
153 AT_CHECK([diff run0 run1], [1], [dnl
155 < Included in Analysis,66,100.000
156 < Missing Cases,0,.000
159 > Included in Analysis,66,95.652
160 > Missing Cases,3,4.348
168 dnl Check that a weighted dataset is interpreted correctly
169 dnl To do this, the same data set is used, one weighted, one not.
170 dnl The weighted dataset omits certain cases which are identical
171 AT_SETUP([LOGISTIC REGRESSION weights])
175 AT_DATA([lr-data-unweighted.sps], [dnl
178 data list notable file='lr-data.txt'
179 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
182 variables = outcome with survrate
186 AT_DATA([lr-data-weighted.sps], [dnl
189 data list notable file='lr-data.txt'
190 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
194 * Omit duplicate cases.
195 select if id <> 305 and id <> 316 and id <> 318.
198 variables = outcome with survrate
203 AT_CHECK([pspp -O format=csv lr-data-unweighted.sps > unweighted-result], [0], [ignore])
204 AT_CHECK([pspp -O format=csv lr-data-weighted.sps > weighted-result], [0], [ignore])
206 dnl The only difference should be the summary information, since
207 dnl this displays the unweighted totals.
208 AT_CHECK([diff unweighted-result weighted-result], [1], [dnl
210 < Included in Analysis,66,100.000
212 > Included in Analysis,63,100.000
223 dnl Check that the /NOCONST option works as intended.
224 dnl The results this produces are very similar to those
225 dnl at the example in http://www.ats.ucla.edu/stat/SPSS/faq/logregconst.htm
226 AT_SETUP([LOGISTIC REGRESSION without constant])
228 AT_DATA([non-const.sps], [dnl
233 compute female = (#i > 91).
239 compute constant = 1.
241 logistic regression female with constant /noconst.
244 AT_CHECK([pspp -O format=csv non-const.sps], [0],
246 Table: Dependent Variable Encoding
247 Original Value,Internal Value
251 Table: Case Processing Summary
252 Unweighted Cases,N,Percent
253 Included in Analysis,200,100.000
257 note: Estimation terminated at iteration number 2 because parameter estimates changed by less than 0.001
260 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
263 Table: Variables in the Equation
264 ,,B,S.E.,Wald,df,Sig.,Exp(B)
265 Step 1,constant,.180,.142,1.616,1,.204,1.198
272 dnl Check that if somebody passes a dependent variable which is not dichtomous,
273 dnl then an error is raised.
274 AT_SETUP([LOGISTIC REGRESSION non-dichotomous dep var])
276 AT_DATA([non-dich.sps], [dnl
277 data list notable list /y x1 x2 x3 x4.
284 logistic regression y with x1 x2 x3 x4.
287 AT_CHECK([pspp -O format=csv non-dich.sps], [1],
289 error: Dependent variable's values are not dichotomous.
296 dnl An example to check the behaviour of LOGISTIC REGRESSION with a categorical
297 dnl variable. This examṕle was inspired from that at:
298 dnl http://www.ats.ucla.edu/stat/spss/dae/logit.htm
299 AT_SETUP([LOGISTIC REGRESSION with categorical])
301 AT_DATA([lr-cat.data], [dnl
704 AT_DATA([lr-cat.sps], [dnl
707 data list notable list file='lr-cat.data' /b1 b2 bcat y.
715 AT_CHECK([pspp -O format=csv lr-cat.sps], [0],
717 Table: Dependent Variable Encoding
718 Original Value,Internal Value
722 Table: Case Processing Summary
723 Unweighted Cases,N,Percent
724 Included in Analysis,400,100.000
728 note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001
731 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
734 Table: Categorical Variables' Codings
735 ,,,Parameter coding,,
736 ,,Frequency,(1),(2),(3)
742 Table: Variables in the Equation
743 ,,B,S.E.,Wald,df,Sig.,Exp(B)
744 Step 1,b1,.002,.001,4.284,1,.038,1.002
745 ,b2,.804,.332,5.872,1,.015,2.235
746 ,bcat,,,20.895,3,.000,
747 ,bcat(1),1.551,.418,13.788,1,.000,4.718
748 ,bcat(2),.876,.367,5.706,1,.017,2.401
749 ,bcat(3),.211,.393,.289,1,.591,1.235
750 ,Constant,-5.541,1.138,23.709,1,.000,.004
757 dnl This example is inspired by http://www.ats.ucla.edu/stat/spss/output/logistic.htm
758 AT_SETUP([LOGISTIC REGRESSION with cat var 2])
760 AT_DATA([lr-cat2.data], [dnl
761 60.00 1.00 8.00 50.00
763 57.00 1.00 7.00 53.00
771 68.00 1.00 9.00 69.00
775 57.00 1.00 7.00 61.00
776 55.00 1.00 8.00 50.00
779 50.00 1.00 9.00 66.00
783 47.00 1.00 7.00 34.00
795 68.00 1.00 9.00 69.00
797 63.00 1.00 9.00 61.00
798 65.00 1.00 9.00 61.00
799 63.00 1.00 9.00 53.00
803 52.00 1.00 7.00 56.00
805 47.00 1.00 7.00 53.00
807 50.00 1.00 8.00 55.00
818 68.00 1.00 9.00 55.00
819 47.00 1.00 8.00 50.00
827 55.00 1.00 9.00 49.00
828 68.00 1.00 8.00 50.00
829 52.00 1.00 9.00 63.00
832 66.00 1.00 9.00 61.00
833 65.00 1.00 7.00 58.00
835 68.00 1.00 7.00 59.00
836 60.00 1.00 9.00 61.00
838 57.00 1.00 7.00 54.00
847 63.00 1.00 7.00 63.00
849 57.00 1.00 8.00 63.00
856 65.00 1.00 9.00 63.00
861 63.00 1.00 9.00 55.00
870 47.00 1.00 9.00 69.00
875 50.00 1.00 7.00 63.00
878 73.00 1.00 9.00 61.00
883 57.00 1.00 8.00 55.00
884 53.00 1.00 8.00 57.00
888 57.00 1.00 8.00 58.00
898 73.00 1.00 8.00 69.00
899 71.00 1.00 9.00 58.00
901 63.00 1.00 7.00 54.00
907 65.00 1.00 8.00 55.00
908 76.00 1.00 9.00 67.00
909 71.00 1.00 8.00 66.00
911 47.00 1.00 9.00 63.00
914 54.00 1.00 9.00 55.00
915 55.00 1.00 8.00 58.00
917 55.00 1.00 9.00 63.00
926 65.00 1.00 9.00 66.00
930 63.00 1.00 8.00 72.00
936 73.00 1.00 9.00 58.00
938 63.00 1.00 9.00 69.00
940 65.00 1.00 9.00 66.00
941 73.00 1.00 8.00 63.00
950 60.00 1.00 9.00 50.00
952 73.00 1.00 9.00 55.00
953 52.00 1.00 8.00 47.00
963 AT_DATA([stringcat.sps], [dnl
965 data list notable file='lr-cat2.data' list /read honcomp wiz science *.
968 recode wiz (7 = "a") (8 = "b") (9 = "c") into ses.
970 logistic regression honcomp with read science ses
975 AT_CHECK([pspp -O format=csv stringcat.sps], [0],
977 Table: Dependent Variable Encoding
978 Original Value,Internal Value
982 Table: Case Processing Summary
983 Unweighted Cases,N,Percent
984 Included in Analysis,200,100.000
988 note: Estimation terminated at iteration number 5 because parameter estimates changed by less than 0.001
991 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
994 Table: Categorical Variables' Codings
1001 Table: Variables in the Equation
1002 ,,B,S.E.,Wald,df,Sig.,Exp(B)
1003 Step 1,read,.098,.025,15.199,1,.000,1.103
1004 ,science,.066,.027,5.867,1,.015,1.068
1005 ,ses,,,6.690,2,.035,
1006 ,ses(1),.058,.532,.012,1,.913,1.060
1007 ,ses(2),-1.013,.444,5.212,1,.022,.363
1008 ,Constant,-9.561,1.662,33.113,1,.000,.000