1 AT_BANNER([LOGISTIC REGRESSION])
3 dnl These examples are adapted from
4 dnl http://www.uvm.edu/~dhowell/gradstat/psych341/lectures/Logistic%20Regression/LogisticReg1.html
8 m4_define([LOGIT_TEST_DATA],
9 [AT_DATA([lr-data.txt], dnl
10 105.00 1.00 33.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1
11 106.00 1.00 50.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1
12 107.00 1.00 91.00 3.00 2.00 .28 15.00 7.00 .00897 -1.00905 1
13 108.00 1.00 90.00 3.00 2.00 .20 2.00 2.00 .00972 -1.00982 1
14 109.00 1.00 70.00 3.00 3.00 .38 23.00 27.00 .04745 -1.04981 1
15 111.00 2.00 31.00 2.00 2.00 .00 19.00 10.00 .54159 1.84640 1
16 112.00 1.00 91.00 2.00 3.00 .18 6.00 16.00 .00897 -1.00905 1
17 113.00 1.00 81.00 3.00 2.00 .00 3.00 9.00 .01998 -1.02039 1
18 114.00 2.00 15.00 1.00 2.00 .13 19.00 13.00 .81241 1.23090 1
19 116.00 2.00 1.00 1.00 2.00 .88 15.00 7.00 .93102 1.07410 1
20 117.00 1.00 93.00 3.00 2.00 .18 9.00 15.00 .00764 -1.00770 1
21 118.00 2.00 14.00 1.00 3.00 .15 23.00 18.00 .82447 1.21289 1
22 120.00 1.00 91.00 2.00 2.00 .43 17.00 14.00 .00897 -1.00905 1
23 121.00 1.00 55.00 3.00 2.00 .69 20.00 14.00 .14409 -1.16834 1
24 122.00 1.00 70.00 2.00 3.00 .03 .00 6.00 .04745 -1.04981 1
25 123.00 1.00 25.00 2.00 2.00 .45 4.00 10.00 .65789 -2.92301 1
26 125.00 1.00 91.00 2.00 2.00 .13 .00 3.00 .00897 -1.00905 1
27 126.00 1.00 91.00 3.00 3.00 .23 4.00 6.00 .00897 -1.00905 1
28 127.00 1.00 91.00 3.00 2.00 .00 8.00 8.00 .00897 -1.00905 1
29 128.00 2.00 13.00 2.00 2.00 .65 16.00 14.00 .83592 1.19629 1
30 129.00 1.00 50.00 2.00 2.00 .25 20.00 23.00 .20168 -1.25264 1
31 135.00 1.00 90.00 3.00 3.00 .03 5.00 12.00 .00972 -1.00982 1
32 138.00 1.00 70.00 3.00 3.00 .10 1.00 6.00 .04745 -1.04981 1
33 139.00 2.00 19.00 3.00 3.00 .10 11.00 12.00 .75787 1.31949 1
34 149.00 2.00 50.00 3.00 2.00 .03 .00 .00 .20168 4.95826 1
35 204.00 1.00 50.00 3.00 1.00 .13 .00 1.00 .20168 -1.25264 1
36 205.00 1.00 91.00 3.00 3.00 .72 16.00 18.00 .00897 -1.00905 1
37 206.00 2.00 24.00 1.00 1.00 .10 5.00 21.00 .67592 1.47947 1
38 207.00 1.00 80.00 3.00 3.00 .13 6.00 7.00 .02164 -1.02212 1
39 208.00 1.00 87.00 2.00 2.00 .18 9.00 20.00 .01237 -1.01253 1
40 209.00 1.00 70.00 2.00 2.00 .53 15.00 12.00 .04745 -1.04981 1
41 211.00 1.00 55.00 2.00 1.00 .33 8.00 5.00 .14409 -1.16834 1
42 212.00 1.00 56.00 3.00 1.00 .30 6.00 20.00 .13436 -1.15522 1
43 214.00 1.00 54.00 2.00 2.00 .15 .00 16.00 .15439 -1.18258 1
44 215.00 1.00 71.00 3.00 3.00 .35 12.00 12.00 .04391 -1.04592 1
45 217.00 2.00 36.00 1.00 1.00 .10 12.00 8.00 .44049 2.27020 1
46 218.00 1.00 91.00 2.00 2.00 .05 11.00 25.00 .00897 -1.00905 1
47 219.00 1.00 91.00 2.00 2.00 1.23 11.00 24.00 .00897 -1.00905 1
48 220.00 1.00 91.00 2.00 3.00 .08 8.00 11.00 .00897 -1.00905 1
49 221.00 1.00 91.00 2.00 2.00 .33 5.00 11.00 .00897 -1.00905 1
50 222.00 2.00 36.00 2.00 1.00 .18 5.00 3.00 .44049 2.27020 1
51 223.00 1.00 70.00 2.00 3.00 .18 14.00 3.00 .04745 -1.04981 1
52 224.00 1.00 91.00 2.00 2.00 .43 2.00 10.00 .00897 -1.00905 1
53 225.00 1.00 55.00 2.00 1.00 .18 6.00 11.00 .14409 -1.16834 1
54 229.00 2.00 75.00 2.00 2.00 .40 30.00 25.00 .03212 31.12941 1
55 232.00 1.00 91.00 3.00 2.00 .15 6.00 3.00 .00897 -1.00905 1
56 233.00 1.00 70.00 2.00 1.00 .00 11.00 8.00 .04745 -1.04981 1
57 234.00 1.00 54.00 3.00 2.00 .10 .00 .00 .15439 -1.18258 1
58 237.00 1.00 70.00 3.00 2.00 .18 5.00 25.00 .04745 -1.04981 1
59 241.00 1.00 19.00 2.00 3.00 .33 13.00 9.00 .75787 -4.12995 1
60 304.00 2.00 18.00 2.00 2.00 .26 25.00 6.00 .77245 1.29458 1
61 305.00 1.00 88.00 3.00 2.00 1.35 17.00 29.00 .01142 -1.01155 1
62 306.00 1.00 70.00 2.00 3.00 .63 14.00 33.00 .04745 -1.04981 1
63 307.00 1.00 85.00 2.00 2.00 2.65 18.00 14.00 .01452 -1.01474 1
64 308.00 1.00 13.00 2.00 2.00 .23 5.00 5.00 .83592 -6.09442 1
65 309.00 2.00 13.00 2.00 2.00 .23 7.00 17.00 .83592 1.19629 1
66 311.00 2.00 1.00 2.00 2.00 .50 20.00 14.00 .93102 1.07410 1
67 315.00 1.00 19.00 2.00 3.00 .18 1.00 11.00 .75787 -4.12995 1
68 316.00 1.00 88.00 2.00 2.00 .38 12.00 11.00 .01142 -1.01155 2
69 318.00 1.00 88.00 3.00 2.00 .03 5.00 5.00 .01142 -1.01155 3
70 319.00 2.00 18.00 2.00 3.00 .30 15.00 16.00 .77245 1.29458 1
71 321.00 2.00 15.00 2.00 2.00 .63 15.00 18.00 .81241 1.23090 1
72 322.00 1.00 88.00 3.00 2.00 .40 18.00 15.00 .01142 -1.01155 1
73 325.00 2.00 18.00 2.00 3.00 1.00 28.00 18.00 .77245 1.29458 1
74 329.00 1.00 88.00 3.00 2.00 .03 7.00 11.00 .01142 -1.01155 4
75 332.00 2.00 2.00 2.00 2.00 .05 8.00 9.00 .92562 1.08036 1
78 dnl Note: In the above data cases 305, 316 318 and 329 have identical values
79 dnl of the 2nd and 3rd variables. We use this for weight testing.
81 AT_SETUP([LOGISTIC REGRESSION basic test])
85 AT_DATA([lr-data.sps], [dnl
88 data list notable file='lr-data.txt'
89 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
92 variables = outcome with survrate
96 AT_CHECK([pspp -O format=csv lr-data.sps], [0],
98 Table: Dependent Variable Encoding
99 Original Value,Internal Value
103 Table: Case Processing Summary
104 Unweighted Cases,N,Percent
105 Included in Analysis,66,100.000
109 note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001
112 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
115 Table: Classification Table
117 ,,,outcome,,"Percentage
119 ,Observed,,1.000,2.000,
120 Step 1,outcome,1.000,43,5,89.583
122 ,Overall Percentage,,,,86.364
124 Table: Variables in the Equation
125 ,,B,S.E.,Wald,df,Sig.,Exp(B)
126 Step 1,survrate,-.081,.019,17.756,1,.000,.922
127 ,Constant,2.684,.811,10.941,1,.001,14.639
133 AT_SETUP([LOGISTIC REGRESSION missing values])
137 AT_DATA([lr-data.sps], [dnl
140 data list notable file='lr-data.txt'
141 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
143 missing values survrate (999) avoid (44444).
146 variables = outcome with survrate avoid
150 AT_CHECK([pspp -O format=csv lr-data.sps > run0], [0], [ignore])
152 cat >> lr-data.txt << HERE
153 105.00 1.00 999.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1
154 106.00 1.00 999.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1
155 107.00 1.00 5.00 3.00 2.00 .28 44444 34 .00897 -1.00905 1
158 AT_CHECK([pspp -O format=csv lr-data.sps > run1], [0], [ignore])
160 dnl Only the summary information should be different
161 AT_CHECK([diff run0 run1], [1], [dnl
163 < Included in Analysis,66,100.000
164 < Missing Cases,0,.000
167 > Included in Analysis,66,95.652
168 > Missing Cases,3,4.348
176 dnl Check that a weighted dataset is interpreted correctly
177 dnl To do this, the same data set is used, one weighted, one not.
178 dnl The weighted dataset omits certain cases which are identical
179 AT_SETUP([LOGISTIC REGRESSION weights])
183 AT_DATA([lr-data-unweighted.sps], [dnl
186 data list notable file='lr-data.txt'
187 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
190 variables = outcome with survrate
194 AT_DATA([lr-data-weighted.sps], [dnl
197 data list notable file='lr-data.txt'
198 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
202 * Omit duplicate cases.
203 select if id <> 305 and id <> 316 and id <> 318.
206 variables = outcome with survrate
211 AT_CHECK([pspp -O format=csv lr-data-unweighted.sps > unweighted-result], [0], [ignore])
212 AT_CHECK([pspp -O format=csv lr-data-weighted.sps > weighted-result], [0], [ignore])
214 dnl The only difference should be the summary information, since
215 dnl this displays the unweighted totals.
216 AT_CHECK([diff unweighted-result weighted-result], [1], [dnl
218 < Included in Analysis,66,100.000
220 > Included in Analysis,63,100.000
226 < Step 1,outcome,1.000,43,5,89.583
227 < ,,2.000,4,14,77.778
229 > Step 1,outcome,1.000,43.000,5.000,89.583
230 > ,,2.000,4.000,14.000,77.778
237 dnl Check that the /NOCONST option works as intended.
238 dnl The results this produces are very similar to those
239 dnl at the example in http://www.ats.ucla.edu/stat/SPSS/faq/logregconst.htm
240 AT_SETUP([LOGISTIC REGRESSION without constant])
242 AT_DATA([non-const.sps], [dnl
247 compute female = (#i > 91).
253 compute constant = 1.
255 logistic regression female with constant /noconst.
258 AT_CHECK([pspp -O format=csv non-const.sps], [0],
260 Table: Dependent Variable Encoding
261 Original Value,Internal Value
265 Table: Case Processing Summary
266 Unweighted Cases,N,Percent
267 Included in Analysis,200,100.000
271 note: Estimation terminated at iteration number 2 because parameter estimates changed by less than 0.001
274 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
277 Table: Classification Table
279 ,,,female,,"Percentage
282 Step 1,female,.00,0,91,.000
284 ,Overall Percentage,,,,54.500
286 Table: Variables in the Equation
287 ,,B,S.E.,Wald,df,Sig.,Exp(B)
288 Step 1,constant,.180,.142,1.616,1,.204,1.198
295 dnl Check that if somebody passes a dependent variable which is not dichtomous,
296 dnl then an error is raised.
297 AT_SETUP([LOGISTIC REGRESSION non-dichotomous dep var])
299 AT_DATA([non-dich.sps], [dnl
300 data list notable list /y x1 x2 x3 x4.
307 logistic regression y with x1 x2 x3 x4.
310 AT_CHECK([pspp -O format=csv non-dich.sps], [1],
312 error: Dependent variable's values are not dichotomous.
319 dnl An example to check the behaviour of LOGISTIC REGRESSION with a categorical
320 dnl variable. This examṕle was inspired from that at:
321 dnl http://www.ats.ucla.edu/stat/spss/dae/logit.htm
322 AT_SETUP([LOGISTIC REGRESSION with categorical])
324 AT_DATA([lr-cat.data], [dnl
727 AT_DATA([lr-cat.sps], [dnl
730 data list notable list file='lr-cat.data' /b1 b2 bcat y.
738 AT_CHECK([pspp -O format=csv lr-cat.sps], [0],
740 Table: Dependent Variable Encoding
741 Original Value,Internal Value
745 Table: Case Processing Summary
746 Unweighted Cases,N,Percent
747 Included in Analysis,400,100.000
751 note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001
754 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
757 Table: Categorical Variables' Codings
758 ,,,Parameter coding,,
759 ,,Frequency,(1),(2),(3)
765 Table: Classification Table
769 ,Observed,,4.000,9.000,
770 Step 1,y,4.000,254,19,93.040
772 ,Overall Percentage,,,,71.000
774 Table: Variables in the Equation
775 ,,B,S.E.,Wald,df,Sig.,Exp(B)
776 Step 1,b1,.002,.001,4.284,1,.038,1.002
777 ,b2,.804,.332,5.872,1,.015,2.235
778 ,bcat,,,20.895,3,.000,
779 ,bcat(1),1.551,.418,13.788,1,.000,4.718
780 ,bcat(2),.876,.367,5.706,1,.017,2.401
781 ,bcat(3),.211,.393,.289,1,.591,1.235
782 ,Constant,-5.541,1.138,23.709,1,.000,.004
789 dnl This example is inspired by http://www.ats.ucla.edu/stat/spss/output/logistic.htm
790 AT_SETUP([LOGISTIC REGRESSION with cat var 2])
792 AT_DATA([lr-cat2.data], [dnl
793 60.00 1.00 8.00 50.00
795 57.00 1.00 7.00 53.00
803 68.00 1.00 9.00 69.00
807 57.00 1.00 7.00 61.00
808 55.00 1.00 8.00 50.00
811 50.00 1.00 9.00 66.00
815 47.00 1.00 7.00 34.00
827 68.00 1.00 9.00 69.00
829 63.00 1.00 9.00 61.00
830 65.00 1.00 9.00 61.00
831 63.00 1.00 9.00 53.00
835 52.00 1.00 7.00 56.00
837 47.00 1.00 7.00 53.00
839 50.00 1.00 8.00 55.00
850 68.00 1.00 9.00 55.00
851 47.00 1.00 8.00 50.00
859 55.00 1.00 9.00 49.00
860 68.00 1.00 8.00 50.00
861 52.00 1.00 9.00 63.00
864 66.00 1.00 9.00 61.00
865 65.00 1.00 7.00 58.00
867 68.00 1.00 7.00 59.00
868 60.00 1.00 9.00 61.00
870 57.00 1.00 7.00 54.00
879 63.00 1.00 7.00 63.00
881 57.00 1.00 8.00 63.00
888 65.00 1.00 9.00 63.00
893 63.00 1.00 9.00 55.00
902 47.00 1.00 9.00 69.00
907 50.00 1.00 7.00 63.00
910 73.00 1.00 9.00 61.00
915 57.00 1.00 8.00 55.00
916 53.00 1.00 8.00 57.00
920 57.00 1.00 8.00 58.00
930 73.00 1.00 8.00 69.00
931 71.00 1.00 9.00 58.00
933 63.00 1.00 7.00 54.00
939 65.00 1.00 8.00 55.00
940 76.00 1.00 9.00 67.00
941 71.00 1.00 8.00 66.00
943 47.00 1.00 9.00 63.00
946 54.00 1.00 9.00 55.00
947 55.00 1.00 8.00 58.00
949 55.00 1.00 9.00 63.00
958 65.00 1.00 9.00 66.00
962 63.00 1.00 8.00 72.00
968 73.00 1.00 9.00 58.00
970 63.00 1.00 9.00 69.00
972 65.00 1.00 9.00 66.00
973 73.00 1.00 8.00 63.00
982 60.00 1.00 9.00 50.00
984 73.00 1.00 9.00 55.00
985 52.00 1.00 8.00 47.00
995 AT_DATA([stringcat.sps], [dnl
997 data list notable file='lr-cat2.data' list /read honcomp wiz science *.
1000 recode wiz (7 = "a") (8 = "b") (9 = "c") into ses.
1002 logistic regression honcomp with read science ses
1007 AT_CHECK([pspp -O format=csv stringcat.sps], [0],
1009 Table: Dependent Variable Encoding
1010 Original Value,Internal Value
1014 Table: Case Processing Summary
1015 Unweighted Cases,N,Percent
1016 Included in Analysis,200,100.000
1017 Missing Cases,0,.000
1020 note: Estimation terminated at iteration number 5 because parameter estimates changed by less than 0.001
1022 Table: Model Summary
1023 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
1026 Table: Categorical Variables' Codings
1027 ,,,Parameter coding,
1033 Table: Classification Table
1035 ,,,honcomp,,"Percentage
1037 ,Observed,,.000,1.000,
1038 Step 1,honcomp,.000,132,15,89.796
1039 ,,1.000,26,27,50.943
1040 ,Overall Percentage,,,,79.500
1042 Table: Variables in the Equation
1043 ,,B,S.E.,Wald,df,Sig.,Exp(B)
1044 Step 1,read,.098,.025,15.199,1,.000,1.103
1045 ,science,.066,.027,5.867,1,.015,1.068
1046 ,ses,,,6.690,2,.035,
1047 ,ses(1),.058,.532,.012,1,.913,1.060
1048 ,ses(2),-1.013,.444,5.212,1,.022,.363
1049 ,Constant,-9.561,1.662,33.113,1,.000,.000
1055 dnl Check that it doesn't crash if a categorical variable
1056 dnl has only one distinct value
1057 AT_SETUP([LOGISTIC REGRESSION identical categories])
1059 AT_DATA([crash.sps], [dnl
1060 data list notable list /y x1 x2*.
1066 logistic regression y with x1 x2
1070 AT_CHECK([pspp -O format=csv crash.sps], [1], [ignore])
1075 dnl Test that missing values on the categorical predictors are treated
1077 AT_SETUP([LOGISTIC REGRESSION missing categoricals])
1079 AT_DATA([data.txt], [dnl
1182 AT_DATA([miss.sps], [dnl
1183 data list notable file='data.txt' list /y x1 cat0*.
1185 logistic regression y with x1 cat0
1186 /categorical = cat0.
1189 AT_CHECK([pspp -O format=csv miss.sps > file1], [0], [ignore])
1191 dnl Append a case with a missing categorical.
1192 AT_CHECK([echo '1 34 .' >> data.txt], [0], [ignore])
1194 AT_CHECK([pspp -O format=csv miss.sps > file2], [0], [ignore])
1196 AT_CHECK([diff file1 file2], [1], [dnl
1198 < Included in Analysis,100,100.00
1199 < Missing Cases,0,.00
1202 > Included in Analysis,100,99.01
1203 > Missing Cases,1,.99