AT_BANNER([LOGISTIC REGRESSION]) dnl These examples are adapted from dnl http://www.uvm.edu/~dhowell/gradstat/psych341/lectures/Logistic%20Regression/LogisticReg1.html m4_define([LOGIT_TEST_DATA], [AT_DATA([lr-data.txt], dnl 105.00 1.00 33.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1 106.00 1.00 50.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1 107.00 1.00 91.00 3.00 2.00 .28 15.00 7.00 .00897 -1.00905 1 108.00 1.00 90.00 3.00 2.00 .20 2.00 2.00 .00972 -1.00982 1 109.00 1.00 70.00 3.00 3.00 .38 23.00 27.00 .04745 -1.04981 1 111.00 2.00 31.00 2.00 2.00 .00 19.00 10.00 .54159 1.84640 1 112.00 1.00 91.00 2.00 3.00 .18 6.00 16.00 .00897 -1.00905 1 113.00 1.00 81.00 3.00 2.00 .00 3.00 9.00 .01998 -1.02039 1 114.00 2.00 15.00 1.00 2.00 .13 19.00 13.00 .81241 1.23090 1 116.00 2.00 1.00 1.00 2.00 .88 15.00 7.00 .93102 1.07410 1 117.00 1.00 93.00 3.00 2.00 .18 9.00 15.00 .00764 -1.00770 1 118.00 2.00 14.00 1.00 3.00 .15 23.00 18.00 .82447 1.21289 1 120.00 1.00 91.00 2.00 2.00 .43 17.00 14.00 .00897 -1.00905 1 121.00 1.00 55.00 3.00 2.00 .69 20.00 14.00 .14409 -1.16834 1 122.00 1.00 70.00 2.00 3.00 .03 .00 6.00 .04745 -1.04981 1 123.00 1.00 25.00 2.00 2.00 .45 4.00 10.00 .65789 -2.92301 1 125.00 1.00 91.00 2.00 2.00 .13 .00 3.00 .00897 -1.00905 1 126.00 1.00 91.00 3.00 3.00 .23 4.00 6.00 .00897 -1.00905 1 127.00 1.00 91.00 3.00 2.00 .00 8.00 8.00 .00897 -1.00905 1 128.00 2.00 13.00 2.00 2.00 .65 16.00 14.00 .83592 1.19629 1 129.00 1.00 50.00 2.00 2.00 .25 20.00 23.00 .20168 -1.25264 1 135.00 1.00 90.00 3.00 3.00 .03 5.00 12.00 .00972 -1.00982 1 138.00 1.00 70.00 3.00 3.00 .10 1.00 6.00 .04745 -1.04981 1 139.00 2.00 19.00 3.00 3.00 .10 11.00 12.00 .75787 1.31949 1 149.00 2.00 50.00 3.00 2.00 .03 .00 .00 .20168 4.95826 1 204.00 1.00 50.00 3.00 1.00 .13 .00 1.00 .20168 -1.25264 1 205.00 1.00 91.00 3.00 3.00 .72 16.00 18.00 .00897 -1.00905 1 206.00 2.00 24.00 1.00 1.00 .10 5.00 21.00 .67592 1.47947 1 207.00 1.00 80.00 3.00 3.00 .13 6.00 7.00 .02164 -1.02212 1 208.00 1.00 87.00 2.00 2.00 .18 9.00 20.00 .01237 -1.01253 1 209.00 1.00 70.00 2.00 2.00 .53 15.00 12.00 .04745 -1.04981 1 211.00 1.00 55.00 2.00 1.00 .33 8.00 5.00 .14409 -1.16834 1 212.00 1.00 56.00 3.00 1.00 .30 6.00 20.00 .13436 -1.15522 1 214.00 1.00 54.00 2.00 2.00 .15 .00 16.00 .15439 -1.18258 1 215.00 1.00 71.00 3.00 3.00 .35 12.00 12.00 .04391 -1.04592 1 217.00 2.00 36.00 1.00 1.00 .10 12.00 8.00 .44049 2.27020 1 218.00 1.00 91.00 2.00 2.00 .05 11.00 25.00 .00897 -1.00905 1 219.00 1.00 91.00 2.00 2.00 1.23 11.00 24.00 .00897 -1.00905 1 220.00 1.00 91.00 2.00 3.00 .08 8.00 11.00 .00897 -1.00905 1 221.00 1.00 91.00 2.00 2.00 .33 5.00 11.00 .00897 -1.00905 1 222.00 2.00 36.00 2.00 1.00 .18 5.00 3.00 .44049 2.27020 1 223.00 1.00 70.00 2.00 3.00 .18 14.00 3.00 .04745 -1.04981 1 224.00 1.00 91.00 2.00 2.00 .43 2.00 10.00 .00897 -1.00905 1 225.00 1.00 55.00 2.00 1.00 .18 6.00 11.00 .14409 -1.16834 1 229.00 2.00 75.00 2.00 2.00 .40 30.00 25.00 .03212 31.12941 1 232.00 1.00 91.00 3.00 2.00 .15 6.00 3.00 .00897 -1.00905 1 233.00 1.00 70.00 2.00 1.00 .00 11.00 8.00 .04745 -1.04981 1 234.00 1.00 54.00 3.00 2.00 .10 .00 .00 .15439 -1.18258 1 237.00 1.00 70.00 3.00 2.00 .18 5.00 25.00 .04745 -1.04981 1 241.00 1.00 19.00 2.00 3.00 .33 13.00 9.00 .75787 -4.12995 1 304.00 2.00 18.00 2.00 2.00 .26 25.00 6.00 .77245 1.29458 1 305.00 1.00 88.00 3.00 2.00 1.35 17.00 29.00 .01142 -1.01155 1 306.00 1.00 70.00 2.00 3.00 .63 14.00 33.00 .04745 -1.04981 1 307.00 1.00 85.00 2.00 2.00 2.65 18.00 14.00 .01452 -1.01474 1 308.00 1.00 13.00 2.00 2.00 .23 5.00 5.00 .83592 -6.09442 1 309.00 2.00 13.00 2.00 2.00 .23 7.00 17.00 .83592 1.19629 1 311.00 2.00 1.00 2.00 2.00 .50 20.00 14.00 .93102 1.07410 1 315.00 1.00 19.00 2.00 3.00 .18 1.00 11.00 .75787 -4.12995 1 316.00 1.00 88.00 2.00 2.00 .38 12.00 11.00 .01142 -1.01155 2 318.00 1.00 88.00 3.00 2.00 .03 5.00 5.00 .01142 -1.01155 3 319.00 2.00 18.00 2.00 3.00 .30 15.00 16.00 .77245 1.29458 1 321.00 2.00 15.00 2.00 2.00 .63 15.00 18.00 .81241 1.23090 1 322.00 1.00 88.00 3.00 2.00 .40 18.00 15.00 .01142 -1.01155 1 325.00 2.00 18.00 2.00 3.00 1.00 28.00 18.00 .77245 1.29458 1 329.00 1.00 88.00 3.00 2.00 .03 7.00 11.00 .01142 -1.01155 4 332.00 2.00 2.00 2.00 2.00 .05 8.00 9.00 .92562 1.08036 1 )]) dnl Note: In the above data cases 305, 316 318 and 329 have identical values dnl of the 2nd and 3rd variables. We use this for weight testing. AT_SETUP([LOGISTIC REGRESSION basic test]) LOGIT_TEST_DATA AT_DATA([lr-data.sps], [dnl set format = F12.3. set decimal dot. data list notable file='lr-data.txt' list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. logistic regression variables = outcome with survrate . ]) AT_CHECK([pspp -O format=csv lr-data.sps], [0], [dnl Table: Dependent Variable Encoding Original Value,Internal Value 1.000,0 2.000,1 Table: Case Processing Summary Unweighted Cases,N,Percent Included in Analysis,66,100.000 Missing Cases,0,.000 Total,66,100.000 note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001 Table: Model Summary Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square ,37.323,.455,.659 Table: Variables in the Equation ,,B,S.E.,Wald,df,Sig.,Exp(B) Step 1,survrate,-.081,.019,17.756,1,.000,.922 ,Constant,2.684,.811,10.941,1,.001,14.639 ]) AT_CLEANUP AT_SETUP([LOGISTIC REGRESSION missing values]) LOGIT_TEST_DATA AT_DATA([lr-data.sps], [dnl set format = F12.3. set decimal dot. data list notable file='lr-data.txt' list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. missing values survrate (999) avoid (44444). logistic regression variables = outcome with survrate avoid . ]) AT_CHECK([pspp -O format=csv lr-data.sps > run0], [0], [ignore]) cat >> lr-data.txt << HERE 105.00 1.00 999.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1 106.00 1.00 999.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1 107.00 1.00 5.00 3.00 2.00 .28 44444 34 .00897 -1.00905 1 HERE AT_CHECK([pspp -O format=csv lr-data.sps > run1], [0], [ignore]) dnl Only the summary information should be different AT_CHECK([diff run0 run1], [1], [dnl 8,10c8,10 < Included in Analysis,66,100.000 < Missing Cases,0,.000 < Total,66,100.000 --- > Included in Analysis,66,95.652 > Missing Cases,3,4.348 > Total,69,100.000 ]) AT_CLEANUP dnl Check that a weighted dataset is interpreted correctly dnl To do this, the same data set is used, one weighted, one not. dnl The weighted dataset omits certain cases which are identical AT_SETUP([LOGISTIC REGRESSION weights]) LOGIT_TEST_DATA AT_DATA([lr-data-unweighted.sps], [dnl set format = F12.3. set decimal dot. data list notable file='lr-data.txt' list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. logistic regression variables = outcome with survrate . ]) AT_DATA([lr-data-weighted.sps], [dnl set format = F12.3. set decimal dot. data list notable file='lr-data.txt' list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. weight by w. * Omit duplicate cases. select if id <> 305 and id <> 316 and id <> 318. logistic regression variables = outcome with survrate . ]) AT_CHECK([pspp -O format=csv lr-data-unweighted.sps > unweighted-result], [0], [ignore]) AT_CHECK([pspp -O format=csv lr-data-weighted.sps > weighted-result], [0], [ignore]) dnl The only difference should be the summary information, since dnl this displays the unweighted totals. AT_CHECK([diff unweighted-result weighted-result], [1], [dnl 8c8 < Included in Analysis,66,100.000 --- > Included in Analysis,63,100.000 10c10 < Total,66,100.000 --- > Total,63,100.000 ]) AT_CLEANUP dnl Check that the /NOCONST option works as intended. dnl The results this produces are very similar to those dnl at the example in http://www.ats.ucla.edu/stat/SPSS/faq/logregconst.htm AT_SETUP([LOGISTIC REGRESSION without constant]) AT_DATA([non-const.sps], [dnl set format=F20.3. input program. loop #i = 1 to 200. compute female = (#i > 91). end case. end loop. end file. end input program. compute constant = 1. logistic regression female with constant /noconst. ]) AT_CHECK([pspp -O format=csv non-const.sps], [0], [dnl Table: Dependent Variable Encoding Original Value,Internal Value .00,0 1.00,1 Table: Case Processing Summary Unweighted Cases,N,Percent Included in Analysis,200,100.000 Missing Cases,0,.000 Total,200,100.000 note: Estimation terminated at iteration number 2 because parameter estimates changed by less than 0.001 Table: Model Summary Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square ,275.637,.008,.011 Table: Variables in the Equation ,,B,S.E.,Wald,df,Sig.,Exp(B) Step 1,constant,.180,.142,1.616,1,.204,1.198 ]) AT_CLEANUP dnl Check that if somebody passes a dependent variable which is not dichtomous, dnl then an error is raised. AT_SETUP([LOGISTIC REGRESSION non-dichotomous dep var]) AT_DATA([non-dich.sps], [dnl data list notable list /y x1 x2 x3 x4. begin data. 1 2 3 4 5 0 2 3 4 8 2 3 4 5 6 end data. logistic regression y with x1 x2 x3 x4. ]) AT_CHECK([pspp -O format=csv non-dich.sps], [1], [dnl error: Dependent variable's values are not dichotomous. ]) AT_CLEANUP