AT_BANNER([LOGISTIC REGRESSION]) dnl These examples are adapted from dnl http://www.uvm.edu/~dhowell/gradstat/psych341/lectures/Logistic%20Regression/LogisticReg1.html m4_define([LOGIT_TEST_DATA], [AT_DATA([lr-data.txt], dnl 105.00 1.00 33.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1 106.00 1.00 50.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1 107.00 1.00 91.00 3.00 2.00 .28 15.00 7.00 .00897 -1.00905 1 108.00 1.00 90.00 3.00 2.00 .20 2.00 2.00 .00972 -1.00982 1 109.00 1.00 70.00 3.00 3.00 .38 23.00 27.00 .04745 -1.04981 1 111.00 2.00 31.00 2.00 2.00 .00 19.00 10.00 .54159 1.84640 1 112.00 1.00 91.00 2.00 3.00 .18 6.00 16.00 .00897 -1.00905 1 113.00 1.00 81.00 3.00 2.00 .00 3.00 9.00 .01998 -1.02039 1 114.00 2.00 15.00 1.00 2.00 .13 19.00 13.00 .81241 1.23090 1 116.00 2.00 1.00 1.00 2.00 .88 15.00 7.00 .93102 1.07410 1 117.00 1.00 93.00 3.00 2.00 .18 9.00 15.00 .00764 -1.00770 1 118.00 2.00 14.00 1.00 3.00 .15 23.00 18.00 .82447 1.21289 1 120.00 1.00 91.00 2.00 2.00 .43 17.00 14.00 .00897 -1.00905 1 121.00 1.00 55.00 3.00 2.00 .69 20.00 14.00 .14409 -1.16834 1 122.00 1.00 70.00 2.00 3.00 .03 .00 6.00 .04745 -1.04981 1 123.00 1.00 25.00 2.00 2.00 .45 4.00 10.00 .65789 -2.92301 1 125.00 1.00 91.00 2.00 2.00 .13 .00 3.00 .00897 -1.00905 1 126.00 1.00 91.00 3.00 3.00 .23 4.00 6.00 .00897 -1.00905 1 127.00 1.00 91.00 3.00 2.00 .00 8.00 8.00 .00897 -1.00905 1 128.00 2.00 13.00 2.00 2.00 .65 16.00 14.00 .83592 1.19629 1 129.00 1.00 50.00 2.00 2.00 .25 20.00 23.00 .20168 -1.25264 1 135.00 1.00 90.00 3.00 3.00 .03 5.00 12.00 .00972 -1.00982 1 138.00 1.00 70.00 3.00 3.00 .10 1.00 6.00 .04745 -1.04981 1 139.00 2.00 19.00 3.00 3.00 .10 11.00 12.00 .75787 1.31949 1 149.00 2.00 50.00 3.00 2.00 .03 .00 .00 .20168 4.95826 1 204.00 1.00 50.00 3.00 1.00 .13 .00 1.00 .20168 -1.25264 1 205.00 1.00 91.00 3.00 3.00 .72 16.00 18.00 .00897 -1.00905 1 206.00 2.00 24.00 1.00 1.00 .10 5.00 21.00 .67592 1.47947 1 207.00 1.00 80.00 3.00 3.00 .13 6.00 7.00 .02164 -1.02212 1 208.00 1.00 87.00 2.00 2.00 .18 9.00 20.00 .01237 -1.01253 1 209.00 1.00 70.00 2.00 2.00 .53 15.00 12.00 .04745 -1.04981 1 211.00 1.00 55.00 2.00 1.00 .33 8.00 5.00 .14409 -1.16834 1 212.00 1.00 56.00 3.00 1.00 .30 6.00 20.00 .13436 -1.15522 1 214.00 1.00 54.00 2.00 2.00 .15 .00 16.00 .15439 -1.18258 1 215.00 1.00 71.00 3.00 3.00 .35 12.00 12.00 .04391 -1.04592 1 217.00 2.00 36.00 1.00 1.00 .10 12.00 8.00 .44049 2.27020 1 218.00 1.00 91.00 2.00 2.00 .05 11.00 25.00 .00897 -1.00905 1 219.00 1.00 91.00 2.00 2.00 1.23 11.00 24.00 .00897 -1.00905 1 220.00 1.00 91.00 2.00 3.00 .08 8.00 11.00 .00897 -1.00905 1 221.00 1.00 91.00 2.00 2.00 .33 5.00 11.00 .00897 -1.00905 1 222.00 2.00 36.00 2.00 1.00 .18 5.00 3.00 .44049 2.27020 1 223.00 1.00 70.00 2.00 3.00 .18 14.00 3.00 .04745 -1.04981 1 224.00 1.00 91.00 2.00 2.00 .43 2.00 10.00 .00897 -1.00905 1 225.00 1.00 55.00 2.00 1.00 .18 6.00 11.00 .14409 -1.16834 1 229.00 2.00 75.00 2.00 2.00 .40 30.00 25.00 .03212 31.12941 1 232.00 1.00 91.00 3.00 2.00 .15 6.00 3.00 .00897 -1.00905 1 233.00 1.00 70.00 2.00 1.00 .00 11.00 8.00 .04745 -1.04981 1 234.00 1.00 54.00 3.00 2.00 .10 .00 .00 .15439 -1.18258 1 237.00 1.00 70.00 3.00 2.00 .18 5.00 25.00 .04745 -1.04981 1 241.00 1.00 19.00 2.00 3.00 .33 13.00 9.00 .75787 -4.12995 1 304.00 2.00 18.00 2.00 2.00 .26 25.00 6.00 .77245 1.29458 1 305.00 1.00 88.00 3.00 2.00 1.35 17.00 29.00 .01142 -1.01155 1 306.00 1.00 70.00 2.00 3.00 .63 14.00 33.00 .04745 -1.04981 1 307.00 1.00 85.00 2.00 2.00 2.65 18.00 14.00 .01452 -1.01474 1 308.00 1.00 13.00 2.00 2.00 .23 5.00 5.00 .83592 -6.09442 1 309.00 2.00 13.00 2.00 2.00 .23 7.00 17.00 .83592 1.19629 1 311.00 2.00 1.00 2.00 2.00 .50 20.00 14.00 .93102 1.07410 1 315.00 1.00 19.00 2.00 3.00 .18 1.00 11.00 .75787 -4.12995 1 316.00 1.00 88.00 2.00 2.00 .38 12.00 11.00 .01142 -1.01155 2 318.00 1.00 88.00 3.00 2.00 .03 5.00 5.00 .01142 -1.01155 3 319.00 2.00 18.00 2.00 3.00 .30 15.00 16.00 .77245 1.29458 1 321.00 2.00 15.00 2.00 2.00 .63 15.00 18.00 .81241 1.23090 1 322.00 1.00 88.00 3.00 2.00 .40 18.00 15.00 .01142 -1.01155 1 325.00 2.00 18.00 2.00 3.00 1.00 28.00 18.00 .77245 1.29458 1 329.00 1.00 88.00 3.00 2.00 .03 7.00 11.00 .01142 -1.01155 4 332.00 2.00 2.00 2.00 2.00 .05 8.00 9.00 .92562 1.08036 1 )]) dnl Note: In the above data cases 305, 316 318 and 329 have identical values dnl of the 2nd and 3rd variables. We use this for weight testing. AT_SETUP([LOGISTIC REGRESSION basic test]) LOGIT_TEST_DATA AT_DATA([lr-data.sps], [dnl set format = F12.3. set decimal dot. data list notable file='lr-data.txt' list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. logistic regression variables = outcome with survrate . ]) AT_CHECK([pspp -O format=csv lr-data.sps], [0], [dnl Table: Dependent Variable Encoding Original Value,Internal Value 1.000,0 2.000,1 Table: Case Processing Summary Unweighted Cases,N,Percent Included in Analysis,66,100.000 Missing Cases,0,.000 Total,66,100.000 note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001 Table: Model Summary Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square ,37.323,.455,.659 Table: Classification Table ,,,Predicted,, ,,,outcome,,"Percentage Correct" ,Observed,,1.000,2.000, Step 1,outcome,1.000,43,5,89.583 ,,2.000,4,14,77.778 ,Overall Percentage,,,,86.364 Table: Variables in the Equation ,,B,S.E.,Wald,df,Sig.,Exp(B) Step 1,survrate,-.081,.019,17.756,1,.000,.922 ,Constant,2.684,.811,10.941,1,.001,14.639 ]) AT_CLEANUP AT_SETUP([LOGISTIC REGRESSION missing values]) LOGIT_TEST_DATA AT_DATA([lr-data.sps], [dnl set format = F12.3. set decimal dot. data list notable file='lr-data.txt' list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. missing values survrate (999) avoid (44444) outcome (99). logistic regression variables = outcome with survrate avoid . ]) AT_CHECK([pspp -O format=csv lr-data.sps > run0], [0], [ignore]) dnl Append some cases with missing values into the data. cat >> lr-data.txt << HERE 105.00 1.00 999.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1 106.00 1.00 999.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1 107.00 1.00 5.00 3.00 2.00 .28 44444 34 .00897 -1.00905 1 108.00 99 5.00 3.00 2.00 .28 4 34 .00897 -1.00905 1 HERE AT_CHECK([pspp -O format=csv lr-data.sps > run1], [0], [ignore]) dnl Only the summary information should be different AT_CHECK([diff run0 run1], [1], [dnl 8,10c8,10 < Included in Analysis,66,100.000 < Missing Cases,0,.000 < Total,66,100.000 --- > Included in Analysis,66,94.286 > Missing Cases,4,5.714 > Total,70,100.000 ]) AT_CLEANUP dnl Check that a weighted dataset is interpreted correctly dnl To do this, the same data set is used, one weighted, one not. dnl The weighted dataset omits certain cases which are identical AT_SETUP([LOGISTIC REGRESSION weights]) LOGIT_TEST_DATA AT_DATA([lr-data-unweighted.sps], [dnl set format = F12.3. set decimal dot. data list notable file='lr-data.txt' list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. logistic regression variables = outcome with survrate . ]) AT_DATA([lr-data-weighted.sps], [dnl set format = F12.3. set decimal dot. data list notable file='lr-data.txt' list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. weight by w. * Omit duplicate cases. select if id <> 305 and id <> 316 and id <> 318. logistic regression variables = outcome with survrate . ]) AT_CHECK([pspp -O format=csv lr-data-unweighted.sps > unweighted-result], [0], [ignore]) AT_CHECK([pspp -O format=csv lr-data-weighted.sps > weighted-result], [0], [ignore]) dnl The only difference should be the summary information, since dnl this displays the unweighted totals. AT_CHECK([diff unweighted-result weighted-result], [1], [dnl 8c8 < Included in Analysis,66,100.000 --- > Included in Analysis,63,100.000 10c10 < Total,66,100.000 --- > Total,63,100.000 23,24c23,24 < Step 1,outcome,1.000,43,5,89.583 < ,,2.000,4,14,77.778 --- > Step 1,outcome,1.000,43.000,5.000,89.583 > ,,2.000,4.000,14.000,77.778 ]) AT_CLEANUP dnl Check that the /NOCONST option works as intended. dnl The results this produces are very similar to those dnl at the example in http://www.ats.ucla.edu/stat/SPSS/faq/logregconst.htm AT_SETUP([LOGISTIC REGRESSION without constant]) AT_DATA([non-const.sps], [dnl set format=F20.3. input program. loop #i = 1 to 200. compute female = (#i > 91). end case. end loop. end file. end input program. compute constant = 1. logistic regression female with constant /noconst. ]) AT_CHECK([pspp -O format=csv non-const.sps], [0], [dnl Table: Dependent Variable Encoding Original Value,Internal Value .00,0 1.00,1 Table: Case Processing Summary Unweighted Cases,N,Percent Included in Analysis,200,100.000 Missing Cases,0,.000 Total,200,100.000 note: Estimation terminated at iteration number 2 because parameter estimates changed by less than 0.001 Table: Model Summary Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square ,275.637,.008,.011 Table: Classification Table ,,,Predicted,, ,,,female,,"Percentage Correct" ,Observed,,.00,1.00, Step 1,female,.00,0,91,.000 ,,1.00,0,109,100.000 ,Overall Percentage,,,,54.500 Table: Variables in the Equation ,,B,S.E.,Wald,df,Sig.,Exp(B) Step 1,constant,.180,.142,1.616,1,.204,1.198 ]) AT_CLEANUP dnl Check that if somebody passes a dependent variable which is not dichtomous, dnl then an error is raised. AT_SETUP([LOGISTIC REGRESSION non-dichotomous dep var]) AT_DATA([non-dich.sps], [dnl data list notable list /y x1 x2 x3 x4. begin data. 1 2 3 4 5 0 2 3 4 8 2 3 4 5 6 end data. logistic regression y with x1 x2 x3 x4. ]) AT_CHECK([pspp -O format=csv non-dich.sps], [1], [dnl error: Dependent variable's values are not dichotomous. ]) AT_CLEANUP dnl An example to check the behaviour of LOGISTIC REGRESSION with a categorical dnl variable. This examṕle was inspired from that at: dnl http://www.ats.ucla.edu/stat/spss/dae/logit.htm AT_SETUP([LOGISTIC REGRESSION with categorical]) AT_DATA([lr-cat.data], [dnl 620 3.07 2 4 800 4.00 3 9 580 3.40 2 4 600 3.13 2 4 540 2.70 2 4 660 3.31 4 4 480 3.58 1 9 620 4.00 1 9 680 3.98 2 9 580 3.40 4 4 760 3.35 3 4 700 3.72 2 4 460 3.64 1 9 540 3.28 3 4 680 3.48 3 4 740 3.31 1 4 460 3.77 3 4 740 3.54 1 4 600 3.63 3 4 620 3.05 2 4 560 3.04 3 4 520 2.70 3 4 640 3.35 3 4 620 3.58 2 4 660 3.70 4 9 500 2.86 4 4 640 3.50 2 4 720 4.00 3 4 720 3.94 3 4 400 3.65 2 4 800 2.90 2 4 520 2.90 3 4 440 3.24 4 4 580 3.51 2 4 500 3.31 3 4 440 3.22 1 4 540 3.17 1 9 420 3.02 1 4 780 3.22 2 9 440 3.13 4 4 800 3.66 1 9 580 3.32 2 9 480 2.67 2 9 700 4.00 1 9 740 2.97 2 9 700 3.83 2 4 640 3.93 2 4 800 3.90 2 4 400 3.38 2 4 700 3.52 2 4 680 3.00 4 9 540 3.20 1 4 580 4.00 2 4 780 4.00 2 9 220 2.83 3 4 580 3.20 2 9 580 3.50 2 4 620 3.30 1 4 520 3.65 4 9 600 3.38 3 9 660 3.77 3 4 580 2.86 4 9 580 3.46 2 9 560 3.36 3 4 740 4.00 3 9 480 3.44 3 4 640 3.19 4 9 600 3.54 1 9 540 3.38 4 4 500 2.81 3 4 360 2.56 3 4 460 3.15 4 4 460 2.63 2 4 440 2.76 2 4 740 3.62 4 4 380 3.38 2 4 640 3.63 1 9 800 3.73 1 4 660 3.67 2 4 760 3.00 2 9 420 2.96 1 4 740 3.74 4 4 800 3.75 2 4 620 3.40 2 4 660 3.67 3 9 400 3.35 3 4 680 3.14 2 4 660 3.47 3 9 660 3.63 2 9 420 3.41 4 4 660 4.00 1 4 680 3.70 2 4 620 3.23 3 9 520 3.35 3 4 500 4.00 3 4 400 3.36 2 4 700 3.56 1 9 540 3.81 1 9 520 2.68 3 9 540 3.50 2 4 700 4.00 2 4 600 3.64 3 9 800 3.31 3 4 520 3.29 1 4 580 3.69 1 4 380 3.43 3 4 560 3.19 3 4 760 2.81 1 9 540 3.13 2 4 660 3.14 2 9 520 3.81 1 9 680 3.19 4 4 540 3.78 4 4 500 3.57 3 4 660 3.49 2 4 340 3.00 2 9 400 3.15 2 9 420 3.92 4 4 760 3.35 2 9 700 2.94 2 4 540 3.04 1 4 780 3.87 4 4 560 3.78 2 4 700 3.82 3 4 400 2.93 3 4 440 3.45 2 9 800 3.47 3 4 340 3.15 3 4 520 4.00 1 9 520 3.15 3 4 600 2.98 2 9 420 2.69 2 4 460 3.44 2 4 620 3.71 1 9 480 3.13 2 4 580 3.40 3 4 540 3.39 3 9 540 3.94 3 4 440 2.98 3 4 380 3.59 4 4 500 2.97 4 4 340 2.92 3 4 440 3.15 2 4 600 3.48 2 4 420 2.67 3 4 460 3.07 2 4 460 3.45 3 9 480 3.39 4 4 480 2.78 3 4 720 3.42 2 9 680 3.67 2 9 800 3.89 2 4 360 3.00 3 4 620 3.17 2 9 700 3.52 4 9 540 3.19 2 4 580 3.30 2 4 800 4.00 3 9 660 3.33 2 4 380 3.34 3 4 720 3.84 3 4 600 3.59 2 4 500 3.03 3 4 640 3.81 2 4 540 3.49 1 9 680 3.85 3 9 540 3.84 2 9 460 2.93 3 4 380 2.94 3 4 620 3.22 2 4 740 3.37 4 4 620 4.00 2 4 800 3.74 1 9 400 3.31 3 4 540 3.46 4 4 620 3.18 2 9 480 2.91 1 9 300 2.84 2 9 440 2.48 4 4 640 2.79 2 4 400 3.23 4 9 680 3.46 2 9 620 3.37 1 9 700 3.92 2 4 620 3.37 2 9 620 3.63 2 4 620 3.95 3 9 560 2.52 2 4 520 2.62 2 4 600 3.35 2 4 700 4.00 1 4 640 3.67 3 4 640 4.00 3 4 520 2.93 4 4 620 3.21 4 4 680 3.99 3 4 660 3.34 3 4 700 3.45 3 4 560 3.36 1 9 800 2.78 2 4 500 3.88 4 4 700 3.65 2 4 680 3.76 3 9 660 3.07 3 4 580 3.46 4 4 460 2.87 2 4 600 3.31 4 4 620 3.94 4 4 400 3.05 2 4 800 3.43 2 9 600 3.58 1 9 580 3.36 2 4 540 3.16 3 4 500 2.71 2 4 600 3.28 3 4 600 2.82 4 4 460 3.58 2 4 520 2.85 3 4 740 3.52 4 9 500 3.95 4 4 560 3.61 3 4 620 3.45 2 9 640 3.51 2 4 660 3.44 2 9 660 2.91 3 9 540 3.28 1 4 560 2.98 1 9 800 3.97 1 4 720 3.77 3 4 720 3.64 1 9 480 3.71 4 9 680 3.34 2 4 680 3.11 2 4 540 2.81 3 4 620 3.75 2 9 540 3.12 1 4 560 3.48 2 9 720 3.40 3 4 680 3.90 1 4 640 3.76 3 4 560 3.16 1 4 520 3.30 2 9 640 3.12 3 4 580 3.57 3 4 540 3.55 4 9 780 3.63 4 9 600 3.89 1 9 800 4.00 1 9 580 3.29 4 4 360 3.27 3 4 800 4.00 2 9 640 3.52 4 4 720 3.45 4 4 580 3.06 2 4 580 3.02 2 4 500 3.60 3 9 580 3.12 3 9 600 2.82 4 4 620 3.99 3 4 700 4.00 3 4 480 4.00 2 4 560 2.95 2 4 560 4.00 3 4 560 2.65 3 9 400 3.08 2 4 480 2.62 2 9 640 3.86 3 4 480 3.57 2 4 540 3.51 2 4 380 3.33 4 4 680 3.64 3 4 400 3.51 3 4 340 2.90 1 4 700 3.08 2 4 480 3.02 1 9 600 3.15 2 9 780 3.80 3 9 520 3.74 2 9 520 3.51 2 4 640 3.73 3 4 560 3.32 4 4 620 2.85 2 4 700 3.28 1 4 760 4.00 1 9 800 3.60 2 4 580 3.34 2 4 540 3.77 2 9 640 3.17 2 4 540 3.02 4 4 680 3.08 4 4 680 3.31 2 4 680 2.96 3 9 700 2.88 2 4 580 3.77 4 4 540 3.49 2 9 700 3.56 2 9 600 3.56 2 9 560 3.59 2 4 640 2.94 2 9 560 3.33 4 4 620 3.69 3 4 680 3.27 2 9 460 3.14 3 4 500 3.53 4 4 620 3.33 3 4 600 3.62 3 4 500 3.01 4 4 740 3.34 4 4 560 3.69 3 9 620 3.95 3 9 740 3.86 2 9 800 3.53 1 9 620 3.78 3 4 700 3.27 2 4 540 3.78 2 9 700 3.65 2 4 800 3.22 1 9 560 3.59 2 9 800 3.15 4 4 520 3.90 3 9 520 3.74 4 9 480 2.55 1 4 800 4.00 4 4 620 3.09 4 4 560 3.49 4 4 500 3.17 3 4 480 3.40 2 4 460 2.98 1 4 580 3.58 1 9 640 3.30 2 4 480 3.45 2 4 440 3.17 2 4 660 3.32 1 4 500 3.08 3 4 660 3.94 2 4 720 3.31 1 4 460 3.64 3 9 500 2.93 4 4 800 3.54 3 4 580 2.93 2 4 620 3.61 1 9 500 2.98 3 4 660 4.00 2 9 560 3.24 4 4 560 2.42 2 4 580 3.80 2 4 500 3.23 4 4 680 2.42 1 9 580 3.46 3 4 800 3.91 3 4 700 2.90 4 4 520 3.12 2 4 300 2.92 4 4 560 3.43 3 4 620 3.63 3 4 500 2.79 4 4 360 3.14 1 4 640 3.94 2 9 460 3.99 3 9 300 3.01 3 4 520 2.73 2 4 600 3.47 2 9 580 3.25 1 4 520 3.10 4 4 620 3.43 3 4 380 2.91 4 4 660 3.59 3 4 660 3.95 2 9 540 3.33 3 4 740 4.00 3 4 640 3.38 3 4 600 3.89 3 4 720 3.88 3 4 580 4.00 3 4 420 2.26 4 4 520 4.00 2 9 800 3.70 1 9 700 4.00 1 9 480 3.43 2 4 660 3.45 4 4 520 3.25 3 4 560 2.71 3 4 600 3.32 2 4 580 2.88 2 4 660 3.88 2 9 600 3.22 1 4 580 4.00 1 4 660 3.60 3 9 500 3.35 2 4 520 2.98 2 4 660 3.49 2 9 560 3.07 2 4 500 3.13 2 9 720 3.50 3 9 440 3.39 2 9 640 3.95 2 9 380 3.61 3 4 800 3.05 2 9 520 3.19 3 9 600 3.40 3 4 ]) AT_DATA([lr-cat.sps], [dnl set format=F20.3. data list notable list file='lr-cat.data' /b1 b2 bcat y. logistic regression y with b1 b2 bcat /categorical = bcat . ]) AT_CHECK([pspp -O format=csv lr-cat.sps], [0], [dnl Table: Dependent Variable Encoding Original Value,Internal Value 4.000,0 9.000,1 Table: Case Processing Summary Unweighted Cases,N,Percent Included in Analysis,400,100.000 Missing Cases,0,.000 Total,400,100.000 note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001 Table: Model Summary Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square ,458.517,.098,.138 Table: Categorical Variables' Codings ,,,Parameter coding,, ,,Frequency,(1),(2),(3) bcat,1.000,61,1,0,0 ,2.000,151,0,1,0 ,3.000,121,0,0,1 ,4.000,67,0,0,0 Table: Classification Table ,,,Predicted,, ,,,y,,"Percentage Correct" ,Observed,,4.000,9.000, Step 1,y,4.000,254,19,93.040 ,,9.000,97,30,23.622 ,Overall Percentage,,,,71.000 Table: Variables in the Equation ,,B,S.E.,Wald,df,Sig.,Exp(B) Step 1,b1,.002,.001,4.284,1,.038,1.002 ,b2,.804,.332,5.872,1,.015,2.235 ,bcat,,,20.895,3,.000, ,bcat(1),1.551,.418,13.788,1,.000,4.718 ,bcat(2),.876,.367,5.706,1,.017,2.401 ,bcat(3),.211,.393,.289,1,.591,1.235 ,Constant,-5.541,1.138,23.709,1,.000,.004 ]) AT_CLEANUP dnl This example is inspired by http://www.ats.ucla.edu/stat/spss/output/logistic.htm AT_SETUP([LOGISTIC REGRESSION with cat var 2]) AT_DATA([lr-cat2.data], [dnl 60.00 1.00 8.00 50.00 47.00 .00 9.00 42.00 57.00 1.00 7.00 53.00 60.00 .00 8.00 53.00 68.00 .00 8.00 66.00 63.00 .00 8.00 55.00 65.00 .00 8.00 63.00 52.00 .00 8.00 61.00 34.00 .00 9.00 42.00 37.00 .00 8.00 39.00 68.00 1.00 9.00 69.00 60.00 .00 9.00 61.00 44.00 .00 9.00 58.00 42.00 .00 8.00 47.00 57.00 1.00 7.00 61.00 55.00 1.00 8.00 50.00 55.00 .00 9.00 58.00 44.00 .00 8.00 63.00 50.00 1.00 9.00 66.00 44.00 .00 8.00 39.00 55.00 .00 8.00 58.00 44.00 .00 8.00 50.00 47.00 1.00 7.00 34.00 48.00 .00 8.00 44.00 45.00 .00 7.00 31.00 43.00 .00 8.00 50.00 39.00 .00 8.00 42.00 63.00 .00 9.00 50.00 47.00 .00 8.00 58.00 42.00 .00 7.00 50.00 50.00 .00 9.00 36.00 47.00 .00 7.00 33.00 60.00 .00 9.00 61.00 47.00 .00 7.00 42.00 68.00 1.00 9.00 69.00 52.00 .00 8.00 54.00 63.00 1.00 9.00 61.00 65.00 1.00 9.00 61.00 63.00 1.00 9.00 53.00 57.00 .00 8.00 51.00 34.00 .00 8.00 36.00 50.00 .00 8.00 39.00 52.00 1.00 7.00 56.00 45.00 .00 7.00 34.00 47.00 1.00 7.00 53.00 34.00 .00 7.00 39.00 50.00 1.00 8.00 55.00 60.00 .00 9.00 58.00 63.00 .00 8.00 58.00 35.00 .00 7.00 51.00 50.00 .00 8.00 58.00 68.00 .00 8.00 63.00 41.00 .00 9.00 34.00 47.00 .00 8.00 47.00 76.00 .00 9.00 64.00 44.00 .00 8.00 44.00 36.00 .00 9.00 50.00 68.00 1.00 9.00 55.00 47.00 1.00 8.00 50.00 50.00 .00 7.00 53.00 68.00 .00 8.00 74.00 39.00 .00 7.00 44.00 50.00 .00 8.00 55.00 52.00 .00 9.00 61.00 47.00 .00 8.00 53.00 39.00 .00 7.00 47.00 55.00 1.00 9.00 49.00 68.00 1.00 8.00 50.00 52.00 1.00 9.00 63.00 55.00 .00 8.00 58.00 57.00 .00 8.00 55.00 66.00 1.00 9.00 61.00 65.00 1.00 7.00 58.00 42.00 .00 7.00 42.00 68.00 1.00 7.00 59.00 60.00 1.00 9.00 61.00 52.00 .00 8.00 55.00 57.00 1.00 7.00 54.00 42.00 .00 9.00 50.00 42.00 .00 8.00 47.00 57.00 .00 8.00 50.00 47.00 .00 7.00 45.00 44.00 .00 7.00 40.00 43.00 .00 9.00 55.00 31.00 .00 8.00 39.00 37.00 .00 7.00 33.00 63.00 1.00 7.00 63.00 47.00 .00 8.00 39.00 57.00 1.00 8.00 63.00 52.00 .00 8.00 44.00 44.00 .00 7.00 35.00 52.00 .00 7.00 55.00 55.00 .00 7.00 69.00 52.00 .00 8.00 53.00 55.00 .00 9.00 61.00 65.00 1.00 9.00 63.00 55.00 .00 8.00 44.00 63.00 .00 7.00 65.00 44.00 .00 7.00 39.00 47.00 .00 7.00 36.00 63.00 1.00 9.00 55.00 68.00 .00 8.00 66.00 34.00 .00 8.00 39.00 47.00 .00 9.00 50.00 50.00 .00 9.00 58.00 63.00 .00 8.00 66.00 44.00 .00 7.00 34.00 44.00 .00 8.00 50.00 50.00 .00 8.00 53.00 47.00 1.00 9.00 69.00 65.00 .00 9.00 58.00 57.00 .00 8.00 47.00 39.00 .00 8.00 39.00 47.00 .00 8.00 53.00 50.00 1.00 7.00 63.00 50.00 .00 8.00 50.00 63.00 .00 9.00 53.00 73.00 1.00 9.00 61.00 44.00 .00 7.00 47.00 47.00 .00 8.00 42.00 47.00 .00 8.00 58.00 36.00 .00 7.00 61.00 57.00 1.00 8.00 55.00 53.00 1.00 8.00 57.00 63.00 .00 7.00 66.00 50.00 .00 8.00 34.00 47.00 .00 9.00 48.00 57.00 1.00 8.00 58.00 39.00 .00 8.00 53.00 42.00 .00 8.00 42.00 42.00 .00 9.00 31.00 42.00 .00 8.00 72.00 46.00 .00 8.00 44.00 55.00 .00 8.00 42.00 42.00 .00 8.00 47.00 50.00 .00 8.00 44.00 44.00 .00 9.00 39.00 73.00 1.00 8.00 69.00 71.00 1.00 9.00 58.00 50.00 .00 9.00 49.00 63.00 1.00 7.00 54.00 42.00 .00 8.00 36.00 47.00 .00 7.00 42.00 39.00 .00 9.00 26.00 63.00 .00 8.00 58.00 50.00 .00 8.00 55.00 65.00 1.00 8.00 55.00 76.00 1.00 9.00 67.00 71.00 1.00 8.00 66.00 39.00 .00 9.00 47.00 47.00 1.00 9.00 63.00 60.00 .00 7.00 50.00 63.00 .00 9.00 55.00 54.00 1.00 9.00 55.00 55.00 1.00 8.00 58.00 57.00 .00 8.00 61.00 55.00 1.00 9.00 63.00 42.00 .00 7.00 50.00 50.00 .00 8.00 44.00 55.00 .00 8.00 42.00 42.00 .00 7.00 50.00 34.00 .00 8.00 39.00 65.00 .00 9.00 46.00 52.00 .00 7.00 58.00 44.00 .00 8.00 39.00 65.00 1.00 9.00 66.00 47.00 .00 8.00 42.00 41.00 .00 7.00 39.00 68.00 .00 9.00 63.00 63.00 1.00 8.00 72.00 52.00 .00 8.00 53.00 57.00 .00 8.00 50.00 68.00 .00 8.00 55.00 42.00 .00 8.00 56.00 47.00 .00 8.00 48.00 73.00 1.00 9.00 58.00 39.00 .00 8.00 50.00 63.00 1.00 9.00 69.00 60.00 .00 8.00 55.00 65.00 1.00 9.00 66.00 73.00 1.00 8.00 63.00 52.00 .00 8.00 55.00 36.00 .00 8.00 42.00 28.00 .00 7.00 44.00 47.00 .00 8.00 44.00 57.00 .00 7.00 47.00 34.00 .00 7.00 29.00 47.00 .00 9.00 66.00 57.00 .00 8.00 58.00 60.00 1.00 9.00 50.00 50.00 .00 9.00 47.00 73.00 1.00 9.00 55.00 52.00 1.00 8.00 47.00 55.00 .00 8.00 53.00 47.00 .00 8.00 53.00 50.00 .00 8.00 61.00 61.00 .00 7.00 44.00 52.00 .00 9.00 53.00 47.00 .00 7.00 40.00 47.00 .00 7.00 50.00 ]) AT_DATA([stringcat.sps], [dnl set format=F20.3. data list notable file='lr-cat2.data' list /read honcomp wiz science *. string ses(a1). recode wiz (7 = "a") (8 = "b") (9 = "c") into ses. logistic regression honcomp with read science ses /categorical = ses. ]) AT_CHECK([pspp -O format=csv stringcat.sps], [0], [dnl Table: Dependent Variable Encoding Original Value,Internal Value .000,0 1.000,1 Table: Case Processing Summary Unweighted Cases,N,Percent Included in Analysis,200,100.000 Missing Cases,0,.000 Total,200,100.000 note: Estimation terminated at iteration number 5 because parameter estimates changed by less than 0.001 Table: Model Summary Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square ,165.701,.280,.408 Table: Categorical Variables' Codings ,,,Parameter coding, ,,Frequency,(1),(2) ses,a,47,1,0 ,b,95,0,1 ,c,58,0,0 Table: Classification Table ,,,Predicted,, ,,,honcomp,,"Percentage Correct" ,Observed,,.000,1.000, Step 1,honcomp,.000,132,15,89.796 ,,1.000,26,27,50.943 ,Overall Percentage,,,,79.500 Table: Variables in the Equation ,,B,S.E.,Wald,df,Sig.,Exp(B) Step 1,read,.098,.025,15.199,1,.000,1.103 ,science,.066,.027,5.867,1,.015,1.068 ,ses,,,6.690,2,.035, ,ses(1),.058,.532,.012,1,.913,1.060 ,ses(2),-1.013,.444,5.212,1,.022,.363 ,Constant,-9.561,1.662,33.113,1,.000,.000 ]) AT_CLEANUP dnl Check that it doesn't crash if a categorical variable dnl has only one distinct value AT_SETUP([LOGISTIC REGRESSION identical categories]) AT_DATA([crash.sps], [dnl data list notable list /y x1 x2*. begin data 0 1 1 1 2 1 end data. logistic regression y with x1 x2 /categorical = x2. ]) AT_CHECK([pspp -O format=csv crash.sps], [1], [ignore]) AT_CLEANUP dnl Test that missing values on the categorical predictors are treated dnl properly. AT_SETUP([LOGISTIC REGRESSION missing categoricals]) AT_DATA([data.txt], [dnl .00 3.69 .00 .00 1.16 1.00 1.00 -12.99 .00 .00 2.97 1.00 .00 20.48 .00 .00 4.90 .00 1.00 -4.38 .00 .00 -1.69 1.00 1.00 -5.71 .00 1.00 -14.28 .00 .00 9.00 .00 .00 2.89 1.00 .00 13.51 1.00 .00 23.32 1.00 .00 2.31 1.00 .00 -2.07 1.00 1.00 -4.52 1.00 1.00 -5.83 .00 1.00 -1.91 .00 1.00 -11.12 1.00 .00 -1.51 .00 .00 6.59 1.00 .00 19.28 1.00 .00 5.94 .00 .00 8.21 1.00 .00 8.11 1.00 .00 2.49 .00 .00 9.62 .00 1.00 -20.74 1.00 .00 -1.41 1.00 .00 15.15 1.00 .00 9.39 .00 1.00 -15.14 1.00 1.00 -5.86 .00 1.00 -11.64 1.00 1.00 -14.36 .00 1.00 -8.95 1.00 1.00 -16.42 1.00 1.00 -1.04 1.00 .00 12.89 1.00 .00 -7.08 1.00 .00 4.87 1.00 .00 11.53 1.00 1.00 -6.24 1.00 .00 1.25 1.00 .00 4.39 1.00 .00 3.17 .00 .00 19.39 1.00 .00 13.03 1.00 .00 2.43 .00 1.00 -14.73 1.00 .00 8.25 1.00 1.00 -13.28 1.00 .00 5.27 1.00 1.00 -3.46 1.00 .00 13.81 1.00 .00 1.35 1.00 1.00 -3.94 1.00 .00 20.73 1.00 1.00 -15.40 .00 1.00 -11.01 1.00 .00 4.56 .00 1.00 -15.35 1.00 .00 15.21 .00 .00 5.34 1.00 1.00 -21.55 1.00 .00 10.12 1.00 .00 -.73 1.00 .00 15.28 1.00 .00 11.08 1.00 1.00 -8.24 .00 .00 2.46 .00 .00 9.60 .00 .00 11.24 .00 .00 14.13 1.00 .00 19.72 1.00 .00 5.58 .00 .00 26.23 1.00 .00 7.25 .00 1.00 -.79 .00 .00 6.24 .00 1.00 1.16 .00 1.00 -7.89 1.00 1.00 -1.86 1.00 1.00 -10.80 1.00 1.00 -5.51 .00 .00 7.51 .00 .00 11.18 .00 .00 8.73 .00 1.00 -11.21 1.00 1.00 -13.24 .00 .00 19.34 .00 .00 9.32 1.00 .00 17.97 1.00 1.00 -1.56 1.00 1.00 -3.13 .00 .00 3.98 .00 .00 -1.21 1.00 .00 2.37 .00 1.00 -18.03 1.00 ]) AT_DATA([miss.sps], [dnl data list notable file='data.txt' list /y x1 cat0*. logistic regression y with x1 cat0 /categorical = cat0. ]) AT_CHECK([pspp -O format=csv miss.sps > file1], [0], [ignore]) dnl Append a case with a missing categorical. AT_CHECK([echo '1 34 .' >> data.txt], [0], [ignore]) AT_CHECK([pspp -O format=csv miss.sps > file2], [0], [ignore]) AT_CHECK([diff file1 file2], [1], [dnl 8,10c8,10 < Included in Analysis,100,100.00 < Missing Cases,0,.00 < Total,100,100.00 --- > Included in Analysis,100,99.01 > Missing Cases,1,.99 > Total,101,100.00 ]) AT_CLEANUP dnl Check that the confidence intervals are properly reported. dnl Use an example with categoricals, because that was buggy at dnl one point. The data in this example comes from: dnl http://people.ysu.edu/~gchang/SPSSE/SPSS_lab2Regression.pdf AT_SETUP([LOGISTIC REGRESSION confidence interval]) AT_DATA([ci.sps], [dnl set FORMAT=F20.3 data list notable list /disease age sciostat sector savings *. begin data. 0 33 1 1 1 0 35 1 1 1 0 6 1 1 0 0 60 1 1 1 1 18 3 1 0 0 26 3 1 0 0 6 3 1 0 1 31 2 1 1 1 26 2 1 0 0 37 2 1 0 0 23 1 1 0 0 23 1 1 0 0 27 1 1 1 1 9 1 1 1 1 37 1 2 1 1 22 1 2 1 1 67 1 2 1 0 8 1 2 1 1 6 1 2 1 1 15 1 2 1 1 21 2 2 1 1 32 2 2 1 1 16 1 2 1 0 11 2 2 0 0 14 3 2 0 0 9 2 2 0 0 18 2 2 0 0 2 3 1 0 0 61 3 1 1 0 20 3 1 0 0 16 3 1 0 0 9 2 1 0 0 35 2 1 1 0 4 1 1 1 0 44 3 2 0 1 11 3 2 0 0 3 2 2 1 0 6 3 2 0 1 17 2 2 0 0 1 3 2 1 1 53 2 2 1 1 13 1 2 0 0 24 1 2 0 1 70 1 2 1 1 16 3 2 1 0 12 2 2 1 1 20 3 2 1 0 65 3 2 1 1 40 2 2 0 1 38 2 2 1 1 68 2 2 1 1 74 1 2 1 1 14 1 2 1 1 27 1 2 1 0 31 1 2 1 0 18 1 2 1 0 39 1 2 0 0 50 1 2 1 0 31 1 2 1 0 61 1 2 1 0 18 3 1 0 0 5 3 1 0 0 2 3 1 1 0 16 3 1 0 1 59 3 1 1 0 22 3 1 0 0 24 1 1 1 0 30 1 1 1 0 46 1 1 1 0 28 1 1 0 0 27 1 1 1 1 27 1 1 0 0 28 1 1 1 1 52 1 1 1 0 11 3 1 1 0 6 2 1 1 0 46 3 1 0 1 20 2 1 1 0 3 1 1 1 0 18 2 1 0 0 25 2 1 0 0 6 3 1 1 1 65 3 1 1 0 51 3 1 1 0 39 2 1 1 0 8 1 1 1 0 8 2 1 0 0 14 3 1 0 0 6 3 1 0 0 6 3 1 1 0 7 3 1 0 0 4 3 1 0 0 8 3 1 0 0 9 2 1 0 1 32 3 1 0 0 19 3 1 0 0 11 3 1 0 0 35 3 1 0 0 16 1 1 0 0 1 1 1 1 0 6 1 1 1 0 27 1 1 1 0 25 1 1 1 0 18 1 1 0 0 37 3 1 0 1 33 3 1 0 0 27 2 1 0 0 2 1 1 0 0 8 2 1 0 0 5 1 1 0 0 1 1 1 1 0 32 1 1 0 1 25 1 1 1 0 15 1 2 0 0 15 1 2 1 0 26 1 2 1 1 42 1 2 1 0 7 1 2 1 0 2 1 2 0 1 65 1 2 1 0 33 2 2 1 1 8 2 2 0 0 30 2 2 0 0 5 3 2 0 0 15 3 2 0 1 60 3 2 1 1 13 3 2 1 0 70 3 1 1 0 5 3 1 0 0 3 3 1 1 0 50 2 1 1 0 6 2 1 0 0 12 2 1 1 1 39 3 2 0 0 15 2 2 1 1 35 2 2 0 0 2 2 2 1 0 17 3 2 0 1 43 3 2 1 0 30 2 2 1 0 11 1 2 1 1 39 1 2 1 0 32 1 2 1 0 17 1 2 1 0 3 3 2 1 0 7 3 2 0 0 2 2 2 0 1 64 2 2 1 1 13 1 2 2 1 15 2 2 1 0 48 2 2 1 0 23 1 2 1 1 48 1 2 0 0 25 1 2 1 0 12 1 2 1 1 46 1 2 1 0 79 1 2 1 0 56 1 2 1 0 8 1 2 1 1 29 3 1 0 1 35 3 1 0 1 11 3 1 0 0 69 3 1 1 1 21 3 1 0 0 13 3 1 0 0 21 1 1 1 1 32 1 1 1 1 24 1 1 0 0 24 1 1 1 0 73 1 1 1 0 42 1 1 1 1 34 1 1 1 0 30 2 1 0 0 7 2 1 0 1 29 3 1 0 1 22 3 1 0 0 38 2 1 1 0 13 2 1 1 0 12 2 1 1 0 42 3 1 0 1 17 3 1 0 0 21 3 1 1 0 34 1 1 1 0 1 3 1 0 0 14 2 1 0 0 16 2 1 0 0 9 3 1 0 0 53 3 1 0 0 27 3 1 0 0 15 3 1 0 0 9 3 1 0 0 4 2 1 1 0 10 3 1 1 0 31 3 1 0 0 85 3 1 1 0 24 2 1 0 end data. logistic regression disease WITH age sciostat sector savings /categorical = sciostat sector /print = ci(95). ]) AT_CHECK([pspp -O format=csv ci.sps], [0], [dnl Table: Dependent Variable Encoding Original Value,Internal Value .000,0 1.000,1 Table: Case Processing Summary Unweighted Cases,N,Percent Included in Analysis,196,100.000 Missing Cases,0,.000 Total,196,100.000 note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001 Table: Model Summary Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square ,211.195,.120,.172 Table: Categorical Variables' Codings ,,,Parameter coding, ,,Frequency,(1),(2) sciostat,1.000,77,1,0 ,2.000,49,0,1 ,3.000,70,0,0 sector,1.000,117,1, ,2.000,79,0, Table: Classification Table ,,,Predicted,, ,,,disease,,"Percentage Correct" ,Observed,,.000,1.000, Step 1,disease,.000,131,8,94.245 ,,1.000,41,16,28.070 ,Overall Percentage,,,,75.000 Table: Variables in the Equation ,,,,,,,,95% CI for Exp(B), ,,B,S.E.,Wald,df,Sig.,Exp(B),Lower,Upper Step 1,age,.027,.009,8.647,1,.003,1.027,1.009,1.045 ,savings,.061,.386,.025,1,.874,1.063,.499,2.264 ,sciostat,,,.440,2,.803,,, ,sciostat(1),-.278,.434,.409,1,.522,.757,.323,1.775 ,sciostat(2),-.219,.459,.227,1,.634,.803,.327,1.976 ,sector,,,11.974,1,.001,,, ,sector(1),-1.235,.357,11.974,1,.001,.291,.145,.586 ,Constant,-.814,.452,3.246,1,.072,.443,, ]) AT_CLEANUP