X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=tests%2Flanguage%2Fstats%2Flogistic.at;h=6952167fa0b58490156132038f10c2af01d3a3c8;hb=672c9b7e1c60763d279ecc781b7bf939b8bab4b4;hp=3d7ae76c38de9de5ac8635edea76e58799fc3b37;hpb=a965acea74a93eba5c78c17c6595eef9a776a464;p=pspp diff --git a/tests/language/stats/logistic.at b/tests/language/stats/logistic.at index 3d7ae76c38..6952167fa0 100644 --- a/tests/language/stats/logistic.at +++ b/tests/language/stats/logistic.at @@ -95,6 +95,7 @@ dnl Note: In the above data cases 305, 316 318 and 329 have identical values dnl of the 2nd and 3rd variables. We use this for weight testing. AT_SETUP([LOGISTIC REGRESSION basic test]) +AT_KEYWORDS([categorical categoricals]) LOGIT_TEST_DATA @@ -109,44 +110,43 @@ logistic regression . ]) -AT_CHECK([pspp -O format=csv lr-data.sps], [0], - [dnl -Table: Dependent Variable Encoding +AT_CHECK([pspp -o pspp.csv -o pspp.txt lr-data.sps], [0], [dnl +note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001 +]) +AT_CHECK([cat pspp.csv], [0], [Table: Dependent Variable Encoding Original Value,Internal Value -1.000,0 -2.000,1 +1.000,.000 +2.000,1.000 Table: Case Processing Summary Unweighted Cases,N,Percent -Included in Analysis,66,100.000 -Missing Cases,0,.000 -Total,66,100.000 +Included in Analysis,66,100.0% +Missing Cases,0,.0% +Total,66,100.0% note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001 Table: Model Summary -Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square -,37.323,.455,.659 +Step,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square +1,37.323,.455,.659 Table: Classification Table ,,,Predicted,, -,,,outcome,,"Percentage -Correct" +,,,outcome,,Percentage Correct ,Observed,,1.000,2.000, -Step 1,outcome,1.000,43,5,89.583 -,,2.000,4,14,77.778 -,Overall Percentage,,,,86.364 +Step 1,outcome,1.000,43,5,89.6% +,,2.000,4,14,77.8% +,Overall Percentage,,,,86.4% Table: Variables in the Equation ,,B,S.E.,Wald,df,Sig.,Exp(B) Step 1,survrate,-.081,.019,17.756,1,.000,.922 ,Constant,2.684,.811,10.941,1,.001,14.639 ]) - - AT_CLEANUP AT_SETUP([LOGISTIC REGRESSION missing values]) +AT_KEYWORDS([categorical categoricals]) LOGIT_TEST_DATA @@ -178,13 +178,13 @@ AT_CHECK([pspp -O format=csv lr-data.sps > run1], [0], [ignore]) dnl Only the summary information should be different AT_CHECK([diff run0 run1], [1], [dnl 8,10c8,10 -< Included in Analysis,66,100.000 -< Missing Cases,0,.000 -< Total,66,100.000 +< Included in Analysis,66,100.0% +< Missing Cases,0,.0% +< Total,66,100.0% --- -> Included in Analysis,66,94.286 -> Missing Cases,4,5.714 -> Total,70,100.000 +> Included in Analysis,66,94.3% +> Missing Cases,4,5.7% +> Total,70,100.0% ]) AT_CLEANUP @@ -195,6 +195,7 @@ dnl Check that a weighted dataset is interpreted correctly dnl To do this, the same data set is used, one weighted, one not. dnl The weighted dataset omits certain cases which are identical AT_SETUP([LOGISTIC REGRESSION weights]) +AT_KEYWORDS([categorical categoricals]) LOGIT_TEST_DATA @@ -233,19 +234,19 @@ dnl The only difference should be the summary information, since dnl this displays the unweighted totals. AT_CHECK([diff unweighted-result weighted-result], [1], [dnl 8c8 -< Included in Analysis,66,100.000 +< Included in Analysis,66,100.0% --- -> Included in Analysis,63,100.000 +> Included in Analysis,63,100.0% 10c10 -< Total,66,100.000 +< Total,66,100.0% --- -> Total,63,100.000 -23,24c23,24 -< Step 1,outcome,1.000,43,5,89.583 -< ,,2.000,4,14,77.778 +> Total,63,100.0% +22,23c22,23 +< Step 1,outcome,1.000,43,5,89.6% +< ,,2.000,4,14,77.8% --- -> Step 1,outcome,1.000,43.000,5.000,89.583 -> ,,2.000,4.000,14.000,77.778 +> Step 1,outcome,1.000,43.000,5.000,89.6% +> ,,2.000,4.000,14.000,77.8% ]) @@ -256,6 +257,7 @@ dnl Check that the /NOCONST option works as intended. dnl The results this produces are very similar to those dnl at the example in http://www.ats.ucla.edu/stat/SPSS/faq/logregconst.htm AT_SETUP([LOGISTIC REGRESSION without constant]) +AT_KEYWORDS([categorical categoricals]) AT_DATA([non-const.sps], [dnl set format=F20.3. @@ -273,33 +275,31 @@ compute constant = 1. logistic regression female with constant /noconst. ]) -AT_CHECK([pspp -O format=csv non-const.sps], [0], - [dnl +AT_CHECK([pspp -O format=csv non-const.sps], [0], [dnl Table: Dependent Variable Encoding Original Value,Internal Value -.00,0 -1.00,1 +.00,.000 +1.00,1.000 Table: Case Processing Summary Unweighted Cases,N,Percent -Included in Analysis,200,100.000 -Missing Cases,0,.000 -Total,200,100.000 +Included in Analysis,200,100.0% +Missing Cases,0,.0% +Total,200,100.0% note: Estimation terminated at iteration number 2 because parameter estimates changed by less than 0.001 Table: Model Summary -Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square -,275.637,.008,.011 +Step,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square +1,275.637,.008,.011 Table: Classification Table ,,,Predicted,, -,,,female,,"Percentage -Correct" +,,,female,,Percentage Correct ,Observed,,.00,1.00, -Step 1,female,.00,0,91,.000 -,,1.00,0,109,100.000 -,Overall Percentage,,,,54.500 +Step 1,female,.00,0,91,.0% +,,1.00,0,109,100.0% +,Overall Percentage,,,,54.5% Table: Variables in the Equation ,,B,S.E.,Wald,df,Sig.,Exp(B) @@ -313,6 +313,7 @@ AT_CLEANUP dnl Check that if somebody passes a dependent variable which is not dichtomous, dnl then an error is raised. AT_SETUP([LOGISTIC REGRESSION non-dichotomous dep var]) +AT_KEYWORDS([categorical categoricals]) AT_DATA([non-dich.sps], [dnl data list notable list /y x1 x2 x3 x4. @@ -338,6 +339,7 @@ dnl An example to check the behaviour of LOGISTIC REGRESSION with a categorical dnl variable. This examṕle was inspired from that at: dnl http://www.ats.ucla.edu/stat/spss/dae/logit.htm AT_SETUP([LOGISTIC REGRESSION with categorical]) +AT_KEYWORDS([categorical categoricals]) AT_DATA([lr-cat.data], [dnl 620 3.07 2 4 @@ -753,28 +755,27 @@ logistic regression . ]) -AT_CHECK([pspp -O format=csv lr-cat.sps], [0], - [dnl +AT_CHECK([pspp -O format=csv lr-cat.sps], [0], [dnl Table: Dependent Variable Encoding Original Value,Internal Value -4.000,0 -9.000,1 +4.000,.000 +9.000,1.000 Table: Case Processing Summary Unweighted Cases,N,Percent -Included in Analysis,400,100.000 -Missing Cases,0,.000 -Total,400,100.000 +Included in Analysis,400,100.0% +Missing Cases,0,.0% +Total,400,100.0% note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001 Table: Model Summary -Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square -,458.517,.098,.138 +Step,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square +1,458.517,.098,.138 Table: Categorical Variables' Codings -,,,Parameter coding,, -,,Frequency,(1),(2),(3) +,,Frequency,Parameter coding,, +,,,(1),(2),(3) bcat,1.000,61,1,0,0 ,2.000,151,0,1,0 ,3.000,121,0,0,1 @@ -782,12 +783,11 @@ bcat,1.000,61,1,0,0 Table: Classification Table ,,,Predicted,, -,,,y,,"Percentage -Correct" +,,,y,,Percentage Correct ,Observed,,4.000,9.000, -Step 1,y,4.000,254,19,93.040 -,,9.000,97,30,23.622 -,Overall Percentage,,,,71.000 +Step 1,y,4.000,254,19,93.0% +,,9.000,97,30,23.6% +,Overall Percentage,,,,71.0% Table: Variables in the Equation ,,B,S.E.,Wald,df,Sig.,Exp(B) @@ -799,13 +799,13 @@ Step 1,b1,.002,.001,4.284,1,.038,1.002 ,bcat(3),.211,.393,.289,1,.591,1.235 ,Constant,-5.541,1.138,23.709,1,.000,.004 ]) - AT_CLEANUP dnl This example is inspired by http://www.ats.ucla.edu/stat/spss/output/logistic.htm AT_SETUP([LOGISTIC REGRESSION with cat var 2]) +AT_KEYWORDS([categorical categoricals]) AT_DATA([lr-cat2.data], [dnl 60.00 1.00 8.00 50.00 @@ -1022,40 +1022,38 @@ logistic regression honcomp with read science ses ]) -AT_CHECK([pspp -O format=csv stringcat.sps], [0], - [dnl +AT_CHECK([pspp -O format=csv stringcat.sps], [0], [dnl Table: Dependent Variable Encoding Original Value,Internal Value -.000,0 -1.000,1 +.000,.000 +1.000,1.000 Table: Case Processing Summary Unweighted Cases,N,Percent -Included in Analysis,200,100.000 -Missing Cases,0,.000 -Total,200,100.000 +Included in Analysis,200,100.0% +Missing Cases,0,.0% +Total,200,100.0% note: Estimation terminated at iteration number 5 because parameter estimates changed by less than 0.001 Table: Model Summary -Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square -,165.701,.280,.408 +Step,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square +1,165.701,.280,.408 Table: Categorical Variables' Codings -,,,Parameter coding, -,,Frequency,(1),(2) +,,Frequency,Parameter coding, +,,,(1),(2) ses,a,47,1,0 ,b,95,0,1 ,c,58,0,0 Table: Classification Table ,,,Predicted,, -,,,honcomp,,"Percentage -Correct" +,,,honcomp,,Percentage Correct ,Observed,,.000,1.000, -Step 1,honcomp,.000,132,15,89.796 -,,1.000,26,27,50.943 -,Overall Percentage,,,,79.500 +Step 1,honcomp,.000,132,15,89.8% +,,1.000,26,27,50.9% +,Overall Percentage,,,,79.5% Table: Variables in the Equation ,,B,S.E.,Wald,df,Sig.,Exp(B) @@ -1073,6 +1071,7 @@ AT_CLEANUP dnl Check that it doesn't crash if a categorical variable dnl has only one distinct value AT_SETUP([LOGISTIC REGRESSION identical categories]) +AT_KEYWORDS([categorical categoricals]) AT_DATA([crash.sps], [dnl data list notable list /y x1 x2*. @@ -1093,6 +1092,7 @@ AT_CLEANUP dnl Test that missing values on the categorical predictors are treated dnl properly. AT_SETUP([LOGISTIC REGRESSION missing categoricals]) +AT_KEYWORDS([categorical categoricals]) AT_DATA([data.txt], [dnl .00 3.69 .00 @@ -1213,13 +1213,13 @@ AT_CHECK([pspp -O format=csv miss.sps > file2], [0], [ignore]) AT_CHECK([diff file1 file2], [1], [dnl 8,10c8,10 -< Included in Analysis,100,100.00 -< Missing Cases,0,.00 -< Total,100,100.00 +< Included in Analysis,100,100.0% +< Missing Cases,0,.0% +< Total,100,100.0% --- -> Included in Analysis,100,99.01 -> Missing Cases,1,.99 -> Total,101,100.00 +> Included in Analysis,100,99.0% +> Missing Cases,1,1.0% +> Total,101,100.0% ]) AT_CLEANUP @@ -1230,6 +1230,7 @@ dnl Use an example with categoricals, because that was buggy at dnl one point. The data in this example comes from: dnl http://people.ysu.edu/~gchang/SPSSE/SPSS_lab2Regression.pdf AT_SETUP([LOGISTIC REGRESSION confidence interval]) +AT_KEYWORDS([categorical categoricals]) AT_DATA([ci.sps], [dnl set FORMAT=F20.3 @@ -1442,24 +1443,24 @@ logistic regression AT_CHECK([pspp -O format=csv ci.sps], [0], [dnl Table: Dependent Variable Encoding Original Value,Internal Value -.000,0 -1.000,1 +.000,.000 +1.000,1.000 Table: Case Processing Summary Unweighted Cases,N,Percent -Included in Analysis,196,100.000 -Missing Cases,0,.000 -Total,196,100.000 +Included in Analysis,196,100.0% +Missing Cases,0,.0% +Total,196,100.0% note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001 Table: Model Summary -Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square -,211.195,.120,.172 +Step,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square +1,211.195,.120,.172 Table: Categorical Variables' Codings -,,,Parameter coding, -,,Frequency,(1),(2) +,,Frequency,Parameter coding, +,,,(1),(2) sciostat,1.000,77,1,0 ,2.000,49,0,1 ,3.000,70,0,0 @@ -1468,16 +1469,15 @@ sector,1.000,117,1, Table: Classification Table ,,,Predicted,, -,,,disease,,"Percentage -Correct" +,,,disease,,Percentage Correct ,Observed,,.000,1.000, -Step 1,disease,.000,131,8,94.245 -,,1.000,41,16,28.070 -,Overall Percentage,,,,75.000 +Step 1,disease,.000,131,8,94.2% +,,1.000,41,16,28.1% +,Overall Percentage,,,,75.0% Table: Variables in the Equation -,,,,,,,,95% CI for Exp(B), -,,B,S.E.,Wald,df,Sig.,Exp(B),Lower,Upper +,,B,S.E.,Wald,df,Sig.,Exp(B),95% CI for Exp(B), +,,,,,,,,Lower,Upper Step 1,age,.027,.009,8.647,1,.003,1.027,1.009,1.045 ,savings,.061,.386,.025,1,.874,1.063,.499,2.264 ,sciostat,,,.440,2,.803,,,