X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=tests%2Flanguage%2Fstats%2Fregression.at;h=f3b295adb6aa0c81eaf2d3fa349d9f0a5aed5fa4;hb=8603f5581522bca588e17a2155aff1bec3b0de1b;hp=11ce07fa7aa672f6a8cc6137084313d5f6749cd8;hpb=65a73ff3836bd26f54ad9862959bef918a24b3fc;p=pspp diff --git a/tests/language/stats/regression.at b/tests/language/stats/regression.at index 11ce07fa7a..f3b295adb6 100644 --- a/tests/language/stats/regression.at +++ b/tests/language/stats/regression.at @@ -1,8 +1,9 @@ -AT_BANNER([REGRESSION]) +AT_BANNER([LINEAR REGRESSION]) -AT_SETUP([REGRESSION]) +AT_SETUP([LINEAR REGRESSION - basic]) AT_DATA([regression.sps], [dnl -data list list / v0 to v2. +set format = F22.3. +data list notable list / v0 to v2. begin data 0.65377128 7.735648 -23.97588 -0.13087553 6.142625 -19.63854 @@ -18,48 +19,169 @@ end data regression /variables=v0 v1 v2 /statistics defaults /dependent=v2 /method=enter /save=pred resid. list. ]) -AT_CHECK([pspp -o pspp.csv regression.sps]) -AT_CHECK([cat pspp.csv], [0], [dnl -Table: Reading free-form data from INLINE. -Variable,Format -v0,F8.0 -v1,F8.0 -v2,F8.0 -Table: Model Summary +AT_CHECK([pspp -O format=csv regression.sps], [0], [dnl +Table: Model Summary (v2) ,R,R Square,Adjusted R Square,Std. Error of the Estimate -,.97,.94,.93,1.34 +,.971,.942,.925,1.337 -Table: ANOVA +Table: ANOVA (v2) ,,Sum of Squares,df,Mean Square,F,Significance -,Regression,202.75,2,101.38,56.75,.00 -,Residual,12.50,7,1.79,, -,Total,215.26,9,,, +,Regression,202.753,2,101.376,56.754,.000 +,Residual,12.504,7,1.786,, +,Total,215.256,9,,, -Table: Coefficients +Table: Coefficients (v2) ,,B,Std. Error,Beta,t,Significance -,(Constant),2.19,2.36,.00,.93,.52 -,v0,1.81,1.05,.17,1.72,.12 -,v1,-3.43,.33,-1.03,-10.33,.00 +,(Constant),2.191,2.357,.000,.930,.380 +,v0,1.813,1.053,.171,1.722,.129 +,v1,-3.427,.332,-1.026,-10.334,.000 ,,,,,, Table: Data List v0,v1,v2,RES1,PRED1 -.65,7.74,-23.98,-.84,-23.13 --.13,6.14,-19.64,-.54,-19.10 -.35,7.65,-25.27,-1.87,-23.40 -.69,6.13,-16.57,.97,-17.54 --.07,8.25,-25.80,.40,-26.20 --.34,6.03,-17.57,1.53,-19.10 -.76,9.83,-28.36,1.77,-30.13 --.47,5.34,-16.80,.18,-16.97 --.06,8.84,-29.26,-1.05,-28.21 -.56,6.20,-18.58,-.54,-18.04 +.654,7.736,-23.976,-.84,-23.13 +-.131,6.143,-19.639,-.54,-19.10 +.349,7.651,-25.266,-1.87,-23.40 +.692,6.125,-16.571,.97,-17.54 +-.074,8.246,-25.800,.40,-26.20 +-.344,6.032,-17.567,1.53,-19.10 +.760,9.832,-28.360,1.77,-30.13 +-.470,5.344,-16.795,.18,-16.97 +-.061,8.838,-29.257,-1.05,-28.21 +.562,6.200,-18.582,-.54,-18.04 ]) AT_CLEANUP + +# Test to ensure that the /SAVE subcommand works properly when SPLIT is active +AT_SETUP([LINEAR REGRESSION - SAVE vs SPLITS]) + +# Generate some test data based on a linear model +AT_DATA([gen-data.sps], [dnl +set seed = 1. +input program. +loop #c = 1 to 20. + compute x0 = rv.normal (0,1). + compute x1 = rv.normal (0,2). + compute err = rv.normal (0,0.1). + compute y = 4 - 2 * x0 + 3 * x1 + err. + compute g = (#c > 10). + end case. +end loop. +end file. +end input program. + +print outfile='regdata.txt' /g x0 x1 y err *. +execute. +]) + +AT_CHECK([pspp -O format=csv gen-data.sps], [0], [ignore]) + +# Use our test data to create a predictor and a residual variable +# for G == 0 +AT_DATA([regression0.sps], [dnl +data list notable file='regdata.txt' list /g x0 x1 y err *. + +select if (g = 0). + +regression + /variables = x0 x1 + /dependent = y + /statistics = all + /save = pred resid. + . + +print outfile='outdata-g0.txt' /g x0 x1 y err res1 pred1 *. +execute. +]) + + +AT_CHECK([pspp -O format=csv regression0.sps], [0], [ignore]) + +# Use our test data to create a predictor and a residual variable +# for G == 1 +AT_DATA([regression1.sps], [dnl +data list notable file='regdata.txt' list /g x0 x1 y err *. + +select if (g = 1). + +regression + /variables = x0 x1 + /dependent = y + /statistics = all + /save = pred resid. + . + +print outfile='outdata-g1.txt' /g x0 x1 y err res1 pred1 *. +execute. +]) + + +AT_CHECK([pspp -O format=csv regression1.sps], [0], [ignore]) + +# Use our test data to create a predictor and a residual variable +# The data is split on G +AT_DATA([regression-split.sps], [dnl +data list notable file='regdata.txt' list /g x0 x1 y err *. + +split file by g. + +regression + /variables = x0 x1 + /dependent = y + /statistics = all + /save = pred resid. + . + +print outfile='outdata-split.txt' /g x0 x1 y err res1 pred1 *. +execute. +]) + +AT_CHECK([pspp -O format=csv regression-split.sps], [0], [ignore]) + +# The concatenation of G==0 and G==1 should be identical to the SPLIT data +AT_CHECK([cat outdata-g0.txt outdata-g1.txt | diff outdata-split.txt - ], [0], []) + +AT_CLEANUP + + +# Test that the procedure behaves sensibly when presented with +# multiple dependent variables +AT_SETUP([LINEAR REGRESSION multiple dependent variables]) +AT_DATA([regression.sps], [dnl +set seed = 2. +input program. +loop #c = 1 to 200. + compute x0 = rv.normal (0, 1). + compute x1 = rv.normal (0, 2). + compute err = rv.normal (0, 0.8). + compute y = 2 - 1.5 * x0 + 8.4 * x1 + err. + compute ycopy = y. + end case. +end loop. +end file. +end input program. + +regression + /variables = x0 x1 + /dependent = y ycopy + /statistics = all +]) + +AT_CHECK([pspp -O format=csv regression.sps > output], [0], [ignore]) + + +AT_CHECK([head -16 output > first], [0], []) +AT_CHECK([tail -16 output > second], [0], []) + +AT_CHECK([sed -e 's/ycopy/y/g' second | diff first -], [0], []) + + +AT_CLEANUP + # Tests the QR decomposition used by the REGRESSION command. -AT_SETUP([REGRESSION test of QR decomposition]) +AT_SETUP([LINEAR REGRESSION test of QR decomposition]) AT_DATA([regression.sps], [dnl data list list / v0 to v1. begin data @@ -1573,20 +1695,120 @@ Variable,Format v0,F8.0 v1,F8.0 -Table: Model Summary +Table: Model Summary (v0) ,R,R Square,Adjusted R Square,Std. Error of the Estimate ,.05,.00,.00,8.11 -Table: ANOVA +Table: ANOVA (v0) ,,Sum of Squares,df,Mean Square,F,Significance ,Regression,235.23,1,235.23,3.58,.06 ,Residual,98438.40,1498,65.71,, ,Total,98673.63,1499,,, -Table: Coefficients +Table: Coefficients (v0) ,,B,Std. Error,Beta,t,Significance -,(Constant),1.24,.42,.00,2.95,.21 +,(Constant),1.24,.42,.00,2.95,.00 ,v1,1.37,.72,.05,1.89,.06 ,,,,,, ]) AT_CLEANUP + +AT_SETUP([LINEAR REGRESSION no crash on all missing]) +AT_DATA([regcrash.sps], [dnl +data list list /x * y. +begin data. + . . + . . + . . + . . + . . + . . + . . + . . + . . + . . +end data. + + +regression /variables=x y /dependent=y. +]) + +AT_CHECK([pspp -o pspp.csv regcrash.sps], [1], [ignore], [ignore]) + +AT_CLEANUP + + + +AT_SETUP([LINEAR REGRESSION missing dependent variable]) + +dnl Test for a bug where missing values in the dependent variable were not being +dnl ignored like they should have been. +AT_DATA([reg-mdv-ref.sps], [dnl +data list notable list / v0 to v2. +begin data + 0.65377128 7.735648 -23.97588 +-0.13087553 6.142625 -19.63854 + 0.34880368 7.651430 -25.26557 + 0.69249021 6.125125 -16.57090 +-0.07368178 8.245789 -25.80001 +-0.34404919 6.031540 -17.56743 + 0.75981559 9.832291 -28.35977 +-0.46958313 5.343832 -16.79548 +-0.06108490 8.838262 -29.25689 + 0.56154863 6.200189 -18.58219 +end data +regression /variables=v0 v1 + /statistics defaults + /dependent=v2 + /method=enter. +]) + +AT_CHECK([pspp -o pspp-ref.csv reg-mdv-ref.sps]) + +AT_DATA([reg-mdv.sps], [dnl +data list notable list / v0 to v2. +begin data + 0.65377128 7.735648 -23.97588 +-0.13087553 6.142625 -19.63854 + 0.34880368 7.651430 -25.26557 + 0.69249021 6.125125 -16.57090 +-0.07368178 8.245789 -25.80001 +-0.34404919 6.031540 -17.56743 + 0.75981559 9.832291 -28.35977 +-0.46958313 5.343832 -16.79548 +-0.06108490 8.838262 -29.25689 + 0.56154863 6.200189 -18.58219 + 0.5 8 9 +end data + +missing values v2 (9). + +regression /variables=v0 v1 + /statistics defaults + /dependent=v2 + /method=enter. +]) + +AT_CHECK([pspp -o pspp.csv reg-mdv.sps]) + +AT_CHECK([diff pspp.csv pspp-ref.csv]) + + +AT_CLEANUP + +AT_SETUP([LINEAR REGRESSION with invalid syntax (and empty dataset)]) + +AT_DATA([ss.sps], [dnl +data list notable list / v0 to v2. +begin data +end data. + +regression /variables=v0 v1 + /statistics r coeff anova + /dependent=v2 + /method=enter v2. +]) + +AT_CHECK([pspp ss.sps], [1], [ignore]) + +AT_CLEANUP