From abb90d86c649cef5489d8cad88d29ee042e65552 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 27 Jun 2015 07:42:08 +0200 Subject: [PATCH] Paired samples T-TEST: fix bug calculating correlation coefficient. The value did not properly consider the weights of the data. Thanks to Douglas Bonett for reporting this. --- NEWS | 4 +++ src/language/stats/t-test-paired.c | 2 +- tests/language/stats/t-test.at | 43 ++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index efa5a36c1f..6ff6af7ed7 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,10 @@ Changes since 0.8.5: * The graphical user interface uses Gtk+ version 3 instead of version 2. Accordingly, it has a somewhat different look and feel. + * A bug, where the correlation coefficient in the paired samples t-test + procedure was incorrectly calculated when presented with weighted data, + has been fixed. + Changes from 0.8.4 to 0.8.5: * The FREQUENCIES and CROSSTABS commands can now generate barcharts. diff --git a/src/language/stats/t-test-paired.c b/src/language/stats/t-test-paired.c index 933b0b3553..5cf4a9f29e 100644 --- a/src/language/stats/t-test-paired.c +++ b/src/language/stats/t-test-paired.c @@ -132,7 +132,7 @@ paired_run (const struct tt *tt, size_t n_pairs, vp *pairs, struct casereader *r moments_pass_two (pp->mom0, val0->f, w); moments_pass_two (pp->mom1, val1->f, w); moments_pass_two (pp->mom_diff, val0->f - val1->f, w); - pp->sum_of_prod += val0->f * val1->f; + pp->sum_of_prod += val0->f * val1->f * w; } } casereader_destroy (r); diff --git a/tests/language/stats/t-test.at b/tests/language/stats/t-test.at index 14251c1b01..efd69e5da3 100644 --- a/tests/language/stats/t-test.at +++ b/tests/language/stats/t-test.at @@ -170,6 +170,49 @@ AT_CHECK([pspp -o missing.csv missing.sps]) AT_CHECK([cat missing.csv], [0], [expout]) AT_CLEANUP + +dnl Tests for a bug in the paired samples T test when weighted +dnl Thanks to Douglas Bonett for reporting this. +AT_SETUP([T-TEST weighted paired bug]) +AT_DATA([t-test.sps], [dnl +DATA LIST notable LIST /x y w *. +BEGIN DATA. +1 1 255 +1 2 43 +1 3 216 +2 1 3 +2 2 1 +2 3 12 +END DATA. + +WEIGHT BY w. + +T-TEST + PAIRS = y WITH x (PAIRED) + /MISSING=ANALYSIS + /CRITERIA=CIN(0.95). +]) + +AT_CHECK([pspp -O format=csv t-test.sps], [0], [dnl +Table: Paired Sample Statistics +,,Mean,N,Std. Deviation,S.E. Mean +Pair 1,y,1.94,530.00,.96,.04 +,x,1.03,530.00,.17,.01 + +Table: Paired Samples Correlations +,,N,Correlation,Sig. +Pair 1,y & x,530.00,.11,.008 + +Table: Paired Samples Test +,,Paired Differences,,,,,,, +,,,,,95% Confidence Interval of the Difference,,,, +,,Mean,Std. Deviation,Std. Error Mean,Lower,Upper,t,df,Sig. (2-tailed) +Pair 1,y - x,.91,.95,.04,.83,.99,22.07,529.00,.000 +]) + +AT_CLEANUP + + dnl Tests for a bug in the paired samples T test. dnl Thanks to Mike Griffiths for reporting this problem. AT_SETUP([T-TEST /PAIRS bug]) -- 2.30.2