From 4d3a7b39fb21058ba0d5637c48f0abf7a666f018 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Fri, 5 Feb 2016 13:00:35 +0100 Subject: [PATCH] Correct error in Mann-Whitney test Fixes bug #47041 --- NEWS | 4 ++++ src/language/stats/mann-whitney.c | 31 +++++++++++++++++++++++++++++-- tests/language/stats/npar.at | 6 +++++- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 0318b285c7..dde55318f7 100644 --- a/NEWS +++ b/NEWS @@ -48,6 +48,10 @@ Changes from 0.8.5 to 0.9.0: - Rendering of the variable and data sheets in right-to-left locales now works properly. + - The Mann-Whitney test, when run on a dataset containing values + of the group variable, other than those defining the test groups, + would incorrectly calculate the ranks. This has been corrected. + * The IMPORTCASES subcommand on GET DATA is now ignored, for compatibility. diff --git a/src/language/stats/mann-whitney.c b/src/language/stats/mann-whitney.c index 81fe033af5..f752b463a2 100644 --- a/src/language/stats/mann-whitney.c +++ b/src/language/stats/mann-whitney.c @@ -54,6 +54,26 @@ static void show_ranks_box (const struct n_sample_test *nst, const struct mw *mw static void show_statistics_box (const struct n_sample_test *nst, const struct mw *mw, bool exact); + +static bool +belongs_to_test (const struct ccase *c, void *aux) +{ + const struct n_sample_test *nst = aux; + + const union value *group = case_data (c, nst->indep_var); + const size_t group_var_width = var_get_width (nst->indep_var); + + if ( value_equal (group, &nst->val1, group_var_width)) + return true; + + if ( value_equal (group, &nst->val2, group_var_width)) + return true; + + return false; +} + + + void mann_whitney_execute (const struct dataset *ds, struct casereader *input, @@ -79,9 +99,16 @@ mann_whitney_execute (const struct dataset *ds, struct casereader *rr; struct ccase *c; const struct variable *var = nst->vars[i]; + + struct casereader *reader = + casereader_create_filter_func (casereader_clone (input), + belongs_to_test, + NULL, + CONST_CAST (struct n_sample_test *, nst), + NULL); + - struct casereader *reader = - sort_execute_1var (casereader_clone (input), var); + reader = sort_execute_1var (reader, var); rr = casereader_create_append_rank (reader, var, dict_get_weight (dict), diff --git a/tests/language/stats/npar.at b/tests/language/stats/npar.at index 3f62014762..eec7433c34 100644 --- a/tests/language/stats/npar.at +++ b/tests/language/stats/npar.at @@ -1008,7 +1008,11 @@ begin data. 85 0 76 1 145 1 -24 1 +24 1 +1 4 +-4 5 +34 5 +21 4 end data. NPAR TESTS -- 2.30.2