From 3c916f0eb51c047746deb363db03471370a5007b Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 1 Jan 2022 09:56:21 +0100 Subject: [PATCH] Fixed misleading results in the Kruskal-Wallis test Fixed misleading results if the lower bound was in fact higher than the upper bound. --- NEWS | 7 ++++- doc/statistics.texi | 7 ++--- src/language/stats/kruskal-wallis.c | 24 ++++++++++++++--- src/language/stats/kruskal-wallis.h | 4 +-- tests/language/stats/npar.at | 40 ++++++++++++++++++++++++++++- 5 files changed, 71 insertions(+), 11 deletions(-) diff --git a/NEWS b/NEWS index 791b9c1ac3..fba936463a 100644 --- a/NEWS +++ b/NEWS @@ -1,11 +1,16 @@ PSPP NEWS -- history of user-visible changes. -Copyright (C) 1996-2000, 2008-2016, 2017, 2019, 2020, 2021 Free Software Foundation, Inc. +Copyright (C) 1996-2000, 2008-2016, 2017, 2019, 2020, 2021, + 2022 Free Software Foundation, Inc. See the end for copying conditions. Please send PSPP bug reports to bug-gnu-pspp@gnu.org. Changes from 1.4.1 to 1.5.3: + * In the Kruskal-Wallis test, a misleading result could occur + if the lower bound specified by the user was in fact higher + than the upper bound specified. This has been fixed. + * The DEFINE, MATRIX, MCONVERT, and MATRIX DATA commands are now implemented. diff --git a/doc/statistics.texi b/doc/statistics.texi index 2f74f883fd..01976e27c9 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -1634,9 +1634,10 @@ arbitrary number of populations. It does not assume normality. The data to be compared are specified by @var{var_list}. The categorical variable determining the groups to which the data belongs is given by @var{var}. The limits @var{lower} and -@var{upper} specify the valid range of @var{var}. Any cases for -which @var{var} falls outside [@var{lower}, @var{upper}] are -ignored. +@var{upper} specify the valid range of @var{var}. +If @var{upper} is smaller than @var{lower}, the PSPP will assume their values +to be reversed. Any cases for which @var{var} falls outside +[@var{lower}, @var{upper}] are ignored. The mean rank of each group as well as the chi-squared value and significance of the test are printed. diff --git a/src/language/stats/kruskal-wallis.c b/src/language/stats/kruskal-wallis.c index 62c49e4848..6d54bae782 100644 --- a/src/language/stats/kruskal-wallis.c +++ b/src/language/stats/kruskal-wallis.c @@ -1,5 +1,5 @@ /* Pspp - a program for statistical analysis. - Copyright (C) 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2010, 2011, 2022 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,16 +44,32 @@ #define N_(msgid) msgid #define _(msgid) gettext (msgid) -/* Returns true iff the independent variable lies in the range [nst->val1, nst->val2] */ +/* Returns true iff the independent variable lies between nst->val1 and nst->val2 */ static bool include_func (const struct ccase *c, void *aux) { const struct n_sample_test *nst = aux; - if (0 < value_compare_3way (&nst->val1, case_data (c, nst->indep_var), var_get_width (nst->indep_var))) + const union value *smaller = 0; + const union value *larger = 0; + int x = value_compare_3way (&nst->val1, &nst->val2, var_get_width (nst->indep_var)); + if (x < 0) + { + smaller = &nst->val1; + larger = &nst->val2; + } + else + { + smaller = &nst->val2; + larger = &nst->val1; + } + + if (0 < value_compare_3way (smaller, case_data (c, nst->indep_var), + var_get_width (nst->indep_var))) return false; - if (0 > value_compare_3way (&nst->val2, case_data (c, nst->indep_var), var_get_width (nst->indep_var))) + if (0 > value_compare_3way (larger, case_data (c, nst->indep_var), + var_get_width (nst->indep_var))) return false; return true; diff --git a/src/language/stats/kruskal-wallis.h b/src/language/stats/kruskal-wallis.h index 1e16007f37..7adc312996 100644 --- a/src/language/stats/kruskal-wallis.h +++ b/src/language/stats/kruskal-wallis.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2010, 2011, 2022 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ struct kruskal_wallis_test { - struct two_sample_test parent; + struct n_sample_test parent; }; struct casereader; diff --git a/tests/language/stats/npar.at b/tests/language/stats/npar.at index be770ab1b1..93624925ae 100644 --- a/tests/language/stats/npar.at +++ b/tests/language/stats/npar.at @@ -1,5 +1,5 @@ dnl PSPP - a program for statistical analysis. -dnl Copyright (C) 2017 Free Software Foundation, Inc. +dnl Copyright (C) 2017, 2022 Free Software Foundation, Inc. dnl dnl This program is free software: you can redistribute it and/or modify dnl it under the terms of the GNU General Public License as published by @@ -759,6 +759,44 @@ AT_CHECK([pspp -o pspp2.csv kw-missing-group.sps]) dnl The result should be the same as before AT_CHECK([diff pspp.csv pspp2.csv], [0]) +dnl Reverse the order of the group values +AT_DATA([kw-reverse-group.sps], [dnl +set format = F9.3. + +data list notable list /gv * xscore *. +begin data +1 96 +1 128 +1 83 +1 61 +1 101 +2 82 +2 124 +2 132 +2 135 +2 109 +3 115 +3 149 +3 166 +3 147 +end data. + +value label /gv + 1 "timed out" + 2 "hit wicket" + 3 "handled the ball". + +npar tests + /kruskal-wallis xscore by gv (3, 1) + /missing=exclude + . +]) + +AT_CHECK([pspp -o pspp2.csv kw-reverse-group.sps]) + +dnl The result should be the same as before +AT_CHECK([diff pspp.csv pspp2.csv], [0]) + AT_CLEANUP -- 2.30.2