1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/stats/mann-whitney.h"
21 #include <gsl/gsl_cdf.h>
23 #include "data/case.h"
24 #include "data/casereader.h"
25 #include "data/dataset.h"
26 #include "data/dictionary.h"
27 #include "data/format.h"
28 #include "data/variable.h"
29 #include "libpspp/cast.h"
30 #include "libpspp/misc.h"
31 #include "math/sort.h"
32 #include "output/pivot-table.h"
35 #define N_(msgid) msgid
36 #define _(msgid) gettext (msgid)
38 /* Calculates the adjustment necessary for tie compensation */
40 distinct_callback (double v UNUSED, casenumber t, double w UNUSED, void *aux)
42 double *tiebreaker = aux;
44 *tiebreaker += (pow3 (t) - t) / 12.0;
52 double u; /* The Mann-Whitney U statistic */
53 double w; /* The Wilcoxon Rank Sum W statistic */
57 static void show_ranks_box (const struct n_sample_test *, const struct mw *);
58 static void show_statistics_box (const struct n_sample_test *,
64 belongs_to_test (const struct ccase *c, void *aux)
66 const struct n_sample_test *nst = aux;
68 const union value *group = case_data (c, nst->indep_var);
69 const size_t group_var_width = var_get_width (nst->indep_var);
71 if (value_equal (group, &nst->val1, group_var_width))
74 if (value_equal (group, &nst->val2, group_var_width))
83 mann_whitney_execute (const struct dataset *ds,
84 struct casereader *input,
85 enum mv_class exclude,
86 const struct npar_test *test,
91 const struct dictionary *dict = dataset_dict (ds);
92 const struct n_sample_test *nst = UP_CAST (test, const struct n_sample_test, parent);
94 const struct caseproto *proto = casereader_get_proto (input);
95 size_t rank_idx = caseproto_get_n_widths (proto);
97 struct mw *mw = XCALLOC (nst->n_vars, struct mw);
99 for (i = 0; i < nst->n_vars; ++i)
101 double tiebreaker = 0.0;
103 enum rank_error rerr = 0;
104 struct casereader *rr;
106 const struct variable *var = nst->vars[i];
108 struct casereader *reader =
109 casereader_create_filter_func (casereader_clone (input),
112 CONST_CAST (struct n_sample_test *, nst),
115 reader = casereader_create_filter_missing (reader, &var, 1,
119 reader = sort_execute_1var (reader, var);
121 rr = casereader_create_append_rank (reader, var,
122 dict_get_weight (dict),
124 distinct_callback, &tiebreaker);
126 for (; (c = casereader_read (rr)); case_unref (c))
128 const union value *group = case_data (c, nst->indep_var);
129 const size_t group_var_width = var_get_width (nst->indep_var);
130 const double rank = case_data_idx (c, rank_idx)->f;
132 if (value_equal (group, &nst->val1, group_var_width))
134 mw[i].rank_sum[0] += rank;
135 mw[i].n[0] += dict_get_case_weight (dict, c, &warn);
137 else if (value_equal (group, &nst->val2, group_var_width))
139 mw[i].rank_sum[1] += rank;
140 mw[i].n[1] += dict_get_case_weight (dict, c, &warn);
143 casereader_destroy (rr);
148 struct mw *mwv = &mw[i];
150 mwv->u = mwv->n[0] * mwv->n[1] ;
151 mwv->u += mwv->n[0] * (mwv->n[0] + 1) / 2.0;
152 mwv->u -= mwv->rank_sum[0];
154 mwv->w = mwv->rank_sum[1];
155 if (mwv->u > mwv->n[0] * mwv->n[1] / 2.0)
157 mwv->u = mwv->n[0] * mwv->n[1] - mwv->u;
158 mwv->w = mwv->rank_sum[0];
160 mwv->z = mwv->u - mwv->n[0] * mwv->n[1] / 2.0;
161 n = mwv->n[0] + mwv->n[1];
162 denominator = pow3(n) - n;
164 denominator -= tiebreaker;
165 denominator *= mwv->n[0] * mwv->n[1];
166 denominator /= n * (n - 1);
168 mwv->z /= sqrt (denominator);
171 casereader_destroy (input);
173 show_ranks_box (nst, mw);
174 show_statistics_box (nst, mw);
180 show_ranks_box (const struct n_sample_test *nst, const struct mw *mwv)
182 struct pivot_table *table = pivot_table_create (N_("Ranks"));
184 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
185 N_("N"), PIVOT_RC_COUNT,
186 N_("Mean Rank"), PIVOT_RC_OTHER,
187 N_("Sum of Ranks"), PIVOT_RC_OTHER);
189 struct pivot_dimension *indep = pivot_dimension_create__ (
190 table, PIVOT_AXIS_ROW, pivot_value_new_variable (nst->indep_var));
191 pivot_category_create_leaf (indep->root,
192 pivot_value_new_var_value (nst->indep_var,
194 pivot_category_create_leaf (indep->root,
195 pivot_value_new_var_value (nst->indep_var,
197 pivot_category_create_leaves (indep->root, N_("Total"));
199 struct pivot_dimension *dep = pivot_dimension_create (
200 table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
202 for (size_t i = 0 ; i < nst->n_vars ; ++i)
204 const struct mw *mw = &mwv[i];
206 int dep_idx = pivot_category_create_leaf (
207 dep->root, pivot_value_new_variable (nst->vars[i]));
219 { 0, 2, mw->n[0] + mw->n[1] },
222 { 1, 0, mw->rank_sum[0] / mw->n[0] },
223 { 1, 1, mw->rank_sum[1] / mw->n[1] },
226 { 2, 0, mw->rank_sum[0] },
227 { 2, 1, mw->rank_sum[1] },
230 for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
232 const struct entry *e = &entries[j];
233 pivot_table_put3 (table, e->stat_idx, e->indep_idx, dep_idx,
234 pivot_value_new_number (e->x));
238 pivot_table_submit (table);
242 show_statistics_box (const struct n_sample_test *nst, const struct mw *mwv)
244 struct pivot_table *table = pivot_table_create (N_("Test Statistics"));
246 pivot_dimension_create (
247 table, PIVOT_AXIS_COLUMN, N_("Statistics"),
248 _("Mann-Whitney U"), PIVOT_RC_OTHER,
249 _("Wilcoxon W"), PIVOT_RC_OTHER,
250 _("Z"), PIVOT_RC_OTHER,
251 _("Asymp. Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE);
253 struct pivot_dimension *variables = pivot_dimension_create (
254 table, PIVOT_AXIS_ROW, N_("Variables"));
256 for (size_t i = 0 ; i < nst->n_vars ; ++i)
258 const struct mw *mw = &mwv[i];
260 int row = pivot_category_create_leaf (
261 variables->root, pivot_value_new_variable (nst->vars[i]));
267 2.0 * gsl_cdf_ugaussian_P (mw->z),
269 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
270 pivot_table_put2 (table, i, row, pivot_value_new_number (entries[i]));
273 pivot_table_submit (table);