1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/stats/mann-whitney.h"
21 #include <gsl/gsl_cdf.h>
23 #include "data/case.h"
24 #include "data/casereader.h"
25 #include "data/dataset.h"
26 #include "data/dictionary.h"
27 #include "data/format.h"
28 #include "data/variable.h"
29 #include "libpspp/cast.h"
30 #include "libpspp/misc.h"
31 #include "math/sort.h"
32 #include "output/tab.h"
34 /* Calculates the adjustment necessary for tie compensation */
36 distinct_callback (double v UNUSED, casenumber t, double w UNUSED, void *aux)
38 double *tiebreaker = aux;
40 *tiebreaker += (pow3 (t) - t) / 12.0;
48 double u; /* The Mann-Whitney U statistic */
49 double w; /* The Wilcoxon Rank Sum W statistic */
53 static void show_ranks_box (const struct n_sample_test *nst, const struct mw *mw);
54 static void show_statistics_box (const struct n_sample_test *nst, const struct mw *mw, bool exact);
59 belongs_to_test (const struct ccase *c, void *aux)
61 const struct n_sample_test *nst = aux;
63 const union value *group = case_data (c, nst->indep_var);
64 const size_t group_var_width = var_get_width (nst->indep_var);
66 if ( value_equal (group, &nst->val1, group_var_width))
69 if ( value_equal (group, &nst->val2, group_var_width))
78 mann_whitney_execute (const struct dataset *ds,
79 struct casereader *input,
80 enum mv_class exclude,
81 const struct npar_test *test,
86 const struct dictionary *dict = dataset_dict (ds);
87 const struct n_sample_test *nst = UP_CAST (test, const struct n_sample_test, parent);
89 const struct caseproto *proto = casereader_get_proto (input);
90 size_t rank_idx = caseproto_get_n_widths (proto);
92 struct mw *mw = xcalloc (nst->n_vars, sizeof *mw);
94 for (i = 0; i < nst->n_vars; ++i)
96 double tiebreaker = 0.0;
98 enum rank_error rerr = 0;
99 struct casereader *rr;
101 const struct variable *var = nst->vars[i];
103 struct casereader *reader =
104 casereader_create_filter_func (casereader_clone (input),
107 CONST_CAST (struct n_sample_test *, nst),
111 reader = sort_execute_1var (reader, var);
113 rr = casereader_create_append_rank (reader, var,
114 dict_get_weight (dict),
116 distinct_callback, &tiebreaker);
118 for (; (c = casereader_read (rr)); case_unref (c))
120 const union value *val = case_data (c, var);
121 const union value *group = case_data (c, nst->indep_var);
122 const size_t group_var_width = var_get_width (nst->indep_var);
123 const double rank = case_data_idx (c, rank_idx)->f;
125 if ( var_is_value_missing (var, val, exclude))
128 if ( value_equal (group, &nst->val1, group_var_width))
130 mw[i].rank_sum[0] += rank;
131 mw[i].n[0] += dict_get_case_weight (dict, c, &warn);
133 else if ( value_equal (group, &nst->val2, group_var_width))
135 mw[i].rank_sum[1] += rank;
136 mw[i].n[1] += dict_get_case_weight (dict, c, &warn);
139 casereader_destroy (rr);
144 struct mw *mwv = &mw[i];
146 mwv->u = mwv->n[0] * mwv->n[1] ;
147 mwv->u += mwv->n[0] * (mwv->n[0] + 1) / 2.0;
148 mwv->u -= mwv->rank_sum[0];
150 mwv->w = mwv->rank_sum[1];
151 if ( mwv->u > mwv->n[0] * mwv->n[1] / 2.0)
153 mwv->u = mwv->n[0] * mwv->n[1] - mwv->u;
154 mwv->w = mwv->rank_sum[0];
156 mwv->z = mwv->u - mwv->n[0] * mwv->n[1] / 2.0;
157 n = mwv->n[0] + mwv->n[1];
158 denominator = pow3(n) - n;
160 denominator -= tiebreaker;
161 denominator *= mwv->n[0] * mwv->n[1];
162 denominator /= n * (n - 1);
164 mwv->z /= sqrt (denominator);
167 casereader_destroy (input);
169 show_ranks_box (nst, mw);
170 show_statistics_box (nst, mw, exact);
178 #define _(msgid) gettext (msgid)
181 show_ranks_box (const struct n_sample_test *nst, const struct mw *mwv)
184 const int row_headers = 1;
185 const int column_headers = 2;
186 struct tab_table *table =
187 tab_create (row_headers + 7, column_headers + nst->n_vars);
189 struct string g1str, g2str;;
190 ds_init_empty (&g1str);
191 var_append_value_name (nst->indep_var, &nst->val1, &g1str);
193 ds_init_empty (&g2str);
194 var_append_value_name (nst->indep_var, &nst->val2, &g2str);
196 tab_headers (table, row_headers, 0, column_headers, 0);
198 tab_title (table, _("Ranks"));
200 /* Vertical lines inside the box */
201 tab_box (table, 1, 0, -1, TAL_1,
202 row_headers, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
204 /* Box around the table */
205 tab_box (table, TAL_2, TAL_2, -1, -1,
206 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
208 tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
209 tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
211 tab_hline (table, TAL_1, row_headers, tab_nc (table) -1, 1);
213 tab_text (table, 1, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g1str));
214 tab_text (table, 2, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g2str));
215 tab_text (table, 3, 1, TAT_TITLE | TAB_CENTER, _("Total"));
216 tab_joint_text (table, 1, 0, 3, 0,
217 TAT_TITLE | TAB_CENTER, _("N"));
218 tab_vline (table, TAL_2, 4, 0, tab_nr (table) - 1);
220 tab_text (table, 4, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g1str));
221 tab_text (table, 5, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g2str));
222 tab_joint_text (table, 4, 0, 5, 0,
223 TAT_TITLE | TAB_CENTER, _("Mean Rank"));
224 tab_vline (table, TAL_2, 6, 0, tab_nr (table) - 1);
226 tab_text (table, 6, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g1str));
227 tab_text (table, 7, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g2str));
228 tab_joint_text (table, 6, 0, 7, 0,
229 TAT_TITLE | TAB_CENTER, _("Sum of Ranks"));
234 for (i = 0 ; i < nst->n_vars ; ++i)
236 const struct mw *mw = &mwv[i];
237 tab_text (table, 0, column_headers + i, TAT_TITLE,
238 var_to_string (nst->vars[i]));
240 tab_double (table, 1, column_headers + i, 0,
241 mw->n[0], NULL, RC_OTHER);
243 tab_double (table, 2, column_headers + i, 0,
244 mw->n[1], NULL, RC_OTHER);
246 tab_double (table, 3, column_headers + i, 0,
247 mw->n[1] + mw->n[0], NULL, RC_OTHER);
250 tab_double (table, 4, column_headers + i, 0,
251 mw->rank_sum[0] / mw->n[0], NULL, RC_OTHER);
253 tab_double (table, 5, column_headers + i, 0,
254 mw->rank_sum[1] / mw->n[1], NULL, RC_OTHER);
257 tab_double (table, 6, column_headers + i, 0,
258 mw->rank_sum[0], NULL, RC_OTHER);
260 tab_double (table, 7, column_headers + i, 0,
261 mw->rank_sum[1], NULL, RC_OTHER);
268 show_statistics_box (const struct n_sample_test *nst, const struct mw *mwv, bool exact)
271 const int row_headers = 1;
272 const int column_headers = 1;
273 struct tab_table *table =
274 tab_create (row_headers + (exact ? 6 : 4), column_headers + nst->n_vars);
276 tab_headers (table, row_headers, 0, column_headers, 0);
278 tab_title (table, _("Test Statistics"));
280 /* Vertical lines inside the box */
281 tab_box (table, 1, 0, -1, TAL_1,
282 row_headers, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
284 /* Box around the table */
285 tab_box (table, TAL_2, TAL_2, -1, -1,
286 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
288 tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
289 tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
291 tab_text (table, 1, 0, TAT_TITLE | TAB_CENTER, _("Mann-Whitney U"));
292 tab_text (table, 2, 0, TAT_TITLE | TAB_CENTER, _("Wilcoxon W"));
293 tab_text (table, 3, 0, TAT_TITLE | TAB_CENTER, _("Z"));
294 tab_text (table, 4, 0, TAT_TITLE | TAB_CENTER, _("Asymp. Sig. (2-tailed)"));
298 tab_text (table, 5, 0, TAT_TITLE | TAB_CENTER, _("Exact Sig. (2-tailed)"));
299 tab_text (table, 6, 0, TAT_TITLE | TAB_CENTER, _("Point Probability"));
302 for (i = 0 ; i < nst->n_vars ; ++i)
304 const struct mw *mw = &mwv[i];
306 tab_text (table, 0, column_headers + i, TAT_TITLE,
307 var_to_string (nst->vars[i]));
309 tab_double (table, 1, column_headers + i, 0,
310 mw->u, NULL, RC_OTHER);
312 tab_double (table, 2, column_headers + i, 0,
313 mw->w, NULL, RC_OTHER);
315 tab_double (table, 3, column_headers + i, 0,
316 mw->z, NULL, RC_OTHER);
318 tab_double (table, 4, column_headers + i, 0,
319 2.0 * gsl_cdf_ugaussian_P (mw->z), NULL, RC_PVALUE);