X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fmann-whitney.c;h=e117cc366cbfd24aefacd5631eca45a1a81c2e0c;hb=ad4f9604cc877272fe504234e8b9078a7ce76e17;hp=94cb1ceb60b42efef44b0d96e3206eeea30f2cf2;hpb=d6259145c57dbef83bbe9fc0d90d35ed14a95a96;p=pspp diff --git a/src/language/stats/mann-whitney.c b/src/language/stats/mann-whitney.c index 94cb1ceb60..e117cc366c 100644 --- a/src/language/stats/mann-whitney.c +++ b/src/language/stats/mann-whitney.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,20 +16,20 @@ #include -#include "mann-whitney.h" +#include "language/stats/mann-whitney.h" #include -#include -#include - -#include -#include -#include -#include -#include -#include -#include +#include "data/case.h" +#include "data/casereader.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/variable.h" +#include "libpspp/cast.h" +#include "libpspp/misc.h" +#include "math/sort.h" +#include "output/tab.h" /* Calculates the adjustment necessary for tie compensation */ static void @@ -45,23 +45,42 @@ struct mw double rank_sum[2]; double n[2]; - double u; /* The Mann-Whitney U statistic */ double w; /* The Wilcoxon Rank Sum W statistic */ - double z; + double z; }; static void show_ranks_box (const struct n_sample_test *nst, const struct mw *mw); static void show_statistics_box (const struct n_sample_test *nst, const struct mw *mw, bool exact); + +static bool +belongs_to_test (const struct ccase *c, void *aux) +{ + const struct n_sample_test *nst = aux; + + const union value *group = case_data (c, nst->indep_var); + const size_t group_var_width = var_get_width (nst->indep_var); + + if ( value_equal (group, &nst->val1, group_var_width)) + return true; + + if ( value_equal (group, &nst->val2, group_var_width)) + return true; + + return false; +} + + + void mann_whitney_execute (const struct dataset *ds, struct casereader *input, enum mv_class exclude, const struct npar_test *test, bool exact, - double timer) + double timer UNUSED) { int i; const struct dictionary *dict = dataset_dict (ds); @@ -80,9 +99,19 @@ mann_whitney_execute (const struct dataset *ds, struct casereader *rr; struct ccase *c; const struct variable *var = nst->vars[i]; - + struct casereader *reader = - sort_execute_1var (casereader_clone (input), var); + casereader_create_filter_func (casereader_clone (input), + belongs_to_test, + NULL, + CONST_CAST (struct n_sample_test *, nst), + NULL); + + reader = casereader_create_filter_missing (reader, &var, 1, + exclude, + NULL, NULL); + + reader = sort_execute_1var (reader, var); rr = casereader_create_append_rank (reader, var, dict_get_weight (dict), @@ -91,14 +120,10 @@ mann_whitney_execute (const struct dataset *ds, for (; (c = casereader_read (rr)); case_unref (c)) { - const union value *val = case_data (c, var); const union value *group = case_data (c, nst->indep_var); const size_t group_var_width = var_get_width (nst->indep_var); const double rank = case_data_idx (c, rank_idx)->f; - if ( var_is_value_missing (var, val, exclude)) - continue; - if ( value_equal (group, &nst->val1, group_var_width)) { mw[i].rank_sum[0] += rank; @@ -134,7 +159,7 @@ mann_whitney_execute (const struct dataset *ds, denominator -= tiebreaker; denominator *= mwv->n[0] * mwv->n[1]; denominator /= n * (n - 1); - + mwv->z /= sqrt (denominator); } } @@ -148,7 +173,6 @@ mann_whitney_execute (const struct dataset *ds, -#include #include "gettext.h" #define _(msgid) gettext (msgid) @@ -161,6 +185,13 @@ show_ranks_box (const struct n_sample_test *nst, const struct mw *mwv) struct tab_table *table = tab_create (row_headers + 7, column_headers + nst->n_vars); + struct string g1str, g2str;; + ds_init_empty (&g1str); + var_append_value_name (nst->indep_var, &nst->val1, &g1str); + + ds_init_empty (&g2str); + var_append_value_name (nst->indep_var, &nst->val2, &g2str); + tab_headers (table, row_headers, 0, column_headers, 0); tab_title (table, _("Ranks")); @@ -178,24 +209,27 @@ show_ranks_box (const struct n_sample_test *nst, const struct mw *mwv) tab_hline (table, TAL_1, row_headers, tab_nc (table) -1, 1); - tab_text (table, 1, 1, TAT_TITLE | TAB_CENTER, _("group1")); - tab_text (table, 2, 1, TAT_TITLE | TAB_CENTER, _("group2")); + tab_text (table, 1, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g1str)); + tab_text (table, 2, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g2str)); tab_text (table, 3, 1, TAT_TITLE | TAB_CENTER, _("Total")); tab_joint_text (table, 1, 0, 3, 0, TAT_TITLE | TAB_CENTER, _("N")); tab_vline (table, TAL_2, 4, 0, tab_nr (table) - 1); - tab_text (table, 4, 1, TAT_TITLE | TAB_CENTER, _("group1")); - tab_text (table, 5, 1, TAT_TITLE | TAB_CENTER, _("group2")); + tab_text (table, 4, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g1str)); + tab_text (table, 5, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g2str)); tab_joint_text (table, 4, 0, 5, 0, TAT_TITLE | TAB_CENTER, _("Mean Rank")); tab_vline (table, TAL_2, 6, 0, tab_nr (table) - 1); - tab_text (table, 6, 1, TAT_TITLE | TAB_CENTER, _("group1")); - tab_text (table, 7, 1, TAT_TITLE | TAB_CENTER, _("group2")); + tab_text (table, 6, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g1str)); + tab_text (table, 7, 1, TAT_TITLE | TAB_CENTER, ds_cstr (&g2str)); tab_joint_text (table, 6, 0, 7, 0, TAT_TITLE | TAB_CENTER, _("Sum of Ranks")); + ds_destroy (&g1str); + ds_destroy (&g2str); + for (i = 0 ; i < nst->n_vars ; ++i) { const struct mw *mw = &mwv[i]; @@ -203,27 +237,27 @@ show_ranks_box (const struct n_sample_test *nst, const struct mw *mwv) var_to_string (nst->vars[i])); tab_double (table, 1, column_headers + i, 0, - mw->n[0], 0); + mw->n[0], NULL, RC_OTHER); tab_double (table, 2, column_headers + i, 0, - mw->n[1], 0); + mw->n[1], NULL, RC_OTHER); tab_double (table, 3, column_headers + i, 0, - mw->n[1] + mw->n[0], 0); + mw->n[1] + mw->n[0], NULL, RC_OTHER); /* Mean Ranks */ tab_double (table, 4, column_headers + i, 0, - mw->rank_sum[0] / mw->n[0], 0); + mw->rank_sum[0] / mw->n[0], NULL, RC_OTHER); tab_double (table, 5, column_headers + i, 0, - mw->rank_sum[1] / mw->n[1], 0); + mw->rank_sum[1] / mw->n[1], NULL, RC_OTHER); /* Sum of Ranks */ tab_double (table, 6, column_headers + i, 0, - mw->rank_sum[0], 0); + mw->rank_sum[0], NULL, RC_OTHER); tab_double (table, 7, column_headers + i, 0, - mw->rank_sum[1], 0); + mw->rank_sum[1], NULL, RC_OTHER); } tab_submit (table); @@ -258,7 +292,7 @@ show_statistics_box (const struct n_sample_test *nst, const struct mw *mwv, bool tab_text (table, 3, 0, TAT_TITLE | TAB_CENTER, _("Z")); tab_text (table, 4, 0, TAT_TITLE | TAB_CENTER, _("Asymp. Sig. (2-tailed)")); - if (exact) + if (exact) { tab_text (table, 5, 0, TAT_TITLE | TAB_CENTER, _("Exact Sig. (2-tailed)")); tab_text (table, 6, 0, TAT_TITLE | TAB_CENTER, _("Point Probability")); @@ -272,16 +306,16 @@ show_statistics_box (const struct n_sample_test *nst, const struct mw *mwv, bool var_to_string (nst->vars[i])); tab_double (table, 1, column_headers + i, 0, - mw->u, 0); + mw->u, NULL, RC_OTHER); tab_double (table, 2, column_headers + i, 0, - mw->w, 0); + mw->w, NULL, RC_OTHER); tab_double (table, 3, column_headers + i, 0, - mw->z, 0); + mw->z, NULL, RC_OTHER); tab_double (table, 4, column_headers + i, 0, - 2.0 * gsl_cdf_ugaussian_P (mw->z), 0); + 2.0 * gsl_cdf_ugaussian_P (mw->z), NULL, RC_PVALUE); } tab_submit (table);