X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Flanguage%2Fstats%2Fmann-whitney.c;h=e117cc366cbfd24aefacd5631eca45a1a81c2e0c;hb=ad4f9604cc877272fe504234e8b9078a7ce76e17;hp=4db82f8e904be5d7bf2a18460b319bc1b37717a1;hpb=81579d9e9f994fb2908f50af41c3eb033d216e58;p=pspp diff --git a/src/language/stats/mann-whitney.c b/src/language/stats/mann-whitney.c index 4db82f8e90..e117cc366c 100644 --- a/src/language/stats/mann-whitney.c +++ b/src/language/stats/mann-whitney.c @@ -22,9 +22,9 @@ #include "data/case.h" #include "data/casereader.h" +#include "data/dataset.h" #include "data/dictionary.h" #include "data/format.h" -#include "data/procedure.h" #include "data/variable.h" #include "libpspp/cast.h" #include "libpspp/misc.h" @@ -47,13 +47,33 @@ struct mw double u; /* The Mann-Whitney U statistic */ double w; /* The Wilcoxon Rank Sum W statistic */ - double z; + double z; }; static void show_ranks_box (const struct n_sample_test *nst, const struct mw *mw); static void show_statistics_box (const struct n_sample_test *nst, const struct mw *mw, bool exact); + +static bool +belongs_to_test (const struct ccase *c, void *aux) +{ + const struct n_sample_test *nst = aux; + + const union value *group = case_data (c, nst->indep_var); + const size_t group_var_width = var_get_width (nst->indep_var); + + if ( value_equal (group, &nst->val1, group_var_width)) + return true; + + if ( value_equal (group, &nst->val2, group_var_width)) + return true; + + return false; +} + + + void mann_whitney_execute (const struct dataset *ds, struct casereader *input, @@ -79,9 +99,19 @@ mann_whitney_execute (const struct dataset *ds, struct casereader *rr; struct ccase *c; const struct variable *var = nst->vars[i]; - + struct casereader *reader = - sort_execute_1var (casereader_clone (input), var); + casereader_create_filter_func (casereader_clone (input), + belongs_to_test, + NULL, + CONST_CAST (struct n_sample_test *, nst), + NULL); + + reader = casereader_create_filter_missing (reader, &var, 1, + exclude, + NULL, NULL); + + reader = sort_execute_1var (reader, var); rr = casereader_create_append_rank (reader, var, dict_get_weight (dict), @@ -90,14 +120,10 @@ mann_whitney_execute (const struct dataset *ds, for (; (c = casereader_read (rr)); case_unref (c)) { - const union value *val = case_data (c, var); const union value *group = case_data (c, nst->indep_var); const size_t group_var_width = var_get_width (nst->indep_var); const double rank = case_data_idx (c, rank_idx)->f; - if ( var_is_value_missing (var, val, exclude)) - continue; - if ( value_equal (group, &nst->val1, group_var_width)) { mw[i].rank_sum[0] += rank; @@ -133,7 +159,7 @@ mann_whitney_execute (const struct dataset *ds, denominator -= tiebreaker; denominator *= mwv->n[0] * mwv->n[1]; denominator /= n * (n - 1); - + mwv->z /= sqrt (denominator); } } @@ -211,27 +237,27 @@ show_ranks_box (const struct n_sample_test *nst, const struct mw *mwv) var_to_string (nst->vars[i])); tab_double (table, 1, column_headers + i, 0, - mw->n[0], 0); + mw->n[0], NULL, RC_OTHER); tab_double (table, 2, column_headers + i, 0, - mw->n[1], 0); + mw->n[1], NULL, RC_OTHER); tab_double (table, 3, column_headers + i, 0, - mw->n[1] + mw->n[0], 0); + mw->n[1] + mw->n[0], NULL, RC_OTHER); /* Mean Ranks */ tab_double (table, 4, column_headers + i, 0, - mw->rank_sum[0] / mw->n[0], 0); + mw->rank_sum[0] / mw->n[0], NULL, RC_OTHER); tab_double (table, 5, column_headers + i, 0, - mw->rank_sum[1] / mw->n[1], 0); + mw->rank_sum[1] / mw->n[1], NULL, RC_OTHER); /* Sum of Ranks */ tab_double (table, 6, column_headers + i, 0, - mw->rank_sum[0], 0); + mw->rank_sum[0], NULL, RC_OTHER); tab_double (table, 7, column_headers + i, 0, - mw->rank_sum[1], 0); + mw->rank_sum[1], NULL, RC_OTHER); } tab_submit (table); @@ -266,7 +292,7 @@ show_statistics_box (const struct n_sample_test *nst, const struct mw *mwv, bool tab_text (table, 3, 0, TAT_TITLE | TAB_CENTER, _("Z")); tab_text (table, 4, 0, TAT_TITLE | TAB_CENTER, _("Asymp. Sig. (2-tailed)")); - if (exact) + if (exact) { tab_text (table, 5, 0, TAT_TITLE | TAB_CENTER, _("Exact Sig. (2-tailed)")); tab_text (table, 6, 0, TAT_TITLE | TAB_CENTER, _("Point Probability")); @@ -280,16 +306,16 @@ show_statistics_box (const struct n_sample_test *nst, const struct mw *mwv, bool var_to_string (nst->vars[i])); tab_double (table, 1, column_headers + i, 0, - mw->u, 0); + mw->u, NULL, RC_OTHER); tab_double (table, 2, column_headers + i, 0, - mw->w, 0); + mw->w, NULL, RC_OTHER); tab_double (table, 3, column_headers + i, 0, - mw->z, 0); + mw->z, NULL, RC_OTHER); tab_double (table, 4, column_headers + i, 0, - 2.0 * gsl_cdf_ugaussian_P (mw->z), 0); + 2.0 * gsl_cdf_ugaussian_P (mw->z), NULL, RC_PVALUE); } tab_submit (table);