X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fmann-whitney.c;h=2301cd13c53ed207643e4314408c19fc4cb88171;hb=8a0397328b6230fd49724e1c6d91a5a545d2fb4b;hp=94cb1ceb60b42efef44b0d96e3206eeea30f2cf2;hpb=d6259145c57dbef83bbe9fc0d90d35ed14a95a96;p=pspp diff --git a/src/language/stats/mann-whitney.c b/src/language/stats/mann-whitney.c index 94cb1ceb60..2301cd13c5 100644 --- a/src/language/stats/mann-whitney.c +++ b/src/language/stats/mann-whitney.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,20 +16,24 @@ #include -#include "mann-whitney.h" +#include "language/stats/mann-whitney.h" #include -#include -#include +#include "data/case.h" +#include "data/casereader.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/variable.h" +#include "libpspp/cast.h" +#include "libpspp/misc.h" +#include "math/sort.h" +#include "output/pivot-table.h" -#include -#include -#include -#include -#include -#include -#include +#include "gettext.h" +#define N_(msgid) msgid +#define _(msgid) gettext (msgid) /* Calculates the adjustment necessary for tie compensation */ static void @@ -45,14 +49,34 @@ struct mw double rank_sum[2]; double n[2]; - double u; /* The Mann-Whitney U statistic */ double w; /* The Wilcoxon Rank Sum W statistic */ - double z; + double z; }; -static void show_ranks_box (const struct n_sample_test *nst, const struct mw *mw); -static void show_statistics_box (const struct n_sample_test *nst, const struct mw *mw, bool exact); +static void show_ranks_box (const struct n_sample_test *, const struct mw *); +static void show_statistics_box (const struct n_sample_test *, + const struct mw *); + + + +static bool +belongs_to_test (const struct ccase *c, void *aux) +{ + const struct n_sample_test *nst = aux; + + const union value *group = case_data (c, nst->indep_var); + const size_t group_var_width = var_get_width (nst->indep_var); + + if (value_equal (group, &nst->val1, group_var_width)) + return true; + + if (value_equal (group, &nst->val2, group_var_width)) + return true; + + return false; +} + void @@ -60,8 +84,8 @@ mann_whitney_execute (const struct dataset *ds, struct casereader *input, enum mv_class exclude, const struct npar_test *test, - bool exact, - double timer) + bool exact UNUSED, + double timer UNUSED) { int i; const struct dictionary *dict = dataset_dict (ds); @@ -70,7 +94,7 @@ mann_whitney_execute (const struct dataset *ds, const struct caseproto *proto = casereader_get_proto (input); size_t rank_idx = caseproto_get_n_widths (proto); - struct mw *mw = xcalloc (nst->n_vars, sizeof *mw); + struct mw *mw = XCALLOC (nst->n_vars, struct mw); for (i = 0; i < nst->n_vars; ++i) { @@ -80,9 +104,19 @@ mann_whitney_execute (const struct dataset *ds, struct casereader *rr; struct ccase *c; const struct variable *var = nst->vars[i]; - + struct casereader *reader = - sort_execute_1var (casereader_clone (input), var); + casereader_create_filter_func (casereader_clone (input), + belongs_to_test, + NULL, + CONST_CAST (struct n_sample_test *, nst), + NULL); + + reader = casereader_create_filter_missing (reader, &var, 1, + exclude, + NULL, NULL); + + reader = sort_execute_1var (reader, var); rr = casereader_create_append_rank (reader, var, dict_get_weight (dict), @@ -91,20 +125,16 @@ mann_whitney_execute (const struct dataset *ds, for (; (c = casereader_read (rr)); case_unref (c)) { - const union value *val = case_data (c, var); const union value *group = case_data (c, nst->indep_var); const size_t group_var_width = var_get_width (nst->indep_var); - const double rank = case_data_idx (c, rank_idx)->f; - - if ( var_is_value_missing (var, val, exclude)) - continue; + const double rank = case_num_idx (c, rank_idx); - if ( value_equal (group, &nst->val1, group_var_width)) + if (value_equal (group, &nst->val1, group_var_width)) { mw[i].rank_sum[0] += rank; mw[i].n[0] += dict_get_case_weight (dict, c, &warn); } - else if ( value_equal (group, &nst->val2, group_var_width)) + else if (value_equal (group, &nst->val2, group_var_width)) { mw[i].rank_sum[1] += rank; mw[i].n[1] += dict_get_case_weight (dict, c, &warn); @@ -122,7 +152,7 @@ mann_whitney_execute (const struct dataset *ds, mwv->u -= mwv->rank_sum[0]; mwv->w = mwv->rank_sum[1]; - if ( mwv->u > mwv->n[0] * mwv->n[1] / 2.0) + if (mwv->u > mwv->n[0] * mwv->n[1] / 2.0) { mwv->u = mwv->n[0] * mwv->n[1] - mwv->u; mwv->w = mwv->rank_sum[0]; @@ -134,155 +164,111 @@ mann_whitney_execute (const struct dataset *ds, denominator -= tiebreaker; denominator *= mwv->n[0] * mwv->n[1]; denominator /= n * (n - 1); - + mwv->z /= sqrt (denominator); } } casereader_destroy (input); show_ranks_box (nst, mw); - show_statistics_box (nst, mw, exact); + show_statistics_box (nst, mw); free (mw); } - - -#include -#include "gettext.h" -#define _(msgid) gettext (msgid) - static void show_ranks_box (const struct n_sample_test *nst, const struct mw *mwv) { - int i; - const int row_headers = 1; - const int column_headers = 2; - struct tab_table *table = - tab_create (row_headers + 7, column_headers + nst->n_vars); - - tab_headers (table, row_headers, 0, column_headers, 0); - - tab_title (table, _("Ranks")); - - /* Vertical lines inside the box */ - tab_box (table, 1, 0, -1, TAL_1, - row_headers, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); - - /* Box around the table */ - tab_box (table, TAL_2, TAL_2, -1, -1, - 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); - - tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers); - tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1); - - tab_hline (table, TAL_1, row_headers, tab_nc (table) -1, 1); - - tab_text (table, 1, 1, TAT_TITLE | TAB_CENTER, _("group1")); - tab_text (table, 2, 1, TAT_TITLE | TAB_CENTER, _("group2")); - tab_text (table, 3, 1, TAT_TITLE | TAB_CENTER, _("Total")); - tab_joint_text (table, 1, 0, 3, 0, - TAT_TITLE | TAB_CENTER, _("N")); - tab_vline (table, TAL_2, 4, 0, tab_nr (table) - 1); - - tab_text (table, 4, 1, TAT_TITLE | TAB_CENTER, _("group1")); - tab_text (table, 5, 1, TAT_TITLE | TAB_CENTER, _("group2")); - tab_joint_text (table, 4, 0, 5, 0, - TAT_TITLE | TAB_CENTER, _("Mean Rank")); - tab_vline (table, TAL_2, 6, 0, tab_nr (table) - 1); - - tab_text (table, 6, 1, TAT_TITLE | TAB_CENTER, _("group1")); - tab_text (table, 7, 1, TAT_TITLE | TAB_CENTER, _("group2")); - tab_joint_text (table, 6, 0, 7, 0, - TAT_TITLE | TAB_CENTER, _("Sum of Ranks")); - - for (i = 0 ; i < nst->n_vars ; ++i) + struct pivot_table *table = pivot_table_create (N_("Ranks")); + + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("N"), PIVOT_RC_COUNT, + N_("Mean Rank"), PIVOT_RC_OTHER, + N_("Sum of Ranks"), PIVOT_RC_OTHER); + + struct pivot_dimension *indep = pivot_dimension_create__ ( + table, PIVOT_AXIS_ROW, pivot_value_new_variable (nst->indep_var)); + pivot_category_create_leaf (indep->root, + pivot_value_new_var_value (nst->indep_var, + &nst->val1)); + pivot_category_create_leaf (indep->root, + pivot_value_new_var_value (nst->indep_var, + &nst->val2)); + pivot_category_create_leaves (indep->root, N_("Total")); + + struct pivot_dimension *dep = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Dependent Variables")); + + for (size_t i = 0 ; i < nst->n_vars ; ++i) { const struct mw *mw = &mwv[i]; - tab_text (table, 0, column_headers + i, TAT_TITLE, - var_to_string (nst->vars[i])); - - tab_double (table, 1, column_headers + i, 0, - mw->n[0], 0); - - tab_double (table, 2, column_headers + i, 0, - mw->n[1], 0); - - tab_double (table, 3, column_headers + i, 0, - mw->n[1] + mw->n[0], 0); - - /* Mean Ranks */ - tab_double (table, 4, column_headers + i, 0, - mw->rank_sum[0] / mw->n[0], 0); - - tab_double (table, 5, column_headers + i, 0, - mw->rank_sum[1] / mw->n[1], 0); - /* Sum of Ranks */ - tab_double (table, 6, column_headers + i, 0, - mw->rank_sum[0], 0); - - tab_double (table, 7, column_headers + i, 0, - mw->rank_sum[1], 0); + int dep_idx = pivot_category_create_leaf ( + dep->root, pivot_value_new_variable (nst->vars[i])); + + struct entry + { + int stat_idx; + int indep_idx; + double x; + } + entries[] = { + /* N. */ + { 0, 0, mw->n[0] }, + { 0, 1, mw->n[1] }, + { 0, 2, mw->n[0] + mw->n[1] }, + + /* Mean Rank. */ + { 1, 0, mw->rank_sum[0] / mw->n[0] }, + { 1, 1, mw->rank_sum[1] / mw->n[1] }, + + /* Sum of Ranks. */ + { 2, 0, mw->rank_sum[0] }, + { 2, 1, mw->rank_sum[1] }, + }; + + for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) + { + const struct entry *e = &entries[j]; + pivot_table_put3 (table, e->stat_idx, e->indep_idx, dep_idx, + pivot_value_new_number (e->x)); + } } - tab_submit (table); + pivot_table_submit (table); } static void -show_statistics_box (const struct n_sample_test *nst, const struct mw *mwv, bool exact) +show_statistics_box (const struct n_sample_test *nst, const struct mw *mwv) { - int i; - const int row_headers = 1; - const int column_headers = 1; - struct tab_table *table = - tab_create (row_headers + (exact ? 6 : 4), column_headers + nst->n_vars); - - tab_headers (table, row_headers, 0, column_headers, 0); - - tab_title (table, _("Test Statistics")); + struct pivot_table *table = pivot_table_create (N_("Test Statistics")); - /* Vertical lines inside the box */ - tab_box (table, 1, 0, -1, TAL_1, - row_headers, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); + pivot_dimension_create ( + table, PIVOT_AXIS_COLUMN, N_("Statistics"), + _("Mann-Whitney U"), PIVOT_RC_OTHER, + _("Wilcoxon W"), PIVOT_RC_OTHER, + _("Z"), PIVOT_RC_OTHER, + _("Asymp. Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE); - /* Box around the table */ - tab_box (table, TAL_2, TAL_2, -1, -1, - 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); + struct pivot_dimension *variables = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Variables")); - tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers); - tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1); - - tab_text (table, 1, 0, TAT_TITLE | TAB_CENTER, _("Mann-Whitney U")); - tab_text (table, 2, 0, TAT_TITLE | TAB_CENTER, _("Wilcoxon W")); - tab_text (table, 3, 0, TAT_TITLE | TAB_CENTER, _("Z")); - tab_text (table, 4, 0, TAT_TITLE | TAB_CENTER, _("Asymp. Sig. (2-tailed)")); - - if (exact) - { - tab_text (table, 5, 0, TAT_TITLE | TAB_CENTER, _("Exact Sig. (2-tailed)")); - tab_text (table, 6, 0, TAT_TITLE | TAB_CENTER, _("Point Probability")); - } - - for (i = 0 ; i < nst->n_vars ; ++i) + for (size_t i = 0 ; i < nst->n_vars ; ++i) { const struct mw *mw = &mwv[i]; - tab_text (table, 0, column_headers + i, TAT_TITLE, - var_to_string (nst->vars[i])); - - tab_double (table, 1, column_headers + i, 0, - mw->u, 0); - - tab_double (table, 2, column_headers + i, 0, - mw->w, 0); - - tab_double (table, 3, column_headers + i, 0, - mw->z, 0); - - tab_double (table, 4, column_headers + i, 0, - 2.0 * gsl_cdf_ugaussian_P (mw->z), 0); + int row = pivot_category_create_leaf ( + variables->root, pivot_value_new_variable (nst->vars[i])); + + double entries[] = { + mw->u, + mw->w, + mw->z, + 2.0 * gsl_cdf_ugaussian_P (mw->z), + }; + for (size_t i = 0; i < sizeof entries / sizeof *entries; i++) + pivot_table_put2 (table, i, row, pivot_value_new_number (entries[i])); } - tab_submit (table); + pivot_table_submit (table); }