X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Flanguage%2Fstats%2Fkruskal-wallis.c;h=65ce0868ad722cdc0184183ca0fac94f2a30056f;hb=047d4a8e14cdbb50258bf8c5634db24a88767f24;hp=ea8fe8ceebdf5c01c8de67a94dc7a1eb7fba5a3b;hpb=d8ffe656444590f1199f62ac3ebdb9b7692923c2;p=pspp diff --git a/src/language/stats/kruskal-wallis.c b/src/language/stats/kruskal-wallis.c index ea8fe8ceeb..65ce0868ad 100644 --- a/src/language/stats/kruskal-wallis.c +++ b/src/language/stats/kruskal-wallis.c @@ -1,5 +1,5 @@ /* Pspp - a program for statistical analysis. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,23 +22,23 @@ #include #include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - - -#include "minmax.h" -#include "xalloc.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/subcase.h" +#include "data/variable.h" +#include "libpspp/assertion.h" +#include "libpspp/hmap.h" +#include "libpspp/bt.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "math/sort.h" +#include "output/tab.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" /* Returns true iff the independent variable lies in the range [nst->val1, nst->val2] */ @@ -60,12 +60,27 @@ include_func (const struct ccase *c, void *aux) struct rank_entry { struct hmap_node node; + struct bt_node btn; union value group; double sum_of_ranks; double n; }; + +static int +compare_rank_entries_3way (const struct bt_node *a, + const struct bt_node *b, + const void *aux) +{ + const struct variable *var = aux; + const struct rank_entry *rea = bt_data (a, struct rank_entry, btn); + const struct rank_entry *reb = bt_data (b, struct rank_entry, btn); + + return value_compare_3way (&rea->group, &reb->group, var_get_width (var)); +} + + /* Return the entry with the key GROUP or null if there is no such entry */ static struct rank_entry * find_rank_entry (const struct hmap *map, const union value *group, size_t width) @@ -78,7 +93,7 @@ find_rank_entry (const struct hmap *map, const union value *group, size_t width) if (0 == value_compare_3way (group, &re->group, width)) return re; } - + return re; } @@ -122,7 +137,7 @@ kruskal_wallis_execute (const struct dataset *ds, struct kw *kw = xcalloc (nst->n_vars, sizeof *kw); /* If the independent variable is missing, then we ignore the case */ - input = casereader_create_filter_missing (input, + input = casereader_create_filter_missing (input, &nst->indep_var, 1, exclude, NULL, NULL); @@ -130,7 +145,7 @@ kruskal_wallis_execute (const struct dataset *ds, input = casereader_create_filter_weight (input, dict, &warn, NULL); /* Remove all those cases which are outside the range (val1, val2) */ - input = casereader_create_filter_func (input, include_func, NULL, + input = casereader_create_filter_func (input, include_func, NULL, CONST_CAST (struct n_sample_test *, nst), NULL); proto = casereader_get_proto (input); @@ -152,7 +167,7 @@ kruskal_wallis_execute (const struct dataset *ds, exclude, NULL, NULL); - rr = casereader_create_append_rank (r, + rr = casereader_create_append_rank (r, nst->vars[i], dict_get_weight (dict), &rerr, @@ -163,7 +178,7 @@ kruskal_wallis_execute (const struct dataset *ds, { const union value *group = case_data (c, nst->indep_var); const size_t group_var_width = var_get_width (nst->indep_var); - struct rank_entry *rank = find_rank_entry (&kw[i].map, group, group_var_width); + struct rank_entry *rank = find_rank_entry (&kw[i].map, group, group_var_width); if ( NULL == rank) { @@ -178,7 +193,7 @@ kruskal_wallis_execute (const struct dataset *ds, rank->n += dict_get_case_weight (dict, c, &warn); /* If this assertion fires, then either the data wasn't sorted or some other - problem occured */ + problem occurred */ assert (rerr == 0); } @@ -197,14 +212,14 @@ kruskal_wallis_execute (const struct dataset *ds, total_n_groups ++; } kw[i].h *= 12 / (n * ( n + 1)); - kw[i].h -= 3 * (n + 1) ; + kw[i].h -= 3 * (n + 1) ; kw[i].h /= 1 - tiebreaker/ (pow3 (n) - n); } } casereader_destroy (input); - + show_ranks_box (nst, kw, total_n_groups); show_sig_box (nst, kw); @@ -224,7 +239,6 @@ kruskal_wallis_execute (const struct dataset *ds, } -#include #include "gettext.h" #define _(msgid) gettext (msgid) @@ -251,7 +265,7 @@ show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_grou tab_box (table, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); - tab_text (table, 1, 0, TAT_TITLE, + tab_text (table, 1, 0, TAT_TITLE, var_to_string (nst->indep_var) ); @@ -266,31 +280,48 @@ show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_grou for (i = 0 ; i < nst->n_vars ; ++i) { int tot = 0; - const struct rank_entry *re; + struct rank_entry *re_x; + struct bt_node *bt_n = NULL; + struct bt bt; if (i > 0) tab_hline (table, TAL_1, 0, tab_nc (table) -1, row); - + tab_text (table, 0, row, TAT_TITLE, var_to_string (nst->vars[i])); - HMAP_FOR_EACH (re, const struct rank_entry, node, &kw[i].map) + /* Sort the rank entries, by iteratin the hash and putting the entries + into a binary tree. */ + bt_init (&bt, compare_rank_entries_3way, nst->vars[i]); + HMAP_FOR_EACH (re_x, struct rank_entry, node, &kw[i].map) { + bt_insert (&bt, &re_x->btn); + } + + /* Report the rank entries in sorted order. */ + for (bt_n = bt_first (&bt); + bt_n != NULL; + bt_n = bt_next (&bt, bt_n) ) + { + const struct rank_entry *re = + bt_data (bt_n, const struct rank_entry, btn); + struct string str; ds_init_empty (&str); var_append_value_name (nst->indep_var, &re->group, &str); tab_text (table, 1, row, TAB_LEFT, ds_cstr (&str)); - tab_double (table, 2, row, TAB_LEFT, re->n, &F_8_0); - tab_double (table, 3, row, TAB_LEFT, re->sum_of_ranks / re->n, 0); + tab_double (table, 2, row, TAB_LEFT, re->n, NULL, RC_INTEGER); + tab_double (table, 3, row, TAB_LEFT, re->sum_of_ranks / re->n, NULL, RC_OTHER); tot += re->n; row++; ds_destroy (&str); } + tab_double (table, 2, row, TAB_LEFT, - tot, &F_8_0); + tot, NULL, RC_INTEGER); tab_text (table, 1, row++, TAB_LEFT, _("Total")); } @@ -331,19 +362,19 @@ show_sig_box (const struct n_sample_test *nst, const struct kw *kw) for (i = 0 ; i < nst->n_vars; ++i) { const double df = hmap_count (&kw[i].map) - 1; - tab_text (table, column_headers + 1 + i, 0, TAT_TITLE, + tab_text (table, column_headers + 1 + i, 0, TAT_TITLE, var_to_string (nst->vars[i]) ); tab_double (table, column_headers + 1 + i, 1, 0, - kw[i].h, 0); + kw[i].h, NULL, RC_OTHER); tab_double (table, column_headers + 1 + i, 2, 0, - df, &F_8_0); + df, NULL, RC_INTEGER); tab_double (table, column_headers + 1 + i, 3, 0, gsl_cdf_chisq_Q (kw[i].h, df), - 0); + NULL, RC_PVALUE); } tab_submit (table);