X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fkruskal-wallis.c;h=a9c269efedebfb7afaae9e86b910defeca8bcd23;hb=0e0cabc772b5f3a416e1e4e1dc021e196ac2c443;hp=ecab0348d1ae16986f85e905ed19e0bd43fe4d50;hpb=2be9bee9da6a2ce27715e58128569594319abfa2;p=pspp diff --git a/src/language/stats/kruskal-wallis.c b/src/language/stats/kruskal-wallis.c index ecab0348d1..a9c269efed 100644 --- a/src/language/stats/kruskal-wallis.c +++ b/src/language/stats/kruskal-wallis.c @@ -31,6 +31,7 @@ #include "data/variable.h" #include "libpspp/assertion.h" #include "libpspp/hmap.h" +#include "libpspp/bt.h" #include "libpspp/message.h" #include "libpspp/misc.h" #include "math/sort.h" @@ -59,12 +60,27 @@ include_func (const struct ccase *c, void *aux) struct rank_entry { struct hmap_node node; + struct bt_node btn; union value group; double sum_of_ranks; double n; }; + +static int +compare_rank_entries_3way (const struct bt_node *a, + const struct bt_node *b, + const void *aux) +{ + const struct variable *var = aux; + const struct rank_entry *rea = bt_data (a, struct rank_entry, btn); + const struct rank_entry *reb = bt_data (b, struct rank_entry, btn); + + return value_compare_3way (&rea->group, &reb->group, var_get_width (var)); +} + + /* Return the entry with the key GROUP or null if there is no such entry */ static struct rank_entry * find_rank_entry (const struct hmap *map, const union value *group, size_t width) @@ -77,7 +93,7 @@ find_rank_entry (const struct hmap *map, const union value *group, size_t width) if (0 == value_compare_3way (group, &re->group, width)) return re; } - + return re; } @@ -121,7 +137,7 @@ kruskal_wallis_execute (const struct dataset *ds, struct kw *kw = xcalloc (nst->n_vars, sizeof *kw); /* If the independent variable is missing, then we ignore the case */ - input = casereader_create_filter_missing (input, + input = casereader_create_filter_missing (input, &nst->indep_var, 1, exclude, NULL, NULL); @@ -129,7 +145,7 @@ kruskal_wallis_execute (const struct dataset *ds, input = casereader_create_filter_weight (input, dict, &warn, NULL); /* Remove all those cases which are outside the range (val1, val2) */ - input = casereader_create_filter_func (input, include_func, NULL, + input = casereader_create_filter_func (input, include_func, NULL, CONST_CAST (struct n_sample_test *, nst), NULL); proto = casereader_get_proto (input); @@ -151,7 +167,7 @@ kruskal_wallis_execute (const struct dataset *ds, exclude, NULL, NULL); - rr = casereader_create_append_rank (r, + rr = casereader_create_append_rank (r, nst->vars[i], dict_get_weight (dict), &rerr, @@ -162,7 +178,7 @@ kruskal_wallis_execute (const struct dataset *ds, { const union value *group = case_data (c, nst->indep_var); const size_t group_var_width = var_get_width (nst->indep_var); - struct rank_entry *rank = find_rank_entry (&kw[i].map, group, group_var_width); + struct rank_entry *rank = find_rank_entry (&kw[i].map, group, group_var_width); if ( NULL == rank) { @@ -196,14 +212,14 @@ kruskal_wallis_execute (const struct dataset *ds, total_n_groups ++; } kw[i].h *= 12 / (n * ( n + 1)); - kw[i].h -= 3 * (n + 1) ; + kw[i].h -= 3 * (n + 1) ; kw[i].h /= 1 - tiebreaker/ (pow3 (n) - n); } } casereader_destroy (input); - + show_ranks_box (nst, kw, total_n_groups); show_sig_box (nst, kw); @@ -249,7 +265,7 @@ show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_grou tab_box (table, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); - tab_text (table, 1, 0, TAT_TITLE, + tab_text (table, 1, 0, TAT_TITLE, var_to_string (nst->indep_var) ); @@ -264,31 +280,48 @@ show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_grou for (i = 0 ; i < nst->n_vars ; ++i) { int tot = 0; - const struct rank_entry *re; + struct rank_entry *re_x; + struct bt_node *bt_n = NULL; + struct bt bt; if (i > 0) tab_hline (table, TAL_1, 0, tab_nc (table) -1, row); - + tab_text (table, 0, row, TAT_TITLE, var_to_string (nst->vars[i])); - HMAP_FOR_EACH (re, const struct rank_entry, node, &kw[i].map) + /* Sort the rank entries, by iteratin the hash and putting the entries + into a binary tree. */ + bt_init (&bt, compare_rank_entries_3way, nst->vars[i]); + HMAP_FOR_EACH (re_x, struct rank_entry, node, &kw[i].map) { + bt_insert (&bt, &re_x->btn); + } + + /* Report the rank entries in sorted order. */ + for (bt_n = bt_first (&bt); + bt_n != NULL; + bt_n = bt_next (&bt, bt_n) ) + { + const struct rank_entry *re = + bt_data (bt_n, const struct rank_entry, btn); + struct string str; ds_init_empty (&str); var_append_value_name (nst->indep_var, &re->group, &str); tab_text (table, 1, row, TAB_LEFT, ds_cstr (&str)); - tab_double (table, 2, row, TAB_LEFT, re->n, &F_8_0); - tab_double (table, 3, row, TAB_LEFT, re->sum_of_ranks / re->n, 0); + tab_double (table, 2, row, TAB_LEFT, re->n, NULL, RC_INTEGER); + tab_double (table, 3, row, TAB_LEFT, re->sum_of_ranks / re->n, NULL, RC_OTHER); tot += re->n; row++; ds_destroy (&str); } + tab_double (table, 2, row, TAB_LEFT, - tot, &F_8_0); + tot, NULL, RC_INTEGER); tab_text (table, 1, row++, TAB_LEFT, _("Total")); } @@ -329,19 +362,19 @@ show_sig_box (const struct n_sample_test *nst, const struct kw *kw) for (i = 0 ; i < nst->n_vars; ++i) { const double df = hmap_count (&kw[i].map) - 1; - tab_text (table, column_headers + 1 + i, 0, TAT_TITLE, + tab_text (table, column_headers + 1 + i, 0, TAT_TITLE, var_to_string (nst->vars[i]) ); tab_double (table, column_headers + 1 + i, 1, 0, - kw[i].h, 0); + kw[i].h, NULL, RC_OTHER); tab_double (table, column_headers + 1 + i, 2, 0, - df, &F_8_0); + df, NULL, RC_INTEGER); tab_double (table, column_headers + 1 + i, 3, 0, gsl_cdf_chisq_Q (kw[i].h, df), - 0); + NULL, RC_PVALUE); } tab_submit (table);