X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Flanguage%2Fstats%2Fkruskal-wallis.c;h=a9c269efedebfb7afaae9e86b910defeca8bcd23;hb=0e0cabc772b5f3a416e1e4e1dc021e196ac2c443;hp=420b1a038d89f33cac8c6ce8fb5ca9a1aab7dcf7;hpb=c8b02c29026c095ce912faf5fdba7e29b42cb135;p=pspp diff --git a/src/language/stats/kruskal-wallis.c b/src/language/stats/kruskal-wallis.c index 420b1a038d..a9c269efed 100644 --- a/src/language/stats/kruskal-wallis.c +++ b/src/language/stats/kruskal-wallis.c @@ -1,5 +1,5 @@ /* Pspp - a program for statistical analysis. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,25 +22,26 @@ #include #include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - - -#include "minmax.h" -#include "xalloc.h" - - +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/subcase.h" +#include "data/variable.h" +#include "libpspp/assertion.h" +#include "libpspp/hmap.h" +#include "libpspp/bt.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "math/sort.h" +#include "output/tab.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" + + +/* Returns true iff the independent variable lies in the range [nst->val1, nst->val2] */ static bool include_func (const struct ccase *c, void *aux) { @@ -59,12 +60,28 @@ include_func (const struct ccase *c, void *aux) struct rank_entry { struct hmap_node node; + struct bt_node btn; union value group; double sum_of_ranks; double n; }; + +static int +compare_rank_entries_3way (const struct bt_node *a, + const struct bt_node *b, + const void *aux) +{ + const struct variable *var = aux; + const struct rank_entry *rea = bt_data (a, struct rank_entry, btn); + const struct rank_entry *reb = bt_data (b, struct rank_entry, btn); + + return value_compare_3way (&rea->group, &reb->group, var_get_width (var)); +} + + +/* Return the entry with the key GROUP or null if there is no such entry */ static struct rank_entry * find_rank_entry (const struct hmap *map, const union value *group, size_t width) { @@ -76,10 +93,11 @@ find_rank_entry (const struct hmap *map, const union value *group, size_t width) if (0 == value_compare_3way (group, &re->group, width)) return re; } - + return re; } +/* Calculates the adjustment necessary for tie compensation */ static void distinct_callback (double v UNUSED, casenumber t, double w UNUSED, void *aux) { @@ -119,7 +137,7 @@ kruskal_wallis_execute (const struct dataset *ds, struct kw *kw = xcalloc (nst->n_vars, sizeof *kw); /* If the independent variable is missing, then we ignore the case */ - input = casereader_create_filter_missing (input, + input = casereader_create_filter_missing (input, &nst->indep_var, 1, exclude, NULL, NULL); @@ -127,7 +145,8 @@ kruskal_wallis_execute (const struct dataset *ds, input = casereader_create_filter_weight (input, dict, &warn, NULL); /* Remove all those cases which are outside the range (val1, val2) */ - input = casereader_create_filter_func (input, include_func, NULL, nst, NULL); + input = casereader_create_filter_func (input, include_func, NULL, + CONST_CAST (struct n_sample_test *, nst), NULL); proto = casereader_get_proto (input); rank_idx = caseproto_get_n_widths (proto); @@ -148,7 +167,7 @@ kruskal_wallis_execute (const struct dataset *ds, exclude, NULL, NULL); - rr = casereader_create_append_rank (r, + rr = casereader_create_append_rank (r, nst->vars[i], dict_get_weight (dict), &rerr, @@ -159,7 +178,7 @@ kruskal_wallis_execute (const struct dataset *ds, { const union value *group = case_data (c, nst->indep_var); const size_t group_var_width = var_get_width (nst->indep_var); - struct rank_entry *rank = find_rank_entry (&kw[i].map, group, group_var_width); + struct rank_entry *rank = find_rank_entry (&kw[i].map, group, group_var_width); if ( NULL == rank) { @@ -180,6 +199,7 @@ kruskal_wallis_execute (const struct dataset *ds, casereader_destroy (rr); + /* Calculate the value of h */ { struct rank_entry *mre; double n = 0.0; @@ -192,22 +212,33 @@ kruskal_wallis_execute (const struct dataset *ds, total_n_groups ++; } kw[i].h *= 12 / (n * ( n + 1)); - kw[i].h -= 3 * (n + 1) ; + kw[i].h -= 3 * (n + 1) ; kw[i].h /= 1 - tiebreaker/ (pow3 (n) - n); } } casereader_destroy (input); - + show_ranks_box (nst, kw, total_n_groups); show_sig_box (nst, kw); + /* Cleanup allocated memory */ + for (i = 0 ; i < nst->n_vars; ++i) + { + struct rank_entry *mre, *next; + HMAP_FOR_EACH_SAFE (mre, next, struct rank_entry, node, &kw[i].map) + { + hmap_delete (&kw[i].map, &mre->node); + free (mre); + } + hmap_destroy (&kw[i].map); + } + free (kw); } -#include #include "gettext.h" #define _(msgid) gettext (msgid) @@ -215,6 +246,7 @@ kruskal_wallis_execute (const struct dataset *ds, static void show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_groups) { + int row; int i; const int row_headers = 2; const int column_headers = 1; @@ -233,7 +265,7 @@ show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_grou tab_box (table, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); - tab_text (table, 1, 0, TAT_TITLE, + tab_text (table, 1, 0, TAT_TITLE, var_to_string (nst->indep_var) ); @@ -244,36 +276,53 @@ show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_grou tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1); - int x = column_headers; + row = column_headers; for (i = 0 ; i < nst->n_vars ; ++i) { int tot = 0; - const struct rank_entry *re; + struct rank_entry *re_x; + struct bt_node *bt_n = NULL; + struct bt bt; if (i > 0) - tab_hline (table, TAL_1, 0, tab_nc (table) -1, x); - - tab_text (table, 0, x, + tab_hline (table, TAL_1, 0, tab_nc (table) -1, row); + + tab_text (table, 0, row, TAT_TITLE, var_to_string (nst->vars[i])); - HMAP_FOR_EACH (re, const struct rank_entry, node, &kw[i].map) + /* Sort the rank entries, by iteratin the hash and putting the entries + into a binary tree. */ + bt_init (&bt, compare_rank_entries_3way, nst->vars[i]); + HMAP_FOR_EACH (re_x, struct rank_entry, node, &kw[i].map) { + bt_insert (&bt, &re_x->btn); + } + + /* Report the rank entries in sorted order. */ + for (bt_n = bt_first (&bt); + bt_n != NULL; + bt_n = bt_next (&bt, bt_n) ) + { + const struct rank_entry *re = + bt_data (bt_n, const struct rank_entry, btn); + struct string str; ds_init_empty (&str); var_append_value_name (nst->indep_var, &re->group, &str); - tab_text (table, 1, x, TAB_LEFT, ds_cstr (&str)); - tab_double (table, 2, x, TAB_LEFT, re->n, &F_8_0); - tab_double (table, 3, x, TAB_LEFT, re->sum_of_ranks / re->n, 0); + tab_text (table, 1, row, TAB_LEFT, ds_cstr (&str)); + tab_double (table, 2, row, TAB_LEFT, re->n, NULL, RC_INTEGER); + tab_double (table, 3, row, TAB_LEFT, re->sum_of_ranks / re->n, NULL, RC_OTHER); tot += re->n; - x++; + row++; ds_destroy (&str); } - tab_double (table, 2, x, TAB_LEFT, - tot, &F_8_0); - tab_text (table, 1, x++, TAB_LEFT, _("Total")); + + tab_double (table, 2, row, TAB_LEFT, + tot, NULL, RC_INTEGER); + tab_text (table, 1, row++, TAB_LEFT, _("Total")); } tab_submit (table); @@ -313,19 +362,19 @@ show_sig_box (const struct n_sample_test *nst, const struct kw *kw) for (i = 0 ; i < nst->n_vars; ++i) { const double df = hmap_count (&kw[i].map) - 1; - tab_text (table, column_headers + 1 + i, 0, TAT_TITLE, + tab_text (table, column_headers + 1 + i, 0, TAT_TITLE, var_to_string (nst->vars[i]) ); tab_double (table, column_headers + 1 + i, 1, 0, - kw[i].h, 0); + kw[i].h, NULL, RC_OTHER); tab_double (table, column_headers + 1 + i, 2, 0, - df, &F_8_0); + df, NULL, RC_INTEGER); tab_double (table, column_headers + 1 + i, 3, 0, gsl_cdf_chisq_Q (kw[i].h, df), - 0); + NULL, RC_PVALUE); } tab_submit (table);