X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fkruskal-wallis.c;h=65ce7865a3f83ba9baea594756f2ca80aa32e4b3;hb=refs%2Fheads%2Fcenter-titles;hp=51ee291f95651be59c024cf4c57c49c8528e3769;hpb=471d0e518a94de5305b4697f25641464761ef910;p=pspp diff --git a/src/language/stats/kruskal-wallis.c b/src/language/stats/kruskal-wallis.c index 51ee291f95..65ce7865a3 100644 --- a/src/language/stats/kruskal-wallis.c +++ b/src/language/stats/kruskal-wallis.c @@ -1,5 +1,5 @@ /* Pspp - a program for statistical analysis. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,25 +22,26 @@ #include #include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - - -#include "minmax.h" -#include "xalloc.h" - - +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/subcase.h" +#include "data/variable.h" +#include "libpspp/assertion.h" +#include "libpspp/hmap.h" +#include "libpspp/bt.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "math/sort.h" +#include "output/tab.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" + + +/* Returns true iff the independent variable lies in the range [nst->val1, nst->val2] */ static bool include_func (const struct ccase *c, void *aux) { @@ -59,12 +60,28 @@ include_func (const struct ccase *c, void *aux) struct rank_entry { struct hmap_node node; + struct bt_node btn; union value group; double sum_of_ranks; double n; }; + +static int +compare_rank_entries_3way (const struct bt_node *a, + const struct bt_node *b, + const void *aux) +{ + const struct variable *var = aux; + const struct rank_entry *rea = bt_data (a, struct rank_entry, btn); + const struct rank_entry *reb = bt_data (b, struct rank_entry, btn); + + return value_compare_3way (&rea->group, &reb->group, var_get_width (var)); +} + + +/* Return the entry with the key GROUP or null if there is no such entry */ static struct rank_entry * find_rank_entry (const struct hmap *map, const union value *group, size_t width) { @@ -80,6 +97,7 @@ find_rank_entry (const struct hmap *map, const union value *group, size_t width) return re; } +/* Calculates the adjustment necessary for tie compensation */ static void distinct_callback (double v UNUSED, casenumber t, double w UNUSED, void *aux) { @@ -181,6 +199,7 @@ kruskal_wallis_execute (const struct dataset *ds, casereader_destroy (rr); + /* Calculate the value of h */ { struct rank_entry *mre; double n = 0.0; @@ -204,6 +223,7 @@ kruskal_wallis_execute (const struct dataset *ds, show_ranks_box (nst, kw, total_n_groups); show_sig_box (nst, kw); + /* Cleanup allocated memory */ for (i = 0 ; i < nst->n_vars; ++i) { struct rank_entry *mre, *next; @@ -219,7 +239,6 @@ kruskal_wallis_execute (const struct dataset *ds, } -#include #include "gettext.h" #define _(msgid) gettext (msgid) @@ -261,7 +280,9 @@ show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_grou for (i = 0 ; i < nst->n_vars ; ++i) { int tot = 0; - const struct rank_entry *re; + struct rank_entry *re_x; + struct bt_node *bt_n = NULL; + struct bt bt; if (i > 0) tab_hline (table, TAL_1, 0, tab_nc (table) -1, row); @@ -269,23 +290,38 @@ show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_grou tab_text (table, 0, row, TAT_TITLE, var_to_string (nst->vars[i])); - HMAP_FOR_EACH (re, const struct rank_entry, node, &kw[i].map) + /* Sort the rank entries, by iteratin the hash and putting the entries + into a binary tree. */ + bt_init (&bt, compare_rank_entries_3way, nst->vars[i]); + HMAP_FOR_EACH (re_x, struct rank_entry, node, &kw[i].map) { + bt_insert (&bt, &re_x->btn); + } + + /* Report the rank entries in sorted order. */ + for (bt_n = bt_first (&bt); + bt_n != NULL; + bt_n = bt_next (&bt, bt_n) ) + { + const struct rank_entry *re = + bt_data (bt_n, const struct rank_entry, btn); + struct string str; ds_init_empty (&str); - + var_append_value_name (nst->indep_var, &re->group, &str); - + tab_text (table, 1, row, TAB_LEFT, ds_cstr (&str)); - tab_double (table, 2, row, TAB_LEFT, re->n, &F_8_0); - tab_double (table, 3, row, TAB_LEFT, re->sum_of_ranks / re->n, 0); - + tab_double (table, 2, row, TAB_LEFT, re->n, NULL, RC_INTEGER); + tab_double (table, 3, row, TAB_LEFT, re->sum_of_ranks / re->n, NULL, RC_OTHER); + tot += re->n; row++; ds_destroy (&str); } + tab_double (table, 2, row, TAB_LEFT, - tot, &F_8_0); + tot, NULL, RC_INTEGER); tab_text (table, 1, row++, TAB_LEFT, _("Total")); } @@ -331,14 +367,14 @@ show_sig_box (const struct n_sample_test *nst, const struct kw *kw) ); tab_double (table, column_headers + 1 + i, 1, 0, - kw[i].h, 0); + kw[i].h, NULL, RC_OTHER); tab_double (table, column_headers + 1 + i, 2, 0, - df, &F_8_0); + df, NULL, RC_INTEGER); tab_double (table, column_headers + 1 + i, 3, 0, gsl_cdf_chisq_Q (kw[i].h, df), - 0); + NULL, RC_PVALUE); } tab_submit (table);