X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fkruskal-wallis.c;h=cea302bcb9f29db66f6becedb028ec328edd2af8;hb=9cbe8f135bb944db20c630e991d995af0027d058;hp=63ede78a34377c0a9d5bdcf460922a736af5379a;hpb=758bb90bac15899ca7f18a55dc4f90e1345c7179;p=pspp diff --git a/src/language/stats/kruskal-wallis.c b/src/language/stats/kruskal-wallis.c index 63ede78a34..cea302bcb9 100644 --- a/src/language/stats/kruskal-wallis.c +++ b/src/language/stats/kruskal-wallis.c @@ -1,5 +1,5 @@ /* Pspp - a program for statistical analysis. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,25 +22,26 @@ #include #include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - - -#include "minmax.h" -#include "xalloc.h" - - +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/subcase.h" +#include "data/variable.h" +#include "libpspp/assertion.h" +#include "libpspp/hmap.h" +#include "libpspp/bt.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "math/sort.h" +#include "output/tab.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" + + +/* Returns true iff the independent variable lies in the range [nst->val1, nst->val2] */ static bool include_func (const struct ccase *c, void *aux) { @@ -59,12 +60,28 @@ include_func (const struct ccase *c, void *aux) struct rank_entry { struct hmap_node node; + struct bt_node btn; union value group; double sum_of_ranks; double n; }; + +static int +compare_rank_entries_3way (const struct bt_node *a, + const struct bt_node *b, + const void *aux) +{ + const struct variable *var = aux; + const struct rank_entry *rea = bt_data (a, struct rank_entry, btn); + const struct rank_entry *reb = bt_data (b, struct rank_entry, btn); + + return value_compare_3way (&rea->group, &reb->group, var_get_width (var)); +} + + +/* Return the entry with the key GROUP or null if there is no such entry */ static struct rank_entry * find_rank_entry (const struct hmap *map, const union value *group, size_t width) { @@ -80,6 +97,7 @@ find_rank_entry (const struct hmap *map, const union value *group, size_t width) return re; } +/* Calculates the adjustment necessary for tie compensation */ static void distinct_callback (double v UNUSED, casenumber t, double w UNUSED, void *aux) { @@ -127,7 +145,8 @@ kruskal_wallis_execute (const struct dataset *ds, input = casereader_create_filter_weight (input, dict, &warn, NULL); /* Remove all those cases which are outside the range (val1, val2) */ - input = casereader_create_filter_func (input, include_func, NULL, nst, NULL); + input = casereader_create_filter_func (input, include_func, NULL, + CONST_CAST (struct n_sample_test *, nst), NULL); proto = casereader_get_proto (input); rank_idx = caseproto_get_n_widths (proto); @@ -180,6 +199,7 @@ kruskal_wallis_execute (const struct dataset *ds, casereader_destroy (rr); + /* Calculate the value of h */ { struct rank_entry *mre; double n = 0.0; @@ -203,6 +223,7 @@ kruskal_wallis_execute (const struct dataset *ds, show_ranks_box (nst, kw, total_n_groups); show_sig_box (nst, kw); + /* Cleanup allocated memory */ for (i = 0 ; i < nst->n_vars; ++i) { struct rank_entry *mre, *next; @@ -218,7 +239,6 @@ kruskal_wallis_execute (const struct dataset *ds, } -#include #include "gettext.h" #define _(msgid) gettext (msgid) @@ -226,6 +246,7 @@ kruskal_wallis_execute (const struct dataset *ds, static void show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_groups) { + int row; int i; const int row_headers = 2; const int column_headers = 1; @@ -255,36 +276,53 @@ show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_grou tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1); - int x = column_headers; + row = column_headers; for (i = 0 ; i < nst->n_vars ; ++i) { int tot = 0; - const struct rank_entry *re; + struct rank_entry *re_x; + struct bt_node *bt_n = NULL; + struct bt bt; if (i > 0) - tab_hline (table, TAL_1, 0, tab_nc (table) -1, x); + tab_hline (table, TAL_1, 0, tab_nc (table) -1, row); - tab_text (table, 0, x, + tab_text (table, 0, row, TAT_TITLE, var_to_string (nst->vars[i])); - HMAP_FOR_EACH (re, const struct rank_entry, node, &kw[i].map) + /* Sort the rank entries, by iteratin the hash and putting the entries + into a binary tree. */ + bt_init (&bt, compare_rank_entries_3way, nst->vars[i]); + HMAP_FOR_EACH (re_x, struct rank_entry, node, &kw[i].map) { + bt_insert (&bt, &re_x->btn); + } + + /* Report the rank entries in sorted order. */ + for (bt_n = bt_first (&bt); + bt_n != NULL; + bt_n = bt_next (&bt, bt_n) ) + { + const struct rank_entry *re = + bt_data (bt_n, const struct rank_entry, btn); + struct string str; ds_init_empty (&str); - + var_append_value_name (nst->indep_var, &re->group, &str); - - tab_text (table, 1, x, TAB_LEFT, ds_cstr (&str)); - tab_double (table, 2, x, TAB_LEFT, re->n, &F_8_0); - tab_double (table, 3, x, TAB_LEFT, re->sum_of_ranks / re->n, 0); - + + tab_text (table, 1, row, TAB_LEFT, ds_cstr (&str)); + tab_double (table, 2, row, TAB_LEFT, re->n, &F_8_0); + tab_double (table, 3, row, TAB_LEFT, re->sum_of_ranks / re->n, 0); + tot += re->n; - x++; + row++; ds_destroy (&str); } - tab_double (table, 2, x, TAB_LEFT, + + tab_double (table, 2, row, TAB_LEFT, tot, &F_8_0); - tab_text (table, 1, x++, TAB_LEFT, _("Total")); + tab_text (table, 1, row++, TAB_LEFT, _("Total")); } tab_submit (table);