X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fkruskal-wallis.c;h=6d54bae78208d74b98db28007cf831d3cbc37c94;hb=06ac0c2d623e9ae54c3e22a2d6740219ab357b21;hp=63ede78a34377c0a9d5bdcf460922a736af5379a;hpb=758bb90bac15899ca7f18a55dc4f90e1345c7179;p=pspp diff --git a/src/language/stats/kruskal-wallis.c b/src/language/stats/kruskal-wallis.c index 63ede78a34..6d54bae782 100644 --- a/src/language/stats/kruskal-wallis.c +++ b/src/language/stats/kruskal-wallis.c @@ -1,5 +1,5 @@ /* Pspp - a program for statistical analysis. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011, 2022 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,34 +22,54 @@ #include #include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - - -#include "minmax.h" -#include "xalloc.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/subcase.h" +#include "data/variable.h" +#include "libpspp/assertion.h" +#include "libpspp/hmap.h" +#include "libpspp/bt.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "math/sort.h" +#include "output/pivot-table.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" +#include "gettext.h" +#define N_(msgid) msgid +#define _(msgid) gettext (msgid) +/* Returns true iff the independent variable lies between nst->val1 and nst->val2 */ static bool include_func (const struct ccase *c, void *aux) { const struct n_sample_test *nst = aux; - if (0 < value_compare_3way (&nst->val1, case_data (c, nst->indep_var), var_get_width (nst->indep_var))) + const union value *smaller = 0; + const union value *larger = 0; + int x = value_compare_3way (&nst->val1, &nst->val2, var_get_width (nst->indep_var)); + if (x < 0) + { + smaller = &nst->val1; + larger = &nst->val2; + } + else + { + smaller = &nst->val2; + larger = &nst->val1; + } + + if (0 < value_compare_3way (smaller, case_data (c, nst->indep_var), + var_get_width (nst->indep_var))) return false; - if (0 > value_compare_3way (&nst->val2, case_data (c, nst->indep_var), var_get_width (nst->indep_var))) + if (0 > value_compare_3way (larger, case_data (c, nst->indep_var), + var_get_width (nst->indep_var))) return false; return true; @@ -59,12 +79,28 @@ include_func (const struct ccase *c, void *aux) struct rank_entry { struct hmap_node node; + struct bt_node btn; union value group; double sum_of_ranks; double n; }; + +static int +compare_rank_entries_3way (const struct bt_node *a, + const struct bt_node *b, + const void *aux) +{ + const struct variable *var = aux; + const struct rank_entry *rea = BT_DATA (a, struct rank_entry, btn); + const struct rank_entry *reb = BT_DATA (b, struct rank_entry, btn); + + return value_compare_3way (&rea->group, &reb->group, var_get_width (var)); +} + + +/* Return the entry with the key GROUP or null if there is no such entry */ static struct rank_entry * find_rank_entry (const struct hmap *map, const union value *group, size_t width) { @@ -76,10 +112,11 @@ find_rank_entry (const struct hmap *map, const union value *group, size_t width) if (0 == value_compare_3way (group, &re->group, width)) return re; } - + return re; } +/* Calculates the adjustment necessary for tie compensation */ static void distinct_callback (double v UNUSED, casenumber t, double w UNUSED, void *aux) { @@ -95,8 +132,8 @@ struct kw double h; }; -static void show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_groups); -static void show_sig_box (const struct n_sample_test *nst, const struct kw *kw); +static void show_ranks_box (const struct n_sample_test *, const struct kw *); +static void show_sig_box (const struct n_sample_test *, const struct kw *); void kruskal_wallis_execute (const struct dataset *ds, @@ -116,10 +153,10 @@ kruskal_wallis_execute (const struct dataset *ds, int total_n_groups = 0.0; - struct kw *kw = xcalloc (nst->n_vars, sizeof *kw); + struct kw *kw = XCALLOC (nst->n_vars, struct kw); /* If the independent variable is missing, then we ignore the case */ - input = casereader_create_filter_missing (input, + input = casereader_create_filter_missing (input, &nst->indep_var, 1, exclude, NULL, NULL); @@ -127,7 +164,8 @@ kruskal_wallis_execute (const struct dataset *ds, input = casereader_create_filter_weight (input, dict, &warn, NULL); /* Remove all those cases which are outside the range (val1, val2) */ - input = casereader_create_filter_func (input, include_func, NULL, nst, NULL); + input = casereader_create_filter_func (input, include_func, NULL, + CONST_CAST (struct n_sample_test *, nst), NULL); proto = casereader_get_proto (input); rank_idx = caseproto_get_n_widths (proto); @@ -148,7 +186,7 @@ kruskal_wallis_execute (const struct dataset *ds, exclude, NULL, NULL); - rr = casereader_create_append_rank (r, + rr = casereader_create_append_rank (r, nst->vars[i], dict_get_weight (dict), &rerr, @@ -159,9 +197,9 @@ kruskal_wallis_execute (const struct dataset *ds, { const union value *group = case_data (c, nst->indep_var); const size_t group_var_width = var_get_width (nst->indep_var); - struct rank_entry *rank = find_rank_entry (&kw[i].map, group, group_var_width); + struct rank_entry *rank = find_rank_entry (&kw[i].map, group, group_var_width); - if ( NULL == rank) + if (NULL == rank) { rank = xzalloc (sizeof *rank); value_clone (&rank->group, group, group_var_width); @@ -170,16 +208,17 @@ kruskal_wallis_execute (const struct dataset *ds, value_hash (&rank->group, group_var_width, 0)); } - rank->sum_of_ranks += case_data_idx (c, rank_idx)->f; + rank->sum_of_ranks += case_num_idx (c, rank_idx); rank->n += dict_get_case_weight (dict, c, &warn); /* If this assertion fires, then either the data wasn't sorted or some other - problem occured */ + problem occurred */ assert (rerr == 0); } casereader_destroy (rr); + /* Calculate the value of h */ { struct rank_entry *mre; double n = 0.0; @@ -191,18 +230,19 @@ kruskal_wallis_execute (const struct dataset *ds, total_n_groups ++; } - kw[i].h *= 12 / (n * ( n + 1)); - kw[i].h -= 3 * (n + 1) ; + kw[i].h *= 12 / (n * (n + 1)); + kw[i].h -= 3 * (n + 1) ; kw[i].h /= 1 - tiebreaker/ (pow3 (n) - n); } } casereader_destroy (input); - - show_ranks_box (nst, kw, total_n_groups); + + show_ranks_box (nst, kw); show_sig_box (nst, kw); + /* Cleanup allocated memory */ for (i = 0 ; i < nst->n_vars; ++i) { struct rank_entry *mre, *next; @@ -216,128 +256,79 @@ kruskal_wallis_execute (const struct dataset *ds, free (kw); } - -#include -#include "gettext.h" -#define _(msgid) gettext (msgid) - - static void -show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_groups) +show_ranks_box (const struct n_sample_test *nst, const struct kw *kw) { - int i; - const int row_headers = 2; - const int column_headers = 1; - struct tab_table *table = - tab_create (row_headers + 2, column_headers + n_groups + nst->n_vars); + struct pivot_table *table = pivot_table_create (N_("Ranks")); - tab_headers (table, row_headers, 0, column_headers, 0); + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("N"), PIVOT_RC_INTEGER, + N_("Mean Rank"), PIVOT_RC_OTHER); - tab_title (table, _("Ranks")); + struct pivot_dimension *variables = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Variables")); - /* Vertical lines inside the box */ - tab_box (table, 1, 0, -1, TAL_1, - row_headers, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); - - /* Box around the table */ - tab_box (table, TAL_2, TAL_2, -1, -1, - 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); - - tab_text (table, 1, 0, TAT_TITLE, - var_to_string (nst->indep_var) - ); - - tab_text (table, 3, 0, 0, _("Mean Rank")); - tab_text (table, 2, 0, 0, _("N")); - - tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers); - tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1); - - - int x = column_headers; - for (i = 0 ; i < nst->n_vars ; ++i) + for (size_t i = 0 ; i < nst->n_vars ; ++i) { + /* Sort the rank entries, by iteratin the hash and putting the entries + into a binary tree. */ + struct bt bt = BT_INITIALIZER(compare_rank_entries_3way, nst->vars[i]); + struct rank_entry *re_x; + HMAP_FOR_EACH (re_x, struct rank_entry, node, &kw[i].map) + bt_insert (&bt, &re_x->btn); + + /* Report the rank entries in sorted order. */ + struct pivot_category *group = pivot_category_create_group__ ( + variables->root, pivot_value_new_variable (nst->vars[i])); int tot = 0; const struct rank_entry *re; - - if (i > 0) - tab_hline (table, TAL_1, 0, tab_nc (table) -1, x); - - tab_text (table, 0, x, - TAT_TITLE, var_to_string (nst->vars[i])); - - HMAP_FOR_EACH (re, const struct rank_entry, node, &kw[i].map) - { - struct string str; - ds_init_empty (&str); - + BT_FOR_EACH (re, struct rank_entry, btn, &bt) + { + struct string str = DS_EMPTY_INITIALIZER; var_append_value_name (nst->indep_var, &re->group, &str); + int row = pivot_category_create_leaf ( + group, pivot_value_new_user_text_nocopy (ds_steal_cstr (&str))); - tab_text (table, 1, x, TAB_LEFT, ds_cstr (&str)); - tab_double (table, 2, x, TAB_LEFT, re->n, &F_8_0); - tab_double (table, 3, x, TAB_LEFT, re->sum_of_ranks / re->n, 0); + double entries[] = { re->n, re->sum_of_ranks / re->n }; + for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) + pivot_table_put2 (table, j, row, + pivot_value_new_number (entries[j])); tot += re->n; - x++; - ds_destroy (&str); } - tab_double (table, 2, x, TAB_LEFT, - tot, &F_8_0); - tab_text (table, 1, x++, TAB_LEFT, _("Total")); + + int row = pivot_category_create_leaves (group, N_("Total")); + pivot_table_put2 (table, 0, row, pivot_value_new_number (tot)); } - tab_submit (table); + pivot_table_submit (table); } - static void show_sig_box (const struct n_sample_test *nst, const struct kw *kw) { - int i; - const int row_headers = 1; - const int column_headers = 1; - struct tab_table *table = - tab_create (row_headers + nst->n_vars * 2, column_headers + 3); - - tab_headers (table, row_headers, 0, column_headers, 0); - - tab_title (table, _("Test Statistics")); - - tab_text (table, 0, column_headers, - TAT_TITLE | TAB_LEFT , _("Chi-Square")); - - tab_text (table, 0, 1 + column_headers, - TAT_TITLE | TAB_LEFT, _("df")); - - tab_text (table, 0, 2 + column_headers, - TAT_TITLE | TAB_LEFT, _("Asymp. Sig.")); + struct pivot_table *table = pivot_table_create (N_("Test Statistics")); - /* Box around the table */ - tab_box (table, TAL_2, TAL_2, -1, -1, - 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); + pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Statistics"), + N_("Chi-Square"), PIVOT_RC_OTHER, + N_("df"), PIVOT_RC_INTEGER, + N_("Asymp. Sig."), PIVOT_RC_SIGNIFICANCE); + struct pivot_dimension *variables = pivot_dimension_create ( + table, PIVOT_AXIS_COLUMN, N_("Variables")); - tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers); - tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1); - - for (i = 0 ; i < nst->n_vars; ++i) + for (size_t i = 0 ; i < nst->n_vars; ++i) { - const double df = hmap_count (&kw[i].map) - 1; - tab_text (table, column_headers + 1 + i, 0, TAT_TITLE, - var_to_string (nst->vars[i]) - ); - - tab_double (table, column_headers + 1 + i, 1, 0, - kw[i].h, 0); - - tab_double (table, column_headers + 1 + i, 2, 0, - df, &F_8_0); - - tab_double (table, column_headers + 1 + i, 3, 0, - gsl_cdf_chisq_Q (kw[i].h, df), - 0); + int col = pivot_category_create_leaf ( + variables->root, pivot_value_new_variable (nst->vars[i])); + + double df = hmap_count (&kw[i].map) - 1; + double sig = gsl_cdf_chisq_Q (kw[i].h, df); + double entries[] = { kw[i].h, df, sig }; + for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) + pivot_table_put2 (table, j, col, pivot_value_new_number (entries[j])); } - tab_submit (table); + pivot_table_submit (table); }