/* Pspp - a program for statistical analysis.
- Copyright (C) 2010 Free Software Foundation, Inc.
+ Copyright (C) 2010, 2011, 2022 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <gsl/gsl_cdf.h>
#include <math.h>
-#include <data/casereader.h>
-#include <data/casewriter.h>
-#include <data/dictionary.h>
-#include <data/format.h>
-#include <data/procedure.h>
-#include <data/subcase.h>
-#include <data/variable.h>
-
-#include <libpspp/assertion.h>
-#include <libpspp/message.h>
-#include <libpspp/misc.h>
-#include <libpspp/hmap.h>
-#include <math/sort.h>
-
-
-#include "minmax.h"
-#include "xalloc.h"
+#include "data/casereader.h"
+#include "data/casewriter.h"
+#include "data/dataset.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/subcase.h"
+#include "data/variable.h"
+#include "libpspp/assertion.h"
+#include "libpspp/hmap.h"
+#include "libpspp/bt.h"
+#include "libpspp/message.h"
+#include "libpspp/misc.h"
+#include "math/sort.h"
+#include "output/pivot-table.h"
+
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
+#include "gettext.h"
+#define N_(msgid) msgid
+#define _(msgid) gettext (msgid)
+/* Returns true iff the independent variable lies between nst->val1 and nst->val2 */
static bool
include_func (const struct ccase *c, void *aux)
{
const struct n_sample_test *nst = aux;
- if (0 < value_compare_3way (&nst->val1, case_data (c, nst->indep_var), var_get_width (nst->indep_var)))
+ const union value *smaller = 0;
+ const union value *larger = 0;
+ int x = value_compare_3way (&nst->val1, &nst->val2, var_get_width (nst->indep_var));
+ if (x < 0)
+ {
+ smaller = &nst->val1;
+ larger = &nst->val2;
+ }
+ else
+ {
+ smaller = &nst->val2;
+ larger = &nst->val1;
+ }
+
+ if (0 < value_compare_3way (smaller, case_data (c, nst->indep_var),
+ var_get_width (nst->indep_var)))
return false;
- if (0 > value_compare_3way (&nst->val2, case_data (c, nst->indep_var), var_get_width (nst->indep_var)))
+ if (0 > value_compare_3way (larger, case_data (c, nst->indep_var),
+ var_get_width (nst->indep_var)))
return false;
return true;
struct rank_entry
{
struct hmap_node node;
+ struct bt_node btn;
union value group;
double sum_of_ranks;
double n;
};
+
+static int
+compare_rank_entries_3way (const struct bt_node *a,
+ const struct bt_node *b,
+ const void *aux)
+{
+ const struct variable *var = aux;
+ const struct rank_entry *rea = BT_DATA (a, struct rank_entry, btn);
+ const struct rank_entry *reb = BT_DATA (b, struct rank_entry, btn);
+
+ return value_compare_3way (&rea->group, &reb->group, var_get_width (var));
+}
+
+
+/* Return the entry with the key GROUP or null if there is no such entry */
static struct rank_entry *
find_rank_entry (const struct hmap *map, const union value *group, size_t width)
{
if (0 == value_compare_3way (group, &re->group, width))
return re;
}
-
+
return re;
}
+/* Calculates the adjustment necessary for tie compensation */
static void
distinct_callback (double v UNUSED, casenumber t, double w UNUSED, void *aux)
{
double h;
};
-static void show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_groups);
-static void show_sig_box (const struct n_sample_test *nst, const struct kw *kw);
+static void show_ranks_box (const struct n_sample_test *, const struct kw *);
+static void show_sig_box (const struct n_sample_test *, const struct kw *);
void
kruskal_wallis_execute (const struct dataset *ds,
int total_n_groups = 0.0;
- struct kw *kw = xcalloc (nst->n_vars, sizeof *kw);
+ struct kw *kw = XCALLOC (nst->n_vars, struct kw);
/* If the independent variable is missing, then we ignore the case */
- input = casereader_create_filter_missing (input,
+ input = casereader_create_filter_missing (input,
&nst->indep_var, 1,
exclude,
NULL, NULL);
input = casereader_create_filter_weight (input, dict, &warn, NULL);
/* Remove all those cases which are outside the range (val1, val2) */
- input = casereader_create_filter_func (input, include_func, NULL, nst, NULL);
+ input = casereader_create_filter_func (input, include_func, NULL,
+ CONST_CAST (struct n_sample_test *, nst), NULL);
proto = casereader_get_proto (input);
rank_idx = caseproto_get_n_widths (proto);
exclude,
NULL, NULL);
- rr = casereader_create_append_rank (r,
+ rr = casereader_create_append_rank (r,
nst->vars[i],
dict_get_weight (dict),
&rerr,
{
const union value *group = case_data (c, nst->indep_var);
const size_t group_var_width = var_get_width (nst->indep_var);
- struct rank_entry *rank = find_rank_entry (&kw[i].map, group, group_var_width);
+ struct rank_entry *rank = find_rank_entry (&kw[i].map, group, group_var_width);
- if ( NULL == rank)
+ if (NULL == rank)
{
rank = xzalloc (sizeof *rank);
value_clone (&rank->group, group, group_var_width);
value_hash (&rank->group, group_var_width, 0));
}
- rank->sum_of_ranks += case_data_idx (c, rank_idx)->f;
+ rank->sum_of_ranks += case_num_idx (c, rank_idx);
rank->n += dict_get_case_weight (dict, c, &warn);
/* If this assertion fires, then either the data wasn't sorted or some other
- problem occured */
+ problem occurred */
assert (rerr == 0);
}
casereader_destroy (rr);
+ /* Calculate the value of h */
{
struct rank_entry *mre;
double n = 0.0;
total_n_groups ++;
}
- kw[i].h *= 12 / (n * ( n + 1));
- kw[i].h -= 3 * (n + 1) ;
+ kw[i].h *= 12 / (n * (n + 1));
+ kw[i].h -= 3 * (n + 1) ;
kw[i].h /= 1 - tiebreaker/ (pow3 (n) - n);
}
}
casereader_destroy (input);
-
- show_ranks_box (nst, kw, total_n_groups);
+
+ show_ranks_box (nst, kw);
show_sig_box (nst, kw);
+ /* Cleanup allocated memory */
+ for (i = 0 ; i < nst->n_vars; ++i)
+ {
+ struct rank_entry *mre, *next;
+ HMAP_FOR_EACH_SAFE (mre, next, struct rank_entry, node, &kw[i].map)
+ {
+ hmap_delete (&kw[i].map, &mre->node);
+ free (mre);
+ }
+ hmap_destroy (&kw[i].map);
+ }
+
free (kw);
}
-
\f
-#include <output/tab.h>
-#include "gettext.h"
-#define _(msgid) gettext (msgid)
-
-
static void
-show_ranks_box (const struct n_sample_test *nst, const struct kw *kw, int n_groups)
+show_ranks_box (const struct n_sample_test *nst, const struct kw *kw)
{
- int i;
- const int row_headers = 2;
- const int column_headers = 1;
- struct tab_table *table =
- tab_create (row_headers + 2, column_headers + n_groups + nst->n_vars);
-
- tab_headers (table, row_headers, 0, column_headers, 0);
-
- tab_title (table, _("Ranks"));
-
- /* Vertical lines inside the box */
- tab_box (table, 1, 0, -1, TAL_1,
- row_headers, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
-
- /* Box around the table */
- tab_box (table, TAL_2, TAL_2, -1, -1,
- 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
+ struct pivot_table *table = pivot_table_create (N_("Ranks"));
- tab_text (table, 1, 0, TAT_TITLE,
- var_to_string (nst->indep_var)
- );
+ pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
+ N_("N"), PIVOT_RC_INTEGER,
+ N_("Mean Rank"), PIVOT_RC_OTHER);
- tab_text (table, 3, 0, 0, _("Mean Rank"));
- tab_text (table, 2, 0, 0, _("N"));
+ struct pivot_dimension *variables = pivot_dimension_create (
+ table, PIVOT_AXIS_ROW, N_("Variables"));
- tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
- tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
-
-
- int x = column_headers;
- for (i = 0 ; i < nst->n_vars ; ++i)
+ for (size_t i = 0 ; i < nst->n_vars ; ++i)
{
+ /* Sort the rank entries, by iteratin the hash and putting the entries
+ into a binary tree. */
+ struct bt bt = BT_INITIALIZER(compare_rank_entries_3way, nst->vars[i]);
+ struct rank_entry *re_x;
+ HMAP_FOR_EACH (re_x, struct rank_entry, node, &kw[i].map)
+ bt_insert (&bt, &re_x->btn);
+
+ /* Report the rank entries in sorted order. */
+ struct pivot_category *group = pivot_category_create_group__ (
+ variables->root, pivot_value_new_variable (nst->vars[i]));
int tot = 0;
const struct rank_entry *re;
-
- if (i > 0)
- tab_hline (table, TAL_1, 0, tab_nc (table) -1, x);
-
- tab_text (table, 0, x,
- TAT_TITLE, var_to_string (nst->vars[i]));
-
- HMAP_FOR_EACH (re, const struct rank_entry, node, &kw[i].map)
- {
- struct string str;
- ds_init_empty (&str);
-
+ BT_FOR_EACH (re, struct rank_entry, btn, &bt)
+ {
+ struct string str = DS_EMPTY_INITIALIZER;
var_append_value_name (nst->indep_var, &re->group, &str);
+ int row = pivot_category_create_leaf (
+ group, pivot_value_new_user_text_nocopy (ds_steal_cstr (&str)));
- tab_text (table, 1, x, TAB_LEFT, ds_cstr (&str));
- tab_double (table, 2, x, TAB_LEFT, re->n, &F_8_0);
- tab_double (table, 3, x, TAB_LEFT, re->sum_of_ranks / re->n, 0);
+ double entries[] = { re->n, re->sum_of_ranks / re->n };
+ for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
+ pivot_table_put2 (table, j, row,
+ pivot_value_new_number (entries[j]));
tot += re->n;
- x++;
- ds_destroy (&str);
}
- tab_double (table, 2, x, TAB_LEFT,
- tot, &F_8_0);
- tab_text (table, 1, x++, TAB_LEFT, _("Total"));
+
+ int row = pivot_category_create_leaves (group, N_("Total"));
+ pivot_table_put2 (table, 0, row, pivot_value_new_number (tot));
}
- tab_submit (table);
+ pivot_table_submit (table);
}
-
static void
show_sig_box (const struct n_sample_test *nst, const struct kw *kw)
{
- int i;
- const int row_headers = 1;
- const int column_headers = 1;
- struct tab_table *table =
- tab_create (row_headers + nst->n_vars * 2, column_headers + 3);
-
- tab_headers (table, row_headers, 0, column_headers, 0);
-
- tab_title (table, _("Test Statistics"));
-
- tab_text (table, 0, column_headers,
- TAT_TITLE | TAB_LEFT , _("Chi-Square"));
-
- tab_text (table, 0, 1 + column_headers,
- TAT_TITLE | TAB_LEFT, _("df"));
+ struct pivot_table *table = pivot_table_create (N_("Test Statistics"));
- tab_text (table, 0, 2 + column_headers,
- TAT_TITLE | TAB_LEFT, _("Asymp. Sig."));
+ pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Statistics"),
+ N_("Chi-Square"), PIVOT_RC_OTHER,
+ N_("df"), PIVOT_RC_INTEGER,
+ N_("Asymp. Sig."), PIVOT_RC_SIGNIFICANCE);
- /* Box around the table */
- tab_box (table, TAL_2, TAL_2, -1, -1,
- 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
+ struct pivot_dimension *variables = pivot_dimension_create (
+ table, PIVOT_AXIS_COLUMN, N_("Variables"));
-
- tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
- tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
-
- for (i = 0 ; i < nst->n_vars; ++i)
+ for (size_t i = 0 ; i < nst->n_vars; ++i)
{
- const double df = hmap_count (&kw[i].map) - 1;
- tab_text (table, column_headers + 1 + i, 0, TAT_TITLE,
- var_to_string (nst->vars[i])
- );
-
- tab_double (table, column_headers + 1 + i, 1, 0,
- kw[i].h, 0);
-
- tab_double (table, column_headers + 1 + i, 2, 0,
- df, &F_8_0);
-
- tab_double (table, column_headers + 1 + i, 3, 0,
- gsl_cdf_chisq_Q (kw[i].h, df),
- 0);
+ int col = pivot_category_create_leaf (
+ variables->root, pivot_value_new_variable (nst->vars[i]));
+
+ double df = hmap_count (&kw[i].map) - 1;
+ double sig = gsl_cdf_chisq_Q (kw[i].h, df);
+ double entries[] = { kw[i].h, df, sig };
+ for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
+ pivot_table_put2 (table, j, col, pivot_value_new_number (entries[j]));
}
- tab_submit (table);
+ pivot_table_submit (table);
}