From 76e1b0aef6b1cc3b911c4833565109eaa424f9a5 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Wed, 6 Jul 2011 20:56:42 +0200 Subject: [PATCH] Categoricals: Replace single value by a case, so that interactions can be supported --- src/language/stats/oneway.c | 20 ++++++----- src/math/categoricals.c | 67 +++++++++++++++++++++---------------- src/math/categoricals.h | 1 + src/math/interaction.c | 19 +++++++++++ src/math/interaction.h | 2 ++ 5 files changed, 72 insertions(+), 37 deletions(-) diff --git a/src/language/stats/oneway.c b/src/language/stats/oneway.c index 447bdad1..96713280 100644 --- a/src/language/stats/oneway.c +++ b/src/language/stats/oneway.c @@ -39,6 +39,7 @@ #include "linreg/sweep.h" #include "tukey/tukey.h" #include "math/categoricals.h" +#include "math/interaction.h" #include "math/covariance.h" #include "math/levene.h" #include "math/moments.h" @@ -651,7 +652,7 @@ run_oneway (const struct oneway_spec *cmd, for (v = 0; v < cmd->n_vars; ++v) { - struct interaction *inter = interaction_create (cmd->indep_var); + const struct interaction *inter = interaction_create (cmd->indep_var); ws.vws[v].cat = categoricals_create (&inter, 1, cmd->wv, cmd->exclude, makeit, updateit, CONST_CAST (struct variable *, @@ -1022,7 +1023,7 @@ show_descriptives (const struct oneway_spec *cmd, const struct oneway_workspace struct string vstr; - const union value *gval = categoricals_get_value_by_category (cats, count); + const struct ccase *gcc = categoricals_get_case_by_category (cats, count); const struct descriptive_data *dd = categoricals_get_user_data_by_category (cats, count); moments1_calculate (dd->mom, &n, &mean, &variance, NULL, NULL); @@ -1032,7 +1033,7 @@ show_descriptives (const struct oneway_spec *cmd, const struct oneway_workspace ds_init_empty (&vstr); - var_append_value_name (cmd->indep_var, gval, &vstr); + var_append_value_name (cmd->indep_var, case_data (gcc, cmd->indep_var), &vstr); tab_text (t, 1, row + count, TAB_LEFT | TAT_TITLE, @@ -1232,13 +1233,13 @@ show_contrast_coeffs (const struct oneway_spec *cmd, const struct oneway_workspa ++count, coeffi = ll_next (coeffi)) { const struct categoricals *cats = covariance_get_categoricals (cov); - const union value *val = categoricals_get_value_by_category (cats, count); + const struct ccase *gcc = categoricals_get_case_by_category (cats, count); struct coeff_node *coeffn = ll_data (coeffi, struct coeff_node, ll); struct string vstr; ds_init_empty (&vstr); - var_append_value_name (cmd->indep_var, val, &vstr); + var_append_value_name (cmd->indep_var, case_data (gcc, cmd->indep_var), &vstr); tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, ds_cstr (&vstr)); @@ -1519,10 +1520,11 @@ show_comparisons (const struct oneway_spec *cmd, const struct oneway_workspace * struct string vstr; int j; struct descriptive_data *dd_i = categoricals_get_user_data_by_category (cat, i); - const union value *gval = categoricals_get_value_by_category (cat, i); + const struct ccase *gcc = categoricals_get_case_by_category (cat, i); + ds_init_empty (&vstr); - var_append_value_name (cmd->indep_var, gval, &vstr); + var_append_value_name (cmd->indep_var, case_data (gcc, cmd->indep_var), &vstr); if ( i != 0) tab_hline (t, TAL_1, 1, n_cols - 1, r); @@ -1540,8 +1542,8 @@ show_comparisons (const struct oneway_spec *cmd, const struct oneway_workspace * continue; ds_clear (&vstr); - gval = categoricals_get_value_by_category (cat, j); - var_append_value_name (cmd->indep_var, gval, &vstr); + const struct ccase *cc = categoricals_get_case_by_category (cat, j); + var_append_value_name (cmd->indep_var, case_data (cc, cmd->indep_var), &vstr); tab_text (t, 2, r + rx, TAB_LEFT | TAT_TITLE, ds_cstr (&vstr)); moments1_calculate (dd_j->mom, &weight_j, &mean_j, &var_j, 0, 0); diff --git a/src/math/categoricals.c b/src/math/categoricals.c index 2d65f8ae..16bb46f6 100644 --- a/src/math/categoricals.c +++ b/src/math/categoricals.c @@ -34,7 +34,7 @@ struct value_node { struct hmap_node node; /* Node in hash map. */ - union value value; /* The value being labeled. */ + struct ccase *ccase; double cc; /* The total of the weights of cases with this value */ void *user_data; /* A pointer to data which the caller can store stuff */ @@ -49,6 +49,7 @@ struct var_params struct hmap map; const struct variable *var; + const struct interaction *iact; int base_subscript_short; int base_subscript_long; @@ -72,7 +73,7 @@ compare_value_node (const void *vn1_, const void *vn2_, const void *aux) const struct value_node * const *vn2 = vn2_; const struct var_params *vp = aux; - return value_compare_3way (&(*vn1)->value, &(*vn2)->value, var_get_width (vp->var)); + return interaction_case_cmp_3way (vp->iact, (*vn1)->ccase, (*vn2)->ccase); } @@ -195,23 +196,21 @@ categoricals_dump (const struct categoricals *cat) #endif - static struct value_node * -lookup_value (const struct hmap *map, const struct variable *var, const union value *val) +lookup_case (const struct hmap *map, const struct interaction *iact, const struct ccase *c) { - struct value_node *foo; - unsigned int width = var_get_width (var); - size_t hash = value_hash (val, width, 0); + struct value_node *nn; + size_t hash = interaction_case_hash (iact, c); - HMAP_FOR_EACH_WITH_HASH (foo, struct value_node, node, hash, map) + HMAP_FOR_EACH_WITH_HASH (nn, struct value_node, node, hash, map) { - if (value_equal (val, &foo->value, width)) + if (interaction_case_equal (iact, c, nn->ccase)) break; fprintf (stderr, "Warning: Hash table collision\n"); } - return foo; + return nn; } @@ -246,6 +245,7 @@ categoricals_create (const struct interaction **inter, size_t n_inter, { hmap_init (&cat->vp[i].map); cat->vp[i].var = inter[i]->vars[0]; + cat->vp[i].iact = inter[i]; } return cat; @@ -265,24 +265,27 @@ categoricals_update (struct categoricals *cat, const struct ccase *c) for (i = 0 ; i < cat->n_vp; ++i) { + const struct interaction *iact = cat->vp[i].iact; const struct variable *var = cat->vp[i].var; - unsigned int width = var_get_width (var); + const union value *val = case_data (c, var); size_t hash; struct value_node *node ; +#if XXX if ( var_is_value_missing (var, val, cat->exclude)) continue; +#endif - hash = value_hash (val, width, 0); - node = lookup_value (&cat->vp[i].map, var, val); + hash = interaction_case_hash (iact, c); + node = lookup_case (&cat->vp[i].map, iact, c); if ( NULL == node) { + int width = var_get_width (var); node = pool_malloc (cat->pool, sizeof *node); - value_init (&node->value, width); - value_copy (&node->value, val, width); + node->ccase = case_ref (c); node->cc = 0.0; hmap_insert (&cat->vp[i].map, &node->node, hash); @@ -409,15 +412,25 @@ categoricals_get_variable_by_subscript (const struct categoricals *cat, int subs return cat->vp[index].var; } -/* Return the value corresponding to SUBSCRIPT */ -static const union value * -categoricals_get_value_by_subscript (const struct categoricals *cat, int subscript) +/* Return the interaction corresponding to SUBSCRIPT */ +static const struct interaction * +categoricals_get_interaction_by_subscript (const struct categoricals *cat, int subscript) +{ + int index = reverse_variable_lookup_short (cat, subscript); + + return cat->vp[index].iact; +} + + +/* Return the case corresponding to SUBSCRIPT */ +static const struct ccase * +categoricals_get_case_by_subscript (const struct categoricals *cat, int subscript) { int vindex = reverse_variable_lookup_short (cat, subscript); const struct var_params *vp = &cat->vp[vindex]; const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript_short]; - return &vn->value; + return vn->ccase; } @@ -447,12 +460,11 @@ double categoricals_get_binary_by_subscript (const struct categoricals *cat, int subscript, const struct ccase *c) { - const struct variable *var = categoricals_get_variable_by_subscript (cat, subscript); - int width = var_get_width (var); + const struct interaction *iact = categoricals_get_interaction_by_subscript (cat, subscript); - const union value *val = case_data (c, var); + const struct ccase *c2 = categoricals_get_case_by_subscript (cat, subscript); - return value_equal (val, categoricals_get_value_by_subscript (cat, subscript), width); + return interaction_case_equal (iact, c, c2); } @@ -463,16 +475,15 @@ categoricals_get_n_variables (const struct categoricals *cat) } - -/* Return the value corresponding to SUBSCRIPT */ -const union value * -categoricals_get_value_by_category (const struct categoricals *cat, int subscript) +/* Return a case containing the set of values corresponding to SUBSCRIPT */ +const struct ccase * +categoricals_get_case_by_category (const struct categoricals *cat, int subscript) { int vindex = reverse_variable_lookup_long (cat, subscript); const struct var_params *vp = &cat->vp[vindex]; const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript_long]; - return &vn->value; + return vn->ccase; } diff --git a/src/math/categoricals.h b/src/math/categoricals.h index 6ab451d7..2cd55b5e 100644 --- a/src/math/categoricals.h +++ b/src/math/categoricals.h @@ -93,6 +93,7 @@ const union value * categoricals_get_value_by_category (const struct categorical void * categoricals_get_user_data_by_category (const struct categoricals *cat, int category); +const struct ccase * categoricals_get_case_by_category (const struct categoricals *cat, int subscript); #endif diff --git a/src/math/interaction.c b/src/math/interaction.c index aa242500..afdc3277 100644 --- a/src/math/interaction.c +++ b/src/math/interaction.c @@ -130,6 +130,25 @@ interaction_case_equal (const struct interaction *iact, const struct ccase *c1, return same; } + +int +interaction_case_cmp_3way (const struct interaction *iact, const struct ccase *c1, const struct ccase *c2) +{ + int i; + int result = 0; + + for (i = 0; i < iact->n_vars; ++i) + { + const struct variable *var = iact->vars[i]; + result = value_compare_3way (case_data (c1, var), case_data (c2, var), var_get_width (var)); + if (result != 0) + break; + } + + return result; +} + + bool interaction_case_is_missing (const struct interaction *iact, const struct ccase *c, enum mv_class exclude) { diff --git a/src/math/interaction.h b/src/math/interaction.h index 96c2940a..cd38ae22 100644 --- a/src/math/interaction.h +++ b/src/math/interaction.h @@ -43,5 +43,7 @@ struct ccase; unsigned int interaction_case_hash (const struct interaction *, const struct ccase *); bool interaction_case_equal (const struct interaction *, const struct ccase *, const struct ccase *); bool interaction_case_is_missing (const struct interaction *, const struct ccase *, enum mv_class); +int interaction_case_cmp_3way (const struct interaction *, const struct ccase *, const struct ccase *); + #endif -- 2.30.2