X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fcategoricals.c;h=58015c9088c891653329765a9cfc215f67b6b9e8;hb=207ea79ecdd7500ec0ecaf3d7a79488ba78ca560;hp=f40ae10b5d7dca7714cf8aea4e88314792bea795;hpb=840f7bace2423e6d240320ab308f0fbaa8c559f1;p=pspp-builds.git diff --git a/src/math/categoricals.c b/src/math/categoricals.c index f40ae10b..58015c90 100644 --- a/src/math/categoricals.c +++ b/src/math/categoricals.c @@ -17,6 +17,7 @@ #include #include "math/categoricals.h" +#include "math/interaction.h" #include @@ -33,7 +34,7 @@ struct value_node { struct hmap_node node; /* Node in hash map. */ - union value value; /* The value being labeled. */ + struct ccase *ccase; double cc; /* The total of the weights of cases with this value */ void *user_data; /* A pointer to data which the caller can store stuff */ @@ -47,9 +48,10 @@ struct var_params /* A map indexed by a union values */ struct hmap map; - const struct variable *var; + const struct interaction *iact; - int base_subscript; + int base_subscript_short; + int base_subscript_long; /* The number of distinct values of this variable */ int n_cats; @@ -70,7 +72,7 @@ compare_value_node (const void *vn1_, const void *vn2_, const void *aux) const struct value_node * const *vn2 = vn2_; const struct var_params *vp = aux; - return value_compare_3way (&(*vn1)->value, &(*vn2)->value, var_get_width (vp->var)); + return interaction_case_cmp_3way (vp->iact, (*vn1)->ccase, (*vn2)->ccase); } @@ -79,7 +81,6 @@ struct categoricals /* The weight variable */ const struct variable *wv; - /* An array of var_params */ struct var_params *vp; @@ -90,8 +91,13 @@ struct categoricals In the absence of missing values, this will be equal to N_VP */ size_t n_vars; - /* A map to enable the lookup of variables indexed by subscript */ - int *reverse_variable_map; + /* A map to enable the lookup of variables indexed by subscript. + This map considers only the N - 1 of the N variables. + */ + int *reverse_variable_map_short; + + /* Like the above, but uses all N variables */ + int *reverse_variable_map_long; size_t n_cats_total; @@ -141,7 +147,7 @@ categoricals_dump (const struct categoricals *cat) int x; printf ("\n%s (%d) CC=%g n_cats=%d:\n", - var_get_name (vp->var), vp->base_subscript, vp->cc, vp->n_cats); + var_get_name (vp->var), vp->base_subscript_long, vp->cc, vp->n_cats); printf ("Reverse map\n"); for (x = 0 ; x < vp->n_cats; ++x) @@ -175,37 +181,39 @@ categoricals_dump (const struct categoricals *cat) printf ("Number of non-empty categorical variables: %d\n", cat->n_vars); printf ("Total number of categories: %d\n", cat->n_cats_total); - printf ("\nReverse variable map:\n"); - + printf ("\nReverse variable map (short):\n"); for (v = 0 ; v < cat->n_cats_total - cat->n_vars; ++v) - printf ("%d ", cat->reverse_variable_map[v]); + printf ("%d ", cat->reverse_variable_map_short[v]); + + printf ("\nReverse variable map (long):\n"); + for (v = 0 ; v < cat->n_cats_total; ++v) + printf ("%d ", cat->reverse_variable_map_long[v]); + printf ("\n"); } #endif - static struct value_node * -lookup_value (const struct hmap *map, const struct variable *var, const union value *val) +lookup_case (const struct hmap *map, const struct interaction *iact, const struct ccase *c) { - struct value_node *foo; - unsigned int width = var_get_width (var); - size_t hash = value_hash (val, width, 0); + struct value_node *nn; + size_t hash = interaction_case_hash (iact, c); - HMAP_FOR_EACH_WITH_HASH (foo, struct value_node, node, hash, map) + HMAP_FOR_EACH_WITH_HASH (nn, struct value_node, node, hash, map) { - if (value_equal (val, &foo->value, width)) + if (interaction_case_equal (iact, c, nn->ccase)) break; fprintf (stderr, "Warning: Hash table collision\n"); } - return foo; + return nn; } struct categoricals * -categoricals_create (const struct variable *const *v, size_t n_vars, +categoricals_create (const struct interaction **inter, size_t n_inter, const struct variable *wv, enum mv_class exclude, user_data_create_func *udf, update_func *update, void *aux1, void *aux2 @@ -214,11 +222,12 @@ categoricals_create (const struct variable *const *v, size_t n_vars, size_t i; struct categoricals *cat = xmalloc (sizeof *cat); - cat->n_vp = n_vars; + cat->n_vp = n_inter; cat->wv = wv; cat->n_cats_total = 0; cat->n_vars = 0; - cat->reverse_variable_map = NULL; + cat->reverse_variable_map_short = NULL; + cat->reverse_variable_map_long = NULL; cat->pool = pool_create (); cat->exclude = exclude; cat->update = update; @@ -233,7 +242,7 @@ categoricals_create (const struct variable *const *v, size_t n_vars, for (i = 0 ; i < cat->n_vp; ++i) { hmap_init (&cat->vp[i].map); - cat->vp[i].var = v[i]; + cat->vp[i].iact = inter[i]; } return cat; @@ -248,28 +257,26 @@ categoricals_update (struct categoricals *cat, const struct ccase *c) const double weight = cat->wv ? case_data (c, cat->wv)->f : 1.0; - assert (NULL == cat->reverse_variable_map); + assert (NULL == cat->reverse_variable_map_short); + assert (NULL == cat->reverse_variable_map_long); for (i = 0 ; i < cat->n_vp; ++i) { - const struct variable *var = cat->vp[i].var; - unsigned int width = var_get_width (var); - const union value *val = case_data (c, var); + const struct interaction *iact = cat->vp[i].iact; size_t hash; struct value_node *node ; - if ( var_is_value_missing (var, val, cat->exclude)) + if ( interaction_case_is_missing (iact, c, cat->exclude)) continue; - hash = value_hash (val, width, 0); - node = lookup_value (&cat->vp[i].map, var, val); + hash = interaction_case_hash (iact, c); + node = lookup_case (&cat->vp[i].map, iact, c); if ( NULL == node) { node = pool_malloc (cat->pool, sizeof *node); - value_init (&node->value, width); - value_copy (&node->value, val, width); + node->ccase = case_ref (c); node->cc = 0.0; hmap_insert (&cat->vp[i].map, &node->node, hash); @@ -288,7 +295,7 @@ categoricals_update (struct categoricals *cat, const struct ccase *c) cat->vp[i].cc += weight; if (cat->update) - cat->update (node->user_data, cat->exclude, cat->wv, var, c, cat->aux1, cat->aux2); + cat->update (node->user_data, cat->exclude, cat->wv, NULL, c, cat->aux1, cat->aux2); } } @@ -308,7 +315,7 @@ categoricals_total (const struct categoricals *cat) } -/* This function must be called *before* any call to categoricals_get_*_by subscript an +/* This function must be called *before* any call to categoricals_get_*_by subscript and *after* all calls to categoricals_update */ void categoricals_done (const struct categoricals *cat_) @@ -321,10 +328,15 @@ categoricals_done (const struct categoricals *cat_) */ struct categoricals *cat = CONST_CAST (struct categoricals *, cat_); int v; - int idx = 0; - cat->reverse_variable_map = pool_calloc (cat->pool, - cat->n_cats_total - cat->n_vars, - sizeof *cat->reverse_variable_map); + int idx_short = 0; + int idx_long = 0; + cat->reverse_variable_map_short = pool_calloc (cat->pool, + cat->n_cats_total - cat->n_vars, + sizeof *cat->reverse_variable_map_short); + + cat->reverse_variable_map_long = pool_calloc (cat->pool, + cat->n_cats_total, + sizeof *cat->reverse_variable_map_long); for (v = 0 ; v < cat->n_vp; ++v) { @@ -335,7 +347,8 @@ categoricals_done (const struct categoricals *cat_) vp->reverse_value_map = pool_calloc (cat->pool, n_cats_total, sizeof *vp->reverse_value_map); - vp->base_subscript = idx; + vp->base_subscript_short = idx_short; + vp->base_subscript_long = idx_long; for (node = hmap_first (&vp->map); node; node = hmap_next (&vp->map, node)) { @@ -347,10 +360,12 @@ categoricals_done (const struct categoricals *cat_) sort (vp->reverse_value_map, vp->n_cats, sizeof (const struct value_node *), compare_value_node, vp); - /* Populate the reverse variable map. - */ + /* Populate the reverse variable maps. */ for (i = 0; i < vp->n_cats - 1; ++i) - cat->reverse_variable_map[idx++] = v; + cat->reverse_variable_map_short[idx_short++] = v; + + for (i = 0; i < vp->n_cats; ++i) + cat->reverse_variable_map_long[idx_long++] = v; } assert (cat->n_vars <= cat->n_vp); @@ -358,41 +373,62 @@ categoricals_done (const struct categoricals *cat_) static int -reverse_variable_lookup (const struct categoricals *cat, int subscript) +reverse_variable_lookup_short (const struct categoricals *cat, int subscript) { - assert (cat->reverse_variable_map); + assert (cat->reverse_variable_map_short); assert (subscript >= 0); assert (subscript < cat->n_cats_total - cat->n_vars); - return cat->reverse_variable_map[subscript]; + return cat->reverse_variable_map_short[subscript]; +} + +static int +reverse_variable_lookup_long (const struct categoricals *cat, int subscript) +{ + assert (cat->reverse_variable_map_long); + assert (subscript >= 0); + assert (subscript < cat->n_cats_total); + + return cat->reverse_variable_map_long[subscript]; } + /* Return the categorical variable corresponding to SUBSCRIPT */ const struct variable * categoricals_get_variable_by_subscript (const struct categoricals *cat, int subscript) { - int index = reverse_variable_lookup (cat, subscript); + int index = reverse_variable_lookup_short (cat, subscript); + + return cat->vp[index].iact->vars[0]; +} + +/* Return the interaction corresponding to SUBSCRIPT */ +const struct interaction * +categoricals_get_interaction_by_subscript (const struct categoricals *cat, int subscript) +{ + int index = reverse_variable_lookup_short (cat, subscript); - return cat->vp[index].var; + return cat->vp[index].iact; } -/* Return the value corresponding to SUBSCRIPT */ -const union value * -categoricals_get_value_by_subscript (const struct categoricals *cat, int subscript) + +/* Return the case corresponding to SUBSCRIPT */ +static const struct ccase * +categoricals_get_case_by_subscript (const struct categoricals *cat, int subscript) { - int vindex = reverse_variable_lookup (cat, subscript); + int vindex = reverse_variable_lookup_short (cat, subscript); const struct var_params *vp = &cat->vp[vindex]; - const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript]; + const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript_short]; - return &vn->value; + return vn->ccase; } double categoricals_get_weight_by_subscript (const struct categoricals *cat, int subscript) { - int vindex = reverse_variable_lookup (cat, subscript); + int vindex = reverse_variable_lookup_short (cat, subscript); const struct var_params *vp = &cat->vp[vindex]; return vp->cc; @@ -401,10 +437,10 @@ categoricals_get_weight_by_subscript (const struct categoricals *cat, int subscr double categoricals_get_sum_by_subscript (const struct categoricals *cat, int subscript) { - int vindex = reverse_variable_lookup (cat, subscript); + int vindex = reverse_variable_lookup_short (cat, subscript); const struct var_params *vp = &cat->vp[vindex]; - const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript]; + const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript_short]; return vn->cc; } @@ -415,12 +451,11 @@ double categoricals_get_binary_by_subscript (const struct categoricals *cat, int subscript, const struct ccase *c) { - const struct variable *var = categoricals_get_variable_by_subscript (cat, subscript); - int width = var_get_width (var); + const struct interaction *iact = categoricals_get_interaction_by_subscript (cat, subscript); - const union value *val = case_data (c, var); + const struct ccase *c2 = categoricals_get_case_by_subscript (cat, subscript); - return value_equal (val, categoricals_get_value_by_subscript (cat, subscript), width); + return interaction_case_equal (iact, c, c2); } @@ -431,13 +466,24 @@ categoricals_get_n_variables (const struct categoricals *cat) } +/* Return a case containing the set of values corresponding to SUBSCRIPT */ +const struct ccase * +categoricals_get_case_by_category (const struct categoricals *cat, int subscript) +{ + int vindex = reverse_variable_lookup_long (cat, subscript); + const struct var_params *vp = &cat->vp[vindex]; + const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript_long]; + + return vn->ccase; +} + void * -categoricals_get_user_data_by_subscript (const struct categoricals *cat, int subscript) +categoricals_get_user_data_by_category (const struct categoricals *cat, int subscript) { - int vindex = reverse_variable_lookup (cat, subscript); + int vindex = reverse_variable_lookup_long (cat, subscript); const struct var_params *vp = &cat->vp[vindex]; - const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript]; + const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript_long]; return vn->user_data; }