X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Fmath%2Fcategoricals.c;h=218ac6c180701c78ea424eaa25b6a46fa3f8c1e6;hb=dc32b2366bbb293a81b57a9a19285188db28c881;hp=78944fca44803aabe3da896e8ac23e3f430257b0;hpb=ed38ada34331b3b1e0167c350b375a3fb38099a2;p=pspp-builds.git diff --git a/src/math/categoricals.c b/src/math/categoricals.c index 78944fca..218ac6c1 100644 --- a/src/math/categoricals.c +++ b/src/math/categoricals.c @@ -17,6 +17,7 @@ #include #include "math/categoricals.h" +#include "math/interaction.h" #include @@ -33,7 +34,7 @@ struct value_node { struct hmap_node node; /* Node in hash map. */ - union value value; /* The value being labeled. */ + struct ccase *ccase; double cc; /* The total of the weights of cases with this value */ void *user_data; /* A pointer to data which the caller can store stuff */ @@ -42,12 +43,12 @@ struct value_node Can be used as an index into an array */ }; -struct var_params +struct interact_params { /* A map indexed by a union values */ struct hmap map; - const struct variable *var; + const struct interaction *iact; int base_subscript_short; int base_subscript_long; @@ -69,9 +70,9 @@ compare_value_node (const void *vn1_, const void *vn2_, const void *aux) { const struct value_node * const *vn1 = vn1_; const struct value_node * const *vn2 = vn2_; - const struct var_params *vp = aux; + const struct interact_params *vp = aux; - return value_compare_3way (&(*vn1)->value, &(*vn2)->value, var_get_width (vp->var)); + return interaction_case_cmp_3way (vp->iact, (*vn1)->ccase, (*vn2)->ccase); } @@ -80,15 +81,14 @@ struct categoricals /* The weight variable */ const struct variable *wv; + /* An array of interact_params */ + struct interact_params *iap; - /* An array of var_params */ - struct var_params *vp; - - /* The size of VP. (ie, the number of variables involved.) */ - size_t n_vp; + /* The size of IAP. (ie, the number of interactions involved.) */ + size_t n_iap; /* The number of categorical variables which contain entries. - In the absence of missing values, this will be equal to N_VP */ + In the absence of missing values, this will be equal to N_IAP */ size_t n_vars; /* A map to enable the lookup of variables indexed by subscript. @@ -124,8 +124,18 @@ categoricals_destroy ( struct categoricals *cat) int i; if (cat != NULL) { - for (i = 0 ; i < cat->n_vp; ++i) - hmap_destroy (&cat->vp[i].map); + for (i = 0 ; i < cat->n_iap; ++i) + { + struct hmap *map = &cat->iap[i].map; + struct value_node *nn; + + HMAP_FOR_EACH (nn, struct value_node, node, map) + { + case_unref (nn->ccase); + } + + hmap_destroy (map); + } pool_destroy (cat->pool); free (cat); @@ -139,16 +149,21 @@ categoricals_dump (const struct categoricals *cat) { int v; - for (v = 0 ; v < cat->n_vp; ++v) + for (v = 0 ; v < cat->n_iap; ++v) { - const struct var_params *vp = &cat->vp[v]; + const struct interact_params *vp = &cat->iap[v]; const struct hmap *m = &vp->map; struct hmap_node *node ; int x; - + struct string str ; + ds_init_empty (&str); + + interaction_to_string (vp->iact, &str); printf ("\n%s (%d) CC=%g n_cats=%d:\n", - var_get_name (vp->var), vp->base_subscript_long, vp->cc, vp->n_cats); + ds_cstr (&str), + vp->base_subscript_long, vp->cc, vp->n_cats); +#if 0 printf ("Reverse map\n"); for (x = 0 ; x < vp->n_cats; ++x) { @@ -172,12 +187,13 @@ categoricals_dump (const struct categoricals *cat) vn->subscript, vn->cc); ds_destroy (&s); } +#endif } - assert (cat->n_vars <= cat->n_vp); + assert (cat->n_vars <= cat->n_iap); printf ("\n"); - printf ("Number of categorical variables: %d\n", cat->n_vp); + printf ("Number of interactions: %d\n", cat->n_iap); printf ("Number of non-empty categorical variables: %d\n", cat->n_vars); printf ("Total number of categories: %d\n", cat->n_cats_total); @@ -194,28 +210,26 @@ categoricals_dump (const struct categoricals *cat) #endif - static struct value_node * -lookup_value (const struct hmap *map, const struct variable *var, const union value *val) +lookup_case (const struct hmap *map, const struct interaction *iact, const struct ccase *c) { - struct value_node *foo; - unsigned int width = var_get_width (var); - size_t hash = value_hash (val, width, 0); + struct value_node *nn; + size_t hash = interaction_case_hash (iact, c); - HMAP_FOR_EACH_WITH_HASH (foo, struct value_node, node, hash, map) + HMAP_FOR_EACH_WITH_HASH (nn, struct value_node, node, hash, map) { - if (value_equal (val, &foo->value, width)) + if (interaction_case_equal (iact, c, nn->ccase)) break; fprintf (stderr, "Warning: Hash table collision\n"); } - return foo; + return nn; } struct categoricals * -categoricals_create (const struct variable *const *v, size_t n_vars, +categoricals_create (const struct interaction **inter, size_t n_inter, const struct variable *wv, enum mv_class exclude, user_data_create_func *udf, update_func *update, void *aux1, void *aux2 @@ -224,7 +238,7 @@ categoricals_create (const struct variable *const *v, size_t n_vars, size_t i; struct categoricals *cat = xmalloc (sizeof *cat); - cat->n_vp = n_vars; + cat->n_iap = n_inter; cat->wv = wv; cat->n_cats_total = 0; cat->n_vars = 0; @@ -239,12 +253,12 @@ categoricals_create (const struct variable *const *v, size_t n_vars, cat->aux2 = aux2; - cat->vp = pool_calloc (cat->pool, cat->n_vp, sizeof *cat->vp); + cat->iap = pool_calloc (cat->pool, cat->n_iap, sizeof *cat->iap); - for (i = 0 ; i < cat->n_vp; ++i) + for (i = 0 ; i < cat->n_iap; ++i) { - hmap_init (&cat->vp[i].map); - cat->vp[i].var = v[i]; + hmap_init (&cat->iap[i].map); + cat->iap[i].iact = inter[i]; } return cat; @@ -262,45 +276,42 @@ categoricals_update (struct categoricals *cat, const struct ccase *c) assert (NULL == cat->reverse_variable_map_short); assert (NULL == cat->reverse_variable_map_long); - for (i = 0 ; i < cat->n_vp; ++i) + for (i = 0 ; i < cat->n_iap; ++i) { - const struct variable *var = cat->vp[i].var; - unsigned int width = var_get_width (var); - const union value *val = case_data (c, var); + const struct interaction *iact = cat->iap[i].iact; size_t hash; struct value_node *node ; - if ( var_is_value_missing (var, val, cat->exclude)) + if ( interaction_case_is_missing (iact, c, cat->exclude)) continue; - hash = value_hash (val, width, 0); - node = lookup_value (&cat->vp[i].map, var, val); + hash = interaction_case_hash (iact, c); + node = lookup_case (&cat->iap[i].map, iact, c); if ( NULL == node) { node = pool_malloc (cat->pool, sizeof *node); - value_init (&node->value, width); - value_copy (&node->value, val, width); + node->ccase = case_ref (c); node->cc = 0.0; - hmap_insert (&cat->vp[i].map, &node->node, hash); + hmap_insert (&cat->iap[i].map, &node->node, hash); cat->n_cats_total++; - if ( 0 == cat->vp[i].n_cats) + if ( 0 == cat->iap[i].n_cats) cat->n_vars++; - node->subscript = cat->vp[i].n_cats++ ; + node->subscript = cat->iap[i].n_cats++ ; if (cat->user_data_create) node->user_data = cat->user_data_create (cat->aux1, cat->aux2); } node->cc += weight; - cat->vp[i].cc += weight; + cat->iap[i].cc += weight; if (cat->update) - cat->update (node->user_data, cat->exclude, cat->wv, var, c, cat->aux1, cat->aux2); + cat->update (node->user_data, cat->exclude, cat->wv, NULL, c, cat->aux1, cat->aux2); } } @@ -308,7 +319,7 @@ categoricals_update (struct categoricals *cat, const struct ccase *c) size_t categoricals_n_count (const struct categoricals *cat, size_t n) { - return hmap_count (&cat->vp[n].map); + return hmap_count (&cat->iap[n].map); } @@ -343,10 +354,10 @@ categoricals_done (const struct categoricals *cat_) cat->n_cats_total, sizeof *cat->reverse_variable_map_long); - for (v = 0 ; v < cat->n_vp; ++v) + for (v = 0 ; v < cat->n_iap; ++v) { int i; - struct var_params *vp = &cat->vp[v]; + struct interact_params *vp = &cat->iap[v]; int n_cats_total = categoricals_n_count (cat, v); struct hmap_node *node ; @@ -373,7 +384,7 @@ categoricals_done (const struct categoricals *cat_) cat->reverse_variable_map_long[idx_long++] = v; } - assert (cat->n_vars <= cat->n_vp); + assert (cat->n_vars <= cat->n_iap); } @@ -398,25 +409,25 @@ reverse_variable_lookup_long (const struct categoricals *cat, int subscript) } - -/* Return the categorical variable corresponding to SUBSCRIPT */ -const struct variable * -categoricals_get_variable_by_subscript (const struct categoricals *cat, int subscript) +/* Return the interaction corresponding to SUBSCRIPT */ +const struct interaction * +categoricals_get_interaction_by_subscript (const struct categoricals *cat, int subscript) { int index = reverse_variable_lookup_short (cat, subscript); - return cat->vp[index].var; + return cat->iap[index].iact; } -/* Return the value corresponding to SUBSCRIPT */ -static const union value * -categoricals_get_value_by_subscript (const struct categoricals *cat, int subscript) + +/* Return the case corresponding to SUBSCRIPT */ +static const struct ccase * +categoricals_get_case_by_subscript (const struct categoricals *cat, int subscript) { int vindex = reverse_variable_lookup_short (cat, subscript); - const struct var_params *vp = &cat->vp[vindex]; + const struct interact_params *vp = &cat->iap[vindex]; const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript_short]; - return &vn->value; + return vn->ccase; } @@ -424,7 +435,7 @@ double categoricals_get_weight_by_subscript (const struct categoricals *cat, int subscript) { int vindex = reverse_variable_lookup_short (cat, subscript); - const struct var_params *vp = &cat->vp[vindex]; + const struct interact_params *vp = &cat->iap[vindex]; return vp->cc; } @@ -433,7 +444,7 @@ double categoricals_get_sum_by_subscript (const struct categoricals *cat, int subscript) { int vindex = reverse_variable_lookup_short (cat, subscript); - const struct var_params *vp = &cat->vp[vindex]; + const struct interact_params *vp = &cat->iap[vindex]; const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript_short]; return vn->cc; @@ -446,12 +457,11 @@ double categoricals_get_binary_by_subscript (const struct categoricals *cat, int subscript, const struct ccase *c) { - const struct variable *var = categoricals_get_variable_by_subscript (cat, subscript); - int width = var_get_width (var); + const struct interaction *iact = categoricals_get_interaction_by_subscript (cat, subscript); - const union value *val = case_data (c, var); + const struct ccase *c2 = categoricals_get_case_by_subscript (cat, subscript); - return value_equal (val, categoricals_get_value_by_subscript (cat, subscript), width); + return interaction_case_equal (iact, c, c2); } @@ -462,16 +472,15 @@ categoricals_get_n_variables (const struct categoricals *cat) } - -/* Return the value corresponding to SUBSCRIPT */ -const union value * -categoricals_get_value_by_category (const struct categoricals *cat, int subscript) +/* Return a case containing the set of values corresponding to SUBSCRIPT */ +const struct ccase * +categoricals_get_case_by_category (const struct categoricals *cat, int subscript) { int vindex = reverse_variable_lookup_long (cat, subscript); - const struct var_params *vp = &cat->vp[vindex]; + const struct interact_params *vp = &cat->iap[vindex]; const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript_long]; - return &vn->value; + return vn->ccase; } @@ -479,7 +488,7 @@ void * categoricals_get_user_data_by_category (const struct categoricals *cat, int subscript) { int vindex = reverse_variable_lookup_long (cat, subscript); - const struct var_params *vp = &cat->vp[vindex]; + const struct interact_params *vp = &cat->iap[vindex]; const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript_long]; return vn->user_data;