X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fcategoricals.c;h=c544f97e98000ade273bd78785bd9a9a4b924e1b;hb=43b96ca660fb4c57674f1625aad48b83c0170de0;hp=4b5176322f798212073ffc6fab95e312789acf92;hpb=27c1ad9a254ab0f90b4f4b63960f6ba847bda62a;p=pspp diff --git a/src/math/categoricals.c b/src/math/categoricals.c index 4b5176322f..c544f97e98 100644 --- a/src/math/categoricals.c +++ b/src/math/categoricals.c @@ -61,19 +61,29 @@ struct var_params struct categoricals { + /* The weight variable */ const struct variable *wv; - size_t n_vars; /* An array of var_params */ struct var_params *vp; + /* The size of VP. (ie, the number of variables involved.) */ + size_t n_vp; + + /* The number of categorical variables which contain entries. + In the absence of missing values, this will be equal to N_VP */ + size_t n_vars; + /* A map to enable the lookup of variables indexed by subscript */ int *reverse_variable_map; size_t n_cats_total; struct pool *pool; + + /* Missing values to be excluded */ + enum mv_class exclude; }; @@ -81,30 +91,32 @@ void categoricals_destroy ( struct categoricals *cat) { int i; - for (i = 0 ; i < cat->n_vars; ++i) - hmap_destroy (&cat->vp[i].map); - - pool_destroy (cat->pool); - free (cat); + if (cat != NULL) + { + for (i = 0 ; i < cat->n_vp; ++i) + hmap_destroy (&cat->vp[i].map); + + pool_destroy (cat->pool); + free (cat); + } } +#if 0 void categoricals_dump (const struct categoricals *cat) { int v; - for (v = 0 ; v < cat->n_vars; ++v) + for (v = 0 ; v < cat->n_vp; ++v) { const struct var_params *vp = &cat->vp[v]; const struct hmap *m = &vp->map; - // size_t width = var_get_width (vp->var); struct hmap_node *node ; int x; - printf ("\n%s (%d) CC=%g:\n", var_get_name (vp->var), vp->base_subscript, vp->cc); - - assert (vp->reverse_value_map); + printf ("\n%s (%d) CC=%g n_cats=%d:\n", + var_get_name (vp->var), vp->base_subscript, vp->cc, vp->n_cats); printf ("Reverse map\n"); for (x = 0 ; x < vp->n_cats; ++x) @@ -121,8 +133,8 @@ categoricals_dump (const struct categoricals *cat) for (node = hmap_first (m); node; node = hmap_next (m, node)) { struct string s; - ds_init_empty (&s); const struct value_node *vn = HMAP_DATA (node, struct value_node, node); + ds_init_empty (&s); var_append_value_name (vp->var, &vn->value, &s); printf ("Value: %s; Index %d; CC %g\n", ds_cstr (&s), @@ -130,7 +142,21 @@ categoricals_dump (const struct categoricals *cat) ds_destroy (&s); } } + + assert (cat->n_vars <= cat->n_vp); + + printf ("\n"); + printf ("Number of categorical variables: %d\n", cat->n_vp); + printf ("Number of non-empty categorical variables: %d\n", cat->n_vars); + printf ("Total number of categories: %d\n", cat->n_cats_total); + + printf ("\nReverse variable map:\n"); + + for (v = 0 ; v < cat->n_cats_total; ++v) + printf ("%d ", cat->reverse_variable_map[v]); + printf ("\n"); } +#endif @@ -155,20 +181,23 @@ lookup_value (const struct hmap *map, const struct variable *var, const union va struct categoricals * -categoricals_create (const struct variable **v, size_t n_vars, const struct variable *wv) +categoricals_create (const struct variable **v, size_t n_vars, + const struct variable *wv, enum mv_class exclude) { size_t i; struct categoricals *cat = xmalloc (sizeof *cat); - cat->n_vars = n_vars; + cat->n_vp = n_vars; cat->wv = wv; cat->n_cats_total = 0; + cat->n_vars = 0; cat->reverse_variable_map = NULL; cat->pool = pool_create (); + cat->exclude = exclude; - cat->vp = pool_calloc (cat->pool, n_vars, sizeof *cat->vp); + cat->vp = pool_calloc (cat->pool, cat->n_vp, sizeof *cat->vp); - for (i = 0 ; i < cat->n_vars; ++i) + for (i = 0 ; i < cat->n_vp; ++i) { hmap_init (&cat->vp[i].map); cat->vp[i].var = v[i]; @@ -188,15 +217,19 @@ categoricals_update (struct categoricals *cat, const struct ccase *c) assert (NULL == cat->reverse_variable_map); - for (i = 0 ; i < cat->n_vars; ++i) + for (i = 0 ; i < cat->n_vp; ++i) { const struct variable *var = cat->vp[i].var; unsigned int width = var_get_width (var); const union value *val = case_data (c, var); - size_t hash = value_hash (val, width, 0); + size_t hash; + struct value_node *node ; - struct value_node *node = lookup_value (&cat->vp[i].map, var, val); + if ( var_is_value_missing (var, val, cat->exclude)) + continue; + hash = value_hash (val, width, 0); + node = lookup_value (&cat->vp[i].map, var, val); if ( NULL == node) { @@ -207,7 +240,11 @@ categoricals_update (struct categoricals *cat, const struct ccase *c) node->cc = 0.0; hmap_insert (&cat->vp[i].map, &node->node, hash); - cat->n_cats_total ++; + cat->n_cats_total++; + + if ( 0 == cat->vp[i].n_cats) + cat->n_vars++; + node->subscript = cat->vp[i].n_cats++ ; } @@ -224,19 +261,6 @@ categoricals_n_count (const struct categoricals *cat, size_t n) } -/* Return the index for value VAL in the Nth variable */ -int -categoricals_index (const struct categoricals *cat, size_t n, const union value *val) -{ - struct value_node *vn = lookup_value (&cat->vp[n].map, cat->vp[n].var, val); - - if ( vn == NULL) - return -1; - - return vn->subscript; -} - - /* Return the total number of categories */ size_t categoricals_total (const struct categoricals *cat) @@ -258,9 +282,11 @@ categoricals_done (struct categoricals *cat) */ int v; int idx = 0; - cat->reverse_variable_map = pool_calloc (cat->pool, cat->n_cats_total, sizeof *cat->reverse_variable_map); + cat->reverse_variable_map = pool_calloc (cat->pool, + cat->n_cats_total, + sizeof *cat->reverse_variable_map); - for (v = 0 ; v < cat->n_vars; ++v) + for (v = 0 ; v < cat->n_vp; ++v) { int i; struct var_params *vp = &cat->vp[v]; @@ -277,32 +303,41 @@ categoricals_done (struct categoricals *cat) vp->reverse_value_map[vn->subscript] = vn; } + /* Populate the reverse variable map. + */ for (i = 0; i < vp->n_cats; ++i) cat->reverse_variable_map[idx++] = v; } + + assert (cat->n_vars <= cat->n_vp); } +static int +reverse_variable_lookup (const struct categoricals *cat, int subscript) +{ + assert (cat->reverse_variable_map); + assert (subscript >= 0); + assert (subscript < cat->n_cats_total); + + return cat->reverse_variable_map[subscript]; +} + /* Return the categorical variable corresponding to SUBSCRIPT */ const struct variable * categoricals_get_variable_by_subscript (const struct categoricals *cat, int subscript) { - int index; - - assert (cat->reverse_variable_map); - - index = cat->reverse_variable_map[subscript]; + int index = reverse_variable_lookup (cat, subscript); return cat->vp[index].var; } - /* Return the value corresponding to SUBSCRIPT */ const union value * categoricals_get_value_by_subscript (const struct categoricals *cat, int subscript) { - int vindex = cat->reverse_variable_map[subscript]; + int vindex = reverse_variable_lookup (cat, subscript); const struct var_params *vp = &cat->vp[vindex]; const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript]; @@ -310,6 +345,26 @@ categoricals_get_value_by_subscript (const struct categoricals *cat, int subscri } +double +categoricals_get_weight_by_subscript (const struct categoricals *cat, int subscript) +{ + int vindex = reverse_variable_lookup (cat, subscript); + const struct var_params *vp = &cat->vp[vindex]; + + return vp->cc; +} + +double +categoricals_get_sum_by_subscript (const struct categoricals *cat, int subscript) +{ + int vindex = reverse_variable_lookup (cat, subscript); + const struct var_params *vp = &cat->vp[vindex]; + + const struct value_node *vn = vp->reverse_value_map [subscript - vp->base_subscript]; + return vn->cc; +} + + /* Returns unity if the value in case C at SUBSCRIPT is equal to the category for that subscript */ double @@ -323,3 +378,10 @@ categoricals_get_binary_by_subscript (const struct categoricals *cat, int subscr return value_equal (val, categoricals_get_value_by_subscript (cat, subscript), width); } + + +size_t +categoricals_get_n_variables (const struct categoricals *cat) +{ + return cat->n_vars; +}