+size_t
+categoricals_df_total (const struct categoricals *cat)
+{
+ return cat->df_sum;
+}
+
+bool
+categoricals_is_complete (const struct categoricals *cat)
+{
+ return (NULL != cat->reverse_variable_map_short);
+}
+
+
+/* This function must be called *before* any call to categoricals_get_*_by subscript and
+ *after* all calls to categoricals_update */
+bool
+categoricals_done (const struct categoricals *cat_)
+{
+ /* Implementation Note: Whilst this function is O(n) in cat->n_cats_total, in most
+ uses it will be more efficient that using a tree based structure, since it
+ is called only once, and means that subsequent lookups will be O(1).
+
+ 1 call of O(n) + 10^9 calls of O(1) is better than 10^9 calls of O(log n).
+ */
+ struct categoricals *cat = CONST_CAST (struct categoricals *, cat_);
+ int v;
+ int i;
+ int idx_short = 0;
+ int idx_long = 0;
+ cat->df_sum = 0;
+ cat->n_cats_total = 0;
+
+ /* Calculate the degrees of freedom, and the number of categories */
+ for (i = 0 ; i < cat->n_iap; ++i)
+ {
+ int df = 1;
+ const struct interaction *iact = cat->iap[i].iact;
+
+ cat->iap[i].df_prod = iact->n_vars ? xcalloc (iact->n_vars, sizeof (int)) : NULL;
+
+ cat->iap[i].n_cats = 1;
+
+ for (v = 0 ; v < iact->n_vars; ++v)
+ {
+ const struct variable *var = iact->vars[v];
+
+ struct variable_node *vn = lookup_variable (&cat->varmap, var, hash_pointer (var, 0));
+
+ if (hmap_count (&vn->valmap) == 0)
+ return false;
+
+ cat->iap[i].df_prod[v] = df * (hmap_count (&vn->valmap) - 1);
+ df = cat->iap[i].df_prod[v];
+
+ cat->iap[i].n_cats *= hmap_count (&vn->valmap);
+ }
+
+ assert (v == iact->n_vars);
+ if (v > 0)
+ cat->df_sum += cat->iap[i].df_prod [v - 1];
+
+ cat->n_cats_total += cat->iap[i].n_cats;
+ }
+
+
+ cat->reverse_variable_map_short = pool_calloc (cat->pool,
+ cat->df_sum,
+ sizeof *cat->reverse_variable_map_short);
+
+ cat->reverse_variable_map_long = pool_calloc (cat->pool,
+ cat->n_cats_total,
+ sizeof *cat->reverse_variable_map_long);
+
+ for (i = 0 ; i < cat->n_iap; ++i)
+ {
+ struct interaction_value *ivn = NULL;
+ int x = 0;
+ int ii;
+ struct interact_params *iap = &cat->iap[i];
+
+ iap->base_subscript_short = idx_short;
+ iap->base_subscript_long = idx_long;
+
+ iap->reverse_interaction_value_map = pool_calloc (cat->pool, iap->n_cats,
+ sizeof *iap->reverse_interaction_value_map);
+
+ HMAP_FOR_EACH (ivn, struct interaction_value, node, &iap->ivmap)
+ {
+ iap->reverse_interaction_value_map[x++] = ivn;
+
+ }
+
+ assert (x <= iap->n_cats);
+
+ /* For some purposes (eg CONTRASTS in ONEWAY) the values need to be sorted */
+ sort (iap->reverse_interaction_value_map, x, sizeof (*iap->reverse_interaction_value_map),
+ compare_interaction_value_3way, iap);
+
+ /* Fill the remaining values with null */
+ for (ii = x ; ii < iap->n_cats; ++ii)
+ iap->reverse_interaction_value_map[ii] = NULL;
+
+ /* Populate the reverse variable maps. */
+ if (iap->df_prod)
+ {
+ for (ii = 0; ii < iap->df_prod [iap->iact->n_vars - 1]; ++ii)
+ cat->reverse_variable_map_short[idx_short++] = i;
+ }
+
+ for (ii = 0; ii < iap->n_cats; ++ii)
+ cat->reverse_variable_map_long[idx_long++] = i;
+ }
+
+ assert (cat->n_vars <= cat->n_iap);
+
+ categoricals_dump (cat);
+
+ /* Tally up the sums for all the encodings */
+ for (i = 0 ; i < cat->n_iap; ++i)
+ {
+ int x, y;
+ struct interact_params *iap = &cat->iap[i];
+ const struct interaction *iact = iap->iact;
+
+ const int df = iap->df_prod ? iap->df_prod [iact->n_vars - 1] : 0;
+
+ iap->enc_sum = xcalloc (df, sizeof (*(iap->enc_sum)));
+
+ for (y = 0; y < hmap_count (&iap->ivmap); ++y)
+ {
+ struct interaction_value *iv = iap->reverse_interaction_value_map[y];
+ for (x = iap->base_subscript_short; x < iap->base_subscript_short + df ;++x)
+ {
+ const double bin = categoricals_get_code_for_case (cat, x, iv->ccase); \
+ iap->enc_sum [x - iap->base_subscript_short] += bin * iv->cc;
+ }
+ if (cat->payload && cat->payload->destroy)
+ cat->payload->destroy (cat->aux1, cat->aux2, iv->user_data);
+ }
+ }
+
+ return true;
+}
+
+
+static int
+reverse_variable_lookup_short (const struct categoricals *cat, int subscript)
+{
+ assert (cat->reverse_variable_map_short);
+ assert (subscript >= 0);
+ assert (subscript < cat->df_sum);
+
+ return cat->reverse_variable_map_short[subscript];
+}
+
+static int
+reverse_variable_lookup_long (const struct categoricals *cat, int subscript)
+{
+ assert (cat->reverse_variable_map_long);
+ assert (subscript >= 0);
+ assert (subscript < cat->n_cats_total);
+
+ return cat->reverse_variable_map_long[subscript];
+}
+
+
+/* Return the interaction corresponding to SUBSCRIPT */
+const struct interaction *
+categoricals_get_interaction_by_subscript (const struct categoricals *cat, int subscript)
+{
+ int index = reverse_variable_lookup_short (cat, subscript);
+
+ return cat->iap[index].iact;
+}
+
+double
+categoricals_get_weight_by_subscript (const struct categoricals *cat, int subscript)
+{
+ int vindex = reverse_variable_lookup_short (cat, subscript);
+ const struct interact_params *vp = &cat->iap[vindex];
+
+ return vp->cc;
+}
+
+double
+categoricals_get_sum_by_subscript (const struct categoricals *cat, int subscript)
+{
+ int vindex = reverse_variable_lookup_short (cat, subscript);
+ const struct interact_params *vp = &cat->iap[vindex];
+
+ return vp->enc_sum[subscript - vp->base_subscript_short];
+}
+
+/* Returns unity if the value in case C at SUBSCRIPT is equal to the category
+ for that subscript */
+double
+categoricals_get_code_for_case (const struct categoricals *cat, int subscript,
+ const struct ccase *c)
+{
+ const struct interaction *iact = categoricals_get_interaction_by_subscript (cat, subscript);
+
+ const int i = reverse_variable_lookup_short (cat, subscript);
+
+ const int base_index = cat->iap[i].base_subscript_short;
+
+ int v;
+ double result = 1.0;
+
+ const struct interact_params *iap = &cat->iap[i];
+
+ double dfp = 1.0;
+ for (v = 0; v < iact->n_vars; ++v)
+ {
+ const struct variable *var = iact->vars[v];
+
+ const union value *val = case_data (c, var);
+ const int width = var_get_width (var);
+ const struct variable_node *vn = lookup_variable (&cat->varmap, var, hash_pointer (var, 0));
+
+ const unsigned int hash = value_hash (val, width, 0);
+ const struct value_node *valn = lookup_value (&vn->valmap, val, hash, width);
+
+ double bin = 1.0;
+
+ const double df = iap->df_prod[v] / dfp;
+
+ /* Translate the subscript into an index for the individual variable */
+ const int index = ((subscript - base_index) % iap->df_prod[v] ) / dfp;
+ dfp = iap->df_prod [v];
+
+#if EFFECTS_CODING
+ if ( valn->index == df )
+ bin = -1.0;
+ else
+#endif
+ if ( valn->index != index )
+ bin = 0;
+
+ result *= bin;
+ }
+
+ return result;