+/* Return the total number of categories */
+size_t
+categoricals_n_total (const struct categoricals *cat)
+{
+ if (!categoricals_is_complete (cat))
+ return 0;
+
+ return cat->n_cats_total;
+}
+
+size_t
+categoricals_df_total (const struct categoricals *cat)
+{
+ if (NULL == cat)
+ return 0;
+
+ return cat->df_sum;
+}
+
+bool
+categoricals_is_complete (const struct categoricals *cat)
+{
+ return (NULL != cat->reverse_variable_map_short);
+}
+
+
+/* This function must be called *before* any call to categoricals_get_*_by subscript and
+ *after* all calls to categoricals_update */
+void
+categoricals_done (const struct categoricals *cat_)
+{
+ /* Implementation Note: Whilst this function is O(n) in cat->n_cats_total, in most
+ uses it will be more efficient that using a tree based structure, since it
+ is called only once, and means that subsequent lookups will be O(1).
+
+ 1 call of O(n) + 10^9 calls of O(1) is better than 10^9 calls of O(log n).
+ */
+ struct categoricals *cat = CONST_CAST (struct categoricals *, cat_);
+ int v;
+ int i;
+ int idx_short = 0;
+ int idx_long = 0;
+
+ if (NULL == cat)
+ return;
+
+ cat->df_sum = 0;
+ cat->n_cats_total = 0;
+
+ /* Calculate the degrees of freedom, and the number of categories */
+ for (i = 0 ; i < cat->n_iap; ++i)
+ {
+ int df = 1;
+ const struct interaction *iact = cat->iap[i].iact;
+
+ cat->iap[i].df_prod = iact->n_vars ? xcalloc (iact->n_vars, sizeof (int)) : NULL;
+
+ cat->iap[i].n_cats = 1;
+
+ for (v = 0 ; v < iact->n_vars; ++v)
+ {
+ int x;
+ const struct variable *var = iact->vars[v];
+
+ struct variable_node *vn = lookup_variable (&cat->varmap, var, hash_pointer (var, 0));
+
+ struct value_node *valnd = NULL;
+ struct value_node **array ;
+
+ assert (vn->n_vals == hmap_count (&vn->valmap));
+
+ if (vn->n_vals == 0)
+ {
+ cat->sane = false;
+ return;
+ }
+
+ /* Sort the VALMAP here */
+ array = xcalloc (sizeof *array, vn->n_vals);
+ x = 0;
+ HMAP_FOR_EACH (valnd, struct value_node, node, &vn->valmap)
+ {
+ /* Note: This loop is probably superfluous, it could be done in the
+ update stage (at the expense of a realloc) */
+ array[x++] = valnd;
+ }
+
+ sort (array, vn->n_vals, sizeof (*array),
+ compare_value_node_3way, vn);
+
+ for (x = 0; x < vn->n_vals; ++x)
+ {
+ struct value_node *vvv = array[x];
+ vvv->index = x;
+ }
+ free (array);
+
+ cat->iap[i].df_prod[v] = df * (vn->n_vals - 1);
+ df = cat->iap[i].df_prod[v];
+
+ cat->iap[i].n_cats *= vn->n_vals;
+ }
+
+ if (v > 0)
+ cat->df_sum += cat->iap[i].df_prod [v - 1];
+
+ cat->n_cats_total += cat->iap[i].n_cats;
+ }
+
+
+ cat->reverse_variable_map_short = pool_calloc (cat->pool,
+ cat->df_sum,
+ sizeof *cat->reverse_variable_map_short);
+
+ cat->reverse_variable_map_long = pool_calloc (cat->pool,
+ cat->n_cats_total,
+ sizeof *cat->reverse_variable_map_long);
+
+ for (i = 0 ; i < cat->n_iap; ++i)
+ {
+ struct interaction_value *ivn = NULL;
+ int x = 0;
+ int ii;
+ struct interact_params *iap = &cat->iap[i];
+
+ iap->base_subscript_short = idx_short;
+ iap->base_subscript_long = idx_long;
+
+ iap->reverse_interaction_value_map = pool_calloc (cat->pool, iap->n_cats,
+ sizeof *iap->reverse_interaction_value_map);
+
+ HMAP_FOR_EACH (ivn, struct interaction_value, node, &iap->ivmap)
+ {
+ iap->reverse_interaction_value_map[x++] = ivn;
+ }
+
+ assert (x <= iap->n_cats);
+
+ /* For some purposes (eg CONTRASTS in ONEWAY) the values need to be sorted */
+ sort (iap->reverse_interaction_value_map, x, sizeof (*iap->reverse_interaction_value_map),
+ compare_interaction_value_3way, iap);
+
+ /* Fill the remaining values with null */
+ for (ii = x ; ii < iap->n_cats; ++ii)
+ iap->reverse_interaction_value_map[ii] = NULL;
+
+ /* Populate the reverse variable maps. */
+ if (iap->df_prod)
+ {
+ for (ii = 0; ii < iap->df_prod [iap->iact->n_vars - 1]; ++ii)
+ cat->reverse_variable_map_short[idx_short++] = i;
+ }
+
+ for (ii = 0; ii < iap->n_cats; ++ii)
+ cat->reverse_variable_map_long[idx_long++] = i;
+ }
+
+ assert (cat->n_vars <= cat->n_iap);
+
+ categoricals_dump (cat);
+
+ /* Tally up the sums for all the encodings */
+ for (i = 0 ; i < cat->n_iap; ++i)
+ {
+ int x, y;
+ struct interact_params *iap = &cat->iap[i];
+ const struct interaction *iact = iap->iact;
+
+ const int df = iap->df_prod ? iap->df_prod [iact->n_vars - 1] : 0;
+
+ iap->enc_sum = xcalloc (df, sizeof (*(iap->enc_sum)));
+
+ for (y = 0; y < hmap_count (&iap->ivmap); ++y)
+ {
+ struct interaction_value *iv = iap->reverse_interaction_value_map[y];
+ for (x = iap->base_subscript_short; x < iap->base_subscript_short + df ;++x)
+ {
+ const double bin = categoricals_get_effects_code_for_case (cat, x, iv->ccase);
+ iap->enc_sum [x - iap->base_subscript_short] += bin * iv->cc;
+ }
+ if (cat->payload && cat->payload->calculate)
+ cat->payload->calculate (cat->aux1, cat->aux2, iv->user_data);
+ }
+ }
+
+ cat->sane = true;
+}
+
+
+static int
+reverse_variable_lookup_short (const struct categoricals *cat, int subscript)
+{
+ assert (cat->reverse_variable_map_short);
+ assert (subscript >= 0);
+ assert (subscript < cat->df_sum);
+
+ return cat->reverse_variable_map_short[subscript];
+}
+
+static int
+reverse_variable_lookup_long (const struct categoricals *cat, int subscript)
+{
+ assert (cat->reverse_variable_map_long);
+ assert (subscript >= 0);
+ assert (subscript < cat->n_cats_total);
+
+ return cat->reverse_variable_map_long[subscript];
+}
+
+
+/* Return the interaction corresponding to SUBSCRIPT */
+const struct interaction *
+categoricals_get_interaction_by_subscript (const struct categoricals *cat, int subscript)
+{
+ int index = reverse_variable_lookup_short (cat, subscript);
+
+ return cat->iap[index].iact;
+}
+
+double
+categoricals_get_weight_by_subscript (const struct categoricals *cat, int subscript)
+{
+ int vindex = reverse_variable_lookup_short (cat, subscript);
+ const struct interact_params *vp = &cat->iap[vindex];
+
+ return vp->cc;
+}
+
+double
+categoricals_get_sum_by_subscript (const struct categoricals *cat, int subscript)
+{
+ int vindex = reverse_variable_lookup_short (cat, subscript);
+ const struct interact_params *vp = &cat->iap[vindex];
+
+ return vp->enc_sum[subscript - vp->base_subscript_short];
+}
+
+
+/* Returns unity if the value in case C at SUBSCRIPT is equal to the category
+ for that subscript */
+static double
+categoricals_get_code_for_case (const struct categoricals *cat, int subscript,
+ const struct ccase *c,
+ bool effects_coding)
+{
+ const struct interaction *iact = categoricals_get_interaction_by_subscript (cat, subscript);
+
+ const int i = reverse_variable_lookup_short (cat, subscript);
+
+ const int base_index = cat->iap[i].base_subscript_short;
+
+ int v;
+ double result = 1.0;
+
+ const struct interact_params *iap = &cat->iap[i];
+
+ double dfp = 1.0;
+ for (v = 0; v < iact->n_vars; ++v)
+ {
+ const struct variable *var = iact->vars[v];
+
+ const union value *val = case_data (c, var);
+ const int width = var_get_width (var);
+ const struct variable_node *vn = lookup_variable (&cat->varmap, var, hash_pointer (var, 0));
+
+ const unsigned int hash = value_hash (val, width, 0);
+ const struct value_node *valn = lookup_value (&vn->valmap, val, hash, width);
+
+ double bin = 1.0;
+
+ const double df = iap->df_prod[v] / dfp;
+
+ /* Translate the subscript into an index for the individual variable */
+ const int index = ((subscript - base_index) % iap->df_prod[v] ) / dfp;
+ dfp = iap->df_prod [v];
+
+ if (effects_coding && valn->index == df )
+ bin = -1.0;
+ else if ( valn->index != index )
+ bin = 0;
+
+ result *= bin;
+ }
+
+ return result;
+}
+
+
+/* Returns unity if the value in case C at SUBSCRIPT is equal to the category
+ for that subscript */
+double
+categoricals_get_dummy_code_for_case (const struct categoricals *cat, int subscript,
+ const struct ccase *c)
+{
+ return categoricals_get_code_for_case (cat, subscript, c, false);
+}
+
+/* Returns unity if the value in case C at SUBSCRIPT is equal to the category
+ for that subscript.
+ Else if it is the last category, return -1.
+ Otherwise return 0.
+ */
+double
+categoricals_get_effects_code_for_case (const struct categoricals *cat, int subscript,
+ const struct ccase *c)
+{
+ return categoricals_get_code_for_case (cat, subscript, c, true);