+ /* Implementation Note: Whilst this function is O(n) in cat->n_cats_total, in most
+ uses it will be more efficient that using a tree based structure, since it
+ is called only once, and means that subsequent lookups will be O(1).
+
+ 1 call of O(n) + 10^9 calls of O(1) is better than 10^9 calls of O(log n).
+ */
+ struct categoricals *cat = CONST_CAST (struct categoricals *, cat_);
+ int v;
+ int i;
+ int idx_short = 0;
+ int idx_long = 0;
+ cat->df_sum = 0;
+ cat->n_cats_total = 0;
+
+ /* Calculate the degrees of freedom, and the number of categories */
+ for (i = 0 ; i < cat->n_iap; ++i)
+ {
+ int df = 1;
+ const struct interaction *iact = cat->iap[i].iact;
+
+ cat->iap[i].df_prod = xcalloc (iact->n_vars, sizeof (int));
+
+ cat->iap[i].n_cats = 1;
+
+ for (v = 0 ; v < iact->n_vars; ++v)
+ {
+ const struct variable *var = iact->vars[v];
+
+ struct variable_node *vn = lookup_variable (&cat->varmap, var, hash_pointer (var, 0));
+
+ cat->iap[i].df_prod[v] = df * (hmap_count (&vn->valmap) - 1);
+ df = cat->iap[i].df_prod[v];
+
+ cat->iap[i].n_cats *= hmap_count (&vn->valmap);
+ }
+
+ cat->df_sum += cat->iap[i].df_prod [v - 1];
+ cat->n_cats_total += cat->iap[i].n_cats;
+ }
+
+
+ cat->reverse_variable_map_short = pool_calloc (cat->pool,
+ cat->df_sum,
+ sizeof *cat->reverse_variable_map_short);
+
+ cat->reverse_variable_map_long = pool_calloc (cat->pool,
+ cat->n_cats_total,
+ sizeof *cat->reverse_variable_map_long);
+
+ for (i = 0 ; i < cat->n_iap; ++i)
+ {
+ struct interaction_value *ivn = NULL;
+ int x = 0;
+ int ii;
+ struct interact_params *iap = &cat->iap[i];
+
+ iap->base_subscript_short = idx_short;
+ iap->base_subscript_long = idx_long;
+
+ iap->reverse_interaction_value_map = pool_calloc (cat->pool, iap->n_cats,
+ sizeof *iap->reverse_interaction_value_map);
+
+ HMAP_FOR_EACH (ivn, struct interaction_value, node, &iap->ivmap)
+ {
+ iap->reverse_interaction_value_map[x++] = ivn;
+ }
+
+ assert (x <= iap->n_cats);
+
+ /* For some purposes (eg CONTRASTS in ONEWAY) the values need to be sorted */
+ sort (iap->reverse_interaction_value_map, x, sizeof (*iap->reverse_interaction_value_map),
+ compare_interaction_value_3way, iap);
+
+ /* Fill the remaining values with null */
+ for (ii = x ; ii < iap->n_cats; ++ii)
+ iap->reverse_interaction_value_map[ii] = NULL;
+
+ /* Populate the reverse variable maps. */
+ for (ii = 0; ii < iap->df_prod [iap->iact->n_vars - 1]; ++ii)
+ cat->reverse_variable_map_short[idx_short++] = i;
+
+ for (ii = 0; ii < iap->n_cats; ++ii)
+ cat->reverse_variable_map_long[idx_long++] = i;
+ }
+
+ assert (cat->n_vars <= cat->n_iap);
+
+ // categoricals_dump (cat);
+
+ /* Tally up the sums for all the encodings */
+ for (i = 0 ; i < cat->n_iap; ++i)
+ {
+ int x, y;
+ struct interact_params *iap = &cat->iap[i];
+ const struct interaction *iact = iap->iact;
+
+ const int df = iap->df_prod [iact->n_vars - 1];
+
+ iap->enc_sum = xcalloc (df, sizeof (*(iap->enc_sum)));
+
+ for (y = 0; y < hmap_count (&iap->ivmap); ++y)
+ {
+ struct interaction_value *iv = iap->reverse_interaction_value_map[y];
+ for (x = iap->base_subscript_short; x < iap->base_subscript_short + df ;++x)
+ {
+ const double bin = categoricals_get_binary_by_subscript (cat, x, iv->ccase); \
+ iap->enc_sum [x - iap->base_subscript_short] += bin * iv->cc;
+ }
+ }
+ }
+}
+
+
+static int
+reverse_variable_lookup_short (const struct categoricals *cat, int subscript)
+{
+ assert (cat->reverse_variable_map_short);
+ assert (subscript >= 0);
+ assert (subscript < cat->df_sum);
+
+ return cat->reverse_variable_map_short[subscript];
+}
+
+static int
+reverse_variable_lookup_long (const struct categoricals *cat, int subscript)
+{
+ assert (cat->reverse_variable_map_long);
+ assert (subscript >= 0);
+ assert (subscript < cat->n_cats_total);
+
+ return cat->reverse_variable_map_long[subscript];
+}
+
+
+/* Return the interaction corresponding to SUBSCRIPT */
+const struct interaction *
+categoricals_get_interaction_by_subscript (const struct categoricals *cat, int subscript)
+{
+ int index = reverse_variable_lookup_short (cat, subscript);
+
+ return cat->iap[index].iact;
+}
+
+/* Return the case corresponding to SUBSCRIPT */
+static const struct ccase *
+categoricals_get_case_by_subscript (const struct categoricals *cat, int subscript)
+{
+ int vindex = reverse_variable_lookup_short (cat, subscript);
+ const struct interact_params *vp = &cat->iap[vindex];
+ const struct interaction_value *vn = vp->reverse_interaction_value_map [subscript - vp->base_subscript_short];