struct interact_params
{
- /* A map of cases indexed by a interaction_value */
+ /* An example of each interaction that appears in the data, like a frequency
+ table for 'iact'. By construction, the number of elements must be less
+ than or equal to 'n_cats'.
+
+ categoricals_update() updates 'ivmap' case-by-case, then
+ categoricals_done() dumps 'ivmap' into 'reverse_interaction_value_map' and
+ sorts it. */
struct hmap ivmap;
+ struct interaction_value **reverse_interaction_value_map;
const struct interaction *iact;
int base_subscript_short;
int base_subscript_long;
- /* The number of distinct values of this interaction */
+ /* Product of hmap_count(&varnodes[*]->valmap), that is, the maximum number
+ of distinct values of this interaction. */
int n_cats;
/* An array of integers df_n * df_{n-1} * df_{n-2} ...
double *enc_sum;
- /* A map of interaction_values indexed by subscript */
- struct interaction_value **reverse_interaction_value_map;
-
+ /* Sum of reverse_interaction_value_map[*]->cc. */
double cc;
};
/* An array of interact_params */
struct interact_params *iap;
+ size_t n_iap;
/* Map whose members are the union of the variables which comprise IAP */
struct hmap varmap;
- /* The size of IAP. (ie, the number of interactions involved.) */
- size_t n_iap;
-
/* The number of categorical variables which contain entries.
In the absence of missing values, this will be equal to N_IAP */
size_t n_vars;
- size_t df_sum;
-
/* A map to enable the lookup of variables indexed by subscript.
This map considers only the N - 1 of the N variables.
*/
- int *reverse_variable_map_short;
+ int *reverse_variable_map_short; /* 'df_sum' elements. */
+ size_t df_sum;
/* Like the above, but uses all N variables */
- int *reverse_variable_map_long;
-
+ int *reverse_variable_map_long; /* 'n_cats_total' elements. */
size_t n_cats_total;
struct pool *pool;
return iv;
}
+/* Returns true iff CAT is sane, that is, if it is complete and has at least
+ one value. */
bool
categoricals_sane (const struct categoricals *cat)
{
return cat->sane;
}
+/* Creates and returns a new categoricals object whose variables come from the
+ N_INTER interactions objects in the array starting at INTER. (The INTER
+ objects must outlive the categoricals object because it uses them
+ internally.)
+
+ FCTR_EXCL determines which cases are listwise ignored by
+ categoricals_update(). */
struct categoricals *
categoricals_create (struct interaction *const*inter, size_t n_inter,
const struct variable *wv, enum mv_class fctr_excl)
}
}
-/* Return the number of categories (distinct values) for interction N */
+/* Return the number of categories (distinct values) for interaction IDX in
+ CAT. */
size_t
categoricals_n_count (const struct categoricals *cat, size_t n)
{
}
+/* Returns the number of degrees of freedom for interaction IDX within CAT. */
size_t
categoricals_df (const struct categoricals *cat, size_t n)
{
}
-/* Return the total number of categories */
+/* Return the total number of categories across all interactions in CAT. */
size_t
categoricals_n_total (const struct categoricals *cat)
{
return cat->n_cats_total;
}
+/* Returns the total degrees of freedom for CAT. */
size_t
categoricals_df_total (const struct categoricals *cat)
{
return cat->df_sum;
}
+/* Returns true iff categoricals_done() has been called for CAT. */
bool
categoricals_is_complete (const struct categoricals *cat)
{
}
-/* This function must be called *before* any call to categoricals_get_*_by subscript and
- *after* all calls to categoricals_update */
+/* This function must be called (once) before any call to the *_by_subscript or
+ *_by_category functions, but AFTER any calls to categoricals_update. If this
+ function returns false, then no calls to _by_subscript or *_by_category are
+ allowed. */
void
categoricals_done (const struct categoricals *cat_)
{
union value ;
+/* Categoricals.
+
+ A categorical variable has a finite and usually small number of possible
+ values. The categoricals data structure organizes an array of interactions
+ maong categorical variables, that is, a set of sets of categorical
+ variables. (Both levels of "set" are ordered.)
+
+ The life cycle of a categoricals object looks like this:
+
+ 1. Create it with categoricals_create(). This fixes the set of interactions
+ and other parameters.
+
+ 2. Pass all of the desired cases through the object with
+ categoricals_update().
+
+ 3. Finalize the object with categoricals_done(). Only at this point may
+ most of the categoricals query functions be called.
+
+ 4. Use the categoricals object as desired.
+
+ 5. Destroy the object with categoricals_destroy().
+*/
+
+/* Creating and destroying categoricals. */
struct categoricals *categoricals_create (struct interaction *const*, size_t n_int,
const struct variable *wv,
enum mv_class fctr_excl);
-
void categoricals_destroy (struct categoricals *);
+/* Updating categoricals. */
void categoricals_update (struct categoricals *cat, const struct ccase *c);
+void categoricals_done (const struct categoricals *cat);
+bool categoricals_is_complete (const struct categoricals *cat);
+
+/* Counting categories.
+ A variable's number of categories is the number of unique values observed in
+ the data passed to categoricals_update().
+ An interaction's number of categories is the number of observed unique
+ values of its variables, which will often be less than the product of its
+ variables' numbers of categories.
+
+ A categorical object's number of categories is the sum of its interactions'
+ categories. */
/* Return the number of categories (distinct values) for variable N */
size_t categoricals_n_count (const struct categoricals *cat, size_t n);
+size_t categoricals_n_total (const struct categoricals *cat);
-size_t categoricals_df (const struct categoricals *cat, size_t n);
+/* Degrees of freedom.
-/* Return the total number of categories */
-size_t categoricals_n_total (const struct categoricals *cat);
+ A categorical variable with N_CATS categories has N_CATS - 1 degrees of
+ freedom.
-/* Return the total degrees of freedom */
-size_t categoricals_df_total (const struct categoricals *cat);
+ An interaction's degrees of freedom is the product of its variables' degrees
+ of freedom.
+ A categorical object's degrees of freedom is the sum of its interactions'
+ degrees of freedom. */
+size_t categoricals_df (const struct categoricals *cat, size_t n);
+size_t categoricals_df_total (const struct categoricals *cat);
-/*
- Return the total number of variables which participated in these categoricals.
- Due to the possibility of missing values, this is NOT necessarily
- equal to the number of variables passed in when the object was
- created.
-*/
size_t categoricals_get_n_variables (const struct categoricals *cat);
-bool categoricals_is_complete (const struct categoricals *cat);
-
+/* Sanity. */
+bool categoricals_sane (const struct categoricals *cat);
-/*
- Must be called (once) before any call to the *_by_subscript or *_by_category
- functions, but AFTER any calls to categoricals_update.
- If this function returns false, then no calls to _by_subscript or *_by_category
- are allowed.
-*/
-void categoricals_done (const struct categoricals *cat);
+/* "Short map".
-bool categoricals_sane (const struct categoricals *cat);
+ These look up an interaction within a categoricals object on the basis of a
+ "subscript". Interaction 0 with DF_0 degrees of freedom is assigned
+ subscripts [0, DF_0 - 1], interaction 1 with DF_1 degrees of freedom is
+ assigned subscripts [DF_0, DF_0 + DF_1 - 1], and so on. The subscripts
+ passed in must be in the range [0, DF_SUM - 1] where DF_SUM is the total
+ number of degrees of freedom for the object, as returned by
+ categoricals_df_total().
+ These functions are intended for covariance matrix routines, where normally
+ 1 less than the total number of distinct values of each categorical variable
+ should be considered.
-/*
- The *_by_subscript functions use the short map.
- Their intended use is by covariance matrix routines, where normally 1 less than
- the total number of distinct values of each categorical variable should
- be considered.
- */
+ These functions may be used on an object only after calling
+ categoricals_done().
+*/
double categoricals_get_weight_by_subscript (const struct categoricals *cat, int subscript);
const struct interaction *categoricals_get_interaction_by_subscript (const struct categoricals *cat, int subscript);
double categoricals_get_sum_by_subscript (const struct categoricals *cat, int subscript);
-
-/* Returns unity if the value in case C at SUBSCRIPT is equal to the category
- for that subscript */
double
categoricals_get_dummy_code_for_case (const struct categoricals *cat, int subscript,
const struct ccase *c);
-
-/* Returns unity if the value in case C at SUBSCRIPT is equal to the category
- for that subscript.
- Else if it is the last category, return -1.
- Otherwise return 0.
- */
double
categoricals_get_effects_code_for_case (const struct categoricals *cat, int subscript,
const struct ccase *c);
-/* These use the long map. Useful for descriptive statistics. */
+/* "Long map".
+
+ These look up an interaction within a categoricals object on the basis of a
+ "category index". Interaction 0 in CAT with CAT_0 categories has indexes
+ [0, CAT_0 - 1], interaction 1 with CAT_1 categories has indexes [CAT_0,
+ CAT_0 + CAT_1 - 1], and so on. The indexes passed in must be in the range
+ [0, CAT_TOTAL - 1] where CAT_TOTAL is the total number of categories for the
+ object, as returned by categoricals_n_total().
+ These functions are useful for descriptive statistics.
+ These functions may be used on an object only after calling
+ categoricals_done().
+*/
const struct ccase *
categoricals_get_case_by_category_real (const struct categoricals *cat, int iact, int n);
-
void *
categoricals_get_user_data_by_category_real (const struct categoricals *cat, int iact, int n);
-
-
void * categoricals_get_user_data_by_category (const struct categoricals *cat, int category);
-
const struct ccase * categoricals_get_case_by_category (const struct categoricals *cat, int subscript);
-
struct payload
{
void* (*create) (const void *aux1, void *aux2);
void (*destroy) (const void *aux1, void *aux2, void *user_data);
};
-
void categoricals_set_payload (struct categoricals *cats, const struct payload *p, const void *aux1, void *aux2);
bool categoricals_isbalanced (const struct categoricals *cat);