From 468d19460a8bc3fc9541cde13db5d88e02da71f2 Mon Sep 17 00:00:00 2001 From: Jason H Stover Date: Wed, 28 Jan 2009 16:29:38 -0500 Subject: [PATCH] interaction.c: Introduced interaction_variable and interaction_value structs and associated functions. covariance-matrix.c: Altered covariance_accumulate_listwise, covariance_accumulate_pairwise, and update_* functions to handle interactions. glm.q: Added temporary NULL and 0 as final args to call to covariance_matrix_accumulate. --- src/language/stats/glm.q | 2 +- src/math/covariance-matrix.c | 109 +++++++++++++++++------ src/math/covariance-matrix.h | 3 +- src/math/interaction.c | 167 ++++++++++++++++++++++++++++++++--- src/math/interaction.h | 15 +++- 5 files changed, 255 insertions(+), 41 deletions(-) diff --git a/src/language/stats/glm.q b/src/language/stats/glm.q index 07ee7ab5..26b90365 100644 --- a/src/language/stats/glm.q +++ b/src/language/stats/glm.q @@ -243,7 +243,7 @@ run_glm (struct casereader *input, /* Accumulate the covariance matrix. */ - covariance_matrix_accumulate (cov, c); + covariance_matrix_accumulate (cov, c, NULL, 0); n_data++; } covariance_matrix_compute (cov); diff --git a/src/math/covariance-matrix.c b/src/math/covariance-matrix.c index 4f14a7c6..e1b612af 100644 --- a/src/math/covariance-matrix.c +++ b/src/math/covariance-matrix.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -65,10 +64,13 @@ struct covariance_matrix int n_pass; int missing_handling; enum mv_class missing_value; - void (*accumulate) (struct covariance_matrix *, const struct ccase *); + void (*accumulate) (struct covariance_matrix *, const struct ccase *, + const struct interaction_variable **, size_t); void (*update_moments) (struct covariance_matrix *, size_t, double); }; + + static struct hsh_table *covariance_hsh_create (size_t); static hsh_hash_func covariance_accumulator_hash; static unsigned int hash_numeric_alpha (const struct variable *, @@ -94,9 +96,13 @@ static struct covariance_accumulator *get_new_covariance_accumulator (const value *); static void covariance_accumulate_listwise (struct covariance_matrix *, - const struct ccase *); + const struct ccase *, + const struct interaction_variable **, + size_t); static void covariance_accumulate_pairwise (struct covariance_matrix *, - const struct ccase *); + const struct ccase *, + const struct interaction_variable **, + size_t); struct covariance_matrix * covariance_matrix_init (size_t n_variables, @@ -228,7 +234,7 @@ column_iterate (struct design_matrix *cov, const struct variable *v, col += i; y = -1.0 * cat_get_category_count (i, v) / ssize; tmp_val = cat_subscript_to_value (i, v); - if (compare_values_short (tmp_val, val1, v)) + if (!compare_values_short (tmp_val, val1, v)) { y += -1.0; } @@ -263,7 +269,7 @@ covariance_pass_two (struct design_matrix *cov, double mean1, double mean2, row += i; x = -1.0 * cat_get_category_count (i, v1) / ssize; tmp_val = cat_subscript_to_value (i, v1); - if (compare_values_short (tmp_val, val1, v1)) + if (!compare_values_short (tmp_val, val1, v1)) { x += 1.0; } @@ -414,7 +420,7 @@ match_nodes (const struct covariance_accumulator *c, } if (var_is_alpha (v1) && var_is_alpha (v2)) { - if (compare_values_short (val1, c->val1, v1)) + if (!compare_values_short (val1, c->val1, v1)) { if (!compare_values_short (val2, c->val2, v2)) { @@ -491,13 +497,13 @@ update_product (const struct variable *v1, const struct variable *v2, return 0.0; } static double -update_sum (const struct variable *var, const union value *val) +update_sum (const struct variable *var, const union value *val, double weight) { assert (var != NULL); assert (val != NULL); if (var_is_alpha (var)) { - return 1.0; + return weight; } return val->f; } @@ -526,20 +532,27 @@ get_covariance_variables (const struct covariance_matrix *cov) return cov->v_variables; } + static void update_hash_entry (struct hsh_table *c, const struct variable *v1, const struct variable *v2, - const union value *val1, const union value *val2) + const union value *val1, const union value *val2, + const struct interaction_value *i_val1, + const struct interaction_value *i_val2) { struct covariance_accumulator *ca; struct covariance_accumulator *new_entry; + double iv_f1; + double iv_f2; - + iv_f1 = interaction_value_get_nonzero_entry (i_val1); + iv_f2 = interaction_value_get_nonzero_entry (i_val2); ca = get_new_covariance_accumulator (v1, v2, val1, val2); ca->dot_product = update_product (ca->v1, ca->v2, ca->val1, ca->val2); - ca->sum1 = update_sum (ca->v1, ca->val1); - ca->sum2 = update_sum (ca->v2, ca->val2); + ca->dot_product *= iv_f1 * iv_f2; + ca->sum1 = update_sum (ca->v1, ca->val1, iv_f1); + ca->sum2 = update_sum (ca->v2, ca->val2, iv_f2); ca->ssize = 1.0; new_entry = hsh_insert (c, ca); if (new_entry != NULL) @@ -569,13 +582,17 @@ update_hash_entry (struct hsh_table *c, */ static void covariance_accumulate_pairwise (struct covariance_matrix *cov, - const struct ccase *ccase) + const struct ccase *ccase, + const struct interaction_variable **i_var, + size_t n_intr) { size_t i; size_t j; const union value *val1; const union value *val2; const struct variable **v_variables; + struct interaction_value *i_val1 = NULL; + struct interaction_value *i_val2 = NULL; assert (cov != NULL); assert (ccase != NULL); @@ -585,7 +602,15 @@ covariance_accumulate_pairwise (struct covariance_matrix *cov, for (i = 0; i < cov->n_variables; ++i) { - val1 = case_data (ccase, v_variables[i]); + if (is_interaction (v_variables[i], i_var, n_intr)) + { + i_val1 = interaction_case_data (ccase, v_variables[i], i_var, n_intr); + val1 = interaction_value_get (i_val1); + } + else + { + val1 = case_data (ccase, v_variables[i]); + } if (!var_is_value_missing (v_variables[i], val1, cov->missing_value)) { cat_value_update (v_variables[i], val1); @@ -594,15 +619,23 @@ covariance_accumulate_pairwise (struct covariance_matrix *cov, for (j = i; j < cov->n_variables; j++) { - val2 = case_data (ccase, v_variables[j]); + if (is_interaction (v_variables[j], i_var, n_intr)) + { + i_val2 = interaction_case_data (ccase, v_variables[j], i_var, n_intr); + val2 = interaction_value_get (i_val2); + } + else + { + val2 = case_data (ccase, v_variables[j]); + } if (!var_is_value_missing (v_variables[j], val2, cov->missing_value)) { update_hash_entry (cov->ca, v_variables[i], v_variables[j], - val1, val2); + val1, val2, i_val1, i_val2); if (j != i) update_hash_entry (cov->ca, v_variables[j], - v_variables[i], val2, val1); + v_variables[i], val2, val1, i_val2, i_val1); } } } @@ -626,13 +659,17 @@ covariance_accumulate_pairwise (struct covariance_matrix *cov, */ static void covariance_accumulate_listwise (struct covariance_matrix *cov, - const struct ccase *ccase) + const struct ccase *ccase, + const struct interaction_variable **i_var, + size_t n_intr) { size_t i; size_t j; const union value *val1; const union value *val2; const struct variable **v_variables; + struct interaction_value *i_val1 = NULL; + struct interaction_value *i_val2 = NULL; assert (cov != NULL); assert (ccase != NULL); @@ -642,19 +679,35 @@ covariance_accumulate_listwise (struct covariance_matrix *cov, for (i = 0; i < cov->n_variables; ++i) { - val1 = case_data (ccase, v_variables[i]); + if (is_interaction (v_variables[i], i_var, n_intr)) + { + i_val1 = interaction_case_data (ccase, v_variables[i], i_var, n_intr); + val1 = interaction_value_get (i_val1); + } + else + { + val1 = case_data (ccase, v_variables[i]); + } cat_value_update (v_variables[i], val1); if (var_is_numeric (v_variables[i])) cov->update_moments (cov, i, val1->f); for (j = i; j < cov->n_variables; j++) { - val2 = case_data (ccase, v_variables[j]); + if (is_interaction (v_variables[j], i_var, n_intr)) + { + i_val2 = interaction_case_data (ccase, v_variables[j], i_var, n_intr); + val2 = interaction_value_get (i_val2); + } + else + { + val2 = case_data (ccase, v_variables[j]); + } update_hash_entry (cov->ca, v_variables[i], v_variables[j], - val1, val2); + val1, val2, i_val1, i_val2); if (j != i) update_hash_entry (cov->ca, v_variables[j], v_variables[i], - val2, val1); + val2, val1, i_val2, i_val1); } } } @@ -663,13 +716,13 @@ covariance_accumulate_listwise (struct covariance_matrix *cov, Call this function during the data pass. Each case will be added to a hash containing all values of the covariance matrix. After the data have been passed, call covariance_matrix_compute to put the - values in the struct covariance_matrix. + values in the struct covariance_matrix. */ void covariance_matrix_accumulate (struct covariance_matrix *cov, - const struct ccase *ccase) + const struct ccase *ccase, void **aux, size_t n_intr) { - cov->accumulate (cov, ccase); + cov->accumulate (cov, ccase, (const struct interaction_variable **) aux, n_intr); } static void @@ -690,7 +743,7 @@ covariance_matrix_insert (struct design_matrix *cov, { i = 0; tmp_val = cat_subscript_to_value (i, v1); - while (!compare_values_short (tmp_val, val1, v1)) + while (compare_values_short (tmp_val, val1, v1)) { i++; tmp_val = cat_subscript_to_value (i, v1); @@ -705,7 +758,7 @@ covariance_matrix_insert (struct design_matrix *cov, col = design_matrix_var_to_column (cov, v2); i = 0; tmp_val = cat_subscript_to_value (i, v1); - while (!compare_values_short (tmp_val, val1, v1)) + while (compare_values_short (tmp_val, val1, v1)) { i++; tmp_val = cat_subscript_to_value (i, v1); diff --git a/src/math/covariance-matrix.h b/src/math/covariance-matrix.h index 33a5d750..b692e7e8 100644 --- a/src/math/covariance-matrix.h +++ b/src/math/covariance-matrix.h @@ -22,6 +22,7 @@ #define COVARIANCE_MATRIX_H #include +#include struct moments1; struct ccase; @@ -53,6 +54,6 @@ struct covariance_matrix *covariance_matrix_init (size_t, int, int, enum mv_class); void covariance_matrix_free (struct covariance_matrix *); void covariance_matrix_accumulate (struct covariance_matrix *, - const struct ccase *); + const struct ccase *, void **, size_t); struct design_matrix *covariance_to_design (const struct covariance_matrix *); #endif diff --git a/src/math/interaction.c b/src/math/interaction.c index ff2c5357..33da8423 100644 --- a/src/math/interaction.c +++ b/src/math/interaction.c @@ -44,15 +44,24 @@ struct interaction_variable int n_vars; const struct variable **members; struct variable *intr; + size_t n_alpha; }; struct interaction_value { const struct interaction_variable *intr; - union value *strings; /* Concatenation of the string values in this interaction's value. */ + union value *val; /* Concatenation of the string values in this + interaction's value, or the product of a bunch + of numeric values for a purely numeric + interaction. + */ double f; /* Product of the numerical values in this interaction's value. */ }; +/* + An interaction_variable has type alpha if any of members have type + alpha. Otherwise, its type is numeric. + */ struct interaction_variable * interaction_variable_create (const struct variable **vars, int n_vars) { @@ -62,17 +71,26 @@ interaction_variable_create (const struct variable **vars, int n_vars) if (n_vars > 0) { result = xmalloc (sizeof (*result)); + result->n_alpha = 0; result->members = xnmalloc (n_vars, sizeof (*result->members)); result->intr = var_create_internal (0); result->n_vars = n_vars; for (i = 0; i < n_vars; i++) { result->members[i] = vars[i]; + if (var_is_alpha (vars[i])) + { + result->n_alpha++; + } } } + /* + VAR_SET_WIDTH sets the type of the variable. + */ + var_set_width (result->intr, MAX_SHORT_STRING * result->n_alpha + 1); + return result; } - void interaction_variable_destroy (struct interaction_variable *iv) { var_destroy (iv->intr); @@ -80,12 +98,41 @@ void interaction_variable_destroy (struct interaction_variable *iv) free (iv); } +/* + Get one of the member variables. + */ +const struct variable * +interaction_variable_get_member (const struct interaction_variable *iv, size_t i) +{ + return iv->members[i]; +} + size_t -interaction_variable_get_n_vars (const struct interaction_variable *iv) +interaction_get_n_vars (const struct interaction_variable *iv) { return (iv == NULL) ? 0 : iv->n_vars; } +size_t +interaction_get_n_alpha (const struct interaction_variable *iv) +{ + return iv->n_alpha; +} + +size_t +interaction_get_n_numeric (const struct interaction_variable *iv) +{ + return (interaction_get_n_vars (iv) - interaction_get_n_alpha (iv)); +} + +/* + Get the interaction varibale itself. + */ +const struct variable * +interaction_variable_get_var (const struct interaction_variable *iv) +{ + return iv->intr; +} /* Given list of values, compute the value of the corresponding interaction. This "value" is not stored as the typical vector of @@ -97,6 +144,7 @@ struct interaction_value * interaction_value_create (const struct interaction_variable *var, const union value **vals) { struct interaction_value *result = NULL; + const struct variable *member; size_t i; size_t n_vars; @@ -104,31 +152,130 @@ interaction_value_create (const struct interaction_variable *var, const union va { result = xmalloc (sizeof (*result)); result->intr = var; - n_vars = interaction_variable_get_n_vars (var); - result->strings = value_create (n_vars * MAX_SHORT_STRING + 1); + n_vars = interaction_get_n_vars (var); + result->val = value_create (n_vars * MAX_SHORT_STRING + 1); result->f = 1.0; for (i = 0; i < n_vars; i++) { - if (var_is_alpha (var->members[i])) + member = interaction_variable_get_member (var, i); + + if (var_is_value_missing (member, vals[i], MV_ANY)) { - strncat (result->strings->s, vals[i]->s, MAX_SHORT_STRING); + value_set_missing (result->val, MAX_SHORT_STRING); + result->f = SYSMIS; + break; } - else if (var_is_numeric (var->members[i])) + else { - result->f *= vals[i]->f; + if (var_is_alpha (var->members[i])) + { + strncat (result->val->s, vals[i]->s, MAX_SHORT_STRING); + } + else if (var_is_numeric (var->members[i])) + { + result->f *= vals[i]->f; + } } } + if (interaction_get_n_alpha (var) == 0) + { + /* + If there are no categorical variables, then the + interaction consists of only numeric data. In this case, + code that uses this interaction_value will see the union + member as the numeric value. If we were to store that + numeric value in result->f as well, the calling code may + inadvertently square this value by multiplying by + result->val->f. Such multiplication would be correct for an + interaction consisting of both categorical and numeric + data, but a mistake for purely numerical interactions. To + avoid the error, we set result->f to 1.0 for numeric + interactions. + */ + result->val->f = result->f; + result->f = 1.0; + } } return result; } +union value * +interaction_value_get (const struct interaction_value *val) +{ + return val->val; +} + +/* + Returns the numeric value of the non-zero entry for the vector + corresponding to this interaction. Do not use this function to get + the numeric value of a purley numeric interaction. Instead, use the + union value * returned by interaction_value_get. + */ +double +interaction_value_get_nonzero_entry (const struct interaction_value *val) +{ + if (val != NULL) + return val->f; + return 1.0; +} + void interaction_value_destroy (struct interaction_value *val) { if (val != NULL) { - free (val->strings); + free (val->val); free (val); } } +/* + Return a value from a variable that is an interaction. + */ +struct interaction_value * +interaction_case_data (const struct ccase *ccase, const struct variable *var, + const struct interaction_variable **intr_vars, size_t n_intr) +{ + size_t i; + size_t n_vars; + const struct interaction_variable *iv; + const struct variable *intr; + const struct variable *member; + const union value **vals = NULL; + + for (i = 0; i < n_intr; i++) + { + iv = intr_vars[i]; + intr = interaction_variable_get_var (iv); + if (var_get_dict_index (intr) == var_get_dict_index (var)) + { + break; + } + } + n_vars = interaction_get_n_vars (iv); + vals = xnmalloc (n_vars, sizeof (*vals)); + for (i = 0; i < n_vars; i++) + { + member = interaction_variable_get_member (iv, i); + vals[i] = case_data (ccase, member); + } + return interaction_value_create (iv, vals); +} + +bool +is_interaction (const struct variable *var, const struct interaction_variable **iv, size_t n_intr) +{ + size_t i; + const struct variable *intr; + + for (i = 0; i < n_intr; i++) + { + intr = interaction_variable_get_var (iv[i]); + if (var_get_dict_index (intr) == var_get_dict_index (var)) + { + return true; + } + } + return false; +} + diff --git a/src/math/interaction.h b/src/math/interaction.h index bc1e63a4..73b440be 100644 --- a/src/math/interaction.h +++ b/src/math/interaction.h @@ -16,6 +16,8 @@ #ifndef INTERACTION_H #define INTERACTION_H +#include + struct interaction_variable; struct interaction_value; @@ -24,5 +26,16 @@ void interaction_variable_destroy (struct interaction_variable *); struct interaction_value * interaction_value_create (const struct interaction_variable *, const union value **); void interaction_value_destroy (struct interaction_value *); size_t interaction_variable_get_n_vars (const struct interaction_variable *); - +double interaction_value_get_nonzero_entry (const struct interaction_value *); +union value * interaction_value_get (const struct interaction_value *); +const struct variable * interaction_variable_get_var (const struct interaction_variable *); +size_t interaction_get_n_numeric (const struct interaction_variable *); +size_t interaction_get_n_alpha (const struct interaction_variable *); +size_t interaction_get_n_vars (const struct interaction_variable *); +const struct variable * interaction_variable_get_member (const struct interaction_variable *, size_t); +bool is_interaction (const struct variable *, const struct interaction_variable **, size_t); +struct interaction_value * +interaction_case_data (const struct ccase *, const struct variable *, + const struct interaction_variable **, size_t); +double interaction_value_get_nonzero_entry (const struct interaction_value *); #endif -- 2.30.2