From: Jason H Stover Date: Tue, 16 Jun 2009 16:20:57 +0000 (-0400) Subject: Renamed interaction_variable_get_var to interaction_get_variable. X-Git-Tag: build37~67 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp-builds.git;a=commitdiff_plain;h=8829e1f148c279db0f19b9e3bd746ace07d2d7f1 Renamed interaction_variable_get_var to interaction_get_variable. Renamed interaction_variable_get_member to interaction_get_member. Split update_hash_entry into update_hash_entry and update_hash_entry_intr for interactions. inner_intr_loop: New function. covariance_accumulate_pairwise: Loop separately over variables, then interactions. interaction_variable_create: Make interactions type alpha when appropriate. interaction_value_create: Use value_resize to avoid copying more data than necessary into new interaction_value. --- diff --git a/src/math/covariance-matrix.c b/src/math/covariance-matrix.c index 2a00a56d..89660ba9 100644 --- a/src/math/covariance-matrix.c +++ b/src/math/covariance-matrix.c @@ -61,7 +61,9 @@ struct covariance_matrix struct moments1 **m1; struct moments **m; const struct variable **v_variables; + const struct interaction_variable **interactions; size_t n_variables; + size_t n_intr; int n_pass; int missing_handling; enum mv_class missing_value; @@ -119,6 +121,7 @@ covariance_matrix_init (size_t n_variables, result->ca = covariance_hsh_create (&result->n_variables); result->m = NULL; result->m1 = NULL; + result->n_intr = 0; result->missing_handling = missing_handling; result->missing_value = missing_value; result->accumulate = (result->missing_handling == LISTWISE) ? @@ -147,6 +150,14 @@ covariance_matrix_init (size_t n_variables, return result; } +void +covariance_interaction_set (struct covariance_matrix *cov, + const struct interaction_variable **intr, size_t n_intr) +{ + cov->interactions = intr; + cov->n_intr = n_intr; +} + static size_t get_n_rows (size_t n_variables, const struct variable *v_variables[]) { @@ -177,6 +188,44 @@ covariance_matrix_create (size_t n_variables, return design_matrix_create (n_variables, v_variables, n_rows); } +static size_t +get_n_rows_s (const struct variable *var) +{ + size_t result = 0; + if (var_is_numeric (var)) + { + result++; + } + else + { + result += cat_get_n_categories (var) - 1; + } + return result; +} +static struct design_matrix * +covariance_matrix_create_s (struct covariance_matrix *cov) +{ + struct variable **v_variables; + size_t n_variables; + size_t n_rows = 0; + size_t i; + size_t j; + + n_variables = cov->n_variables + cov->n_intr; + v_variables = xnmalloc (n_variables, sizeof (*v_variables)); + for (i = 0; i < cov->n_variables; i++) + { + v_variables[i] = cov->v_variables[i]; + n_rows += get_n_rows_s (v_variables[i]); + } + for (j = 0; j < cov->n_intr; j++) + { + v_variables[i + j] = interaction_get_variable (cov->interactions[j]); + n_rows += get_n_rows_s (v_variables[i]); + } + return design_matrix_create (n_variables, v_variables, n_rows); +} + static void update_moments1 (struct covariance_matrix *cov, size_t i, double x) { @@ -550,12 +599,12 @@ get_covariance_variables (const struct covariance_matrix *cov) } static void -update_hash_entry (struct hsh_table *c, - const struct variable *v1, - const struct variable *v2, - const union value *val1, const union value *val2, - const struct interaction_value *i_val1, - const struct interaction_value *i_val2) +update_hash_entry_intr (struct hsh_table *c, + const struct variable *v1, + const struct variable *v2, + const union value *val1, const union value *val2, + const struct interaction_value *i_val1, + const struct interaction_value *i_val2) { struct covariance_accumulator *ca; struct covariance_accumulator *new_entry; @@ -588,6 +637,56 @@ update_hash_entry (struct hsh_table *c, } } +static void +update_hash_entry (struct hsh_table *c, + const struct variable *v1, + const struct variable *v2, + const union value *val1, const union value *val2) +{ + struct covariance_accumulator *ca; + struct covariance_accumulator *new_entry; + + ca = get_new_covariance_accumulator (v1, v2, val1, val2); + ca->dot_product = update_product (ca->v1, ca->v2, ca->val1, ca->val2); + ca->sum1 = update_sum (ca->v1, ca->val1, 1.0); + ca->sum2 = update_sum (ca->v2, ca->val2, 1.0); + ca->ssize = 1.0; + new_entry = hsh_insert (c, ca); + + if (new_entry != NULL) + { + new_entry->dot_product += ca->dot_product; + new_entry->ssize += 1.0; + new_entry->sum1 += ca->sum1; + new_entry->sum2 += ca->sum2; + /* + If DOT_PRODUCT is null, CA was not already in the hash + hable, so we don't free it because it was just inserted. + If DOT_PRODUCT was not null, CA is already in the hash table. + Unnecessary now, it must be freed here. + */ + free (ca); + } +} + +static void +inner_intr_loop (struct covariance_matrix *cov, const struct ccase *ccase, const struct variable *var1, + const union value *val1, const struct interaction_variable **i_var, + const struct interaction_value *i_val1, size_t j) +{ + struct variable *var2; + union value *val2; + struct interaction_value *i_val2; + + var2 = interaction_get_variable (i_var[j]); + i_val2 = interaction_case_data (ccase, i_var[j]); + val2 = interaction_value_get (i_val2); + + if (!var_is_value_missing (var2, val2, cov->missing_value)) + { + update_hash_entry_intr (cov->ca, var1, var2, val1, val2, i_val1, i_val2); + } +} /* Compute the covariance matrix in a single data-pass. Cases with missing values are dropped pairwise, in other words, only if one of @@ -608,6 +707,8 @@ covariance_accumulate_pairwise (struct covariance_matrix *cov, const union value *val1; const union value *val2; const struct variable **v_variables; + const struct variable *var1; + const struct variable *var2; struct interaction_value *i_val1 = NULL; struct interaction_value *i_val2 = NULL; @@ -619,39 +720,42 @@ covariance_accumulate_pairwise (struct covariance_matrix *cov, for (i = 0; i < cov->n_variables; ++i) { - if (is_interaction (v_variables[i], i_var, n_intr)) - { - i_val1 = interaction_case_data (ccase, v_variables[i], i_var, n_intr); - val1 = interaction_value_get (i_val1); - } - else - { - val1 = case_data (ccase, v_variables[i]); - } - if (!var_is_value_missing (v_variables[i], val1, cov->missing_value)) + var1 = v_variables[i]; + val1 = case_data (ccase, var1); + if (!var_is_value_missing (var1, val1, cov->missing_value)) { - cat_value_update (v_variables[i], val1); - if (var_is_numeric (v_variables[i])) + cat_value_update (var1, val1); + if (var_is_numeric (var1)) cov->update_moments (cov, i, val1->f); for (j = i; j < cov->n_variables; j++) { - if (is_interaction (v_variables[j], i_var, n_intr)) - { - i_val2 = interaction_case_data (ccase, v_variables[j], i_var, n_intr); - val2 = interaction_value_get (i_val2); - } - else - { - val2 = case_data (ccase, v_variables[j]); - } + var2 = v_variables[j]; + val2 = case_data (ccase, var2); if (!var_is_value_missing - (v_variables[j], val2, cov->missing_value)) + (var2, val2, cov->missing_value)) { - update_hash_entry (cov->ca, v_variables[i], v_variables[j], - val1, val2, i_val1, i_val2); + update_hash_entry (cov->ca, var1, var2, val1, val2); } } + for (j = 0; j < cov->n_intr; j++) + { + inner_intr_loop (cov, ccase, var1, val1, i_var, i_val1, j); + } + } + } + for (i = 0; i < cov->n_intr; i++) + { + var1 = interaction_get_variable (i_var[i]); + i_val1 = interaction_case_data (ccase, i_var[i]); + val1 = interaction_value_get (i_val1); + cat_value_update (var1, val1); + if (!var_is_value_missing (var1, val1, cov->missing_value)) + { + for (j = i; j < cov->n_intr; j++) + { + inner_intr_loop (cov, ccase, var1, val1, i_var, i_val1, j); + } } } } @@ -693,32 +797,15 @@ covariance_accumulate_listwise (struct covariance_matrix *cov, for (i = 0; i < cov->n_variables; ++i) { - if (is_interaction (v_variables[i], i_var, n_intr)) - { - i_val1 = interaction_case_data (ccase, v_variables[i], i_var, n_intr); - val1 = interaction_value_get (i_val1); - } - else - { - val1 = case_data (ccase, v_variables[i]); - } + val1 = case_data (ccase, v_variables[i]); cat_value_update (v_variables[i], val1); if (var_is_numeric (v_variables[i])) cov->update_moments (cov, i, val1->f); for (j = i; j < cov->n_variables; j++) { - if (is_interaction (v_variables[j], i_var, n_intr)) - { - i_val2 = interaction_case_data (ccase, v_variables[j], i_var, n_intr); - val2 = interaction_value_get (i_val2); - } - else - { - val2 = case_data (ccase, v_variables[j]); - } update_hash_entry (cov->ca, v_variables[i], v_variables[j], - val1, val2, i_val1, i_val2); + val1, val2); } } } @@ -829,15 +916,18 @@ get_sum (const struct covariance_matrix *cov, size_t i) else { k = 0; - while (var_get_dict_index (cov->v_variables[k]) != var_get_dict_index (var)) + while (cov->v_variables[k] != var && k < cov->n_variables) { k++; } - moments1_calculate (cov->m1[k], &n, &mean, NULL, NULL, NULL); - return mean * n; + if (k < cov->n_variables) + { + moments1_calculate (cov->m1[k], &n, &mean, NULL, NULL, NULL); + return mean * n; + } } } - + return 0.0; } static void @@ -868,8 +958,8 @@ covariance_accumulator_to_matrix (struct covariance_matrix *cov) struct covariance_accumulator *entry; struct hsh_iterator iter; - cov->cov = covariance_matrix_create (cov->n_variables, cov->v_variables); - cov->ssize = covariance_matrix_create (cov->n_variables, cov->v_variables); + cov->cov = covariance_matrix_create_s (cov); + cov->ssize = covariance_matrix_create_s (cov); entry = hsh_first (cov->ca, &iter); while (entry != NULL) { diff --git a/src/math/covariance-matrix.h b/src/math/covariance-matrix.h index 24ce791c..c16e5cbc 100644 --- a/src/math/covariance-matrix.h +++ b/src/math/covariance-matrix.h @@ -40,10 +40,9 @@ enum { LISTWISE, PAIRWISE }; -struct design_matrix *covariance_matrix_create (size_t, - const struct variable *[]); +struct design_matrix *covariance_matrix_create (size_t, const struct variable *[]); -void covariance_matrix_destroy (struct covariance_matrix *cov); +void covariance_matrix_destroy (struct covariance_matrix *); void covariance_pass_two (struct design_matrix *, double, double, double, const struct variable *, const struct variable *, const union value *, @@ -57,4 +56,6 @@ void covariance_matrix_accumulate (struct covariance_matrix *, const struct ccase *, void **, size_t); struct design_matrix *covariance_to_design (const struct covariance_matrix *); double covariance_matrix_get_element (const struct covariance_matrix *, size_t, size_t); +void covariance_interaction_set (struct covariance_matrix *, + const struct interaction_variable **, size_t); #endif diff --git a/src/math/interaction.c b/src/math/interaction.c index 133d7d7c..556123d4 100644 --- a/src/math/interaction.c +++ b/src/math/interaction.c @@ -67,6 +67,7 @@ interaction_variable_create (const struct variable **vars, int n_vars) { struct interaction_variable *result = NULL; size_t i; + int width = 0; if (n_vars > 0) { @@ -80,10 +81,11 @@ interaction_variable_create (const struct variable **vars, int n_vars) if (var_is_alpha (vars[i])) { result->n_alpha++; + width = 1; } } } - result->intr = var_create_internal (0, 0); + result->intr = var_create_internal (0, width); return result; } @@ -98,7 +100,7 @@ void interaction_variable_destroy (struct interaction_variable *iv) Get one of the member variables. */ const struct variable * -interaction_variable_get_member (const struct interaction_variable *iv, size_t i) +interaction_get_member (const struct interaction_variable *iv, size_t i) { return iv->members[i]; } @@ -122,10 +124,10 @@ interaction_get_n_numeric (const struct interaction_variable *iv) } /* - Get the interaction varibale itself. + Get the interaction variable itself. */ const struct variable * -interaction_variable_get_var (const struct interaction_variable *iv) +interaction_get_variable (const struct interaction_variable *iv) { return iv->intr; } @@ -146,20 +148,19 @@ interaction_value_create (const struct interaction_variable *var, const union va if (var != NULL) { - int val_width; + int val_width = 1; char *val; result = xmalloc (sizeof (*result)); result->intr = var; n_vars = interaction_get_n_vars (var); - val_width = n_vars * MAX_SHORT_STRING + 1; value_init (&result->val, val_width); val = value_str_rw (&result->val, val_width); val[0] = '\0'; result->f = 1.0; for (i = 0; i < n_vars; i++) { - member = interaction_variable_get_member (var, i); + member = interaction_get_member (var, i); if (var_is_value_missing (member, vals[i], MV_ANY)) { @@ -172,7 +173,9 @@ interaction_value_create (const struct interaction_variable *var, const union va if (var_is_alpha (var->members[i])) { int w = var_get_width (var->members[i]); - strncat (val, value_str (vals[i], w), MAX_SHORT_STRING); + value_resize (result, val_width, val_width + w); + strncat (val, value_str (vals[i], w), w); + val = value_str_rw (&result->val, val_width); } else if (var_is_numeric (var->members[i])) { @@ -239,32 +242,24 @@ interaction_value_destroy (struct interaction_value *val) Return a value from a variable that is an interaction. */ struct interaction_value * -interaction_case_data (const struct ccase *ccase, const struct variable *var, - const struct interaction_variable **intr_vars, size_t n_intr) +interaction_case_data (const struct ccase *ccase, const struct interaction_variable *iv) { size_t i; size_t n_vars; - const struct interaction_variable *iv = NULL; const struct variable *intr; const struct variable *member; const union value **vals = NULL; - for (i = 0; i < n_intr; i++) - { - iv = intr_vars[i]; - intr = interaction_variable_get_var (iv); - if (var_get_dict_index (intr) == var_get_dict_index (var)) - { - break; - } - } n_vars = interaction_get_n_vars (iv); + intr = interaction_get_variable (iv); vals = xnmalloc (n_vars, sizeof (*vals)); + for (i = 0; i < n_vars; i++) - { - member = interaction_variable_get_member (iv, i); - vals[i] = case_data (ccase, member); - } + { + member = interaction_get_member (iv, i); + vals[i] = case_data (ccase, member); + } + return interaction_value_create (iv, vals); } @@ -276,7 +271,7 @@ is_interaction (const struct variable *var, const struct interaction_variable ** for (i = 0; i < n_intr; i++) { - intr = interaction_variable_get_var (iv[i]); + intr = interaction_get_variable (iv[i]); if (var_get_dict_index (intr) == var_get_dict_index (var)) { return true; diff --git a/src/math/interaction.h b/src/math/interaction.h index 66025b66..995d0684 100644 --- a/src/math/interaction.h +++ b/src/math/interaction.h @@ -28,14 +28,13 @@ void interaction_value_destroy (struct interaction_value *); size_t interaction_variable_get_n_vars (const struct interaction_variable *); double interaction_value_get_nonzero_entry (const struct interaction_value *); const union value *interaction_value_get (const struct interaction_value *); -const struct variable * interaction_variable_get_var (const struct interaction_variable *); +const struct variable * interaction_get_variable (const struct interaction_variable *); size_t interaction_get_n_numeric (const struct interaction_variable *); size_t interaction_get_n_alpha (const struct interaction_variable *); size_t interaction_get_n_vars (const struct interaction_variable *); -const struct variable * interaction_variable_get_member (const struct interaction_variable *, size_t); +const struct variable * interaction_get_member (const struct interaction_variable *, size_t); bool is_interaction (const struct variable *, const struct interaction_variable **, size_t); struct interaction_value * -interaction_case_data (const struct ccase *, const struct variable *, - const struct interaction_variable **, size_t); +interaction_case_data (const struct ccase *, const struct interaction_variable *); double interaction_value_get_nonzero_entry (const struct interaction_value *); #endif