X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fcat.c;h=eb5e0ee234e7aaae7a52cf08d5034881c511fae4;hb=17896e38991000673747aa115b3f7aeb9a57a40f;hp=934a4d31c9d50a8f3cbc74e8155d4af6a3e1d4ec;hpb=494a50b0d023c55b61caf33e3887379332166d07;p=pspp diff --git a/src/cat.c b/src/cat.c index 934a4d31c9..eb5e0ee234 100644 --- a/src/cat.c +++ b/src/cat.c @@ -49,35 +49,34 @@ #include #define N_INITIAL_CATEGORIES 1 -#define CR_COLUMN_NOT_FOUND -1 -#define CR_VALUE_NOT_FOUND -2 -#define CR_INDEX_NOT_FOUND -3 +#define CAT_COLUMN_NOT_FOUND -1 +#define CAT_VALUE_NOT_FOUND -2 +#define CAT_INDEX_NOT_FOUND -3 -static gsl_vector_const_view cr_value_to_vector (const union value *, - struct recoded_categorical *); - -struct recoded_categorical * -cr_recoded_categorical_create (const struct variable *v) +void +cat_stored_values_create (struct variable *v) { - struct recoded_categorical *rc; - - rc = xmalloc (sizeof (*rc)); - rc->v = v; - rc->n_categories = 0; - rc->n_allocated_categories = N_INITIAL_CATEGORIES; - rc->vals = (union value **) xmalloc (N_INITIAL_CATEGORIES * - sizeof (*rc->vals)); - - return rc; + if (v->obs_vals == NULL) + { + v->obs_vals = xmalloc (sizeof (*v->obs_vals)); + v->obs_vals->n_categories = 0; + v->obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES; + v->obs_vals->vals = + xnmalloc (N_INITIAL_CATEGORIES, sizeof *v->obs_vals->vals); + } } void -cr_recoded_categorical_destroy (struct recoded_categorical *r) +cat_stored_values_destroy (struct variable *v) { - free (r->vals); - free (r); + assert (v != NULL); + if (v->obs_vals != NULL) + { + free (v->obs_vals); + } } +#if 0 struct recoded_categorical_array * cr_recoded_cat_ar_create (int n_variables, struct variable *v_variables[]) { @@ -86,7 +85,7 @@ cr_recoded_cat_ar_create (int n_variables, struct variable *v_variables[]) struct recoded_categorical_array *ca; struct variable *v; - ca = (struct recoded_categorical_array *) xmalloc (sizeof (*ca)); + ca = xmalloc (sizeof *ca); for (i = 0; i < n_variables; i++) { v = v_variables[i]; @@ -96,7 +95,7 @@ cr_recoded_cat_ar_create (int n_variables, struct variable *v_variables[]) } } ca->n_vars = n_categoricals; - ca->a = xmalloc (n_categoricals * sizeof (*(ca->a))); + ca->a = xnmalloc (n_categoricals, sizeof *ca->a); for (i = 0; i < n_categoricals; i++) { *(ca->a + i) = cr_recoded_categorical_create (v_variables[i]); @@ -117,52 +116,65 @@ cr_free_recoded_array (struct recoded_categorical_array *r) } return rc; } - +#endif static size_t -cr_value_find (struct recoded_categorical *rc, const union value *v) +cat_value_find (const struct variable *v, const union value *val) { size_t i; - const union value *val; + const union value *candidate; - for (i = 0; i < rc->n_categories; i++) + assert (val != NULL); + assert (v != NULL); + assert (v->obs_vals != NULL); + for (i = 0; i < v->obs_vals->n_categories; i++) { - val = *(rc->vals + i); - if (!compare_values (val, v, rc->v->width)) + candidate = v->obs_vals->vals + i; + assert (candidate != NULL); + if (!compare_values (candidate, val, v->width)) { return i; } } - return CR_VALUE_NOT_FOUND; + return CAT_VALUE_NOT_FOUND; } /* Add the new value unless it is already present. */ void -cr_value_update (struct recoded_categorical *rc, const union value *v) +cat_value_update (struct variable *v, const union value *val) { - if (cr_value_find (rc, v) == CR_VALUE_NOT_FOUND) + struct cat_vals *cv; + + if (v->type == ALPHA) { - if (rc->n_categories >= rc->n_allocated_categories) + assert (val != NULL); + assert (v != NULL); + cv = v->obs_vals; + if (cat_value_find (v, val) == CAT_VALUE_NOT_FOUND) { - rc->n_allocated_categories *= 2; - rc->vals = (union value **) - xrealloc (rc->vals, rc->n_allocated_categories - * sizeof (*(rc->vals))); + if (cv->n_categories >= cv->n_allocated_categories) + { + cv->n_allocated_categories *= 2; + cv->vals = xnrealloc (cv->vals, + cv->n_allocated_categories, + sizeof *cv->vals); + } + cv->vals[cv->n_categories] = *val; + cv->n_categories++; } - *(rc->vals + rc->n_categories) = v; - rc->n_categories++; } } /* - Create a set of gsl_matrix's, each of whose rows correspond to - values of a categorical variable. Since n categories have n-1 - degrees of freedom, the gsl_matrix is n-by-(n-1), with the first - category encoded as the zero vector. + Create a gsl_matrix, whose rows correspond to values of a + categorical variable. Since n categories have n-1 degrees of + freedom, the gsl_matrix is n-by-(n-1), with the first category + encoded as the zero vector. */ +#if 0 void -cr_create_value_matrices (struct recoded_categorical_array *r) +cat_create_value_matrix (struct variable *v) { size_t i; size_t row; @@ -170,31 +182,41 @@ cr_create_value_matrices (struct recoded_categorical_array *r) size_t n_rows; size_t n_cols; - for (i = 0; i < r->n_vars; i++) + assert (v != NULL); + if (v->type == ALPHA) { - n_rows = (*(r->a + i))->n_categories; - n_cols = (*(r->a + i))->n_categories - 1; - (*(r->a + i))->m = gsl_matrix_calloc (n_rows, n_cols); + assert (v->rc != NULL); + n_rows = v->rc->n_categories; + n_cols = v->rc->n_categories - 1; + v->rc->m = gsl_matrix_calloc (n_rows, n_cols); for (row = 1; row < n_rows; row++) { col = row - 1; - gsl_matrix_set ((*(r->a + i))->m, row, col, 1.0); + gsl_matrix_set (v->rc->m, row, col, 1.0); } } } - -static size_t -cr_value_to_subscript (const union value *val, struct recoded_categorical *cr) +#endif +/* + Return the subscript of the binary vector corresponding + to this value. + */ +size_t +cat_value_to_subscript (const union value *val, struct variable *v) { - const union value *v; + const union value *val2; size_t subscript; int different; - subscript = cr->n_categories - 1; + assert (v != NULL); + assert (val != NULL); + assert (v->obs_vals != NULL); + subscript = v->obs_vals->n_categories - 1; while (subscript > 0) { - v = *(cr->vals + subscript); - different = compare_values (val, v, cr->v->width); + val2 = v->obs_vals->vals + subscript; + assert (val2 != NULL); + different = compare_values (val, val2, v->width); if (!different) { return subscript; @@ -204,12 +226,13 @@ cr_value_to_subscript (const union value *val, struct recoded_categorical *cr) return subscript; } -static const union value * -cr_subscript_to_value (const size_t s, struct recoded_categorical *cr) +union value * +cat_subscript_to_value (const size_t s, struct variable *v) { - if (s < cr->n_categories) + assert (v->obs_vals != NULL); + if (s < v->obs_vals->n_categories) { - return cr->vals[s]; + return (v->obs_vals->vals + s); } else { @@ -217,23 +240,24 @@ cr_subscript_to_value (const size_t s, struct recoded_categorical *cr) } } +#if 0 /* Return the row of the matrix corresponding to the value v. */ -static gsl_vector_const_view -cr_value_to_vector (const union value * v, struct recoded_categorical * cr) +static gsl_vector_view +cr_value_to_vector (const union value *v, struct recoded_categorical *cr) { size_t row; row = cr_value_to_subscript (v, cr); - return gsl_matrix_const_row (cr->m, row); + return gsl_matrix_row (cr->m, row); } - +#endif /* Which element of a vector is equal to the value x? */ static size_t -cr_which_element_eq (const gsl_vector * vec, double x) +cat_which_element_eq (const gsl_vector * vec, double x) { size_t i; @@ -244,10 +268,10 @@ cr_which_element_eq (const gsl_vector * vec, double x) return i; } } - return CR_VALUE_NOT_FOUND; + return CAT_VALUE_NOT_FOUND; } static int -cr_is_zero_vector (const gsl_vector * vec) +cat_is_zero_vector (const gsl_vector * vec) { size_t i; @@ -268,23 +292,24 @@ cr_is_zero_vector (const gsl_vector * vec) when i is between 1 and cr->n_categories - 1 and i is 0 otherwise. */ -const union value * -cr_vector_to_value (const gsl_vector * vec, struct recoded_categorical *cr) +union value * +cr_vector_to_value (const gsl_vector * vec, struct variable *v) { size_t i; - i = cr_which_element_eq (vec, 1.0); - if (i != CR_VALUE_NOT_FOUND) + i = cat_which_element_eq (vec, 1.0); + if (i != CAT_VALUE_NOT_FOUND) { - return cr_subscript_to_value (i + 1, cr); + return cat_subscript_to_value (i + 1, v); } - if (cr_is_zero_vector (vec)) + if (cat_is_zero_vector (vec)) { - return cr_subscript_to_value (0, cr); + return cat_subscript_to_value (0, v); } return NULL; } +#if 0 /* Given a variable, return a pointer to its recoded structure. BUSTED IN HERE. @@ -306,69 +331,45 @@ cr_var_to_recoded_categorical (const struct variable *v, } return NULL; } - +#endif struct design_matrix * design_matrix_create (int n_variables, const struct variable *v_variables[], - struct recoded_categorical_array *ca, const size_t n_data) { struct design_matrix *dm; - struct design_matrix_var *tmp; - struct recoded_categorical *rc; const struct variable *v; size_t i; size_t n_cols = 0; size_t col; - dm = xmalloc (sizeof (*dm)); - dm->vars = xmalloc (n_variables * sizeof (struct variable *)); + dm = xmalloc (sizeof *dm); + dm->vars = xnmalloc (n_variables, sizeof *dm->vars); dm->n_vars = n_variables; for (i = 0; i < n_variables; i++) { v = v_variables[i]; + assert ((dm->vars + i) != NULL); + (dm->vars + i)->v = v; /* Allows us to look up the variable from + the design matrix. */ + (dm->vars + i)->first_column = n_cols; if (v->type == NUMERIC) { n_cols++; + (dm->vars + i)->last_column = n_cols; } else if (v->type == ALPHA) { - assert (ca != NULL); - rc = cr_var_to_recoded_categorical (v, ca); - assert (rc != NULL); - rc->first_column = n_cols; - rc->last_column = rc->first_column + rc->n_categories - 2; - n_cols += rc->n_categories - 1; + assert (v->obs_vals != NULL); + (dm->vars + i)->last_column = + (dm->vars + i)->first_column + v->obs_vals->n_categories - 2; + n_cols += v->obs_vals->n_categories - 1; } } dm->m = gsl_matrix_calloc (n_data, n_cols); - dm->vars = xmalloc (dm->n_vars * sizeof (*(dm->vars))); - assert (dm->vars != NULL); col = 0; - for (i = 0; i < n_variables; i++) - { - v = v_variables[i]; - (dm->vars[i]).v = v; - if (v->type == NUMERIC) - { - tmp = &(dm->vars[col]); - tmp->v = v; - tmp->first_column = col; - col++; - } - else if (v->type == ALPHA) - { - assert (ca != NULL); - rc = cr_var_to_recoded_categorical (v, ca); - assert (rc != NULL); - tmp = &(dm->vars[col]); - tmp->v = v; - tmp->last_column = rc->last_column; - col = rc->last_column + 1; - } - } return dm; } @@ -384,7 +385,7 @@ design_matrix_destroy (struct design_matrix *dm) Return the index of the variable for the given column. */ -static const size_t +static size_t design_matrix_col_to_var_index (const struct design_matrix *dm, size_t col) { size_t i; @@ -396,29 +397,27 @@ design_matrix_col_to_var_index (const struct design_matrix *dm, size_t col) if (v.first_column <= col && col <= v.last_column) return (v.v)->index; } - return CR_INDEX_NOT_FOUND; + return CAT_INDEX_NOT_FOUND; } /* Return a pointer to the variable whose values are stored in column col. */ -const struct variable * +struct variable * design_matrix_col_to_var (const struct design_matrix *dm, size_t col) { size_t index; size_t i; struct design_matrix_var dmv; - const struct variable *v; index = design_matrix_col_to_var_index (dm, col); for (i = 0; i < dm->n_vars; i++) { dmv = dm->vars[i]; - v = (dmv.v)->index; - if (v->index == index) + if ((dmv.v)->index == index) { - return v; + return (struct variable *) dmv.v; } } return NULL; @@ -433,7 +432,7 @@ cmp_dm_var_index (const struct design_matrix_var *dmv, size_t index) } /* - Return the number of the first column storing the + Return the number of the first column which holds the values for variable v. */ size_t @@ -451,34 +450,55 @@ design_matrix_var_to_column (const struct design_matrix * dm, return tmp.first_column; } } - return CR_COLUMN_NOT_FOUND; + return CAT_COLUMN_NOT_FOUND; +} + +/* Last column. */ +static size_t +dm_var_to_last_column (const struct design_matrix *dm, + const struct variable *v) +{ + size_t i; + struct design_matrix_var tmp; + + for (i = 0; i < dm->n_vars; i++) + { + tmp = dm->vars[i]; + if (cmp_dm_var_index (&tmp, v->index)) + { + return tmp.last_column; + } + } + return CAT_COLUMN_NOT_FOUND; } /* Set the appropriate value in the design matrix, whether that value is from a categorical or numeric - variable. + variable. For a categorical variable, only the usual + binary encoding is allowed. */ void design_matrix_set_categorical (struct design_matrix *dm, size_t row, const struct variable *var, - const union value *val, - struct recoded_categorical *rc) + const union value *val) { size_t col; - double x; + size_t is_one; + size_t fc; + size_t lc; + double entry; assert (var->type == ALPHA); - gsl_vector_const_view vec = cr_value_to_vector (val, rc); - - /* - Copying values here is not the 'most efficient' way, - but it will work even if we change the vector encoding later. - */ - for (col = rc->first_column; col <= rc->last_column; col++) + fc = design_matrix_var_to_column (dm, var); + lc = dm_var_to_last_column (dm, var); + assert (lc != CAT_COLUMN_NOT_FOUND); + assert (fc != CAT_COLUMN_NOT_FOUND); + is_one = fc + cat_value_find (var, val); + for (col = fc; col <= lc; col++) { - x = gsl_vector_get (&vec.vector, col); - gsl_matrix_set (dm->m, row, col, x); + entry = (col == is_one) ? 1.0 : 0.0; + gsl_matrix_set (dm->m, row, col, entry); } } void @@ -489,6 +509,6 @@ design_matrix_set_numeric (struct design_matrix *dm, size_t row, assert (var->type == NUMERIC); col = design_matrix_var_to_column ((const struct design_matrix *) dm, var); - assert (col != CR_COLUMN_NOT_FOUND); + assert (col != CAT_COLUMN_NOT_FOUND); gsl_matrix_set (dm->m, row, col, val->f); }