#include <gsl/gsl_matrix.h>
#define N_INITIAL_CATEGORIES 1
-#define CR_COLUMN_NOT_FOUND -1
-#define CR_VALUE_NOT_FOUND -2
-#define CR_INDEX_NOT_FOUND -3
+#define CAT_COLUMN_NOT_FOUND -1
+#define CAT_VALUE_NOT_FOUND -2
+#define CAT_INDEX_NOT_FOUND -3
-static gsl_vector_view cr_value_to_vector (const union value *,
- struct recoded_categorical *);
-
-struct recoded_categorical *
-cr_recoded_categorical_create (const struct variable *v)
+void
+cat_stored_values_create (struct variable *v)
{
- struct recoded_categorical *rc;
-
- rc = xmalloc (sizeof (*rc));
- rc->v = v;
- rc->n_categories = 0;
- rc->n_allocated_categories = N_INITIAL_CATEGORIES;
- rc->vals = xnmalloc (N_INITIAL_CATEGORIES, sizeof *rc->vals);
-
- return rc;
+ if (v->obs_vals == NULL)
+ {
+ v->obs_vals = xmalloc (sizeof (*v->obs_vals));
+ v->obs_vals->n_categories = 0;
+ v->obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES;
+ v->obs_vals->vals =
+ xnmalloc (N_INITIAL_CATEGORIES, sizeof *v->obs_vals->vals);
+ }
}
void
-cr_recoded_categorical_destroy (struct recoded_categorical *r)
+cat_stored_values_destroy (struct variable *v)
{
- free (r->vals);
- free (r);
+ assert (v != NULL);
+ if (v->obs_vals != NULL)
+ {
+ free (v->obs_vals);
+ }
}
+#if 0
struct recoded_categorical_array *
cr_recoded_cat_ar_create (int n_variables, struct variable *v_variables[])
{
}
return rc;
}
-
+#endif
static size_t
-cr_value_find (struct recoded_categorical *rc, const union value *v)
+cat_value_find (const struct variable *v, const union value *val)
{
size_t i;
- const union value *val;
+ const union value *candidate;
- for (i = 0; i < rc->n_categories; i++)
+ assert (val != NULL);
+ assert (v != NULL);
+ assert (v->obs_vals != NULL);
+ for (i = 0; i < v->obs_vals->n_categories; i++)
{
- val = rc->vals + i;
- if (!compare_values (val, v, rc->v->width))
+ candidate = v->obs_vals->vals + i;
+ assert (candidate != NULL);
+ if (!compare_values (candidate, val, v->width))
{
return i;
}
}
- return CR_VALUE_NOT_FOUND;
+ return CAT_VALUE_NOT_FOUND;
}
/*
Add the new value unless it is already present.
*/
void
-cr_value_update (struct recoded_categorical *rc, const union value *v)
+cat_value_update (struct variable *v, const union value *val)
{
- if (cr_value_find (rc, v) == CR_VALUE_NOT_FOUND)
+ struct cat_vals *cv;
+
+ if (v->type == ALPHA)
{
- if (rc->n_categories >= rc->n_allocated_categories)
+ assert (val != NULL);
+ assert (v != NULL);
+ cv = v->obs_vals;
+ if (cat_value_find (v, val) == CAT_VALUE_NOT_FOUND)
{
- rc->n_allocated_categories *= 2;
- rc->vals = xnrealloc (rc->vals,
- rc->n_allocated_categories, sizeof *rc->vals);
+ if (cv->n_categories >= cv->n_allocated_categories)
+ {
+ cv->n_allocated_categories *= 2;
+ cv->vals = xnrealloc (cv->vals,
+ cv->n_allocated_categories,
+ sizeof *cv->vals);
+ }
+ cv->vals[cv->n_categories] = *val;
+ cv->n_categories++;
}
- rc->vals[rc->n_categories] = *v;
- rc->n_categories++;
}
}
/*
- Create a set of gsl_matrix's, each of whose rows correspond to
- values of a categorical variable. Since n categories have n-1
- degrees of freedom, the gsl_matrix is n-by-(n-1), with the first
- category encoded as the zero vector.
+ Create a gsl_matrix, whose rows correspond to values of a
+ categorical variable. Since n categories have n-1 degrees of
+ freedom, the gsl_matrix is n-by-(n-1), with the first category
+ encoded as the zero vector.
*/
+#if 0
void
-cr_create_value_matrices (struct recoded_categorical_array *r)
+cat_create_value_matrix (struct variable *v)
{
size_t i;
size_t row;
size_t n_rows;
size_t n_cols;
- for (i = 0; i < r->n_vars; i++)
+ assert (v != NULL);
+ if (v->type == ALPHA)
{
- n_rows = (*(r->a + i))->n_categories;
- n_cols = (*(r->a + i))->n_categories - 1;
- (*(r->a + i))->m = gsl_matrix_calloc (n_rows, n_cols);
+ assert (v->rc != NULL);
+ n_rows = v->rc->n_categories;
+ n_cols = v->rc->n_categories - 1;
+ v->rc->m = gsl_matrix_calloc (n_rows, n_cols);
for (row = 1; row < n_rows; row++)
{
col = row - 1;
- gsl_matrix_set ((*(r->a + i))->m, row, col, 1.0);
+ gsl_matrix_set (v->rc->m, row, col, 1.0);
}
}
}
-
-static size_t
-cr_value_to_subscript (const union value *val, struct recoded_categorical *cr)
+#endif
+/*
+ Return the subscript of the binary vector corresponding
+ to this value.
+ */
+size_t
+cat_value_to_subscript (const union value *val, struct variable *v)
{
- const union value *v;
+ const union value *val2;
size_t subscript;
int different;
- subscript = cr->n_categories - 1;
+ assert (v != NULL);
+ assert (val != NULL);
+ assert (v->obs_vals != NULL);
+ subscript = v->obs_vals->n_categories - 1;
while (subscript > 0)
{
- v = cr->vals + subscript;
- different = compare_values (val, v, cr->v->width);
+ val2 = v->obs_vals->vals + subscript;
+ assert (val2 != NULL);
+ different = compare_values (val, val2, v->width);
if (!different)
{
return subscript;
return subscript;
}
-static union value *
-cr_subscript_to_value (const size_t s, struct recoded_categorical *cr)
+union value *
+cat_subscript_to_value (const size_t s, struct variable *v)
{
- if (s < cr->n_categories)
+ assert (v->obs_vals != NULL);
+ if (s < v->obs_vals->n_categories)
{
- return (cr->vals + s);
+ return (v->obs_vals->vals + s);
}
else
{
}
}
+#if 0
/*
Return the row of the matrix corresponding
to the value v.
row = cr_value_to_subscript (v, cr);
return gsl_matrix_row (cr->m, row);
}
-
+#endif
/*
Which element of a vector is equal to the value x?
*/
static size_t
-cr_which_element_eq (const gsl_vector * vec, double x)
+cat_which_element_eq (const gsl_vector * vec, double x)
{
size_t i;
return i;
}
}
- return CR_VALUE_NOT_FOUND;
+ return CAT_VALUE_NOT_FOUND;
}
static int
-cr_is_zero_vector (const gsl_vector * vec)
+cat_is_zero_vector (const gsl_vector * vec)
{
size_t i;
i is 0 otherwise.
*/
union value *
-cr_vector_to_value (const gsl_vector * vec, struct recoded_categorical *cr)
+cr_vector_to_value (const gsl_vector * vec, struct variable *v)
{
size_t i;
- i = cr_which_element_eq (vec, 1.0);
- if (i != CR_VALUE_NOT_FOUND)
+ i = cat_which_element_eq (vec, 1.0);
+ if (i != CAT_VALUE_NOT_FOUND)
{
- return cr_subscript_to_value (i + 1, cr);
+ return cat_subscript_to_value (i + 1, v);
}
- if (cr_is_zero_vector (vec))
+ if (cat_is_zero_vector (vec))
{
- return cr_subscript_to_value (0, cr);
+ return cat_subscript_to_value (0, v);
}
return NULL;
}
+#if 0
/*
Given a variable, return a pointer to its recoded
structure. BUSTED IN HERE.
}
return NULL;
}
-
+#endif
struct design_matrix *
design_matrix_create (int n_variables,
const struct variable *v_variables[],
- struct recoded_categorical_array *ca,
const size_t n_data)
{
struct design_matrix *dm;
- struct design_matrix_var *tmp;
- struct recoded_categorical *rc;
const struct variable *v;
size_t i;
size_t n_cols = 0;
for (i = 0; i < n_variables; i++)
{
v = v_variables[i];
+ assert ((dm->vars + i) != NULL);
+ (dm->vars + i)->v = v; /* Allows us to look up the variable from
+ the design matrix. */
+ (dm->vars + i)->first_column = n_cols;
if (v->type == NUMERIC)
{
n_cols++;
+ (dm->vars + i)->last_column = n_cols;
}
else if (v->type == ALPHA)
{
- assert (ca != NULL);
- rc = cr_var_to_recoded_categorical (v, ca);
- assert (rc != NULL);
- rc->first_column = n_cols;
- rc->last_column = rc->first_column + rc->n_categories - 2;
- n_cols += rc->n_categories - 1;
+ assert (v->obs_vals != NULL);
+ (dm->vars + i)->last_column =
+ (dm->vars + i)->first_column + v->obs_vals->n_categories - 2;
+ n_cols += v->obs_vals->n_categories - 1;
}
}
dm->m = gsl_matrix_calloc (n_data, n_cols);
- dm->vars = xnmalloc (dm->n_vars, sizeof *dm->vars);
- assert (dm->vars != NULL);
col = 0;
- for (i = 0; i < n_variables; i++)
- {
- v = v_variables[i];
- (dm->vars[i]).v = v;
- if (v->type == NUMERIC)
- {
- tmp = &(dm->vars[col]);
- tmp->v = v;
- tmp->first_column = col;
- tmp->last_column = col;
- col++;
- }
- else if (v->type == ALPHA)
- {
- assert (ca != NULL);
- rc = cr_var_to_recoded_categorical (v, ca);
- assert (rc != NULL);
- tmp = &(dm->vars[col]);
- tmp->v = v;
- tmp->last_column = rc->last_column;
- col = rc->last_column + 1;
- }
- }
return dm;
}
if (v.first_column <= col && col <= v.last_column)
return (v.v)->index;
}
- return CR_INDEX_NOT_FOUND;
+ return CAT_INDEX_NOT_FOUND;
}
/*
}
/*
- Return the number of the first column storing the
+ Return the number of the first column which holds the
values for variable v.
*/
size_t
return tmp.first_column;
}
}
- return CR_COLUMN_NOT_FOUND;
+ return CAT_COLUMN_NOT_FOUND;
+}
+
+/* Last column. */
+static size_t
+dm_var_to_last_column (const struct design_matrix *dm,
+ const struct variable *v)
+{
+ size_t i;
+ struct design_matrix_var tmp;
+
+ for (i = 0; i < dm->n_vars; i++)
+ {
+ tmp = dm->vars[i];
+ if (cmp_dm_var_index (&tmp, v->index))
+ {
+ return tmp.last_column;
+ }
+ }
+ return CAT_COLUMN_NOT_FOUND;
}
/*
Set the appropriate value in the design matrix,
whether that value is from a categorical or numeric
- variable.
+ variable. For a categorical variable, only the usual
+ binary encoding is allowed.
*/
void
design_matrix_set_categorical (struct design_matrix *dm, size_t row,
const struct variable *var,
- const union value *val,
- struct recoded_categorical *rc)
+ const union value *val)
{
size_t col;
- double x;
+ size_t is_one;
+ size_t fc;
+ size_t lc;
+ double entry;
assert (var->type == ALPHA);
- const gsl_vector_view vec = cr_value_to_vector (val, rc);
-
- /*
- Copying values here is not the 'most efficient' way,
- but it will work even if we change the vector encoding later.
- */
- for (col = rc->first_column; col <= rc->last_column; col++)
+ fc = design_matrix_var_to_column (dm, var);
+ lc = dm_var_to_last_column (dm, var);
+ assert (lc != CAT_COLUMN_NOT_FOUND);
+ assert (fc != CAT_COLUMN_NOT_FOUND);
+ is_one = fc + cat_value_find (var, val);
+ for (col = fc; col <= lc; col++)
{
- x = gsl_vector_get (&vec.vector, col);
- gsl_matrix_set (dm->m, row, col, x);
+ entry = (col == is_one) ? 1.0 : 0.0;
+ gsl_matrix_set (dm->m, row, col, entry);
}
}
void
assert (var->type == NUMERIC);
col = design_matrix_var_to_column ((const struct design_matrix *) dm, var);
- assert (col != CR_COLUMN_NOT_FOUND);
+ assert (col != CAT_COLUMN_NOT_FOUND);
gsl_matrix_set (dm->m, row, col, val->f);
}