/* PSPP - a program for statistical analysis.
Copyright (C) 2005, 2009 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see . */
/*
Functions and data structures to store values of a categorical
variable, and to recode those values into binary vectors.
For some statistical models, it is necessary to change each value
of a categorical variable to a vector with binary entries. These
vectors are then stored as sub-rows within a matrix during
model-fitting. For example, we need functions and data strucutres to map a
value, say 'a', of a variable named 'cat_var', to a vector, say (0
1 0 0 0), and vice versa. We also need to be able to map the
vector back to the value 'a', and if the vector is a sub-row of a
matrix, we need to know which sub-row corresponds to the variable
'cat_var'.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define CAT_VALUE_NOT_FOUND -1
#define N_INITIAL_CATEGORIES 1
/*
This structure contains the observed values of a
categorical variable.
*/
struct cat_vals
{
union value *vals;
size_t n_categories;
size_t n_allocated_categories; /* This is used only during
initialization to keep
track of the number of
values stored.
*/
size_t *value_counts; /* Element i stores the number of cases for which
the categorical variable has that corresponding
value. This is necessary for computing covariance
matrices.
*/
};
void
cat_stored_values_create (const struct variable *v)
{
if (!var_has_obs_vals (v))
{
struct cat_vals *obs_vals = xmalloc (sizeof *obs_vals);
obs_vals->n_categories = 0;
obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES;
obs_vals->vals = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->vals);
obs_vals->value_counts = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->value_counts);
var_set_obs_vals (v, obs_vals);
}
}
void
cat_stored_values_destroy (struct cat_vals *obs_vals)
{
if (obs_vals != NULL)
{
if (obs_vals->n_allocated_categories > 0)
{
free (obs_vals->vals);
free (obs_vals->value_counts);
}
free (obs_vals);
}
}
/*
Which subscript corresponds to val?
*/
size_t
cat_value_find (const struct variable *v, const union value *val)
{
struct cat_vals *obs_vals = var_get_obs_vals (v);
size_t i;
const union value *candidate;
for (i = 0; i < obs_vals->n_categories; i++)
{
candidate = obs_vals->vals + i;
assert (candidate != NULL);
if (value_equal (candidate, val, var_get_width (v)))
{
return i;
}
}
return CAT_VALUE_NOT_FOUND;
}
/*
Add the new value unless it is already present. Increment the count.
*/
void
cat_value_update (const struct variable *v, const union value *val)
{
if (var_is_alpha (v))
{
size_t i;
struct cat_vals *cv = var_get_obs_vals (v);
i = cat_value_find (v, val);
if (i == CAT_VALUE_NOT_FOUND)
{
if (cv->n_categories >= cv->n_allocated_categories)
{
cv->n_allocated_categories *= 2;
cv->vals = xnrealloc (cv->vals,
cv->n_allocated_categories,
sizeof *cv->vals);
cv->value_counts = xnrealloc (cv->value_counts, cv->n_allocated_categories,
sizeof *cv->value_counts);
}
cv->vals[cv->n_categories] = *val;
cv->value_counts[cv->n_categories] = 1;
cv->n_categories++;
}
else
{
cv->value_counts[i]++;
}
}
}
/*
Return the count for the sth category.
*/
size_t
cat_get_category_count (const size_t s, const struct variable *v)
{
struct cat_vals *tmp;
size_t n_categories;
tmp = var_get_obs_vals (v);
n_categories = cat_get_n_categories (v);
if (s < n_categories)
{
return tmp->value_counts[s];
}
return CAT_VALUE_NOT_FOUND;
}
const union value *
cat_subscript_to_value (const size_t s, const struct variable *v)
{
struct cat_vals *obs_vals = var_get_obs_vals (v);
return s < obs_vals->n_categories ? obs_vals->vals + s : NULL;
}
/*
Return the number of categories of a categorical variable.
*/
size_t
cat_get_n_categories (const struct variable *v)
{
return var_get_obs_vals (v)->n_categories;
}
/*
If VAR is categorical with d categories, its first category should
correspond to the origin in d-dimensional Euclidean space.
*/
bool
cat_is_origin (const struct variable *var, const union value *val)
{
if (var_is_numeric (var))
{
return false;
}
if (cat_value_find (var, val) == 0)
{
return true;
}
return false;
}