1 /* PSPP - binary encodings for categorical variables.
2 Copyright (C) 2005 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 Functions and data structures to store values of a categorical
21 variable, and to recode those values into binary vectors.
23 For some statistical models, it is necessary to change each value
24 of a categorical variable to a vector with binary entries. These
25 vectors are then stored as sub-rows within a matrix during
26 model-fitting. For example, we need functions and data strucutres to map a
27 value, say 'a', of a variable named 'cat_var', to a vector, say (0
28 1 0 0 0), and vice versa. We also need to be able to map the
29 vector back to the value 'a', and if the vector is a sub-row of a
30 matrix, we need to know which sub-row corresponds to the variable
39 #include <libpspp/alloc.h>
40 #include <libpspp/message.h>
45 #define CAT_VALUE_NOT_FOUND -2
47 #define N_INITIAL_CATEGORIES 1
50 This structure contains the observed values of a
57 size_t n_allocated_categories; /* This is used only during
58 initialization to keep
59 track of the number of
65 cat_stored_values_create (const struct variable *v)
67 if (!var_has_obs_vals (v))
69 struct cat_vals *obs_vals = xmalloc (sizeof *obs_vals);
71 obs_vals->n_categories = 0;
72 obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES;
73 obs_vals->vals = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->vals);
74 var_set_obs_vals (v, obs_vals);
79 cat_stored_values_destroy (struct cat_vals *obs_vals)
83 if (obs_vals->n_allocated_categories > 0)
84 free (obs_vals->vals);
90 Which subscript corresponds to val?
93 cat_value_find (const struct variable *v, const union value *val)
95 struct cat_vals *obs_vals = var_get_obs_vals (v);
97 const union value *candidate;
99 for (i = 0; i < obs_vals->n_categories; i++)
101 candidate = obs_vals->vals + i;
102 assert (candidate != NULL);
103 if (!compare_values (candidate, val, var_get_width (v)))
108 return CAT_VALUE_NOT_FOUND;
112 Add the new value unless it is already present.
115 cat_value_update (const struct variable *v, const union value *val)
117 if (var_is_alpha (v))
119 struct cat_vals *cv = var_get_obs_vals (v);
120 if (cat_value_find (v, val) == CAT_VALUE_NOT_FOUND)
122 if (cv->n_categories >= cv->n_allocated_categories)
124 cv->n_allocated_categories *= 2;
125 cv->vals = xnrealloc (cv->vals,
126 cv->n_allocated_categories,
129 cv->vals[cv->n_categories] = *val;
136 cat_subscript_to_value (const size_t s, const struct variable *v)
138 struct cat_vals *obs_vals = var_get_obs_vals (v);
139 return s < obs_vals->n_categories ? obs_vals->vals + s : NULL;
143 Return the number of categories of a categorical variable.
146 cat_get_n_categories (const struct variable *v)
148 return var_get_obs_vals (v)->n_categories;