1 /* PSPP - binary encodings for categorical variables.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3 Written by Jason H Stover <jason@sakla.net>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Functions and data structures to store values of a categorical
22 variable, and to recode those values into binary vectors.
24 For some statistical models, it is necessary to change each value
25 of a categorical variable to a vector with binary entries. These
26 vectors are then stored as sub-rows within a matrix during
27 model-fitting. For example, we need functions and data strucutres to map a
28 value, say 'a', of a variable named 'cat_var', to a vector, say (0
29 1 0 0 0), and vice versa. We also need to be able to map the
30 vector back to the value 'a', and if the vector is a sub-row of a
31 matrix, we need to know which sub-row corresponds to the variable
42 #include <libpspp/alloc.h>
43 #include <libpspp/message.h>
44 #include "cat-routines.h"
47 #define N_INITIAL_CATEGORIES 1
50 cat_stored_values_create (struct variable *v)
52 if (v->obs_vals == NULL)
54 v->obs_vals = xmalloc (sizeof (*v->obs_vals));
55 v->obs_vals->n_categories = 0;
56 v->obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES;
58 xnmalloc (N_INITIAL_CATEGORIES, sizeof *v->obs_vals->vals);
63 cat_stored_values_destroy (struct variable *v)
67 if (v->obs_vals != NULL)
69 if (v->obs_vals->n_allocated_categories > 0)
71 free (v->obs_vals->vals);
72 v->obs_vals->vals = NULL;
80 Which subscript corresponds to val?
83 cat_value_find (const struct variable *v, const union value *val)
86 const union value *candidate;
90 assert (v->obs_vals != NULL);
91 for (i = 0; i < v->obs_vals->n_categories; i++)
93 candidate = v->obs_vals->vals + i;
94 assert (candidate != NULL);
95 if (!compare_values (candidate, val, var_get_width (v)))
100 return CAT_VALUE_NOT_FOUND;
104 Add the new value unless it is already present.
107 cat_value_update (struct variable *v, const union value *val)
111 if (var_is_alpha (v))
113 assert (val != NULL);
116 if (cat_value_find (v, val) == CAT_VALUE_NOT_FOUND)
118 if (cv->n_categories >= cv->n_allocated_categories)
120 cv->n_allocated_categories *= 2;
121 cv->vals = xnrealloc (cv->vals,
122 cv->n_allocated_categories,
125 cv->vals[cv->n_categories] = *val;
132 cat_subscript_to_value (const size_t s, struct variable *v)
134 assert (v->obs_vals != NULL);
135 if (s < v->obs_vals->n_categories)
137 return (v->obs_vals->vals + s);
146 Return the number of categories of a categorical variable.
149 cat_get_n_categories (const struct variable *v)
151 return v->obs_vals->n_categories;