1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2005, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 Functions and data structures to store values of a categorical
19 variable, and to recode those values into binary vectors.
21 For some statistical models, it is necessary to change each value
22 of a categorical variable to a vector with binary entries. These
23 vectors are then stored as sub-rows within a matrix during
24 model-fitting. For example, we need functions and data strucutres to map a
25 value, say 'a', of a variable named 'cat_var', to a vector, say (0
26 1 0 0 0), and vice versa. We also need to be able to map the
27 vector back to the value 'a', and if the vector is a sub-row of a
28 matrix, we need to know which sub-row corresponds to the variable
34 #include <data/category.h>
35 #include <data/value.h>
36 #include <data/variable.h>
37 #include <gl/xalloc.h>
38 #include <libpspp/message.h>
42 #define CAT_VALUE_NOT_FOUND -1
44 #define N_INITIAL_CATEGORIES 1
47 This structure contains the observed values of a
54 size_t n_allocated_categories; /* This is used only during
55 initialization to keep
56 track of the number of
59 size_t *value_counts; /* Element i stores the number of cases for which
60 the categorical variable has that corresponding
61 value. This is necessary for computing covariance
67 cat_stored_values_create (const struct variable *v)
69 if (!var_has_obs_vals (v))
71 struct cat_vals *obs_vals = xmalloc (sizeof *obs_vals);
73 obs_vals->n_categories = 0;
74 obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES;
75 obs_vals->vals = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->vals);
76 obs_vals->value_counts = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->value_counts);
77 var_set_obs_vals (v, obs_vals);
82 cat_stored_values_destroy (struct cat_vals *obs_vals)
86 if (obs_vals->n_allocated_categories > 0)
88 free (obs_vals->vals);
89 free (obs_vals->value_counts);
96 Which subscript corresponds to val?
99 cat_value_find (const struct variable *v, const union value *val)
101 struct cat_vals *obs_vals = var_get_obs_vals (v);
103 const union value *candidate;
105 for (i = 0; i < obs_vals->n_categories; i++)
107 candidate = obs_vals->vals + i;
108 assert (candidate != NULL);
109 if (value_equal (candidate, val, var_get_width (v)))
114 return CAT_VALUE_NOT_FOUND;
118 Add the new value unless it is already present. Increment the count.
121 cat_value_update (const struct variable *v, const union value *val)
123 if (var_is_alpha (v))
126 struct cat_vals *cv = var_get_obs_vals (v);
127 i = cat_value_find (v, val);
128 if (i == CAT_VALUE_NOT_FOUND)
130 if (cv->n_categories >= cv->n_allocated_categories)
132 cv->n_allocated_categories *= 2;
133 cv->vals = xnrealloc (cv->vals,
134 cv->n_allocated_categories,
136 cv->value_counts = xnrealloc (cv->value_counts, cv->n_allocated_categories,
137 sizeof *cv->value_counts);
139 cv->vals[cv->n_categories] = *val;
140 cv->value_counts[cv->n_categories] = 1;
145 cv->value_counts[i]++;
150 Return the count for the sth category.
153 cat_get_category_count (const size_t s, const struct variable *v)
155 struct cat_vals *tmp;
158 tmp = var_get_obs_vals (v);
159 n_categories = cat_get_n_categories (v);
160 if (s < n_categories)
162 return tmp->value_counts[s];
164 return CAT_VALUE_NOT_FOUND;
168 cat_subscript_to_value (const size_t s, const struct variable *v)
170 struct cat_vals *obs_vals = var_get_obs_vals (v);
171 return s < obs_vals->n_categories ? obs_vals->vals + s : NULL;
175 Return the number of categories of a categorical variable.
178 cat_get_n_categories (const struct variable *v)
180 return var_get_obs_vals (v)->n_categories;
184 If VAR is categorical with d categories, its first category should
185 correspond to the origin in d-dimensional Euclidean space.
188 cat_is_origin (const struct variable *var, const union value *val)
190 if (var_is_numeric (var))
194 if (cat_value_find (var, val) == 0)