X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fcategory.c;fp=src%2Fdata%2Fcategory.c;h=0000000000000000000000000000000000000000;hb=f550aee00a62fe1d8baf62d83cd7efef6cc2ee92;hp=968dd4c52da8369e629fb2797faa056db4b67227;hpb=18ef561271ad1b619f62d994e3dc2286958532f1;p=pspp-builds.git diff --git a/src/data/category.c b/src/data/category.c deleted file mode 100644 index 968dd4c5..00000000 --- a/src/data/category.c +++ /dev/null @@ -1,199 +0,0 @@ -/* PSPP - a program for statistical analysis. - Copyright (C) 2005, 2009 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* - Functions and data structures to store values of a categorical - variable, and to recode those values into binary vectors. - - For some statistical models, it is necessary to change each value - of a categorical variable to a vector with binary entries. These - vectors are then stored as sub-rows within a matrix during - model-fitting. For example, we need functions and data strucutres to map a - value, say 'a', of a variable named 'cat_var', to a vector, say (0 - 1 0 0 0), and vice versa. We also need to be able to map the - vector back to the value 'a', and if the vector is a sub-row of a - matrix, we need to know which sub-row corresponds to the variable - 'cat_var'. -*/ -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#define CAT_VALUE_NOT_FOUND -1 - -#define N_INITIAL_CATEGORIES 1 - -/* - This structure contains the observed values of a - categorical variable. - */ -struct cat_vals -{ - union value *vals; - size_t n_categories; - size_t n_allocated_categories; /* This is used only during - initialization to keep - track of the number of - values stored. - */ - size_t *value_counts; /* Element i stores the number of cases for which - the categorical variable has that corresponding - value. This is necessary for computing covariance - matrices. - */ -}; - -void -cat_stored_values_create (const struct variable *v) -{ - if (!var_has_obs_vals (v)) - { - struct cat_vals *obs_vals = xmalloc (sizeof *obs_vals); - - obs_vals->n_categories = 0; - obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES; - obs_vals->vals = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->vals); - obs_vals->value_counts = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->value_counts); - var_set_obs_vals (v, obs_vals); - } -} - -void -cat_stored_values_destroy (struct cat_vals *obs_vals) -{ - if (obs_vals != NULL) - { - if (obs_vals->n_allocated_categories > 0) - { - free (obs_vals->vals); - free (obs_vals->value_counts); - } - free (obs_vals); - } -} - -/* - Which subscript corresponds to val? - */ -size_t -cat_value_find (const struct variable *v, const union value *val) -{ - struct cat_vals *obs_vals = var_get_obs_vals (v); - size_t i; - const union value *candidate; - - for (i = 0; i < obs_vals->n_categories; i++) - { - candidate = obs_vals->vals + i; - assert (candidate != NULL); - if (value_equal (candidate, val, var_get_width (v))) - { - return i; - } - } - return CAT_VALUE_NOT_FOUND; -} - -/* - Add the new value unless it is already present. Increment the count. - */ -void -cat_value_update (const struct variable *v, const union value *val) -{ - if (var_is_alpha (v)) - { - size_t i; - struct cat_vals *cv = var_get_obs_vals (v); - i = cat_value_find (v, val); - if (i == CAT_VALUE_NOT_FOUND) - { - if (cv->n_categories >= cv->n_allocated_categories) - { - cv->n_allocated_categories *= 2; - cv->vals = xnrealloc (cv->vals, - cv->n_allocated_categories, - sizeof *cv->vals); - cv->value_counts = xnrealloc (cv->value_counts, cv->n_allocated_categories, - sizeof *cv->value_counts); - } - cv->vals[cv->n_categories] = *val; - cv->value_counts[cv->n_categories] = 1; - cv->n_categories++; - } - else - { - cv->value_counts[i]++; - } - } -} -/* - Return the count for the sth category. - */ -size_t -cat_get_category_count (const size_t s, const struct variable *v) -{ - struct cat_vals *tmp; - size_t n_categories; - - tmp = var_get_obs_vals (v); - n_categories = cat_get_n_categories (v); - if (s < n_categories) - { - return tmp->value_counts[s]; - } - return CAT_VALUE_NOT_FOUND; -} - -const union value * -cat_subscript_to_value (const size_t s, const struct variable *v) -{ - struct cat_vals *obs_vals = var_get_obs_vals (v); - return s < obs_vals->n_categories ? obs_vals->vals + s : NULL; -} - -/* - Return the number of categories of a categorical variable. - */ -size_t -cat_get_n_categories (const struct variable *v) -{ - return var_get_obs_vals (v)->n_categories; -} - -/* - If VAR is categorical with d categories, its first category should - correspond to the origin in d-dimensional Euclidean space. - */ -bool -cat_is_origin (const struct variable *var, const union value *val) -{ - if (var_is_numeric (var)) - { - return false; - } - if (cat_value_find (var, val) == 0) - { - return true; - } - return false; -}