X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fcategory.c;h=3aaf5c552cd903897566eb869aa97b0c95e06f2d;hb=124dea11f9542304e35bef92b7f3a46d5afca4d7;hp=55d7ca8b05addbe392424f1114755d9e4629645d;hpb=a19b858e0ac3c69e4a28c0ca6d8674427268a863;p=pspp-builds.git diff --git a/src/data/category.c b/src/data/category.c index 55d7ca8b..3aaf5c55 100644 --- a/src/data/category.c +++ b/src/data/category.c @@ -1,21 +1,18 @@ -/* PSPP - binary encodings for categorical variables. +/* PSPP - a program for statistical analysis. Copyright (C) 2005 Free Software Foundation, Inc. - Written by Jason H Stover . - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ /* Functions and data structures to store values of a categorical @@ -32,36 +29,66 @@ 'cat_var'. */ #include -#include -#include + +#include +#include +#include +#include +#include #include -#include "category.h" -#include "cat-routines.h" +#include #include -#include "variable.h" + +#define CAT_VALUE_NOT_FOUND -1 #define N_INITIAL_CATEGORIES 1 +/* + This structure contains the observed values of a + categorical variable. + */ +struct cat_vals +{ + union value *vals; + size_t n_categories; + size_t n_allocated_categories; /* This is used only during + initialization to keep + track of the number of + values stored. + */ + size_t *value_counts; /* Element i stores the number of cases for which + the categorical variable has that corresponding + value. This is necessary for computing covariance + matrices. + */ +}; + void -cat_stored_values_create (struct variable *v) +cat_stored_values_create (const struct variable *v) { - if (v->obs_vals == NULL) + if (!var_has_obs_vals (v)) { - v->obs_vals = xmalloc (sizeof (*v->obs_vals)); - v->obs_vals->n_categories = 0; - v->obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES; - v->obs_vals->vals = - xnmalloc (N_INITIAL_CATEGORIES, sizeof *v->obs_vals->vals); + struct cat_vals *obs_vals = xmalloc (sizeof *obs_vals); + + obs_vals->n_categories = 0; + obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES; + obs_vals->vals = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->vals); + obs_vals->value_counts = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->value_counts); + var_set_obs_vals (v, obs_vals); } } void -cat_stored_values_destroy (struct variable *v) +cat_stored_values_destroy (struct cat_vals *obs_vals) { - assert (v != NULL); - if (v->obs_vals != NULL) + if (obs_vals != NULL) { - free (v->obs_vals); + if (obs_vals->n_allocated_categories > 0) + { + free (obs_vals->vals); + free (obs_vals->value_counts); + } + free (obs_vals); } } @@ -71,17 +98,15 @@ cat_stored_values_destroy (struct variable *v) size_t cat_value_find (const struct variable *v, const union value *val) { + struct cat_vals *obs_vals = var_get_obs_vals (v); size_t i; const union value *candidate; - assert (val != NULL); - assert (v != NULL); - assert (v->obs_vals != NULL); - for (i = 0; i < v->obs_vals->n_categories; i++) + for (i = 0; i < obs_vals->n_categories; i++) { - candidate = v->obs_vals->vals + i; + candidate = obs_vals->vals + i; assert (candidate != NULL); - if (!compare_values (candidate, val, v->width)) + if (!compare_values_short (candidate, val, v)) { return i; } @@ -90,19 +115,17 @@ cat_value_find (const struct variable *v, const union value *val) } /* - Add the new value unless it is already present. + Add the new value unless it is already present. Increment the count. */ void -cat_value_update (struct variable *v, const union value *val) +cat_value_update (const struct variable *v, const union value *val) { - struct cat_vals *cv; - - if (v->type == ALPHA) + if (var_is_alpha (v)) { - assert (val != NULL); - assert (v != NULL); - cv = v->obs_vals; - if (cat_value_find (v, val) == CAT_VALUE_NOT_FOUND) + size_t i; + struct cat_vals *cv = var_get_obs_vals (v); + i = cat_value_find (v, val); + if (i == CAT_VALUE_NOT_FOUND) { if (cv->n_categories >= cv->n_allocated_categories) { @@ -110,33 +133,67 @@ cat_value_update (struct variable *v, const union value *val) cv->vals = xnrealloc (cv->vals, cv->n_allocated_categories, sizeof *cv->vals); + cv->value_counts = xnrealloc (cv->value_counts, cv->n_allocated_categories, + sizeof *cv->value_counts); } cv->vals[cv->n_categories] = *val; + cv->value_counts[cv->n_categories] = 1; cv->n_categories++; } + else + { + cv->value_counts[i]++; + } } } - -union value * -cat_subscript_to_value (const size_t s, struct variable *v) +/* + Return the count for the sth category. + */ +size_t +cat_get_category_count (const size_t s, const struct variable *v) { - assert (v->obs_vals != NULL); - if (s < v->obs_vals->n_categories) - { - return (v->obs_vals->vals + s); - } - else + struct cat_vals *tmp; + size_t n_categories; + + tmp = var_get_obs_vals (v); + n_categories = cat_get_n_categories (v); + if (s < n_categories) { - return NULL; + return tmp->value_counts[s]; } + return CAT_VALUE_NOT_FOUND; +} + +const union value * +cat_subscript_to_value (const size_t s, const struct variable *v) +{ + struct cat_vals *obs_vals = var_get_obs_vals (v); + return s < obs_vals->n_categories ? obs_vals->vals + s : NULL; } /* Return the number of categories of a categorical variable. */ -size_t +size_t cat_get_n_categories (const struct variable *v) { - return v->obs_vals->n_categories; + return var_get_obs_vals (v)->n_categories; } +/* + If VAR is categorical with d categories, its first category should + correspond to the origin in d-dimensional Euclidean space. + */ +bool +cat_is_origin (const struct variable *var, const union value *val) +{ + if (var_is_numeric (var)) + { + return false; + } + if (cat_value_find (var, val) == 0) + { + return true; + } + return false; +}