X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fcategory.c;h=968dd4c52da8369e629fb2797faa056db4b67227;hb=5c3291dc396b795696e94f47780308fd7ace6fc4;hp=d320cea2a108f955475b413fd0cabbe54173691a;hpb=338fb2a2e84df6427a2fdee6769421f57d5666d8;p=pspp-builds.git diff --git a/src/data/category.c b/src/data/category.c index d320cea2..968dd4c5 100644 --- a/src/data/category.c +++ b/src/data/category.c @@ -1,21 +1,18 @@ -/* PSPP - binary encodings for categorical variables. - Copyright (C) 2005 Free Software Foundation, Inc. - Written by Jason H Stover . +/* PSPP - a program for statistical analysis. + Copyright (C) 2005, 2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ /* Functions and data structures to store values of a categorical @@ -33,29 +30,50 @@ */ #include -#include "category.h" - #include +#include +#include +#include +#include +#include #include #include -#include -#include -#include "cat-routines.h" -#include "value.h" -#include "variable.h" +#define CAT_VALUE_NOT_FOUND -1 #define N_INITIAL_CATEGORIES 1 +/* + This structure contains the observed values of a + categorical variable. + */ +struct cat_vals +{ + union value *vals; + size_t n_categories; + size_t n_allocated_categories; /* This is used only during + initialization to keep + track of the number of + values stored. + */ + size_t *value_counts; /* Element i stores the number of cases for which + the categorical variable has that corresponding + value. This is necessary for computing covariance + matrices. + */ +}; + void -cat_stored_values_create (struct variable *v) +cat_stored_values_create (const struct variable *v) { if (!var_has_obs_vals (v)) { struct cat_vals *obs_vals = xmalloc (sizeof *obs_vals); + obs_vals->n_categories = 0; obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES; obs_vals->vals = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->vals); + obs_vals->value_counts = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->value_counts); var_set_obs_vals (v, obs_vals); } } @@ -63,10 +81,13 @@ cat_stored_values_create (struct variable *v) void cat_stored_values_destroy (struct cat_vals *obs_vals) { - if (obs_vals != NULL) + if (obs_vals != NULL) { if (obs_vals->n_allocated_categories > 0) - free (obs_vals->vals); + { + free (obs_vals->vals); + free (obs_vals->value_counts); + } free (obs_vals); } } @@ -85,7 +106,7 @@ cat_value_find (const struct variable *v, const union value *val) { candidate = obs_vals->vals + i; assert (candidate != NULL); - if (!compare_values (candidate, val, var_get_width (v))) + if (value_equal (candidate, val, var_get_width (v))) { return i; } @@ -94,15 +115,17 @@ cat_value_find (const struct variable *v, const union value *val) } /* - Add the new value unless it is already present. + Add the new value unless it is already present. Increment the count. */ void -cat_value_update (struct variable *v, const union value *val) +cat_value_update (const struct variable *v, const union value *val) { if (var_is_alpha (v)) { + size_t i; struct cat_vals *cv = var_get_obs_vals (v); - if (cat_value_find (v, val) == CAT_VALUE_NOT_FOUND) + i = cat_value_find (v, val); + if (i == CAT_VALUE_NOT_FOUND) { if (cv->n_categories >= cv->n_allocated_categories) { @@ -110,15 +133,39 @@ cat_value_update (struct variable *v, const union value *val) cv->vals = xnrealloc (cv->vals, cv->n_allocated_categories, sizeof *cv->vals); + cv->value_counts = xnrealloc (cv->value_counts, cv->n_allocated_categories, + sizeof *cv->value_counts); } cv->vals[cv->n_categories] = *val; + cv->value_counts[cv->n_categories] = 1; cv->n_categories++; } + else + { + cv->value_counts[i]++; + } + } +} +/* + Return the count for the sth category. + */ +size_t +cat_get_category_count (const size_t s, const struct variable *v) +{ + struct cat_vals *tmp; + size_t n_categories; + + tmp = var_get_obs_vals (v); + n_categories = cat_get_n_categories (v); + if (s < n_categories) + { + return tmp->value_counts[s]; } + return CAT_VALUE_NOT_FOUND; } -union value * -cat_subscript_to_value (const size_t s, struct variable *v) +const union value * +cat_subscript_to_value (const size_t s, const struct variable *v) { struct cat_vals *obs_vals = var_get_obs_vals (v); return s < obs_vals->n_categories ? obs_vals->vals + s : NULL; @@ -127,9 +174,26 @@ cat_subscript_to_value (const size_t s, struct variable *v) /* Return the number of categories of a categorical variable. */ -size_t +size_t cat_get_n_categories (const struct variable *v) { return var_get_obs_vals (v)->n_categories; } +/* + If VAR is categorical with d categories, its first category should + correspond to the origin in d-dimensional Euclidean space. + */ +bool +cat_is_origin (const struct variable *var, const union value *val) +{ + if (var_is_numeric (var)) + { + return false; + } + if (cat_value_find (var, val) == 0) + { + return true; + } + return false; +}