1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 #ifndef _CATEGORICALS__
19 #define _CATEGORICALS__
22 #include "data/missing-values.h"
33 A categorical variable has a finite and usually small number of possible
34 values. The categoricals data structure organizes an array of interactions
35 maong categorical variables, that is, a set of sets of categorical
36 variables. (Both levels of "set" are ordered.)
38 The life cycle of a categoricals object looks like this:
40 1. Create it with categoricals_create(). This fixes the set of interactions
43 2. Pass all of the desired cases through the object with
44 categoricals_update().
46 3. Finalize the object with categoricals_done(). Only at this point may
47 most of the categoricals query functions be called.
49 4. Use the categoricals object as desired.
51 5. Destroy the object with categoricals_destroy().
54 /* Creating and destroying categoricals. */
55 struct categoricals *categoricals_create (struct interaction *const*, size_t n_int,
56 const struct variable *wv,
57 enum mv_class fctr_excl);
58 void categoricals_destroy (struct categoricals *);
60 /* Updating categoricals. */
61 void categoricals_update (struct categoricals *cat, const struct ccase *c);
62 void categoricals_done (const struct categoricals *cat);
63 bool categoricals_is_complete (const struct categoricals *cat);
65 /* Counting categories.
67 A variable's number of categories is the number of unique values observed in
68 the data passed to categoricals_update().
70 An interaction's number of categories is the number of observed unique
71 values of its variables, which will often be less than the product of its
72 variables' numbers of categories.
74 A categorical object's number of categories is the sum of its interactions'
76 /* Return the number of categories (distinct values) for variable N */
77 size_t categoricals_n_count (const struct categoricals *cat, size_t n);
78 size_t categoricals_n_total (const struct categoricals *cat);
80 /* Degrees of freedom.
82 A categorical variable with N_CATS categories has N_CATS - 1 degrees of
85 An interaction's degrees of freedom is the product of its variables' degrees
88 A categorical object's degrees of freedom is the sum of its interactions'
89 degrees of freedom. */
90 size_t categoricals_df (const struct categoricals *cat, size_t n);
91 size_t categoricals_df_total (const struct categoricals *cat);
93 size_t categoricals_get_n_variables (const struct categoricals *cat);
96 bool categoricals_sane (const struct categoricals *cat);
100 These look up an interaction within a categoricals object on the basis of a
101 "subscript". Interaction 0 with DF_0 degrees of freedom is assigned
102 subscripts [0, DF_0 - 1], interaction 1 with DF_1 degrees of freedom is
103 assigned subscripts [DF_0, DF_0 + DF_1 - 1], and so on. The subscripts
104 passed in must be in the range [0, DF_SUM - 1] where DF_SUM is the total
105 number of degrees of freedom for the object, as returned by
106 categoricals_df_total().
108 These functions are intended for covariance matrix routines, where normally
109 1 less than the total number of distinct values of each categorical variable
110 should be considered.
112 These functions may be used on an object only after calling
115 double categoricals_get_weight_by_subscript (const struct categoricals *cat, int subscript);
116 const struct interaction *categoricals_get_interaction_by_subscript (const struct categoricals *cat, int subscript);
118 double categoricals_get_sum_by_subscript (const struct categoricals *cat, int subscript);
120 categoricals_get_dummy_code_for_case (const struct categoricals *cat, int subscript,
121 const struct ccase *c);
123 categoricals_get_effects_code_for_case (const struct categoricals *cat, int subscript,
124 const struct ccase *c);
129 These look up an interaction within a categoricals object on the basis of a
130 "category index". Interaction 0 in CAT with CAT_0 categories has indexes
131 [0, CAT_0 - 1], interaction 1 with CAT_1 categories has indexes [CAT_0,
132 CAT_0 + CAT_1 - 1], and so on. The indexes passed in must be in the range
133 [0, CAT_TOTAL - 1] where CAT_TOTAL is the total number of categories for the
134 object, as returned by categoricals_n_total().
136 These functions are useful for descriptive statistics.
138 These functions may be used on an object only after calling
142 categoricals_get_case_by_category_real (const struct categoricals *cat, int iact, int n);
144 categoricals_get_user_data_by_category_real (const struct categoricals *cat, int iact, int n);
145 void * categoricals_get_user_data_by_category (const struct categoricals *cat, int category);
146 const struct ccase * categoricals_get_case_by_category (const struct categoricals *cat, int subscript);
150 void* (*create) (const void *aux1, void *aux2);
151 void (*update) (const void *aux1, void *aux2, void *user_data, const struct ccase *, double weight);
152 void (*calculate) (const void *aux1, void *aux2, void *user_data);
153 void (*destroy) (const void *aux1, void *aux2, void *user_data);
156 void categoricals_set_payload (struct categoricals *cats, const struct payload *p, const void *aux1, void *aux2);
158 bool categoricals_isbalanced (const struct categoricals *cat);