1 /* PSPP - linear regression.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3 Written by Jason H Stover <jason@sakla.net>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Functions and data structures to recode categorical variables into
22 vectors and sub-rows of matrices.
24 To fit many types of statistical models, it is necessary
25 to change each value of a categorical variable to a vector with binary
26 entries. These vectors are then stored as sub-rows within a matrix
27 during model-fitting. We need functions and data strucutres to,
28 e.g., map a value, say 'a', of a variable named 'cat_var', to a
29 vector, say (0 1 0 0 0), and vice versa. We also need to be able
30 to map the vector back to the value 'a', and if the vector is a
31 sub-row of a matrix, we need to know which sub-row corresponds to
32 the variable 'cat_var'.
34 The data structures defined here will be placed in the variable
35 structure in the future. When that happens, the useful code
36 in this file will be that which refers to design matrices.
42 #include <gsl/gsl_matrix.h>
45 This structure contains the observed values of a
52 size_t n_allocated_categories; /* This is used only during
53 initialization to keep
54 track of the number of
60 There are usually multiple categorical variables to recode. Get rid
61 of this structure immediately when the variable structure has been
62 modified to contain the binary encoding.
64 struct recoded_categorical_array
66 struct recoded_categorical **a;
70 The design matrix structure holds the design
71 matrix and an array to tell us which columns
72 correspond to which variables. This structure
73 is not restricted to categorical variables, and
74 perhaps should be moved to its own module.
77 struct design_matrix_var
79 int first_column; /* First column for this variable in
80 the design_matix. If this variable
81 is categorical, its values are
82 stored in multiple, contiguous
83 columns, as dictated by its vector
84 encoding in the variable's struct
88 const struct variable *v;
93 struct design_matrix_var *vars; /* Element i corresponds to
94 the variable whose values
95 are stored in at least one
97 variable is categorical
99 categories, its values are
101 contiguous columns. The
102 variable's values are then
103 stored in the columns
111 union value *cat_vector_to_value (const gsl_vector *, struct variable *);
113 void cat_stored_values_create (struct variable *);
115 void cat_value_update (struct variable *, const union value *);
117 int cat_free_recoded_array (struct recoded_categorical_array *);
119 struct recoded_categorical_array *cr_recoded_cat_ar_create (int,
123 void cat_recoded_categorical_create (struct variable *);
125 void cat_create_value_matrix (struct variable *);
127 struct recoded_categorical *cat_var_to_recoded_categorical (const struct
130 recoded_categorical_array
133 struct design_matrix *design_matrix_create (int, const struct variable *[],
136 void design_matrix_destroy (struct design_matrix *);
138 void design_matrix_set_categorical (struct design_matrix *, size_t,
139 const struct variable *,
140 const union value *);
142 void design_matrix_set_numeric (struct design_matrix *, size_t,
143 const struct variable *, const union value *);
145 size_t design_matrix_var_to_column (const struct design_matrix *,
146 const struct variable *);
148 struct variable *design_matrix_col_to_var (const struct design_matrix *,
152 design_matrix_set (struct design_matrix *, size_t,
153 const struct variable *, const union value *,
154 struct recoded_categorical *);
156 void cat_stored_values_destroy (struct variable *);