1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2005 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 Create design matrices for procedures that need them.
22 #include "design-matrix.h"
29 #include <libpspp/message.h>
30 #include <data/variable.h>
31 #include <data/category.h>
32 #include <data/value.h>
34 #include <gsl/gsl_machine.h>
35 #include <gsl/gsl_vector.h>
36 #include <gsl/gsl_matrix.h>
40 #define DM_COLUMN_NOT_FOUND -1
41 #define DM_INDEX_NOT_FOUND -3
44 struct design_matrix *
45 design_matrix_create (int n_variables,
46 const struct variable *v_variables[],
49 struct design_matrix *dm;
50 const struct variable *v;
55 dm = xmalloc (sizeof *dm);
56 dm->vars = xnmalloc (n_variables, sizeof *dm->vars);
57 dm->n_cases = xnmalloc (n_variables, sizeof (*dm->n_cases));
58 dm->n_vars = n_variables;
60 for (i = 0; i < n_variables; i++)
64 assert ((dm->vars + i) != NULL);
65 (dm->vars + i)->v = v; /* Allows us to look up the variable from
67 (dm->vars + i)->first_column = n_cols;
68 if (var_is_numeric (v))
70 (dm->vars + i)->last_column = n_cols;
73 else if (var_is_alpha (v))
75 size_t n_categories = cat_get_n_categories (v);
76 (dm->vars + i)->last_column =
77 (dm->vars + i)->first_column + n_categories - 2;
78 n_cols += n_categories - 1;
81 dm->m = gsl_matrix_calloc (n_data, n_cols);
89 design_matrix_destroy (struct design_matrix *dm)
92 gsl_matrix_free (dm->m);
98 Return the index of the variable for the
101 const struct variable *
102 design_matrix_col_to_var (const struct design_matrix *dm, size_t col)
105 struct design_matrix_var v;
107 for (i = 0; i < dm->n_vars; i++)
110 if (v.first_column <= col && col <= v.last_column)
117 Return the number of the first column which holds the
118 values for variable v.
121 design_matrix_var_to_column (const struct design_matrix * dm,
122 const struct variable * v)
125 struct design_matrix_var tmp;
127 for (i = 0; i < dm->n_vars; i++)
132 return tmp.first_column;
135 return DM_COLUMN_NOT_FOUND;
140 dm_var_to_last_column (const struct design_matrix *dm,
141 const struct variable *v)
144 struct design_matrix_var tmp;
146 for (i = 0; i < dm->n_vars; i++)
151 return tmp.last_column;
154 return DM_COLUMN_NOT_FOUND;
158 Set the appropriate value in the design matrix,
159 whether that value is from a categorical or numeric
160 variable. For a categorical variable, only the usual
161 binary encoding is allowed.
164 design_matrix_set_categorical (struct design_matrix *dm, size_t row,
165 const struct variable *var,
166 const union value *val)
174 assert (var_is_alpha (var));
175 fc = design_matrix_var_to_column (dm, var);
176 lc = dm_var_to_last_column (dm, var);
177 assert (lc != DM_COLUMN_NOT_FOUND);
178 assert (fc != DM_COLUMN_NOT_FOUND);
179 is_one = fc + cat_value_find (var, val);
180 for (col = fc; col <= lc; col++)
182 entry = (col == is_one) ? 1.0 : 0.0;
183 gsl_matrix_set (dm->m, row, col, entry);
188 design_matrix_set_numeric (struct design_matrix *dm, size_t row,
189 const struct variable *var, const union value *val)
193 assert (var_is_numeric (var));
194 col = design_matrix_var_to_column ((const struct design_matrix *) dm, var);
195 assert (col != DM_COLUMN_NOT_FOUND);
196 gsl_matrix_set (dm->m, row, col, val->f);
199 struct design_matrix *
200 design_matrix_clone (const struct design_matrix *dm)
202 struct design_matrix *result;
206 result = xmalloc (sizeof *result);
207 result->vars = xnmalloc (dm->n_vars, sizeof *dm->vars);
208 result->n_vars = dm->n_vars;
209 result->m = gsl_matrix_alloc (dm->m->size1, dm->m->size2);
211 gsl_matrix_memcpy (result->m, dm->m);
212 for (i = 0; i < result->n_vars; i++)
214 result->vars[i] = dm->vars[i];
220 Increment the number of cases for V.
223 design_matrix_increment_case_count (struct design_matrix *dm, const struct variable *v)
227 assert (dm->n_cases != NULL);
229 i = design_matrix_var_to_column (dm, v);
234 Set the number of cases for V.
237 design_matrix_set_case_count (struct design_matrix *dm, const struct variable *v, size_t n)
241 assert (dm->n_cases != NULL);
243 i = design_matrix_var_to_column (dm, v);
248 Get the number of cases for V.
251 design_matrix_get_case_count (const struct design_matrix *dm, const struct variable *v)
255 assert (dm->n_cases != NULL);
257 i = design_matrix_var_to_column (dm, v);
258 return dm->n_cases[i];
262 design_matrix_get_n_cols (const struct design_matrix *d)
268 design_matrix_get_n_rows (const struct design_matrix *d)
274 design_matrix_get_element (const struct design_matrix *d, size_t row, size_t col)
276 return (gsl_matrix_get (d->m, row, col));
280 design_matrix_set_element (const struct design_matrix *d, size_t row, size_t col, double x)
282 gsl_matrix_set (d->m, row, col, x);
286 Return the subscript of the column of the design matrix
287 corresponding to VAL. If VAR is categorical with d categories, its
288 first category should correspond to the origin in d-dimensional
289 Euclidean space, so there is no subscript for this value.
292 dm_get_exact_subscript (const struct design_matrix *dm, const struct variable *var,
293 const union value *val)
297 result = design_matrix_var_to_column (dm, var);
298 if (var_is_alpha (var))
300 if (cat_is_origin (var, val))
304 result += cat_value_find (var, val) - 1;