X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fdesign-matrix.c;h=8f125c58b1e734d3b5d38ce1262e1e4477280edd;hb=124dea11f9542304e35bef92b7f3a46d5afca4d7;hp=8678f56ca9fa959f9f3acef1d42f97f57eba3974;hpb=97d4f38945476834fd7fce612b663f19f2b291f8;p=pspp-builds.git diff --git a/src/math/design-matrix.c b/src/math/design-matrix.c index 8678f56c..8f125c58 100644 --- a/src/math/design-matrix.c +++ b/src/math/design-matrix.c @@ -1,21 +1,18 @@ -/* PSPP - Creates design-matrices. +/* PSPP - a program for statistical analysis. Copyright (C) 2005 Free Software Foundation, Inc. - Written by Jason H Stover . - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ /* Create design matrices for procedures that need them. @@ -29,69 +26,20 @@ #include #include -#include #include #include #include +#include #include #include #include +#include "xalloc.h" + #define DM_COLUMN_NOT_FOUND -1 #define DM_INDEX_NOT_FOUND -3 -/* - Which element of a vector is equal to the value x? - */ -static size_t -cat_which_element_eq (const gsl_vector * vec, double x) -{ - size_t i; - - for (i = 0; i < vec->size; i++) - { - if (fabs (gsl_vector_get (vec, i) - x) < GSL_DBL_EPSILON) - { - return i; - } - } - return CAT_VALUE_NOT_FOUND; -} -static int -cat_is_zero_vector (const gsl_vector * vec) -{ - size_t i; - - for (i = 0; i < vec->size; i++) - { - if (gsl_vector_get (vec, i) != 0.0) - { - return 0; - } - } - return 1; -} - -/* - Return the value of v corresponding to the vector vec. - */ -union value * -cat_vector_to_value (const gsl_vector * vec, struct variable *v) -{ - size_t i; - - i = cat_which_element_eq (vec, 1.0); - if (i != CAT_VALUE_NOT_FOUND) - { - return cat_subscript_to_value (i + 1, v); - } - if (cat_is_zero_vector (vec)) - { - return cat_subscript_to_value (0, v); - } - return NULL; -} struct design_matrix * design_matrix_create (int n_variables, @@ -106,31 +54,34 @@ design_matrix_create (int n_variables, dm = xmalloc (sizeof *dm); dm->vars = xnmalloc (n_variables, sizeof *dm->vars); + dm->n_cases = xnmalloc (n_variables, sizeof (*dm->n_cases)); dm->n_vars = n_variables; for (i = 0; i < n_variables; i++) { + dm->n_cases[i] = 0; v = v_variables[i]; assert ((dm->vars + i) != NULL); (dm->vars + i)->v = v; /* Allows us to look up the variable from the design matrix. */ (dm->vars + i)->first_column = n_cols; - if (v->type == NUMERIC) + if (var_is_numeric (v)) { (dm->vars + i)->last_column = n_cols; n_cols++; } - else if (v->type == ALPHA) + else if (var_is_alpha (v)) { - assert (v->obs_vals != NULL); + size_t n_categories = cat_get_n_categories (v); (dm->vars + i)->last_column = - (dm->vars + i)->first_column + v->obs_vals->n_categories - 2; - n_cols += v->obs_vals->n_categories - 1; + (dm->vars + i)->first_column + n_categories - 2; + n_cols += n_categories - 1; } } dm->m = gsl_matrix_calloc (n_data, n_cols); col = 0; + return dm; } @@ -139,6 +90,7 @@ design_matrix_destroy (struct design_matrix *dm) { free (dm->vars); gsl_matrix_free (dm->m); + free (dm->n_cases); free (dm); } @@ -146,8 +98,8 @@ design_matrix_destroy (struct design_matrix *dm) Return the index of the variable for the given column. */ -static size_t -design_matrix_col_to_var_index (const struct design_matrix *dm, size_t col) +const struct variable * +design_matrix_col_to_var (const struct design_matrix *dm, size_t col) { size_t i; struct design_matrix_var v; @@ -156,42 +108,11 @@ design_matrix_col_to_var_index (const struct design_matrix *dm, size_t col) { v = dm->vars[i]; if (v.first_column <= col && col <= v.last_column) - return (v.v)->index; - } - return DM_INDEX_NOT_FOUND; -} - -/* - Return a pointer to the variable whose values - are stored in column col. - */ -struct variable * -design_matrix_col_to_var (const struct design_matrix *dm, size_t col) -{ - size_t index; - size_t i; - struct design_matrix_var dmv; - - index = design_matrix_col_to_var_index (dm, col); - for (i = 0; i < dm->n_vars; i++) - { - dmv = dm->vars[i]; - if ((dmv.v)->index == index) - { - return (struct variable *) dmv.v; - } + return v.v; } return NULL; } -static size_t -cmp_dm_var_index (const struct design_matrix_var *dmv, size_t index) -{ - if (dmv->v->index == index) - return 1; - return 0; -} - /* Return the number of the first column which holds the values for variable v. @@ -206,7 +127,7 @@ design_matrix_var_to_column (const struct design_matrix * dm, for (i = 0; i < dm->n_vars; i++) { tmp = dm->vars[i]; - if (cmp_dm_var_index (&tmp, v->index)) + if (tmp.v == v) { return tmp.first_column; } @@ -225,7 +146,7 @@ dm_var_to_last_column (const struct design_matrix *dm, for (i = 0; i < dm->n_vars; i++) { tmp = dm->vars[i]; - if (cmp_dm_var_index (&tmp, v->index)) + if (tmp.v == v) { return tmp.last_column; } @@ -234,7 +155,7 @@ dm_var_to_last_column (const struct design_matrix *dm, } /* - Set the appropriate value in the design matrix, + Set the appropriate value in the design matrix, whether that value is from a categorical or numeric variable. For a categorical variable, only the usual binary encoding is allowed. @@ -250,7 +171,7 @@ design_matrix_set_categorical (struct design_matrix *dm, size_t row, size_t lc; double entry; - assert (var->type == ALPHA); + assert (var_is_alpha (var)); fc = design_matrix_var_to_column (dm, var); lc = dm_var_to_last_column (dm, var); assert (lc != DM_COLUMN_NOT_FOUND); @@ -262,14 +183,125 @@ design_matrix_set_categorical (struct design_matrix *dm, size_t row, gsl_matrix_set (dm->m, row, col, entry); } } + void design_matrix_set_numeric (struct design_matrix *dm, size_t row, const struct variable *var, const union value *val) { size_t col; - assert (var->type == NUMERIC); + assert (var_is_numeric (var)); col = design_matrix_var_to_column ((const struct design_matrix *) dm, var); assert (col != DM_COLUMN_NOT_FOUND); gsl_matrix_set (dm->m, row, col, val->f); } + +struct design_matrix * +design_matrix_clone (const struct design_matrix *dm) +{ + struct design_matrix *result; + size_t i; + + assert (dm != NULL); + result = xmalloc (sizeof *result); + result->vars = xnmalloc (dm->n_vars, sizeof *dm->vars); + result->n_vars = dm->n_vars; + result->m = gsl_matrix_alloc (dm->m->size1, dm->m->size2); + + gsl_matrix_memcpy (result->m, dm->m); + for (i = 0; i < result->n_vars; i++) + { + result->vars[i] = dm->vars[i]; + } + return result; +} + +/* + Increment the number of cases for V. + */ +void +design_matrix_increment_case_count (struct design_matrix *dm, const struct variable *v) +{ + size_t i; + assert (dm != NULL); + assert (dm->n_cases != NULL); + assert (v != NULL); + i = design_matrix_var_to_column (dm, v); + dm->n_cases[i]++; +} + +/* + Set the number of cases for V. + */ +void +design_matrix_set_case_count (struct design_matrix *dm, const struct variable *v, size_t n) +{ + size_t i; + assert (dm != NULL); + assert (dm->n_cases != NULL); + assert (v != NULL); + i = design_matrix_var_to_column (dm, v); + dm->n_cases[i] = n; +} + +/* + Get the number of cases for V. + */ +size_t +design_matrix_get_case_count (const struct design_matrix *dm, const struct variable *v) +{ + size_t i; + assert (dm != NULL); + assert (dm->n_cases != NULL); + assert (v != NULL); + i = design_matrix_var_to_column (dm, v); + return dm->n_cases[i]; +} + +size_t +design_matrix_get_n_cols (const struct design_matrix *d) +{ + return d->m->size2; +} + +size_t +design_matrix_get_n_rows (const struct design_matrix *d) +{ + return d->m->size1; +} + +double +design_matrix_get_element (const struct design_matrix *d, size_t row, size_t col) +{ + return (gsl_matrix_get (d->m, row, col)); +} + +void +design_matrix_set_element (const struct design_matrix *d, size_t row, size_t col, double x) +{ + gsl_matrix_set (d->m, row, col, x); +} + +/* + Return the subscript of the column of the design matrix + corresponding to VAL. If VAR is categorical with d categories, its + first category should correspond to the origin in d-dimensional + Euclidean space, so there is no subscript for this value. + */ +size_t +dm_get_exact_subscript (const struct design_matrix *dm, const struct variable *var, + const union value *val) +{ + size_t result; + + result = design_matrix_var_to_column (dm, var); + if (var_is_alpha (var)) + { + if (cat_is_origin (var, val)) + { + return -1u; + } + result += cat_value_find (var, val) - 1; + } + return result; +}