From 685407ad62911e5edb1ec093a01ec9e46563af44 Mon Sep 17 00:00:00 2001 From: Jason Stover Date: Tue, 15 Jul 2008 22:54:36 -0400 Subject: [PATCH] Added functions to compute covariance matrix. --- src/data/category.c | 26 +++++-- src/math/ChangeLog | 4 ++ src/math/automake.mk | 2 + src/math/covariance-matrix.c | 129 +++++++++++++++++++++++++++++++++++ src/math/covariance-matrix.h | 34 +++++++++ 5 files changed, 191 insertions(+), 4 deletions(-) create mode 100644 src/math/covariance-matrix.c create mode 100644 src/math/covariance-matrix.h diff --git a/src/data/category.c b/src/data/category.c index b0424d9f..33078ce2 100644 --- a/src/data/category.c +++ b/src/data/category.c @@ -41,7 +41,7 @@ #include "xalloc.h" -#define CAT_VALUE_NOT_FOUND -2 +#define CAT_VALUE_NOT_FOUND -1 #define N_INITIAL_CATEGORIES 1 @@ -58,6 +58,11 @@ struct cat_vals track of the number of values stored. */ + size_t *value_counts; /* Element i stores the number of cases for which + the categorical variable has that corresponding + value. This is necessary for computing covariance + matrices. + */ }; void @@ -70,6 +75,7 @@ cat_stored_values_create (const struct variable *v) obs_vals->n_categories = 0; obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES; obs_vals->vals = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->vals); + obs_vals->value_counts = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->value_counts); var_set_obs_vals (v, obs_vals); } } @@ -80,7 +86,10 @@ cat_stored_values_destroy (struct cat_vals *obs_vals) if (obs_vals != NULL) { if (obs_vals->n_allocated_categories > 0) - free (obs_vals->vals); + { + free (obs_vals->vals); + free (obs_vals->value_counts); + } free (obs_vals); } } @@ -108,15 +117,17 @@ cat_value_find (const struct variable *v, const union value *val) } /* - Add the new value unless it is already present. + Add the new value unless it is already present. Increment the count. */ void cat_value_update (const struct variable *v, const union value *val) { if (var_is_alpha (v)) { + size_t i; struct cat_vals *cv = var_get_obs_vals (v); - if (cat_value_find (v, val) == CAT_VALUE_NOT_FOUND) + i = cat_value_find (v, val); + if (i == CAT_VALUE_NOT_FOUND) { if (cv->n_categories >= cv->n_allocated_categories) { @@ -124,10 +135,17 @@ cat_value_update (const struct variable *v, const union value *val) cv->vals = xnrealloc (cv->vals, cv->n_allocated_categories, sizeof *cv->vals); + cv->value_counts = xnrealloc (cv->value_counts, cv->n_allocated_categories, + sizeof *cv->value_counts); } cv->vals[cv->n_categories] = *val; + cv->value_counts[cv->n_categories] = 1; cv->n_categories++; } + else + { + cv->value_counts[i]++; + } } } diff --git a/src/math/ChangeLog b/src/math/ChangeLog index 24620580..20c1c054 100644 --- a/src/math/ChangeLog +++ b/src/math/ChangeLog @@ -1,3 +1,7 @@ +2008-07-15 Jason Stover + + * covariance-matrix.c (covariance_pass_one): New file, new function. + 2008-06-21 Jason Stover * linreg.c (pspp_linreg): Accept a struct design_matrix. Use new diff --git a/src/math/automake.mk b/src/math/automake.mk index cff7f960..2c25686b 100644 --- a/src/math/automake.mk +++ b/src/math/automake.mk @@ -11,6 +11,8 @@ src_math_libpspp_math_a_SOURCES = \ src/math/chart-geometry.h \ src/math/coefficient.c \ src/math/coefficient.h \ + src/math/covariance-matrix.c \ + src/math/covariance-matrix.h \ src/math/group.c src/math/group.h \ src/math/group-proc.h \ src/math/histogram.c src/math/histogram.h \ diff --git a/src/math/covariance-matrix.c b/src/math/covariance-matrix.c new file mode 100644 index 00000000..85998dfe --- /dev/null +++ b/src/math/covariance-matrix.c @@ -0,0 +1,129 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2008 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* + Create and update the values in the covariance matrix. +*/ +#include +#include +#include +#include +#include "covariance-matrix.h" +#include "moments.h" + +/* + The covariances are stored in a DESIGN_MATRIX structure. + */ +struct design_matrix * +covariance_matrix_create (int n_variables, const struct variable *v_variables[]) +{ + return design_matrix_create (n_variables, v_variables, (size_t) n_variables); +} + +void covariance_matrix_destroy (struct design_matrix *x) +{ + design_matrix_destroy (x); +} + +/* + Update the covariance matrix with the new entries, assuming that V1 + is categorical and V2 is numeric. + */ +static void +covariance_update_categorical_numeric (struct design_matrix *cov, double mean, + double weight, double ssize, const struct variable *v1, + const struct variable *v2, const union value *val1, const union value *val2) +{ + double x; + size_t i; + size_t col; + size_t row; + + assert (var_is_alpha (v1)); + assert (var_is_numeric (v2)); + + row = design_matrix_var_to_column (cov, v1); + col = design_matrix_var_to_column (cov, v2); + for (i = 0; i < cat_get_n_categories (v1); i++) + { + row += i; + x = -1.0 * cat_get_n_categories (v1) / ssize; + if (i == cat_value_find (v1, val1)) + { + x += 1.0; + } + assert (val2 != NULL); + gsl_matrix_set (cov->m, row, col, (val2->f - mean) * x * weight); + } +} + +/* + Call this function in the first data pass. The central moments are + MEAN1 and MEAN2. Any categorical variables should already have their + values summarized in in its OBS_VALS element. + */ +void covariance_pass_one (struct design_matrix *cov, double mean1, double mean2, + double weight, double ssize, const struct variable *v1, + const struct variable *v2, const union value *val1, const union value *val2) +{ + size_t row; + size_t col; + size_t i; + double x; + double y; + + if (var_is_alpha (v1)) + { + if (var_is_numeric (v2)) + { + covariance_update_categorical_numeric (cov, mean2, weight, ssize, v1, + v2, val1, val2); + } + else + { + row = design_matrix_var_to_column (cov, v1); + col = design_matrix_var_to_column (cov, v2); + for (i = 0; i < cat_get_n_categories (v2); i++) + { + col += i; + y = -1.0 * cat_get_n_categories (v2) / ssize; + if (i == cat_value_find (v2, val2)) + { + y += 1.0; + } + gsl_matrix_set (cov->m, row, col, x * y * weight); + gsl_matrix_set (cov->m, col, row, x * y * weight); + } + } + } + else if (var_is_alpha (v2)) + { + covariance_update_categorical_numeric (cov, mean1, weight, ssize, v2, + v1, val2, val1); + } + else + { + /* + Both variables are numeric. + */ + row = design_matrix_var_to_column (cov, v1); + col = design_matrix_var_to_column (cov, v2); + x = (val1->f - mean1) * (val2->f - mean2) * weight; + gsl_matrix_set (cov->m, row, col, x); + gsl_matrix_set (cov->m, col, row, x); + } +} + diff --git a/src/math/covariance-matrix.h b/src/math/covariance-matrix.h new file mode 100644 index 00000000..eee5e16a --- /dev/null +++ b/src/math/covariance-matrix.h @@ -0,0 +1,34 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2008 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* + Create covariance matrices for procedures that need them. + */ + +#ifndef COVARIANCE_MATRIX_H +#define COVARIANCE_MATRIX_H + +#include "design-matrix.h" + +struct design_matrix * +covariance_matrix_create (int, const struct variable *[]); + +void covariance_matrix_destroy (struct design_matrix *); + +void covariance_pass_one (struct design_matrix *, double, double, + double, double, const struct variable *, + const struct variable *, const union value *, const union value *); +#endif -- 2.30.2