#include "xalloc.h"
-#define CAT_VALUE_NOT_FOUND -2
+#define CAT_VALUE_NOT_FOUND -1
#define N_INITIAL_CATEGORIES 1
track of the number of
values stored.
*/
+ size_t *value_counts; /* Element i stores the number of cases for which
+ the categorical variable has that corresponding
+ value. This is necessary for computing covariance
+ matrices.
+ */
};
void
obs_vals->n_categories = 0;
obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES;
obs_vals->vals = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->vals);
+ obs_vals->value_counts = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->value_counts);
var_set_obs_vals (v, obs_vals);
}
}
if (obs_vals != NULL)
{
if (obs_vals->n_allocated_categories > 0)
- free (obs_vals->vals);
+ {
+ free (obs_vals->vals);
+ free (obs_vals->value_counts);
+ }
free (obs_vals);
}
}
}
/*
- Add the new value unless it is already present.
+ Add the new value unless it is already present. Increment the count.
*/
void
cat_value_update (const struct variable *v, const union value *val)
{
if (var_is_alpha (v))
{
+ size_t i;
struct cat_vals *cv = var_get_obs_vals (v);
- if (cat_value_find (v, val) == CAT_VALUE_NOT_FOUND)
+ i = cat_value_find (v, val);
+ if (i == CAT_VALUE_NOT_FOUND)
{
if (cv->n_categories >= cv->n_allocated_categories)
{
cv->vals = xnrealloc (cv->vals,
cv->n_allocated_categories,
sizeof *cv->vals);
+ cv->value_counts = xnrealloc (cv->value_counts, cv->n_allocated_categories,
+ sizeof *cv->value_counts);
}
cv->vals[cv->n_categories] = *val;
+ cv->value_counts[cv->n_categories] = 1;
cv->n_categories++;
}
+ else
+ {
+ cv->value_counts[i]++;
+ }
}
}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/*
+ Create and update the values in the covariance matrix.
+*/
+#include <assert.h>
+#include <config.h>
+#include <data/variable.h>
+#include <data/value.h>
+#include "covariance-matrix.h"
+#include "moments.h"
+
+/*
+ The covariances are stored in a DESIGN_MATRIX structure.
+ */
+struct design_matrix *
+covariance_matrix_create (int n_variables, const struct variable *v_variables[])
+{
+ return design_matrix_create (n_variables, v_variables, (size_t) n_variables);
+}
+
+void covariance_matrix_destroy (struct design_matrix *x)
+{
+ design_matrix_destroy (x);
+}
+
+/*
+ Update the covariance matrix with the new entries, assuming that V1
+ is categorical and V2 is numeric.
+ */
+static void
+covariance_update_categorical_numeric (struct design_matrix *cov, double mean,
+ double weight, double ssize, const struct variable *v1,
+ const struct variable *v2, const union value *val1, const union value *val2)
+{
+ double x;
+ size_t i;
+ size_t col;
+ size_t row;
+
+ assert (var_is_alpha (v1));
+ assert (var_is_numeric (v2));
+
+ row = design_matrix_var_to_column (cov, v1);
+ col = design_matrix_var_to_column (cov, v2);
+ for (i = 0; i < cat_get_n_categories (v1); i++)
+ {
+ row += i;
+ x = -1.0 * cat_get_n_categories (v1) / ssize;
+ if (i == cat_value_find (v1, val1))
+ {
+ x += 1.0;
+ }
+ assert (val2 != NULL);
+ gsl_matrix_set (cov->m, row, col, (val2->f - mean) * x * weight);
+ }
+}
+
+/*
+ Call this function in the first data pass. The central moments are
+ MEAN1 and MEAN2. Any categorical variables should already have their
+ values summarized in in its OBS_VALS element.
+ */
+void covariance_pass_one (struct design_matrix *cov, double mean1, double mean2,
+ double weight, double ssize, const struct variable *v1,
+ const struct variable *v2, const union value *val1, const union value *val2)
+{
+ size_t row;
+ size_t col;
+ size_t i;
+ double x;
+ double y;
+
+ if (var_is_alpha (v1))
+ {
+ if (var_is_numeric (v2))
+ {
+ covariance_update_categorical_numeric (cov, mean2, weight, ssize, v1,
+ v2, val1, val2);
+ }
+ else
+ {
+ row = design_matrix_var_to_column (cov, v1);
+ col = design_matrix_var_to_column (cov, v2);
+ for (i = 0; i < cat_get_n_categories (v2); i++)
+ {
+ col += i;
+ y = -1.0 * cat_get_n_categories (v2) / ssize;
+ if (i == cat_value_find (v2, val2))
+ {
+ y += 1.0;
+ }
+ gsl_matrix_set (cov->m, row, col, x * y * weight);
+ gsl_matrix_set (cov->m, col, row, x * y * weight);
+ }
+ }
+ }
+ else if (var_is_alpha (v2))
+ {
+ covariance_update_categorical_numeric (cov, mean1, weight, ssize, v2,
+ v1, val2, val1);
+ }
+ else
+ {
+ /*
+ Both variables are numeric.
+ */
+ row = design_matrix_var_to_column (cov, v1);
+ col = design_matrix_var_to_column (cov, v2);
+ x = (val1->f - mean1) * (val2->f - mean2) * weight;
+ gsl_matrix_set (cov->m, row, col, x);
+ gsl_matrix_set (cov->m, col, row, x);
+ }
+}
+
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/*
+ Create covariance matrices for procedures that need them.
+ */
+
+#ifndef COVARIANCE_MATRIX_H
+#define COVARIANCE_MATRIX_H
+
+#include "design-matrix.h"
+
+struct design_matrix *
+covariance_matrix_create (int, const struct variable *[]);
+
+void covariance_matrix_destroy (struct design_matrix *);
+
+void covariance_pass_one (struct design_matrix *, double, double,
+ double, double, const struct variable *,
+ const struct variable *, const union value *, const union value *);
+#endif