1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "covariance.h"
20 #include <gl/xalloc.h>
22 #include <gsl/gsl_matrix.h>
23 #include <data/case.h>
24 #include <data/variable.h>
25 #include <libpspp/misc.h>
27 #define n_MOMENTS (MOMENT_VARIANCE + 1)
32 /* The variables for which the covariance matrix is to be calculated. */
34 const struct variable **vars;
36 /* Categorical variables. */
38 const struct variable **catvars;
40 /* Array containing number of categories per categorical variable. */
43 /* Dimension of the covariance matrix. */
46 /* The weight variable (or NULL if none) */
47 const struct variable *wv;
49 /* A set of matrices containing the 0th, 1st and 2nd moments */
52 /* The class of missing values to exclude */
53 enum mv_class exclude;
55 /* An array of doubles representing the covariance matrix.
56 Only the top triangle is included, and no diagonals */
63 /* Return a matrix containing the M th moments.
64 The matrix is of size NxN where N is the number of variables.
65 Each row represents the moments of a variable.
66 In the absence of missing values, the columns of this matrix will
67 be identical. If missing values are involved, then element (i,j)
68 is the moment of the i th variable, when paired with the j th variable.
71 covariance_moments (const struct covariance *cov, int m)
73 return cov->moments[m];
78 /* Create a covariance struct.
81 covariance_create (size_t n_vars, const struct variable **vars,
82 const struct variable *weight, enum mv_class exclude)
85 struct covariance *cov = xmalloc (sizeof *cov);
86 cov->vars = xmalloc (sizeof *cov->vars * n_vars);
92 for (i = 0; i < n_vars; ++i)
93 cov->vars[i] = vars[i];
95 cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
97 for (i = 0; i < n_MOMENTS; ++i)
98 cov->moments[i] = gsl_matrix_calloc (n_vars, n_vars);
100 cov->exclude = exclude;
102 cov->n_cm = (n_vars * (n_vars - 1) ) / 2;
104 cov->cm = xcalloc (sizeof *cov->cm, cov->n_cm);
110 Create a covariance struct for a two-pass algorithm. If categorical
111 variables are involed, the dimension cannot be know until after the
112 first data pass, so the actual covariances will not be allocated
116 covariance_2pass_create (size_t n_vars, const struct variable **vars,
117 size_t n_catvars, const struct variable **catvars,
118 const struct variable *weight, enum mv_class exclude)
121 struct covariance *cov = xmalloc (sizeof *cov);
122 cov->vars = xmalloc (sizeof *cov->vars * n_vars);
123 cov->catvars = xnmalloc (n_catvars, sizeof (*cov->catvars));
124 cov->n_categories = xnmalloc (n_catvars, sizeof (cov->n_categories));
127 cov->n_vars = n_vars;
128 cov->n_catvars = n_catvars;
130 for (i = 0; i < n_vars; ++i)
131 cov->vars[i] = vars[i];
133 for (i = 0; i < n_catvars; i++)
135 cov->catvars[i] = catvars[i];
136 cov->n_categories[i] = 0;
139 cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
141 cov->exclude = exclude;
146 /* Return an integer, which can be used to index
147 into COV->cm, to obtain the I, J th element
148 of the covariance matrix. If COV->cm does not
149 contain that element, then a negative value
153 cm_idx (const struct covariance *cov, int i, int j)
156 const int n2j = cov->n_vars - 2 - j;
157 const int nj = cov->n_vars - 2 ;
160 assert (j < cov->n_vars);
165 if (j >= cov->n_vars - 1)
172 as -= n2j * (n2j + 1) ;
179 /* Call this function for every case in the data set */
181 covariance_accumulate (struct covariance *cov, const struct ccase *c)
184 const double weight = cov->wv ? case_data (c, cov->wv)->f : 1.0;
186 for (i = 0 ; i < cov->n_vars; ++i)
188 const union value *val1 = case_data (c, cov->vars[i]);
190 if ( var_is_value_missing (cov->vars[i], val1, cov->exclude))
193 for (j = 0 ; j < cov->n_vars; ++j)
197 const union value *val2 = case_data (c, cov->vars[j]);
199 if ( var_is_value_missing (cov->vars[j], val2, cov->exclude))
202 idx = cm_idx (cov, i, j);
205 cov->cm [idx] += val1->f * val2->f * weight;
208 for (m = 0 ; m < n_MOMENTS; ++m)
210 double *x = gsl_matrix_ptr (cov->moments[m], i, j);
221 Allocate and return a gsl_matrix containing the covariances of the
225 cm_to_gsl (struct covariance *cov)
228 gsl_matrix *m = gsl_matrix_calloc (cov->n_vars, cov->n_vars);
230 /* Copy the non-diagonal elements from cov->cm */
231 for ( j = 0 ; j < cov->n_vars - 1; ++j)
233 for (i = j+1 ; i < cov->n_vars; ++i)
235 double x = cov->cm [cm_idx (cov, i, j)];
236 gsl_matrix_set (m, i, j, x);
237 gsl_matrix_set (m, j, i, x);
241 /* Copy the diagonal elements from cov->moments[2] */
242 for (j = 0 ; j < cov->n_vars ; ++j)
244 double sigma = gsl_matrix_get (cov->moments[2], j, j);
245 gsl_matrix_set (m, j, j, sigma);
254 Return a pointer to gsl_matrix containing the pairwise covariances.
255 The matrix remains owned by the COV object, and must not be freed.
256 Call this function only after all data have been accumulated.
259 covariance_calculate (struct covariance *cov)
264 for (m = 0; m < n_MOMENTS; ++m)
266 /* Divide the moments by the number of samples */
269 for (i = 0 ; i < cov->n_vars; ++i)
271 for (j = 0 ; j < cov->n_vars; ++j)
273 double *x = gsl_matrix_ptr (cov->moments[m], i, j);
274 *x /= gsl_matrix_get (cov->moments[0], i, j);
276 if ( m == MOMENT_VARIANCE)
277 *x -= pow2 (gsl_matrix_get (cov->moments[1], i, j));
283 /* Centre the moments */
284 for ( j = 0 ; j < cov->n_vars - 1; ++j)
286 for (i = j + 1 ; i < cov->n_vars; ++i)
288 double *x = &cov->cm [cm_idx (cov, i, j)];
290 *x /= gsl_matrix_get (cov->moments[0], i, j);
293 gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
295 gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i);
299 return cm_to_gsl (cov);
303 /* Destroy the COV object */
305 covariance_destroy (struct covariance *cov)
310 for (i = 0; i < n_MOMENTS; ++i)
311 gsl_matrix_free (cov->moments[i]);