1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "covariance.h"
20 #include <gl/xalloc.h>
22 #include <gsl/gsl_matrix.h>
23 #include <data/case.h>
24 #include <data/variable.h>
25 #include <libpspp/misc.h>
27 #define n_MOMENTS (MOMENT_VARIANCE + 1)
32 /* The variables for which the covariance matrix is to be calculated */
34 const struct variable **vars;
36 /* The weight variable (or NULL if none) */
37 const struct variable *wv;
39 /* A set of matrices containing the 0th, 1st and 2nd moments */
42 /* The class of missing values to exclude */
43 enum mv_class exclude;
45 /* An array of doubles representing the covariance matrix.
46 Only the top triangle is included, and no diagonals */
53 /* Return a matrix containing the M th moments.
54 The matrix is of size NxN where N is the number of variables.
55 Each row represents the moments of a variable.
56 In the absence of missing values, the columns of this matrix will
57 be identical. If missing values are involved, then element (i,j)
58 is the moment of the i th variable, when paired with the j th variable.
61 covariance_moments (const struct covariance *cov, int m)
63 return cov->moments[m];
68 /* Create a covariance struct */
70 covariance_create (size_t n_vars, const struct variable **vars,
71 const struct variable *weight, enum mv_class exclude)
74 struct covariance *cov = xmalloc (sizeof *cov);
75 cov->vars = xmalloc (sizeof *cov->vars * n_vars);
80 for (i = 0; i < n_vars; ++i)
81 cov->vars[i] = vars[i];
83 cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
85 for (i = 0; i < n_MOMENTS; ++i)
86 cov->moments[i] = gsl_matrix_calloc (n_vars, n_vars);
88 cov->exclude = exclude;
90 cov->n_cm = (n_vars * (n_vars - 1) ) / 2;
92 cov->cm = xcalloc (sizeof *cov->cm, cov->n_cm);
97 /* Return an integer, which can be used to index
98 into COV->cm, to obtain the I, J th element
99 of the covariance matrix. If COV->cm does not
100 contain that element, then a negative value
104 cm_idx (const struct covariance *cov, int i, int j)
107 const int n2j = cov->n_vars - 2 - j;
108 const int nj = cov->n_vars - 2 ;
111 assert (j < cov->n_vars);
116 if (j >= cov->n_vars - 1)
123 as -= n2j * (n2j + 1) ;
130 /* Call this function for every case in the data set */
132 covariance_accumulate (struct covariance *cov, const struct ccase *c)
135 const double weight = cov->wv ? case_data (c, cov->wv)->f : 1.0;
137 for (i = 0 ; i < cov->n_vars; ++i)
139 const union value *val1 = case_data (c, cov->vars[i]);
141 if ( var_is_value_missing (cov->vars[i], val1, cov->exclude))
144 for (j = 0 ; j < cov->n_vars; ++j)
148 const union value *val2 = case_data (c, cov->vars[j]);
150 if ( var_is_value_missing (cov->vars[j], val2, cov->exclude))
153 idx = cm_idx (cov, i, j);
156 cov->cm [idx] += val1->f * val2->f * weight;
159 for (m = 0 ; m < n_MOMENTS; ++m)
161 double *x = gsl_matrix_ptr (cov->moments[m], i, j);
172 Allocate and return a gsl_matrix containing the covariances of the
176 cm_to_gsl (struct covariance *cov)
179 gsl_matrix *m = gsl_matrix_calloc (cov->n_vars, cov->n_vars);
181 /* Copy the non-diagonal elements from cov->cm */
182 for ( j = 0 ; j < cov->n_vars - 1; ++j)
184 for (i = j+1 ; i < cov->n_vars; ++i)
186 double x = cov->cm [cm_idx (cov, i, j)];
187 gsl_matrix_set (m, i, j, x);
188 gsl_matrix_set (m, j, i, x);
192 /* Copy the diagonal elements from cov->moments[2] */
193 for (j = 0 ; j < cov->n_vars ; ++j)
195 double sigma = gsl_matrix_get (cov->moments[2], j, j);
196 gsl_matrix_set (m, j, j, sigma);
205 Return a pointer to gsl_matrix containing the pairwise covariances.
206 The matrix remains owned by the COV object, and must not be freed.
207 Call this function only after all data have been accumulated.
210 covariance_calculate (struct covariance *cov)
215 for (m = 0; m < n_MOMENTS; ++m)
217 /* Divide the moments by the number of samples */
220 for (i = 0 ; i < cov->n_vars; ++i)
222 for (j = 0 ; j < cov->n_vars; ++j)
224 double *x = gsl_matrix_ptr (cov->moments[m], i, j);
225 *x /= gsl_matrix_get (cov->moments[0], i, j);
227 if ( m == MOMENT_VARIANCE)
228 *x -= pow2 (gsl_matrix_get (cov->moments[1], i, j));
234 /* Centre the moments */
235 for ( j = 0 ; j < cov->n_vars - 1; ++j)
237 for (i = j + 1 ; i < cov->n_vars; ++i)
239 double *x = &cov->cm [cm_idx (cov, i, j)];
241 *x /= gsl_matrix_get (cov->moments[0], i, j);
244 gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
246 gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i);
250 return cm_to_gsl (cov);
254 /* Destroy the COV object */
256 covariance_destroy (struct covariance *cov)
261 for (i = 0; i < n_MOMENTS; ++i)
262 gsl_matrix_free (cov->moments[i]);