/* PSPP - a program for statistical analysis.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include <libpspp/assertion.h>
-#include "covariance.h"
-#include <gl/xalloc.h>
-#include "moments.h"
+#include "math/covariance.h"
+
#include <gsl/gsl_matrix.h>
-#include <data/case.h>
-#include <data/variable.h>
-#include <libpspp/misc.h>
-#include "categoricals.h"
+
+#include "data/case.h"
+#include "data/variable.h"
+#include "libpspp/assertion.h"
+#include "libpspp/misc.h"
+#include "math/categoricals.h"
+#include "math/interaction.h"
+#include "math/moments.h"
+
+#include "gl/xalloc.h"
#define n_MOMENTS (MOMENT_VARIANCE + 1)
gsl_matrix_set (out, i, j, x);
}
}
-
+
gsl_matrix_free (in);
return out;
struct covariance
{
+ /* True if the covariances are centerered. (ie Real covariances) */
+ bool centered;
+
/* The variables for which the covariance matrix is to be calculated. */
size_t n_vars;
- const struct variable **vars;
+ const struct variable *const *vars;
/* Categorical variables. */
struct categoricals *categoricals;
double *cm;
int n_cm;
- /* 1 for single pass algorithm;
+ /* 1 for single pass algorithm;
2 for double pass algorithm
*/
short passes;
/*
0 : No pass has been made
1 : First pass has been started
- 2 : Second pass has been
-
+ 2 : Second pass has been
+
IE: How many passes have been (partially) made. */
short state;
/* Flags indicating that the first case has been seen */
bool pass_one_first_case_seen;
bool pass_two_first_case_seen;
+
+ gsl_matrix *unnormalised;
};
be identical. If missing values are involved, then element (i,j)
is the moment of the i th variable, when paired with the j th variable.
*/
-const gsl_matrix *
+gsl_matrix *
covariance_moments (const struct covariance *cov, int m)
{
return cov->moments[m];
/* Create a covariance struct.
*/
struct covariance *
-covariance_1pass_create (size_t n_vars, const struct variable **vars,
- const struct variable *weight, enum mv_class exclude)
+covariance_1pass_create (size_t n_vars, const struct variable *const *vars,
+ const struct variable *weight, enum mv_class exclude,
+ bool centered)
{
size_t i;
- struct covariance *cov = xzalloc (sizeof *cov);
+ struct covariance *cov = XZALLOC (struct covariance);
+ cov->centered = centered;
cov->passes = 1;
cov->state = 0;
cov->pass_one_first_case_seen = cov->pass_two_first_case_seen = false;
-
+
cov->vars = vars;
cov->wv = weight;
cov->dim = n_vars;
cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
-
+
for (i = 0; i < n_MOMENTS; ++i)
cov->moments[i] = gsl_matrix_calloc (n_vars, n_vars);
cov->exclude = exclude;
- cov->n_cm = (n_vars * (n_vars - 1) ) / 2;
+ cov->n_cm = (n_vars * (n_vars - 1)) / 2;
+
- if (cov->n_cm > 0)
- cov->cm = xcalloc (sizeof *cov->cm, cov->n_cm);
+ cov->cm = xcalloc (cov->n_cm, sizeof *cov->cm);
cov->categoricals = NULL;
return cov;
until then.
*/
struct covariance *
-covariance_2pass_create (size_t n_vars, const struct variable **vars,
- size_t n_catvars, const struct variable **catvars,
- const struct variable *wv, enum mv_class exclude)
+covariance_2pass_create (size_t n_vars, const struct variable *const *vars,
+ struct categoricals *cats,
+ const struct variable *wv, enum mv_class exclude,
+ bool centered)
{
size_t i;
struct covariance *cov = xmalloc (sizeof *cov);
+ cov->centered = centered;
cov->passes = 2;
cov->state = 0;
cov->pass_one_first_case_seen = cov->pass_two_first_case_seen = false;
-
+
cov->vars = vars;
cov->wv = wv;
cov->dim = n_vars;
cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
-
+
for (i = 0; i < n_MOMENTS; ++i)
cov->moments[i] = gsl_matrix_calloc (n_vars, n_vars);
cov->n_cm = -1;
cov->cm = NULL;
- cov->categoricals = categoricals_create (catvars, n_catvars, wv, exclude);
+ cov->categoricals = cats;
+ cov->unnormalised = NULL;
return cov;
}
-/* Return an integer, which can be used to index
+/* Return an integer, which can be used to index
into COV->cm, to obtain the I, J th element
of the covariance matrix. If COV->cm does not
contain that element, then a negative value
int as;
const int n2j = cov->dim - 2 - j;
const int nj = cov->dim - 2 ;
-
+
assert (i >= 0);
assert (j < cov->dim);
- if ( i == 0)
+ if (i == 0)
return -1;
if (j >= cov->dim - 1)
return -1;
- if ( i <= j)
+ if (i <= j)
return -1 ;
as = nj * (nj + 1) ;
- as -= n2j * (n2j + 1) ;
+ as -= n2j * (n2j + 1) ;
as /= 2;
return i - 1 + as;
/*
- Returns true iff the variable corresponding to the Ith element of the covariance matrix
+ Returns true iff the variable corresponding to the Ith element of the covariance matrix
has a missing value for case C
*/
static bool
is_missing (const struct covariance *cov, int i, const struct ccase *c)
{
const struct variable *var = i < cov->n_vars ?
- cov->vars[i] :
- categoricals_get_variable_by_subscript (cov->categoricals, i - cov->n_vars);
+ cov->vars[i] :
+ categoricals_get_interaction_by_subscript (cov->categoricals, i - cov->n_vars)->vars[0];
const union value *val = case_data (c, var);
- return var_is_value_missing (var, val, cov->exclude);
+ return (var_is_value_missing (var, val) & cov->exclude) != 0;
}
static double
get_val (const struct covariance *cov, int i, const struct ccase *c)
{
- if ( i < cov->n_vars)
+ if (i < cov->n_vars)
{
const struct variable *var = cov->vars[i];
return val->f;
}
- return categoricals_get_binary_by_subscript (cov->categoricals, i - cov->n_vars, c);
+ return categoricals_get_effects_code_for_case (cov->categoricals, i - cov->n_vars, c);
}
#if 0
covariance_accumulate_pass1 (struct covariance *cov, const struct ccase *c)
{
size_t i, j, m;
- const double weight = cov->wv ? case_data (c, cov->wv)->f : 1.0;
+ const double weight = cov->wv ? case_num (c, cov->wv) : 1.0;
assert (cov->passes == 2);
if (!cov->pass_one_first_case_seen)
cov->state = 1;
}
- categoricals_update (cov->categoricals, c);
+ if (cov->categoricals)
+ categoricals_update (cov->categoricals, c);
for (i = 0 ; i < cov->dim; ++i)
{
double v1 = get_val (cov, i, c);
- if ( is_missing (cov, i, c))
+ if (is_missing (cov, i, c))
continue;
for (j = 0 ; j < cov->dim; ++j)
{
double pwr = 1.0;
- if ( is_missing (cov, j, c))
+ if (is_missing (cov, j, c))
continue;
for (m = 0 ; m <= MOMENT_MEAN; ++m)
covariance_accumulate_pass2 (struct covariance *cov, const struct ccase *c)
{
size_t i, j;
- const double weight = cov->wv ? case_data (c, cov->wv)->f : 1.0;
+ const double weight = cov->wv ? case_num (c, cov->wv) : 1.0;
assert (cov->passes == 2);
assert (cov->state >= 1);
assert (cov->state == 1);
cov->state = 2;
- cov->dim = cov->n_vars +
- categoricals_total (cov->categoricals) - categoricals_get_n_variables (cov->categoricals);
+ if (cov->categoricals)
+ categoricals_done (cov->categoricals);
+
+ cov->dim = cov->n_vars;
- cov->n_cm = (cov->dim * (cov->dim - 1) ) / 2;
- cov->cm = xcalloc (sizeof *cov->cm, cov->n_cm);
+ if (cov->categoricals)
+ cov->dim += categoricals_df_total (cov->categoricals);
+
+ cov->n_cm = (cov->dim * (cov->dim - 1)) / 2;
+ cov->cm = xcalloc (cov->n_cm, sizeof *cov->cm);
/* Grow the moment matrices so that they're large enough to accommodate the
categorical elements */
cov->moments[i] = resize_matrix (cov->moments[i], cov->dim);
}
- categoricals_done (cov->categoricals);
-
/* Populate the moments matrices with the categorical value elements */
for (i = cov->n_vars; i < cov->dim; ++i)
{
{
double v1 = get_val (cov, i, c);
- if ( is_missing (cov, i, c))
+ if (is_missing (cov, i, c))
continue;
for (j = 0 ; j < cov->dim; ++j)
const double s = pow2 (v1 - gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)) * weight;
- if ( is_missing (cov, j, c))
+ if (is_missing (cov, j, c))
continue;
{
*x += s;
}
- ss =
+ ss =
(v1 - gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j))
- *
+ *
(v2 - gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j))
* weight
;
covariance_accumulate (struct covariance *cov, const struct ccase *c)
{
size_t i, j, m;
- const double weight = cov->wv ? case_data (c, cov->wv)->f : 1.0;
+ const double weight = cov->wv ? case_num (c, cov->wv) : 1.0;
assert (cov->passes == 1);
- if ( !cov->pass_one_first_case_seen)
+ if (!cov->pass_one_first_case_seen)
{
- assert ( cov->state == 0);
+ assert (cov->state == 0);
cov->state = 1;
}
{
const union value *val1 = case_data (c, cov->vars[i]);
- if ( is_missing (cov, i, c))
+ if (is_missing (cov, i, c))
continue;
for (j = 0 ; j < cov->dim; ++j)
int idx;
const union value *val2 = case_data (c, cov->vars[j]);
- if ( is_missing (cov, j, c))
+ if (is_missing (cov, j, c))
continue;
idx = cm_idx (cov, i, j);
}
-/*
+/*
Allocate and return a gsl_matrix containing the covariances of the
data.
*/
gsl_matrix *m = gsl_matrix_calloc (cov->dim, cov->dim);
/* Copy the non-diagonal elements from cov->cm */
- for ( j = 0 ; j < cov->dim - 1; ++j)
+ for (j = 0 ; j < cov->dim - 1; ++j)
{
for (i = j+1 ; i < cov->dim; ++i)
{
}
-static const gsl_matrix *
+static gsl_matrix *
covariance_calculate_double_pass (struct covariance *cov)
{
size_t i, j;
*x /= gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
idx = cm_idx (cov, i, j);
- if ( idx >= 0)
+ if (idx >= 0)
{
x = &cov->cm [idx];
*x /= gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
return cm_to_gsl (cov);
}
-static const gsl_matrix *
+static gsl_matrix *
covariance_calculate_single_pass (struct covariance *cov)
{
size_t i, j;
for (m = 0; m < n_MOMENTS; ++m)
{
/* Divide the moments by the number of samples */
- if ( m > 0)
+ if (m > 0)
{
for (i = 0 ; i < cov->dim; ++i)
{
double *x = gsl_matrix_ptr (cov->moments[m], i, j);
*x /= gsl_matrix_get (cov->moments[0], i, j);
- if ( m == MOMENT_VARIANCE)
+ if (m == MOMENT_VARIANCE)
*x -= pow2 (gsl_matrix_get (cov->moments[1], i, j));
}
}
}
}
- /* Centre the moments */
- for ( j = 0 ; j < cov->dim - 1; ++j)
+ if (cov->centered)
{
- for (i = j + 1 ; i < cov->dim; ++i)
+ /* Centre the moments */
+ for (j = 0 ; j < cov->dim - 1; ++j)
{
- double *x = &cov->cm [cm_idx (cov, i, j)];
-
- *x /= gsl_matrix_get (cov->moments[0], i, j);
+ for (i = j + 1 ; i < cov->dim; ++i)
+ {
+ double *x = &cov->cm [cm_idx (cov, i, j)];
- *x -=
- gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
- *
- gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i);
+ *x /= gsl_matrix_get (cov->moments[0], i, j);
+
+ *x -=
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
+ *
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i);
+ }
}
}
}
-/*
- Return a pointer to gsl_matrix containing the pairwise covariances.
- The matrix remains owned by the COV object, and must not be freed.
- Call this function only after all data have been accumulated.
-*/
-const gsl_matrix *
+/* Return a pointer to gsl_matrix containing the pairwise covariances. The
+ caller owns the returned matrix and must free it when it is no longer
+ needed.
+
+ Call this function only after all data have been accumulated. */
+gsl_matrix *
covariance_calculate (struct covariance *cov)
{
- assert ( cov->state > 0 );
+ if (cov->state <= 0)
+ return NULL;
switch (cov->passes)
{
case 1:
- return covariance_calculate_single_pass (cov);
+ return covariance_calculate_single_pass (cov);
break;
case 2:
- return covariance_calculate_double_pass (cov);
+ return covariance_calculate_double_pass (cov);
break;
default:
NOT_REACHED ();
/*
Covariance computed without dividing by the sample size.
*/
-static const gsl_matrix *
+static gsl_matrix *
covariance_calculate_double_pass_unnormalized (struct covariance *cov)
{
- size_t i, j;
- for (i = 0 ; i < cov->dim; ++i)
- {
- for (j = 0 ; j < cov->dim; ++j)
- {
- int idx;
- double *x = gsl_matrix_ptr (cov->moments[MOMENT_VARIANCE], i, j);
-
- idx = cm_idx (cov, i, j);
- if ( idx >= 0)
- {
- x = &cov->cm [idx];
- }
- }
- }
-
return cm_to_gsl (cov);
}
-static const gsl_matrix *
+static gsl_matrix *
covariance_calculate_single_pass_unnormalized (struct covariance *cov)
{
size_t i, j;
- for (i = 0 ; i < cov->dim; ++i)
+ if (cov->centered)
{
- for (j = 0 ; j < cov->dim; ++j)
+ for (i = 0 ; i < cov->dim; ++i)
{
- double *x = gsl_matrix_ptr (cov->moments[MOMENT_VARIANCE], i, j);
- *x -= pow2 (gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j))
- / gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
+ for (j = 0 ; j < cov->dim; ++j)
+ {
+ double *x = gsl_matrix_ptr (cov->moments[MOMENT_VARIANCE], i, j);
+ *x -= pow2 (gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j))
+ / gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
+ }
}
- }
- for ( j = 0 ; j < cov->dim - 1; ++j)
- {
- for (i = j + 1 ; i < cov->dim; ++i)
+
+ for (j = 0 ; j < cov->dim - 1; ++j)
{
- double *x = &cov->cm [cm_idx (cov, i, j)];
-
- *x -=
- gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
- *
- gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i)
- / gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
+ for (i = j + 1 ; i < cov->dim; ++i)
+ {
+ double *x = &cov->cm [cm_idx (cov, i, j)];
+
+ *x -=
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
+ *
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i)
+ / gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
+ }
}
}
}
-/*
- Return a pointer to gsl_matrix containing the pairwise covariances.
- The matrix remains owned by the COV object, and must not be freed.
- Call this function only after all data have been accumulated.
-*/
+/* Return a pointer to gsl_matrix containing the pairwise covariances. The
+ returned matrix is owned by the structure, and must not be freed.
+
+ Call this function only after all data have been accumulated. */
const gsl_matrix *
covariance_calculate_unnormalized (struct covariance *cov)
{
- assert ( cov->state > 0 );
+ if (cov->state <= 0)
+ return NULL;
+
+ if (cov->unnormalised != NULL)
+ return cov->unnormalised;
switch (cov->passes)
{
case 1:
- return covariance_calculate_single_pass_unnormalized (cov);
+ cov->unnormalised = covariance_calculate_single_pass_unnormalized (cov);
break;
case 2:
- return covariance_calculate_double_pass_unnormalized (cov);
+ cov->unnormalised = covariance_calculate_double_pass_unnormalized (cov);
break;
default:
NOT_REACHED ();
}
+
+ return cov->unnormalised;
}
+/* Function to access the categoricals used by COV
+ The return value is owned by the COV
+*/
+const struct categoricals *
+covariance_get_categoricals (const struct covariance *cov)
+{
+ return cov->categoricals;
+}
/* Destroy the COV object */
for (i = 0; i < n_MOMENTS; ++i)
gsl_matrix_free (cov->moments[i]);
+ gsl_matrix_free (cov->unnormalised);
free (cov->moments);
free (cov->cm);
free (cov);
}
+
+size_t
+covariance_dim (const struct covariance * cov)
+{
+ return (cov->dim);
+}
+
+\f
+
+/*
+ Routines to assist debugging.
+ The following are not thoroughly tested and in certain respects
+ unreliable. They should only be
+ used for aids to development. Not as user accessible code.
+*/
+
+#include "libpspp/str.h"
+#include "output/pivot-table.h"
+#include "data/format.h"
+
+
+/* Create a table which can be populated with the encodings for
+ the covariance matrix COV */
+struct pivot_table *
+covariance_dump_enc_header (const struct covariance *cov)
+{
+ struct pivot_table *table = pivot_table_create ("Covariance Encoding");
+
+ struct pivot_dimension *factors = pivot_dimension_create (
+ table, PIVOT_AXIS_COLUMN, "Factor");
+ for (size_t i = 0 ; i < cov->n_vars; ++i)
+ pivot_category_create_leaf (factors->root,
+ pivot_value_new_variable (cov->vars[i]));
+ for (size_t i = 0, n = 0; i < cov->dim - cov->n_vars; n++)
+ {
+ const struct interaction *iact =
+ categoricals_get_interaction_by_subscript (cov->categoricals, i);
+
+ struct string str = DS_EMPTY_INITIALIZER;
+ interaction_to_string (iact, &str);
+ struct pivot_category *group = pivot_category_create_group__ (
+ factors->root,
+ pivot_value_new_user_text_nocopy (ds_steal_cstr (&str)));
+
+ int df = categoricals_df (cov->categoricals, n);
+ for (int j = 0; j < df; j++)
+ pivot_category_create_leaf_rc (group, pivot_value_new_integer (j),
+ PIVOT_RC_INTEGER);
+
+ i += df;
+ }
+
+ struct pivot_dimension *matrix = pivot_dimension_create (
+ table, PIVOT_AXIS_ROW, "Matrix", "Matrix");
+ matrix->hide_all_labels = true;
+
+ return table;
+}
+
+
+/*
+ Append table T, which should have been returned by covariance_dump_enc_header
+ with an entry corresponding to case C for the covariance matrix COV
+ */
+void
+covariance_dump_enc (const struct covariance *cov, const struct ccase *c,
+ struct pivot_table *table)
+{
+ int row = pivot_category_create_leaf (
+ table->dimensions[1]->root,
+ pivot_value_new_integer (table->dimensions[1]->n_leaves));
+
+ for (int i = 0 ; i < cov->dim; ++i)
+ pivot_table_put2 (
+ table, i, row, pivot_value_new_number (get_val (cov, i, c)));
+}