/* PSPP - a program for statistical analysis.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include <libpspp/assertion.h>
-#include "covariance.h"
-#include <gl/xalloc.h>
-#include "moments.h"
+#include "math/covariance.h"
+
#include <gsl/gsl_matrix.h>
-#include <data/case.h>
-#include <data/variable.h>
-#include <libpspp/misc.h>
-#include "categoricals.h"
+
+#include "data/case.h"
+#include "data/variable.h"
+#include "libpspp/assertion.h"
+#include "libpspp/misc.h"
+#include "math/categoricals.h"
+#include "math/interaction.h"
+#include "math/moments.h"
+
+#include "gl/xalloc.h"
#define n_MOMENTS (MOMENT_VARIANCE + 1)
gsl_matrix_set (out, i, j, x);
}
}
-
+
gsl_matrix_free (in);
return out;
struct covariance
{
+ /* True if the covariances are centerered. (ie Real covariances) */
+ bool centered;
+
/* The variables for which the covariance matrix is to be calculated. */
size_t n_vars;
- const struct variable **vars;
+ const struct variable *const *vars;
/* Categorical variables. */
struct categoricals *categoricals;
double *cm;
int n_cm;
- /* 1 for single pass algorithm;
+ /* 1 for single pass algorithm;
2 for double pass algorithm
*/
short passes;
/*
0 : No pass has been made
1 : First pass has been started
- 2 : Second pass has been
-
+ 2 : Second pass has been
+
IE: How many passes have been (partially) made. */
short state;
/* Flags indicating that the first case has been seen */
bool pass_one_first_case_seen;
bool pass_two_first_case_seen;
+
+ gsl_matrix *unnormalised;
};
/* Create a covariance struct.
*/
struct covariance *
-covariance_1pass_create (size_t n_vars, const struct variable **vars,
- const struct variable *weight, enum mv_class exclude)
+covariance_1pass_create (size_t n_vars, const struct variable *const *vars,
+ const struct variable *weight, enum mv_class exclude,
+ bool centered)
{
size_t i;
- struct covariance *cov = xmalloc (sizeof *cov);
+ struct covariance *cov = xzalloc (sizeof *cov);
+ cov->centered = centered;
cov->passes = 1;
cov->state = 0;
cov->pass_one_first_case_seen = cov->pass_two_first_case_seen = false;
-
+
cov->vars = vars;
cov->wv = weight;
cov->dim = n_vars;
cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
-
+
for (i = 0; i < n_MOMENTS; ++i)
cov->moments[i] = gsl_matrix_calloc (n_vars, n_vars);
cov->n_cm = (n_vars * (n_vars - 1) ) / 2;
- cov->cm = xcalloc (sizeof *cov->cm, cov->n_cm);
+
+ cov->cm = xcalloc (cov->n_cm, sizeof *cov->cm);
cov->categoricals = NULL;
return cov;
until then.
*/
struct covariance *
-covariance_2pass_create (size_t n_vars, const struct variable **vars,
- size_t n_catvars, const struct variable **catvars,
- const struct variable *wv, enum mv_class exclude)
+covariance_2pass_create (size_t n_vars, const struct variable *const *vars,
+ struct categoricals *cats,
+ const struct variable *wv, enum mv_class exclude,
+ bool centered)
{
size_t i;
struct covariance *cov = xmalloc (sizeof *cov);
+ cov->centered = centered;
cov->passes = 2;
cov->state = 0;
cov->pass_one_first_case_seen = cov->pass_two_first_case_seen = false;
-
+
cov->vars = vars;
cov->wv = wv;
cov->dim = n_vars;
cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
-
+
for (i = 0; i < n_MOMENTS; ++i)
cov->moments[i] = gsl_matrix_calloc (n_vars, n_vars);
cov->n_cm = -1;
cov->cm = NULL;
- cov->categoricals = categoricals_create (catvars, n_catvars, wv, exclude);
+ cov->categoricals = cats;
+ cov->unnormalised = NULL;
return cov;
}
-/* Return an integer, which can be used to index
+/* Return an integer, which can be used to index
into COV->cm, to obtain the I, J th element
of the covariance matrix. If COV->cm does not
contain that element, then a negative value
int as;
const int n2j = cov->dim - 2 - j;
const int nj = cov->dim - 2 ;
-
+
assert (i >= 0);
assert (j < cov->dim);
if (j >= cov->dim - 1)
return -1;
- if ( i <= j)
+ if ( i <= j)
return -1 ;
as = nj * (nj + 1) ;
- as -= n2j * (n2j + 1) ;
+ as -= n2j * (n2j + 1) ;
as /= 2;
return i - 1 + as;
/*
- Returns true iff the variable corresponding to the Ith element of the covariance matrix
+ Returns true iff the variable corresponding to the Ith element of the covariance matrix
has a missing value for case C
*/
static bool
is_missing (const struct covariance *cov, int i, const struct ccase *c)
{
const struct variable *var = i < cov->n_vars ?
- cov->vars[i] :
- categoricals_get_variable_by_subscript (cov->categoricals, i - cov->n_vars);
+ cov->vars[i] :
+ categoricals_get_interaction_by_subscript (cov->categoricals, i - cov->n_vars)->vars[0];
const union value *val = case_data (c, var);
return val->f;
}
- return categoricals_get_binary_by_subscript (cov->categoricals, i - cov->n_vars, c);
+ return categoricals_get_effects_code_for_case (cov->categoricals, i - cov->n_vars, c);
}
+#if 0
void
dump_matrix (const gsl_matrix *m)
{
printf ("\n");
}
}
+#endif
/* Call this function for every case in the data set */
void
cov->state = 1;
}
- categoricals_update (cov->categoricals, c);
+ if (cov->categoricals)
+ categoricals_update (cov->categoricals, c);
for (i = 0 ; i < cov->dim; ++i)
{
assert (cov->state == 1);
cov->state = 2;
- cov->dim = cov->n_vars +
- categoricals_total (cov->categoricals) - categoricals_get_n_variables (cov->categoricals);
+ if (cov->categoricals)
+ categoricals_done (cov->categoricals);
+
+ cov->dim = cov->n_vars;
+
+ if (cov->categoricals)
+ cov->dim += categoricals_df_total (cov->categoricals);
cov->n_cm = (cov->dim * (cov->dim - 1) ) / 2;
- cov->cm = xcalloc (sizeof *cov->cm, cov->n_cm);
+ cov->cm = xcalloc (cov->n_cm, sizeof *cov->cm);
/* Grow the moment matrices so that they're large enough to accommodate the
categorical elements */
cov->moments[i] = resize_matrix (cov->moments[i], cov->dim);
}
- categoricals_done (cov->categoricals);
-
/* Populate the moments matrices with the categorical value elements */
for (i = cov->n_vars; i < cov->dim; ++i)
{
*x += s;
}
- ss =
+ ss =
(v1 - gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j))
- *
+ *
(v2 - gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j))
* weight
;
}
-/*
+/*
Allocate and return a gsl_matrix containing the covariances of the
data.
*/
}
-static const gsl_matrix *
+static gsl_matrix *
covariance_calculate_double_pass (struct covariance *cov)
{
size_t i, j;
return cm_to_gsl (cov);
}
-static const gsl_matrix *
+static gsl_matrix *
covariance_calculate_single_pass (struct covariance *cov)
{
size_t i, j;
}
}
- /* Centre the moments */
- for ( j = 0 ; j < cov->dim - 1; ++j)
+ if (cov->centered)
{
- for (i = j + 1 ; i < cov->dim; ++i)
+ /* Centre the moments */
+ for ( j = 0 ; j < cov->dim - 1; ++j)
{
- double *x = &cov->cm [cm_idx (cov, i, j)];
-
- *x /= gsl_matrix_get (cov->moments[0], i, j);
+ for (i = j + 1 ; i < cov->dim; ++i)
+ {
+ double *x = &cov->cm [cm_idx (cov, i, j)];
- *x -=
- gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
- *
- gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i);
+ *x /= gsl_matrix_get (cov->moments[0], i, j);
+
+ *x -=
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
+ *
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i);
+ }
}
}
}
+/* Return a pointer to gsl_matrix containing the pairwise covariances. The
+ caller owns the returned matrix and must free it when it is no longer
+ needed.
-/*
- Return a pointer to gsl_matrix containing the pairwise covariances.
- The matrix remains owned by the COV object, and must not be freed.
- Call this function only after all data have been accumulated.
-*/
-const gsl_matrix *
+ Call this function only after all data have been accumulated. */
+gsl_matrix *
covariance_calculate (struct covariance *cov)
{
- assert ( cov->state > 0 );
+ if ( cov->state <= 0 )
+ return NULL;
switch (cov->passes)
{
case 1:
- return covariance_calculate_single_pass (cov);
+ return covariance_calculate_single_pass (cov);
break;
case 2:
- return covariance_calculate_double_pass (cov);
+ return covariance_calculate_double_pass (cov);
break;
default:
NOT_REACHED ();
}
}
+/*
+ Covariance computed without dividing by the sample size.
+ */
+static gsl_matrix *
+covariance_calculate_double_pass_unnormalized (struct covariance *cov)
+{
+ return cm_to_gsl (cov);
+}
+
+static gsl_matrix *
+covariance_calculate_single_pass_unnormalized (struct covariance *cov)
+{
+ size_t i, j;
+
+ if (cov->centered)
+ {
+ for (i = 0 ; i < cov->dim; ++i)
+ {
+ for (j = 0 ; j < cov->dim; ++j)
+ {
+ double *x = gsl_matrix_ptr (cov->moments[MOMENT_VARIANCE], i, j);
+ *x -= pow2 (gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j))
+ / gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
+ }
+ }
+
+ for ( j = 0 ; j < cov->dim - 1; ++j)
+ {
+ for (i = j + 1 ; i < cov->dim; ++i)
+ {
+ double *x = &cov->cm [cm_idx (cov, i, j)];
+
+ *x -=
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
+ *
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i)
+ / gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
+ }
+ }
+ }
+
+ return cm_to_gsl (cov);
+}
+/* Return a pointer to gsl_matrix containing the pairwise covariances. The
+ returned matrix is owned by the structure, and must not be freed.
+
+ Call this function only after all data have been accumulated. */
+const gsl_matrix *
+covariance_calculate_unnormalized (struct covariance *cov)
+{
+ if ( cov->state <= 0 )
+ return NULL;
+
+ if (cov->unnormalised != NULL)
+ return cov->unnormalised;
+
+ switch (cov->passes)
+ {
+ case 1:
+ cov->unnormalised = covariance_calculate_single_pass_unnormalized (cov);
+ break;
+ case 2:
+ cov->unnormalised = covariance_calculate_double_pass_unnormalized (cov);
+ break;
+ default:
+ NOT_REACHED ();
+ }
+
+ return cov->unnormalised;
+}
+
+/* Function to access the categoricals used by COV
+ The return value is owned by the COV
+*/
+const struct categoricals *
+covariance_get_categoricals (const struct covariance *cov)
+{
+ return cov->categoricals;
+}
+
/* Destroy the COV object */
void
covariance_destroy (struct covariance *cov)
{
size_t i;
- free (cov->vars);
+
categoricals_destroy (cov->categoricals);
for (i = 0; i < n_MOMENTS; ++i)
gsl_matrix_free (cov->moments[i]);
+ gsl_matrix_free (cov->unnormalised);
free (cov->moments);
free (cov->cm);
free (cov);
}
+
+size_t
+covariance_dim (const struct covariance * cov)
+{
+ return (cov->dim);
+}
+
+\f
+
+/*
+ Routines to assist debugging.
+ The following are not thoroughly tested and in certain respects
+ unreliable. They should only be
+ used for aids to development. Not as user accessible code.
+*/
+
+#include "libpspp/str.h"
+#include "output/tab.h"
+#include "data/format.h"
+
+
+/* Create a table which can be populated with the encodings for
+ the covariance matrix COV */
+struct tab_table *
+covariance_dump_enc_header (const struct covariance *cov, int length)
+{
+ struct tab_table *t = tab_create (cov->dim, length);
+ int n;
+ int i;
+
+ tab_title (t, "Covariance Encoding");
+
+ tab_box (t,
+ TAL_2, TAL_2, 0, 0,
+ 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
+
+ tab_hline (t, TAL_2, 0, tab_nc (t) - 1, 1);
+
+
+ for (i = 0 ; i < cov->n_vars; ++i)
+ {
+ tab_text (t, i, 0, TAT_TITLE, var_get_name (cov->vars[i]));
+ tab_vline (t, TAL_1, i + 1, 0, tab_nr (t) - 1);
+ }
+
+ n = 0;
+ while (i < cov->dim)
+ {
+ struct string str;
+ int idx = i - cov->n_vars;
+ const struct interaction *iact =
+ categoricals_get_interaction_by_subscript (cov->categoricals, idx);
+ int df;
+
+ ds_init_empty (&str);
+ interaction_to_string (iact, &str);
+
+ df = categoricals_df (cov->categoricals, n);
+
+ tab_joint_text (t,
+ i, 0,
+ i + df - 1, 0,
+ TAT_TITLE, ds_cstr (&str));
+
+ if (i + df < tab_nr (t) - 1)
+ tab_vline (t, TAL_1, i + df, 0, tab_nr (t) - 1);
+
+ i += df;
+ n++;
+ ds_destroy (&str);
+ }
+
+ return t;
+}
+
+
+/*
+ Append table T, which should have been returned by covariance_dump_enc_header
+ with an entry corresponding to case C for the covariance matrix COV
+ */
+void
+covariance_dump_enc (const struct covariance *cov, const struct ccase *c,
+ struct tab_table *t)
+{
+ static int row = 0;
+ int i;
+ ++row;
+ for (i = 0 ; i < cov->dim; ++i)
+ {
+ double v = get_val (cov, i, c);
+ tab_double (t, i, row, 0, v, i < cov->n_vars ? NULL : &F_8_0, RC_OTHER);
+ }
+}