#include <config.h>
+#include <math/covariance.h>
#include <math/design-matrix.h>
#include <gsl/gsl_matrix.h>
#include <data/casegrouper.h>
#include <output/table.h>
#include <libpspp/message.h>
#include <data/format.h>
+#include <math/moments.h>
#include <math.h>
#include "xalloc.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid
-/* Returns the correlation matrix corresponding to the covariance
-matrix COV. The return value must be freed with gsl_matrix_free
-when no longer required.
-*/
-static gsl_matrix *
-covariance_to_correlation (const gsl_matrix *cov)
-{
- size_t r, c;
- gsl_matrix *corr = gsl_matrix_alloc (cov->size1, cov->size2);
-
- for (r = 0 ; r < cov->size1; ++r)
- {
- for (c = 0 ; c < cov->size2 ; ++c)
- {
- double x = gsl_matrix_get (cov, r, c);
- x /= sqrt (gsl_matrix_get (cov, r, r)
- * gsl_matrix_get (cov, c, c) );
- gsl_matrix_set (corr, r, c, x);
- }
- }
-
- return corr;
-}
static double
significance_of_correlation (double rho, double w)
tab_submit (t);
}
+
+static gsl_matrix *
+correlation_from_covariance (const gsl_matrix *cv, const gsl_matrix *v)
+{
+ size_t i, j;
+ gsl_matrix *corr = gsl_matrix_calloc (cv->size1, cv->size2);
+
+ for (i = 0 ; i < cv->size1; ++i)
+ {
+ for (j = 0 ; j < cv->size2; ++j)
+ {
+ double rho = gsl_matrix_get (cv, i, j);
+
+ rho /= sqrt (gsl_matrix_get (v, i, j))
+ *
+ sqrt (gsl_matrix_get (v, j, i));
+
+ gsl_matrix_set (corr, i, j, rho);
+ }
+ }
+
+ return corr;
+}
+
+
+
+
static void
run_corr (struct casereader *r, const struct corr_opts *opts, const struct corr *corr)
{
struct ccase *c;
- const struct design_matrix *cov_matrix;
+ const gsl_matrix *var_matrix;
const gsl_matrix *samples_matrix;
+ const gsl_matrix *cov_matrix;
+ gsl_matrix *corr_matrix;
+ struct covariance *cov = covariance_create (corr->n_vars_total, corr->vars,
+ opts->wv, opts->exclude);
for ( ; (c = casereader_read (r) ); case_unref (c))
{
-
+ covariance_accumulate (cov, c);
}
+ cov_matrix = covariance_calculate (cov);
+
+ samples_matrix = covariance_moments (cov, MOMENT_NONE);
+ var_matrix = covariance_moments (cov, MOMENT_VARIANCE);
+
+ corr_matrix = correlation_from_covariance (cov_matrix, var_matrix);
output_correlation (corr, opts,
- covariance_to_correlation (cov_matrix->m),
+ corr_matrix,
samples_matrix );
+
+ covariance_destroy (cov);
+ gsl_matrix_free (corr_matrix);
}
int
cmd_correlation (struct lexer *lexer, struct dataset *ds)
{
+ int i;
int n_all_vars = 0; /* Total number of variables involved in this command */
+ const struct variable **all_vars ;
const struct dictionary *dict = dataset_dict (ds);
bool ok = true;
}
- const struct variable **all_vars = xmalloc (sizeof (*all_vars) * n_all_vars);
- int i;
+ all_vars = xmalloc (sizeof (*all_vars) * n_all_vars);
{
/* FIXME: Using a hash here would make more sense */
r = casereader_create_filter_missing (r, all_vars, n_all_vars,
opts.exclude, NULL, NULL);
+
run_corr (r, &opts, &corr[i]);
casereader_destroy (r);
}
src/math/box-whisker.c src/math/box-whisker.h \
src/math/coefficient.c \
src/math/coefficient.h \
+ src/math/covariance.c \
+ src/math/covariance.h \
src/math/covariance-matrix.c \
src/math/covariance-matrix.h \
src/math/design-matrix.c src/math/design-matrix.h \
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "covariance.h"
+#include <gl/xalloc.h>
+#include "moments.h"
+#include <gsl/gsl_matrix.h>
+#include <data/case.h>
+#include <data/variable.h>
+#include <libpspp/misc.h>
+
+#define n_MOMENTS (MOMENT_VARIANCE + 1)
+
+
+struct covariance
+{
+ /* The variables for which the covariance matrix is to be calculated */
+ size_t n_vars;
+ const struct variable **vars;
+
+ /* The weight variable (or NULL if none) */
+ const struct variable *wv;
+
+ /* A set of matrices containing the 0th, 1st and 2nd moments */
+ gsl_matrix **moments;
+
+ /* The class of missing values to exclude */
+ enum mv_class exclude;
+
+ /* An array of doubles representing the covariance matrix.
+ Only the top triangle is included, and no diagonals */
+ double *cm;
+ int n_cm;
+};
+
+
+
+/* Return a matrix containing the M th moments.
+ The matrix is of size NxN where N is the number of variables.
+ Each row represents the moments of a variable.
+ In the absence of missing values, the columns of this matrix will
+ be identical. If missing values are involved, then element (i,j)
+ is the moment of the i th variable, when paired with the j th variable.
+ */
+const gsl_matrix *
+covariance_moments (const struct covariance *cov, int m)
+{
+ return cov->moments[m];
+}
+
+
+
+/* Create a covariance struct */
+struct covariance *
+covariance_create (size_t n_vars, const struct variable **vars,
+ const struct variable *weight, enum mv_class exclude)
+{
+ size_t i;
+ struct covariance *cov = xmalloc (sizeof *cov);
+ cov->vars = xmalloc (sizeof *cov->vars * n_vars);
+
+ cov->wv = weight;
+ cov->n_vars = n_vars;
+
+ for (i = 0; i < n_vars; ++i)
+ cov->vars[i] = vars[i];
+
+ cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
+
+ for (i = 0; i < n_MOMENTS; ++i)
+ cov->moments[i] = gsl_matrix_calloc (n_vars, n_vars);
+
+ cov->exclude = exclude;
+
+ cov->n_cm = (n_vars * (n_vars - 1) ) / 2;
+
+ cov->cm = xcalloc (sizeof *cov->cm, cov->n_cm);
+
+ return cov;
+}
+
+/* Return an integer, which can be used to index
+ into COV->cm, to obtain the I, J th element
+ of the covariance matrix. If COV->cm does not
+ contain that element, then a negative value
+ will be returned.
+*/
+static int
+cm_idx (const struct covariance *cov, int i, int j)
+{
+ int as;
+ const int n2j = cov->n_vars - 2 - j;
+ const int nj = cov->n_vars - 2 ;
+
+ assert (i >= 0);
+ assert (j < cov->n_vars);
+
+ if ( i == 0)
+ return -1;
+
+ if (j >= cov->n_vars - 1)
+ return -1;
+
+ if ( i <= j)
+ return -1 ;
+
+ as = nj * (nj + 1) ;
+ as -= n2j * (n2j + 1) ;
+ as /= 2;
+
+ return i - 1 + as;
+}
+
+
+/* Call this function for every case in the data set */
+void
+covariance_accumulate (struct covariance *cov, const struct ccase *c)
+{
+ size_t i, j, m;
+ const double weight = cov->wv ? case_data (c, cov->wv)->f : 1.0;
+
+ for (i = 0 ; i < cov->n_vars; ++i)
+ {
+ const union value *val1 = case_data (c, cov->vars[i]);
+
+ if ( var_is_value_missing (cov->vars[i], val1, cov->exclude))
+ continue;
+
+ for (j = 0 ; j < cov->n_vars; ++j)
+ {
+ double pwr = 1.0;
+ int idx;
+ const union value *val2 = case_data (c, cov->vars[j]);
+
+ if ( var_is_value_missing (cov->vars[j], val2, cov->exclude))
+ continue;
+
+ idx = cm_idx (cov, i, j);
+ if (idx >= 0)
+ {
+ cov->cm [idx] += val1->f * val2->f;
+ }
+
+ for (m = 0 ; m < n_MOMENTS; ++m)
+ {
+ double *x = gsl_matrix_ptr (cov->moments[m], i, j);
+
+ *x += pwr * weight;
+ pwr *= val1->f;
+ }
+ }
+ }
+}
+
+
+/*
+ Allocate and return a gsl_matrix containing the covariances of the
+ data.
+*/
+static gsl_matrix *
+cm_to_gsl (struct covariance *cov)
+{
+ int i, j;
+ gsl_matrix *m = gsl_matrix_calloc (cov->n_vars, cov->n_vars);
+
+ /* Copy the non-diagonal elements from cov->cm */
+ for ( j = 0 ; j < cov->n_vars - 1; ++j)
+ {
+ for (i = j+1 ; i < cov->n_vars; ++i)
+ {
+ double x = cov->cm [cm_idx (cov, i, j)];
+ gsl_matrix_set (m, i, j, x);
+ gsl_matrix_set (m, j, i, x);
+ }
+ }
+
+ /* Copy the diagonal elements from cov->moments[2] */
+ for (j = 0 ; j < cov->n_vars ; ++j)
+ {
+ double sigma = gsl_matrix_get (cov->moments[2], j, j);
+ gsl_matrix_set (m, j, j, sigma);
+ }
+
+ return m;
+}
+
+
+
+/*
+ Return a pointer to gsl_matrix containing the pairwise covariances.
+ The matrix remains owned by the COV object, and must not be freed.
+ Call this function only after all data have been accumulated.
+*/
+const gsl_matrix *
+covariance_calculate (struct covariance *cov)
+{
+ size_t i, j;
+ size_t m;
+
+ for (m = 0; m < n_MOMENTS; ++m)
+ {
+ /* Divide the moments by the number of samples */
+ if ( m > 0)
+ {
+ for (i = 0 ; i < cov->n_vars; ++i)
+ {
+ for (j = 0 ; j < cov->n_vars; ++j)
+ {
+ double *x = gsl_matrix_ptr (cov->moments[m], i, j);
+ *x /= gsl_matrix_get (cov->moments[0], i, j);
+
+ if ( m == MOMENT_VARIANCE)
+ *x -= pow2 (gsl_matrix_get (cov->moments[1], i, j));
+ }
+ }
+ }
+ }
+
+ /* Centre the moments */
+ for ( j = 0 ; j < cov->n_vars - 1; ++j)
+ {
+ for (i = j + 1 ; i < cov->n_vars; ++i)
+ {
+ double *x = &cov->cm [cm_idx (cov, i, j)];
+
+ *x /= gsl_matrix_get (cov->moments[0], i, j);
+
+ *x -=
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
+ *
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i);
+ }
+ }
+
+ return cm_to_gsl (cov);
+}
+
+
+/* Destroy the COV object */
+void
+covariance_destroy (struct covariance *cov)
+{
+ size_t i;
+ free (cov->vars);
+
+ for (i = 0; i < n_MOMENTS; ++i)
+ gsl_matrix_free (cov->moments[i]);
+
+ free (cov->moments);
+ free (cov->cm);
+ free (cov);
+}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+
+#ifndef COVARIANCE_H
+#define COVARIANCE_H
+
+#include <stddef.h>
+
+#include <data/missing-values.h>
+#include <gsl/gsl_matrix.h>
+
+struct covariance;
+struct variable;
+struct ccase ;
+
+struct covariance * covariance_create (size_t n_vars, const struct variable **vars,
+ const struct variable *wv, enum mv_class excl);
+
+void covariance_accumulate (struct covariance *, const struct ccase *);
+
+const gsl_matrix * covariance_calculate (struct covariance *cov);
+
+void covariance_destroy (struct covariance *cov);
+
+const gsl_matrix *covariance_moments (const struct covariance *cov, int m);
+
+#endif
tests/command/beg-data.sh \
tests/command/bignum.sh \
tests/command/count.sh \
+ tests/command/correlation.sh \
tests/command/data-list.sh \
tests/command/do-if.sh \
tests/command/do-repeat.sh \
src/libpspp/float-format.c \
tests/dissect-sysfile.c
tests_dissect_sysfile_LDADD = gl/libgl.la @LIBINTL@
+tests_dissect_sysfile_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\"
EXTRA_DIST += \
$(dist_TESTS) \
--- /dev/null
+#!/bin/sh
+
+# This program tests the CORRELATIONS command
+
+TEMPDIR=/tmp/pspp-tst-$$
+TESTFILE=$TEMPDIR/`basename $0`.sps
+
+# ensure that top_srcdir and top_builddir are absolute
+if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi
+if [ -z "$top_builddir" ] ; then top_builddir=. ; fi
+top_srcdir=`cd $top_srcdir; pwd`
+top_builddir=`cd $top_builddir; pwd`
+
+PSPP=$top_builddir/src/ui/terminal/pspp
+
+STAT_CONFIG_PATH=$top_srcdir/config
+export STAT_CONFIG_PATH
+
+LANG=C
+export LANG
+
+
+cleanup()
+{
+ if [ x"$PSPP_TEST_NO_CLEANUP" != x ] ; then
+ echo "NOT cleaning $TEMPDIR"
+ return ;
+ fi
+ rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+ echo $activity
+ echo FAILED
+ cleanup;
+ exit 1;
+}
+
+
+no_result()
+{
+ echo $activity
+ echo NO RESULT;
+ cleanup;
+ exit 2;
+}
+
+pass()
+{
+ cleanup;
+ exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+activity="create program"
+cat << EOF > $TESTFILE
+set format = F11.3.
+data list notable list /foo * bar * wiz * bang *.
+begin data.
+1 0 3 1
+3 9 -50 5
+3 4 3 203
+4 -9 0 -4
+98 78 104 2
+3 50 -49 200
+. 4 4 4
+5 3 0 .
+end data.
+
+correlations
+ variables = foo bar wiz bang
+ /print nosig
+ /missing = listwise
+ .
+
+correlations
+ variables = bar wiz
+ /print nosig
+ /missing = listwise
+ .
+
+correlations
+ variables = foo bar wiz bang
+ /print nosig
+ /missing = pairwise
+ .
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+activity="run program"
+$SUPERVISOR $PSPP --testing-mode -o raw-ascii $TESTFILE
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="compare results"
+perl -pi -e 's/^\s*$//g' $TEMPDIR/pspp.list
+cp $TEMPDIR/pspp.list /tmp
+diff -b $TEMPDIR/pspp.list - << EOF
+1.1 CORRELATIONS. Correlations
+#========================#=====#=====#=====#=====#
+# #foo |bar |wiz |bang #
+#----+-------------------#-----+-----+-----+-----#
+#foo |Pearson Correlation#1.000| .802| .890|-.308#
+# |Sig. (2-tailed) # | .055| .017| .553#
+#----+-------------------#-----+-----+-----+-----#
+#bar |Pearson Correlation# .802|1.000| .519| .118#
+# |Sig. (2-tailed) # .055| | .291| .824#
+#----+-------------------#-----+-----+-----+-----#
+#wiz |Pearson Correlation# .890| .519|1.000|-.344#
+# |Sig. (2-tailed) # .017| .291| | .505#
+#----+-------------------#-----+-----+-----+-----#
+#bang|Pearson Correlation#-.308| .118|-.344|1.000#
+# |Sig. (2-tailed) # .553| .824| .505| #
+#====#===================#=====#=====#=====#=====#
+2.1 CORRELATIONS. Correlations
+#=======================#=====#=====#
+# #bar |wiz #
+#---+-------------------#-----+-----#
+#bar|Pearson Correlation#1.000| .497#
+# |Sig. (2-tailed) # | .210#
+#---+-------------------#-----+-----#
+#wiz|Pearson Correlation# .497|1.000#
+# |Sig. (2-tailed) # .210| #
+#===#===================#=====#=====#
+3.1 CORRELATIONS. Correlations
+#========================#=====#=====#=====#=====#
+# #foo |bar |wiz |bang #
+#----+-------------------#-----+-----+-----+-----#
+#foo |Pearson Correlation#1.000| .805| .883|-.308#
+# |Sig. (2-tailed) # | .029| .008| .553#
+# |N # 7| 7| 7| 6#
+#----+-------------------#-----+-----+-----+-----#
+#bar |Pearson Correlation# .805|1.000| .497| .164#
+# |Sig. (2-tailed) # .029| | .210| .725#
+# |N # 7| 8| 8| 7#
+#----+-------------------#-----+-----+-----+-----#
+#wiz |Pearson Correlation# .883| .497|1.000|-.337#
+# |Sig. (2-tailed) # .008| .210| | .460#
+# |N # 7| 8| 8| 7#
+#----+-------------------#-----+-----+-----+-----#
+#bang|Pearson Correlation#-.308| .164|-.337|1.000#
+# |Sig. (2-tailed) # .553| .725| .460| #
+# |N # 6| 7| 7| 7#
+#====#===================#=====#=====#=====#=====#
+EOF
+if [ $? -ne 0 ] ; then fail ; fi
+
+pass;