--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "matrix-reader.h"
+
+#include <stdbool.h>
+
+#include <libpspp/message.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
+#include <data/dictionary.h>
+#include <data/variable.h>
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) msgid
+
+
+/*
+This module interprets a "data matrix", typically generated by the command
+MATRIX DATA. The dictionary of such a matrix takes the form:
+
+ s_0, s_1, ... s_m, ROWTYPE_, VARNAME_, v_0, v_1, .... v_n
+
+where s_0, s_1 ... s_m are the variables defining the splits, and
+v_0, v_1 ... v_n are the continuous variables.
+
+m >= 0; n >= 0
+
+The variables ROWTYPE_ and VARNAME_ are of type A8,
+the variables s_x are of type F4.0 (although this reader accepts any type),
+and v_x are of any numeric type.
+
+The values of the ROWTYPE_ variable are in the set {MEAN, STDDEV, N, CORR, COV}
+and determine the purpose of that case.
+The values of the VARNAME_ variable must correspond to the names of the varibles
+in {v_0, v_1 ... v_n} and indicate the rows of the correlation or covariance
+matrices.
+
+
+
+A typical example is as follows:
+
+s_0 ROWTYPE_ VARNAME_ v_0 v_1 v_2
+
+0 MEAN 5.0000 4.0000 3.0000
+0 STDDEV 1.0000 2.0000 3.0000
+0 N 9.0000 9.0000 9.0000
+0 CORR V1 1.0000 .6000 .7000
+0 CORR V2 .6000 1.0000 .8000
+0 CORR V3 .7000 .8000 1.0000
+1 MEAN 9.0000 8.0000 7.0000
+1 STDDEV 5.0000 6.0000 7.0000
+1 N 9.0000 9.0000 9.0000
+1 CORR V1 1.0000 .4000 .3000
+1 CORR V2 .4000 1.0000 .2000
+1 CORR V3 .3000 .2000 1.0000
+
+*/
+
+struct matrix_reader
+{
+ const struct dictionary *dict;
+ const struct variable *varname;
+ const struct variable *rowtype;
+ struct casegrouper *grouper;
+
+ gsl_matrix *n_vectors;
+ gsl_matrix *mean_vectors;
+ gsl_matrix *var_vectors;
+
+ gsl_matrix *correlation;
+ gsl_matrix *covariance;
+};
+
+struct matrix_reader *
+create_matrix_reader_from_case_reader (const struct dictionary *dict, struct casereader *in_reader,
+ const struct variable ***vars, size_t *n_vars)
+{
+ struct matrix_reader *mr = xzalloc (sizeof *mr);
+
+ mr->dict = dict;
+ mr->varname = dict_lookup_var (dict, "varname_");
+ if (mr->varname == NULL)
+ {
+ msg (ME, _("Matrix dataset lacks a variable called %s."), "VARNAME_");
+ free (mr);
+ return NULL;
+ }
+
+ mr->rowtype = dict_lookup_var (dict, "rowtype_");
+ if (mr->rowtype == NULL)
+ {
+ msg (ME, _("Matrix dataset lacks a variable called %s."), "ROWTYPE_");
+ free (mr);
+ return NULL;
+ }
+
+ size_t dvarcnt;
+ const struct variable **dvars = NULL;
+ dict_get_vars (dict, &dvars, &dvarcnt, DC_SCRATCH);
+
+ if (n_vars)
+ *n_vars = dvarcnt - var_get_dict_index (mr->varname) - 1;
+
+ if (vars)
+ {
+ int i;
+ *vars = xcalloc (sizeof (struct variable **), *n_vars);
+
+ for (i = 0; i < *n_vars; ++i)
+ {
+ (*vars)[i] = dvars[i + var_get_dict_index (mr->varname) + 1];
+ }
+ }
+
+ /* All the variables before ROWTYPE_ (if any) are split variables */
+ mr->grouper = casegrouper_create_vars (in_reader, dvars, var_get_dict_index (mr->rowtype));
+
+ free (dvars);
+
+ return mr;
+}
+
+bool
+destroy_matrix_reader (struct matrix_reader *mr)
+{
+ if (mr == NULL)
+ return false;
+ bool ret = casegrouper_destroy (mr->grouper);
+ free (mr);
+ return ret;
+}
+
+
+bool
+next_matrix_from_reader (struct matrix_material *mm,
+ struct matrix_reader *mr,
+ const struct variable **vars, int n_vars)
+{
+ struct casereader *group;
+
+ gsl_matrix_free (mr->n_vectors);
+ gsl_matrix_free (mr->mean_vectors);
+ gsl_matrix_free (mr->var_vectors);
+ gsl_matrix_free (mr->correlation);
+ gsl_matrix_free (mr->covariance);
+
+ if (!casegrouper_get_next_group (mr->grouper, &group))
+ return false;
+
+ mr->n_vectors = gsl_matrix_alloc (n_vars, n_vars);
+ mr->mean_vectors = gsl_matrix_alloc (n_vars, n_vars);
+ mr->var_vectors = gsl_matrix_alloc (n_vars, n_vars);
+
+ mm->n = mr->n_vectors;
+ mm->mean_matrix = mr->mean_vectors;
+ mm->var_matrix = mr->var_vectors;
+
+ mr->correlation = NULL;
+ mr->covariance = NULL;
+
+ struct ccase *c;
+ int crow = 0;
+ for ( ; (c = casereader_read (group) ); case_unref (c))
+ {
+ const union value *uv = case_data (c, mr->rowtype);
+ int col, row;
+ for (col = 0; col < n_vars; ++col)
+ {
+ const struct variable *cv
+ = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col);
+ double x = case_data (c, cv)->f;
+ if (0 == strncasecmp ((char *)value_str (uv, 8), "N ", 8))
+ for (row = 0; row < n_vars; ++row)
+ gsl_matrix_set (mr->n_vectors, row, col, x);
+ else if (0 == strncasecmp ((char *) value_str (uv, 8), "MEAN ", 8))
+ for (row = 0; row < n_vars; ++row)
+ gsl_matrix_set (mr->mean_vectors, row, col, x);
+ else if (0 == strncasecmp ((char *) value_str (uv, 8), "STDDEV ", 8))
+ for (row = 0; row < n_vars; ++row)
+ gsl_matrix_set (mr->var_vectors, row, col, x * x);
+ }
+ if (0 == strncasecmp ((char *) value_str (uv, 8), "CORR ", 8))
+ {
+ if (mr->correlation == NULL)
+ mr->correlation = gsl_matrix_alloc (n_vars, n_vars);
+ for (col = 0; col < n_vars; ++col)
+ {
+ const struct variable *cv
+ = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col);
+ double x = case_data (c, cv)->f;
+ gsl_matrix_set (mr->correlation, crow, col, x);
+ }
+ crow++;
+ }
+ else if (0 == strncasecmp ((char *) value_str (uv, 8), "COV ", 8))
+ {
+ if (mr->covariance == NULL)
+ mr->covariance = gsl_matrix_alloc (n_vars, n_vars);
+ for (col = 0; col < n_vars; ++col)
+ {
+ const struct variable *cv
+ = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col);
+ double x = case_data (c, cv)->f;
+ gsl_matrix_set (mr->covariance, crow, col, x);
+ }
+ crow++;
+ }
+ }
+
+ casereader_destroy (group);
+
+ mm->cov = mr->covariance;
+ mm->corr = mr->correlation;
+
+ return true;
+}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef MATRIX_READER_H
+#define MATRIX_READER_H
+
+#include <gsl/gsl_matrix.h>
+#include <stdbool.h>
+
+struct matrix_material
+{
+ gsl_matrix *corr ; /* The correlation matrix */
+ gsl_matrix *cov ; /* The covariance matrix */
+
+ /* Moment matrices */
+ const gsl_matrix *n ; /* MOMENT 0 */
+ const gsl_matrix *mean_matrix; /* MOMENT 1 */
+ const gsl_matrix *var_matrix; /* MOMENT 2 */
+};
+
+struct dictionary;
+struct variable;
+struct casereader;
+
+
+struct matrix_reader;
+
+struct matrix_reader *create_matrix_reader_from_case_reader (const struct dictionary *dict,
+ struct casereader *in_reader,
+ const struct variable ***vars, size_t *n_vars);
+
+bool destroy_matrix_reader (struct matrix_reader *mr);
+
+bool next_matrix_from_reader (struct matrix_material *mm,
+ struct matrix_reader *mr,
+ const struct variable **vars, int n_vars);
+
+
+#endif