From b2b5300cd5deace18d4277c58dcbac7993693863 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Thu, 4 May 2017 17:25:49 +0200 Subject: [PATCH] New module matrix-reader Added a new module to help read the files created by MATRIX DATA --- src/language/data-io/automake.mk | 2 + src/language/data-io/matrix-reader.c | 233 +++++++++++++++++++++++++++ src/language/data-io/matrix-reader.h | 52 ++++++ 3 files changed, 287 insertions(+) create mode 100644 src/language/data-io/matrix-reader.c create mode 100644 src/language/data-io/matrix-reader.h diff --git a/src/language/data-io/automake.mk b/src/language/data-io/automake.mk index 1b3f5a9383..5579b81912 100644 --- a/src/language/data-io/automake.mk +++ b/src/language/data-io/automake.mk @@ -22,6 +22,8 @@ language_data_io_sources = \ src/language/data-io/print-space.c \ src/language/data-io/print.c \ src/language/data-io/matrix-data.c \ + src/language/data-io/matrix-reader.c \ + src/language/data-io/matrix-reader.h \ src/language/data-io/save-translate.c \ src/language/data-io/save.c \ src/language/data-io/trim.c \ diff --git a/src/language/data-io/matrix-reader.c b/src/language/data-io/matrix-reader.c new file mode 100644 index 0000000000..03df31c97d --- /dev/null +++ b/src/language/data-io/matrix-reader.c @@ -0,0 +1,233 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2017 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "matrix-reader.h" + +#include + +#include +#include +#include +#include +#include + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + + +/* +This module interprets a "data matrix", typically generated by the command +MATRIX DATA. The dictionary of such a matrix takes the form: + + s_0, s_1, ... s_m, ROWTYPE_, VARNAME_, v_0, v_1, .... v_n + +where s_0, s_1 ... s_m are the variables defining the splits, and +v_0, v_1 ... v_n are the continuous variables. + +m >= 0; n >= 0 + +The variables ROWTYPE_ and VARNAME_ are of type A8, +the variables s_x are of type F4.0 (although this reader accepts any type), +and v_x are of any numeric type. + +The values of the ROWTYPE_ variable are in the set {MEAN, STDDEV, N, CORR, COV} +and determine the purpose of that case. +The values of the VARNAME_ variable must correspond to the names of the varibles +in {v_0, v_1 ... v_n} and indicate the rows of the correlation or covariance +matrices. + + + +A typical example is as follows: + +s_0 ROWTYPE_ VARNAME_ v_0 v_1 v_2 + +0 MEAN 5.0000 4.0000 3.0000 +0 STDDEV 1.0000 2.0000 3.0000 +0 N 9.0000 9.0000 9.0000 +0 CORR V1 1.0000 .6000 .7000 +0 CORR V2 .6000 1.0000 .8000 +0 CORR V3 .7000 .8000 1.0000 +1 MEAN 9.0000 8.0000 7.0000 +1 STDDEV 5.0000 6.0000 7.0000 +1 N 9.0000 9.0000 9.0000 +1 CORR V1 1.0000 .4000 .3000 +1 CORR V2 .4000 1.0000 .2000 +1 CORR V3 .3000 .2000 1.0000 + +*/ + +struct matrix_reader +{ + const struct dictionary *dict; + const struct variable *varname; + const struct variable *rowtype; + struct casegrouper *grouper; + + gsl_matrix *n_vectors; + gsl_matrix *mean_vectors; + gsl_matrix *var_vectors; + + gsl_matrix *correlation; + gsl_matrix *covariance; +}; + +struct matrix_reader * +create_matrix_reader_from_case_reader (const struct dictionary *dict, struct casereader *in_reader, + const struct variable ***vars, size_t *n_vars) +{ + struct matrix_reader *mr = xzalloc (sizeof *mr); + + mr->dict = dict; + mr->varname = dict_lookup_var (dict, "varname_"); + if (mr->varname == NULL) + { + msg (ME, _("Matrix dataset lacks a variable called %s."), "VARNAME_"); + free (mr); + return NULL; + } + + mr->rowtype = dict_lookup_var (dict, "rowtype_"); + if (mr->rowtype == NULL) + { + msg (ME, _("Matrix dataset lacks a variable called %s."), "ROWTYPE_"); + free (mr); + return NULL; + } + + size_t dvarcnt; + const struct variable **dvars = NULL; + dict_get_vars (dict, &dvars, &dvarcnt, DC_SCRATCH); + + if (n_vars) + *n_vars = dvarcnt - var_get_dict_index (mr->varname) - 1; + + if (vars) + { + int i; + *vars = xcalloc (sizeof (struct variable **), *n_vars); + + for (i = 0; i < *n_vars; ++i) + { + (*vars)[i] = dvars[i + var_get_dict_index (mr->varname) + 1]; + } + } + + /* All the variables before ROWTYPE_ (if any) are split variables */ + mr->grouper = casegrouper_create_vars (in_reader, dvars, var_get_dict_index (mr->rowtype)); + + free (dvars); + + return mr; +} + +bool +destroy_matrix_reader (struct matrix_reader *mr) +{ + if (mr == NULL) + return false; + bool ret = casegrouper_destroy (mr->grouper); + free (mr); + return ret; +} + + +bool +next_matrix_from_reader (struct matrix_material *mm, + struct matrix_reader *mr, + const struct variable **vars, int n_vars) +{ + struct casereader *group; + + gsl_matrix_free (mr->n_vectors); + gsl_matrix_free (mr->mean_vectors); + gsl_matrix_free (mr->var_vectors); + gsl_matrix_free (mr->correlation); + gsl_matrix_free (mr->covariance); + + if (!casegrouper_get_next_group (mr->grouper, &group)) + return false; + + mr->n_vectors = gsl_matrix_alloc (n_vars, n_vars); + mr->mean_vectors = gsl_matrix_alloc (n_vars, n_vars); + mr->var_vectors = gsl_matrix_alloc (n_vars, n_vars); + + mm->n = mr->n_vectors; + mm->mean_matrix = mr->mean_vectors; + mm->var_matrix = mr->var_vectors; + + mr->correlation = NULL; + mr->covariance = NULL; + + struct ccase *c; + int crow = 0; + for ( ; (c = casereader_read (group) ); case_unref (c)) + { + const union value *uv = case_data (c, mr->rowtype); + int col, row; + for (col = 0; col < n_vars; ++col) + { + const struct variable *cv + = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col); + double x = case_data (c, cv)->f; + if (0 == strncasecmp ((char *)value_str (uv, 8), "N ", 8)) + for (row = 0; row < n_vars; ++row) + gsl_matrix_set (mr->n_vectors, row, col, x); + else if (0 == strncasecmp ((char *) value_str (uv, 8), "MEAN ", 8)) + for (row = 0; row < n_vars; ++row) + gsl_matrix_set (mr->mean_vectors, row, col, x); + else if (0 == strncasecmp ((char *) value_str (uv, 8), "STDDEV ", 8)) + for (row = 0; row < n_vars; ++row) + gsl_matrix_set (mr->var_vectors, row, col, x * x); + } + if (0 == strncasecmp ((char *) value_str (uv, 8), "CORR ", 8)) + { + if (mr->correlation == NULL) + mr->correlation = gsl_matrix_alloc (n_vars, n_vars); + for (col = 0; col < n_vars; ++col) + { + const struct variable *cv + = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col); + double x = case_data (c, cv)->f; + gsl_matrix_set (mr->correlation, crow, col, x); + } + crow++; + } + else if (0 == strncasecmp ((char *) value_str (uv, 8), "COV ", 8)) + { + if (mr->covariance == NULL) + mr->covariance = gsl_matrix_alloc (n_vars, n_vars); + for (col = 0; col < n_vars; ++col) + { + const struct variable *cv + = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col); + double x = case_data (c, cv)->f; + gsl_matrix_set (mr->covariance, crow, col, x); + } + crow++; + } + } + + casereader_destroy (group); + + mm->cov = mr->covariance; + mm->corr = mr->correlation; + + return true; +} diff --git a/src/language/data-io/matrix-reader.h b/src/language/data-io/matrix-reader.h new file mode 100644 index 0000000000..7a651d7866 --- /dev/null +++ b/src/language/data-io/matrix-reader.h @@ -0,0 +1,52 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2017 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef MATRIX_READER_H +#define MATRIX_READER_H + +#include +#include + +struct matrix_material +{ + gsl_matrix *corr ; /* The correlation matrix */ + gsl_matrix *cov ; /* The covariance matrix */ + + /* Moment matrices */ + const gsl_matrix *n ; /* MOMENT 0 */ + const gsl_matrix *mean_matrix; /* MOMENT 1 */ + const gsl_matrix *var_matrix; /* MOMENT 2 */ +}; + +struct dictionary; +struct variable; +struct casereader; + + +struct matrix_reader; + +struct matrix_reader *create_matrix_reader_from_case_reader (const struct dictionary *dict, + struct casereader *in_reader, + const struct variable ***vars, size_t *n_vars); + +bool destroy_matrix_reader (struct matrix_reader *mr); + +bool next_matrix_from_reader (struct matrix_material *mm, + struct matrix_reader *mr, + const struct variable **vars, int n_vars); + + +#endif -- 2.30.2