1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "matrix-reader.h"
23 #include <libpspp/hash-functions.h>
24 #include <libpspp/message.h>
25 #include <data/casegrouper.h>
26 #include <data/casereader.h>
27 #include <data/dictionary.h>
28 #include <data/variable.h>
31 #define _(msgid) gettext (msgid)
32 #define N_(msgid) msgid
36 This module interprets a "data matrix", typically generated by the command
37 MATRIX DATA. The dictionary of such a matrix takes the form:
39 s_0, s_1, ... s_m, ROWTYPE_, VARNAME_, v_0, v_1, .... v_n
41 where s_0, s_1 ... s_m are the variables defining the splits, and
42 v_0, v_1 ... v_n are the continuous variables.
46 The ROWTYPE_ variable is of type A8.
47 The VARNAME_ variable is a string type whose width is not predetermined.
48 The variables s_x are of type F4.0 (although this reader accepts any type),
49 and v_x are of any numeric type.
51 The values of the ROWTYPE_ variable are in the set {MEAN, STDDEV, N, CORR, COV}
52 and determine the purpose of that case.
53 The values of the VARNAME_ variable must correspond to the names of the varibles
54 in {v_0, v_1 ... v_n} and indicate the rows of the correlation or covariance
59 A typical example is as follows:
61 s_0 ROWTYPE_ VARNAME_ v_0 v_1 v_2
63 0 MEAN 5.0000 4.0000 3.0000
64 0 STDDEV 1.0000 2.0000 3.0000
65 0 N 9.0000 9.0000 9.0000
66 0 CORR V1 1.0000 .6000 .7000
67 0 CORR V2 .6000 1.0000 .8000
68 0 CORR V3 .7000 .8000 1.0000
69 1 MEAN 9.0000 8.0000 7.0000
70 1 STDDEV 5.0000 6.0000 7.0000
71 1 N 9.0000 9.0000 9.0000
72 1 CORR V1 1.0000 .4000 .3000
73 1 CORR V2 .4000 1.0000 .2000
74 1 CORR V3 .3000 .2000 1.0000
80 const struct variable *varname;
81 const struct variable *rowtype;
82 struct casegrouper *grouper;
84 gsl_matrix *n_vectors;
85 gsl_matrix *mean_vectors;
86 gsl_matrix *var_vectors;
89 struct matrix_reader *
90 create_matrix_reader_from_case_reader (const struct dictionary *dict, struct casereader *in_reader,
91 const struct variable ***vars, size_t *n_vars)
93 struct matrix_reader *mr = xzalloc (sizeof *mr);
95 mr->varname = dict_lookup_var (dict, "varname_");
96 if (mr->varname == NULL)
98 msg (ME, _("Matrix dataset lacks a variable called %s."), "VARNAME_");
103 if (!var_is_alpha (mr->varname))
105 msg (ME, _("Matrix dataset variable %s should be of string type."),
111 mr->rowtype = dict_lookup_var (dict, "rowtype_");
112 if (mr->rowtype == NULL)
114 msg (ME, _("Matrix dataset lacks a variable called %s."), "ROWTYPE_");
119 if (!var_is_alpha (mr->rowtype))
121 msg (ME, _("Matrix dataset variable %s should be of string type."),
128 const struct variable **dvars = NULL;
129 dict_get_vars (dict, &dvars, &dvarcnt, DC_SCRATCH);
132 *n_vars = dvarcnt - var_get_dict_index (mr->varname) - 1;
137 *vars = xcalloc (sizeof (struct variable **), *n_vars);
139 for (i = 0; i < *n_vars; ++i)
141 (*vars)[i] = dvars[i + var_get_dict_index (mr->varname) + 1];
145 /* All the variables before ROWTYPE_ (if any) are split variables */
146 mr->grouper = casegrouper_create_vars (in_reader, dvars, var_get_dict_index (mr->rowtype));
154 destroy_matrix_reader (struct matrix_reader *mr)
158 bool ret = casegrouper_destroy (mr->grouper);
165 Allocates MATRIX if necessary,
166 and populates row MROW, from the data in C corresponding to
167 variables in VARS. N_VARS is the length of VARS.
170 matrix_fill_row (gsl_matrix **matrix,
171 const struct ccase *c, int mrow,
172 const struct variable **vars, size_t n_vars)
176 *matrix = gsl_matrix_alloc (n_vars, n_vars);
178 for (col = 0; col < n_vars; ++col)
180 const struct variable *cv = vars [col];
181 double x = case_data (c, cv)->f;
182 assert (col < (*matrix)->size2);
183 assert (mrow < (*matrix)->size1);
184 gsl_matrix_set (*matrix, mrow, col, x);
189 next_matrix_from_reader (struct matrix_material *mm,
190 struct matrix_reader *mr,
191 const struct variable **vars, int n_vars)
193 struct casereader *group;
197 gsl_matrix_free (mr->n_vectors);
198 gsl_matrix_free (mr->mean_vectors);
199 gsl_matrix_free (mr->var_vectors);
201 if (!casegrouper_get_next_group (mr->grouper, &group))
204 mr->n_vectors = gsl_matrix_alloc (n_vars, n_vars);
205 mr->mean_vectors = gsl_matrix_alloc (n_vars, n_vars);
206 mr->var_vectors = gsl_matrix_alloc (n_vars, n_vars);
208 mm->n = mr->n_vectors;
209 mm->mean_matrix = mr->mean_vectors;
210 mm->var_matrix = mr->var_vectors;
212 // FIXME: Make this into a hash table.
213 unsigned long *table = xmalloc (sizeof (*table) * n_vars);
215 for (i = 0; i < n_vars; ++i)
217 const int w = var_get_width (mr->varname);
220 const char *name = var_get_name (vars[i]);
221 strncpy (s, name, w);
222 unsigned long h = hash_bytes (s, w, 0);
227 for ( ; (c = casereader_read (group) ); case_unref (c))
229 const union value *uv = case_data (c, mr->rowtype);
231 for (col = 0; col < n_vars; ++col)
233 const struct variable *cv = vars[col];
234 double x = case_data (c, cv)->f;
235 if (0 == strncasecmp ((char *)value_str (uv, 8), "N ", 8))
236 for (row = 0; row < n_vars; ++row)
237 gsl_matrix_set (mr->n_vectors, row, col, x);
238 else if (0 == strncasecmp ((char *) value_str (uv, 8), "MEAN ", 8))
239 for (row = 0; row < n_vars; ++row)
240 gsl_matrix_set (mr->mean_vectors, row, col, x);
241 else if (0 == strncasecmp ((char *) value_str (uv, 8), "STDDEV ", 8))
242 for (row = 0; row < n_vars; ++row)
243 gsl_matrix_set (mr->var_vectors, row, col, x * x);
246 const union value *uvv = case_data (c, mr->varname);
247 const uint8_t *vs = value_str (uvv, var_get_width (mr->varname));
248 int w = var_get_width (mr->varname);
249 unsigned long h = hash_bytes (vs, w, 0);
252 for (i = 0; i < n_vars; ++i)
264 if (0 == strncasecmp ((char *) value_str (uv, 8), "CORR ", 8))
266 matrix_fill_row (&mm->corr, c, mrow, vars, n_vars);
268 else if (0 == strncasecmp ((char *) value_str (uv, 8), "COV ", 8))
270 matrix_fill_row (&mm->cov, c, mrow, vars, n_vars);
274 casereader_destroy (group);