1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2017, 2019 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "matrix-reader.h"
24 #include "data/casegrouper.h"
25 #include "data/casereader.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/format.h"
30 #include "data/variable.h"
31 #include "language/command.h"
32 #include "libpspp/i18n.h"
33 #include "libpspp/message.h"
34 #include "libpspp/str.h"
35 #include "output/pivot-table.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) msgid
44 This module interprets a "data matrix", typically generated by the command
45 MATRIX DATA. The dictionary of such a matrix takes the form:
47 s_0, s_1, ... s_m, ROWTYPE_, VARNAME_, v_0, v_1, .... v_n
49 where s_0, s_1 ... s_m are the variables defining the splits, and
50 v_0, v_1 ... v_n are the continuous variables.
54 The ROWTYPE_ variable is of type A8.
55 The VARNAME_ variable is a string type whose width is not predetermined.
56 The variables s_x are of type F4.0 (although this reader accepts any type),
57 and v_x are of any numeric type.
59 The values of the ROWTYPE_ variable are in the set {MEAN, STDDEV, N, CORR, COV}
60 and determine the purpose of that case.
61 The values of the VARNAME_ variable must correspond to the names of the varibles
62 in {v_0, v_1 ... v_n} and indicate the rows of the correlation or covariance
67 A typical example is as follows:
69 s_0 ROWTYPE_ VARNAME_ v_0 v_1 v_2
71 0 MEAN 5.0000 4.0000 3.0000
72 0 STDDEV 1.0000 2.0000 3.0000
73 0 N 9.0000 9.0000 9.0000
74 0 CORR V1 1.0000 .6000 .7000
75 0 CORR V2 .6000 1.0000 .8000
76 0 CORR V3 .7000 .8000 1.0000
77 1 MEAN 9.0000 8.0000 7.0000
78 1 STDDEV 5.0000 6.0000 7.0000
79 1 N 9.0000 9.0000 9.0000
80 1 CORR V1 1.0000 .4000 .3000
81 1 CORR V2 .4000 1.0000 .2000
82 1 CORR V3 .3000 .2000 1.0000
87 matrix_material_uninit (struct matrix_material *mm)
89 gsl_matrix_free (mm->corr);
90 gsl_matrix_free (mm->cov);
91 gsl_matrix_free (mm->n);
92 gsl_matrix_free (mm->mean_matrix);
93 gsl_matrix_free (mm->var_matrix);
98 const struct dictionary *dict;
99 const struct variable *varname;
100 const struct variable *rowtype;
101 struct casegrouper *grouper;
104 struct matrix_reader *
105 create_matrix_reader_from_case_reader (const struct dictionary *dict, struct casereader *in_reader,
106 const struct variable ***vars, size_t *n_vars)
108 struct matrix_reader *mr = xzalloc (sizeof *mr);
110 mr->varname = dict_lookup_var (dict, "varname_");
112 if (mr->varname == NULL)
114 msg (ME, _("Matrix dataset lacks a variable called %s."), "VARNAME_");
119 if (!var_is_alpha (mr->varname))
121 msg (ME, _("Matrix dataset variable %s should be of string type."),
127 mr->rowtype = dict_lookup_var (dict, "rowtype_");
128 if (mr->rowtype == NULL)
130 msg (ME, _("Matrix dataset lacks a variable called %s."), "ROWTYPE_");
135 if (!var_is_alpha (mr->rowtype))
137 msg (ME, _("Matrix dataset variable %s should be of string type."),
144 const struct variable **dvars = NULL;
145 dict_get_vars (dict, &dvars, &dvarcnt, DC_SCRATCH);
148 *n_vars = dvarcnt - var_get_dict_index (mr->varname) - 1;
153 *vars = xcalloc (*n_vars, sizeof (struct variable **));
155 for (i = 0; i < *n_vars; ++i)
157 (*vars)[i] = dvars[i + var_get_dict_index (mr->varname) + 1];
161 /* All the variables before ROWTYPE_ (if any) are split variables */
162 mr->grouper = casegrouper_create_vars (in_reader, dvars, var_get_dict_index (mr->rowtype));
170 destroy_matrix_reader (struct matrix_reader *mr)
174 bool ret = casegrouper_destroy (mr->grouper);
181 Allocates MATRIX if necessary,
182 and populates row MROW, from the data in C corresponding to
183 variables in VARS. N_VARS is the length of VARS.
186 matrix_fill_row (gsl_matrix **matrix,
187 const struct ccase *c, int mrow,
188 const struct variable **vars, size_t n_vars)
193 *matrix = gsl_matrix_alloc (n_vars, n_vars);
194 gsl_matrix_set_all (*matrix, SYSMIS);
197 for (col = 0; col < n_vars; ++col)
199 const struct variable *cv = vars [col];
200 double x = case_num (c, cv);
201 assert (col < (*matrix)->size2);
202 assert (mrow < (*matrix)->size1);
203 gsl_matrix_set (*matrix, mrow, col, x);
208 find_varname (const struct variable **vars, int n_vars,
211 for (int i = 0; i < n_vars; i++)
212 if (!strcasecmp (var_get_name (vars[i]), varname))
218 next_matrix_from_reader (struct matrix_material *mm,
219 struct matrix_reader *mr,
220 const struct variable **vars, int n_vars)
222 struct casereader *group;
226 if (!casegrouper_get_next_group (mr->grouper, &group))
228 *mm = (struct matrix_material) MATRIX_MATERIAL_INIT;
232 *mm = (struct matrix_material) {
233 .n = gsl_matrix_calloc (n_vars, n_vars),
234 .mean_matrix = gsl_matrix_calloc (n_vars, n_vars),
235 .var_matrix = gsl_matrix_calloc (n_vars, n_vars),
245 struct matrix matrices[] = {
246 { .name = "CORR", .m = &mm->corr },
247 { .name = "COV", .m = &mm->cov },
249 enum { N_MATRICES = 2 };
252 for (; (c = casereader_read (group)); case_unref (c))
254 struct substring rowtype = case_ss (c, mr->rowtype);
255 ss_rtrim (&rowtype, ss_cstr (CC_SPACES));
258 = (ss_equals_case (rowtype, ss_cstr ("N")) ? mm->n
259 : ss_equals_case (rowtype, ss_cstr ("MEAN")) ? mm->mean_matrix
260 : ss_equals_case (rowtype, ss_cstr ("STDDEV")) ? mm->var_matrix
264 for (int x = 0; x < n_vars; ++x)
266 double n = case_num (c, vars[x]);
267 if (v == mm->var_matrix)
269 for (int y = 0; y < n_vars; ++y)
270 gsl_matrix_set (v, y, x, n);
275 struct matrix *m = NULL;
276 for (size_t i = 0; i < N_MATRICES; i++)
277 if (ss_equals_case (rowtype, ss_cstr (matrices[i].name)))
284 struct substring varname_raw = case_ss (c, mr->varname);
285 struct substring varname = ss_cstr (
286 recode_string (UTF8, dict_get_encoding (mr->dict),
287 varname_raw.string, varname_raw.length));
288 ss_rtrim (&varname, ss_cstr (CC_SPACES));
289 varname.string[varname.length] = '\0';
291 int y = find_varname (vars, n_vars, varname.string);
295 matrix_fill_row (m->m, c, y, vars, n_vars);
299 ss_dealloc (&varname);
302 casereader_destroy (group);
304 for (size_t i = 0; i < N_MATRICES; i++)
305 if (matrices[i].good_rows && matrices[i].good_rows != n_vars)
306 msg (SW, _("%s matrix has %d columns but %zu rows named variables "
307 "to be analyzed (and %zu rows named unknown variables)."),
308 matrices[i].name, n_vars, matrices[i].good_rows,
309 matrices[i].bad_rows);
315 cmd_debug_matrix_read (struct lexer *lexer UNUSED, struct dataset *ds)
317 const struct variable **vars;
319 struct matrix_reader *mr = create_matrix_reader_from_case_reader (
320 dataset_dict (ds), proc_open (ds), &vars, &n_vars);
324 struct pivot_table *pt = pivot_table_create ("Debug Matrix Reader");
334 const char *mm_stat_names[] = {
335 [MM_CORR] = "Correlation",
336 [MM_COV] = "Covariance",
339 [MM_STDDEV] = "Standard Deviation",
341 enum { N_STATS = sizeof mm_stat_names / sizeof *mm_stat_names };
342 for (size_t i = 0; i < 2; i++)
344 struct pivot_dimension *d = pivot_dimension_create (
346 i ? PIVOT_AXIS_COLUMN : PIVOT_AXIS_ROW,
347 i ? "Column" : "Row");
349 pivot_category_create_leaf_rc (d->root, pivot_value_new_text ("Value"),
350 PIVOT_RC_CORRELATION);
351 for (size_t j = 0; j < n_vars; j++)
352 pivot_category_create_leaf_rc (
353 d->root, pivot_value_new_variable (vars[j]), PIVOT_RC_CORRELATION);
356 struct pivot_dimension *stat = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
358 for (size_t i = 0; i < N_STATS; i++)
359 pivot_category_create_leaf (stat->root,
360 pivot_value_new_text (mm_stat_names[i]));
362 struct pivot_dimension *split = pivot_dimension_create (
363 pt, PIVOT_AXIS_ROW, "Split");
367 struct matrix_material mm = MATRIX_MATERIAL_INIT;
368 while (next_matrix_from_reader (&mm, mr, vars, n_vars))
370 pivot_category_create_leaf (split->root,
371 pivot_value_new_integer (split_num + 1));
373 const gsl_matrix *m[N_STATS] = {
377 [MM_MEAN] = mm.mean_matrix,
378 [MM_STDDEV] = mm.var_matrix,
381 for (size_t i = 0; i < N_STATS; i++)
384 if (i == MM_COV || i == MM_CORR)
386 for (size_t y = 0; y < n_vars; y++)
387 for (size_t x = 0; x < n_vars; x++)
389 pt, y + 1, x, i, split_num,
390 pivot_value_new_number (gsl_matrix_get (m[i], y, x)));
393 for (size_t x = 0; x < n_vars; x++)
395 double n = gsl_matrix_get (m[i], 0, x);
398 pivot_table_put4 (pt, 0, x, i, split_num,
399 pivot_value_new_number (n));
404 matrix_material_uninit (&mm);
406 pivot_table_submit (pt);
410 destroy_matrix_reader (mr);