From: Ben Pfaff Date: Mon, 27 Sep 2021 05:35:33 +0000 (-0700) Subject: Implement the MCONVERT command. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=aa93c33832ee3dd896062da88b88e7282437ea78 Implement the MCONVERT command. --- diff --git a/NEWS b/NEWS index 9277e682eb..811c4d9de2 100644 --- a/NEWS +++ b/NEWS @@ -6,7 +6,7 @@ Please send PSPP bug reports to bug-gnu-pspp@gnu.org. Changes from 1.4.1 to 1.5.3: - * The DEFINE command is now supported. + * The DEFINE and MCONVERT commands are now implemented. * The MATRIX DATA command is now fully implemented. diff --git a/doc/matrices.texi b/doc/matrices.texi index 26a29dbced..15d1e914c5 100644 --- a/doc/matrices.texi +++ b/doc/matrices.texi @@ -14,7 +14,8 @@ Some @pspp{} procedures work with matrices by producing numeric matrices that report results of data analysis, or by consuming matrices as a basis for further analysis. This chapter documents the format of data files that store these matrices and commands for -working with them. +working with them, as well as @pspp{}'s general-purpose facility for +matrix operations. @node Matrix Files @section Matrix Files @@ -57,9 +58,9 @@ order. This column is blank for vector data. @cmd{MATRIX DATA} makes variables, but at least 8 bytes. @item -One or more continuous variables. These are the variables whose data -was analyzed to produce the matrices. @cmd{MATRIX DATA} assigns -continuous variables format F10.4. +One or more numeric continuous variables. These are the variables +whose data was analyzed to produce the matrices. @cmd{MATRIX DATA} +assigns continuous variables format F10.4. @end enumerate Case weights are ignored in matrix files. @@ -572,3 +573,36 @@ BEGIN DATA. .7 .5 .4 1 END DATA. @end example + +@node MCONVERT +@section MCONVERT + +@display +MCONVERT + [[MATRIX=] + [IN(@{@samp{*}|'@var{file}'@})] + [OUT(@{@samp{*}|'@var{file}'@})]] + [/@{REPLACE,APPEND@}]. +@end display + +The @cmd{MCONVERT} command converts matrix data from a correlation +matrix and a vector of standard deviations into a covariance matrix, +or vice versa. + +By default, @cmd{MCONVERT} both reads and writes the active file. Use +the @cmd{MATRIX} subcommand to specify other files. To read a matrix +file, specify its name inside parentheses following @code{IN}. To +write a matrix file, specify its name inside parentheses following +@code{OUT}. Use @samp{*} to explicitly specify the active file for +input or output. + +When @cmd{MCONVERT} reads the input, by default it substitutes a +correlation matrix and a vector of standard deviations each time it +encounters a covariance matrix, and vice versa. Specify +@code{/APPEND} to instead have @cmd{MCONVERT} add the other form of +data without removing the existing data. Use @code{/REPLACE} to +explicitly request removing the existing data. + +The @cmd{MCONVERT} command requires its input to be a matrix file. +Use @cmd{MATRIX DATA} to convert text input into matrix file format. +@xref{MATRIX DATA}, for details. diff --git a/src/language/command.def b/src/language/command.def index f47c0f5866..9b3ff8a729 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -30,6 +30,7 @@ DEF_CMD (S_ANY, 0, "FINISH", cmd_finish) DEF_CMD (S_ANY, 0, "HOST", cmd_host) DEF_CMD (S_ANY, 0, "INCLUDE", cmd_include) DEF_CMD (S_ANY, 0, "INSERT", cmd_insert) +DEF_CMD (S_ANY, 0, "MCONVERT", cmd_mconvert) DEF_CMD (S_ANY, 0, "N OF CASES", cmd_n_of_cases) DEF_CMD (S_ANY, F_ABBREV, "N", cmd_n_of_cases) DEF_CMD (S_ANY, 0, "NEW FILE", cmd_new_file) @@ -210,7 +211,6 @@ UNIMPL_CMD ("LOGLINEAR", "General model fitting") UNIMPL_CMD ("MANOVA", "Multivariate analysis of variance") UNIMPL_CMD ("MAPS", "Geographical display") UNIMPL_CMD ("MATRIX", "Matrix processing") -UNIMPL_CMD ("MCONVERT", "Convert covariance/correlation matrices") UNIMPL_CMD ("MIXED", "Mixed linear models") UNIMPL_CMD ("MODEL CLOSE", "Close server connection") UNIMPL_CMD ("MODEL HANDLE", "Define server connection") diff --git a/src/language/data-io/automake.mk b/src/language/data-io/automake.mk index 819f267b63..4116d798f3 100644 --- a/src/language/data-io/automake.mk +++ b/src/language/data-io/automake.mk @@ -40,6 +40,7 @@ language_data_io_sources = \ src/language/data-io/matrix-data.c \ src/language/data-io/matrix-reader.c \ src/language/data-io/matrix-reader.h \ + src/language/data-io/mconvert.c \ src/language/data-io/save-translate.c \ src/language/data-io/save.c \ src/language/data-io/trim.c \ diff --git a/src/language/data-io/matrix-reader.c b/src/language/data-io/matrix-reader.c index cf7b524480..73d60c844d 100644 --- a/src/language/data-io/matrix-reader.c +++ b/src/language/data-io/matrix-reader.c @@ -23,12 +23,14 @@ #include "data/casegrouper.h" #include "data/casereader.h" +#include "data/casewriter.h" #include "data/data-out.h" #include "data/dataset.h" #include "data/dictionary.h" #include "data/format.h" #include "data/variable.h" #include "language/command.h" +#include "language/lexer/lexer.h" #include "libpspp/i18n.h" #include "libpspp/message.h" #include "libpspp/str.h" @@ -93,81 +95,90 @@ matrix_material_uninit (struct matrix_material *mm) gsl_matrix_free (mm->var_matrix); } -struct matrix_reader +static const struct variable * +find_matrix_string_var (const struct dictionary *dict, const char *name) { - const struct dictionary *dict; - const struct variable *varname; - const struct variable *rowtype; - struct casegrouper *grouper; -}; - -struct matrix_reader * -create_matrix_reader_from_case_reader (const struct dictionary *dict, struct casereader *in_reader, - const struct variable ***vars, size_t *n_vars) -{ - struct matrix_reader *mr = xzalloc (sizeof *mr); - - mr->varname = dict_lookup_var (dict, "varname_"); - mr->dict = dict; - if (mr->varname == NULL) + const struct variable *var = dict_lookup_var (dict, name); + if (var == NULL) { - msg (ME, _("Matrix dataset lacks a variable called %s."), "VARNAME_"); - free (mr); + msg (SE, _("Matrix dataset lacks a variable called %s."), name); return NULL; } - - if (!var_is_alpha (mr->varname)) + if (!var_is_alpha (var)) { - msg (ME, _("Matrix dataset variable %s should be of string type."), - "VARNAME_"); - free (mr); + msg (SE, _("Matrix dataset variable %s should be of string type."), name); return NULL; } + return var; +} - mr->rowtype = dict_lookup_var (dict, "rowtype_"); - if (mr->rowtype == NULL) +struct matrix_reader * +matrix_reader_create (const struct dictionary *dict, + struct casereader *in_reader) +{ + const struct variable *varname = find_matrix_string_var (dict, "VARNAME_"); + const struct variable *rowtype = find_matrix_string_var (dict, "ROWTYPE_"); + if (!varname || !rowtype) + return NULL; + + size_t varname_idx = var_get_dict_index (varname); + size_t rowtype_idx = var_get_dict_index (rowtype); + if (varname_idx < rowtype_idx) { - msg (ME, _("Matrix dataset lacks a variable called %s."), "ROWTYPE_"); - free (mr); + msg (SE, _("Variable %s must precede %s in matrix file dictionary."), + "ROWTYPE_", "VARNAME_"); return NULL; } - if (!var_is_alpha (mr->rowtype)) + for (size_t i = 0; i < dict_get_var_cnt (dict); i++) { - msg (ME, _("Matrix dataset variable %s should be of string type."), - "ROWTYPE_"); - free (mr); - return NULL; + const struct variable *v = dict_get_var (dict, i); + if (!var_is_numeric (v) && v != rowtype && v != varname) + { + msg (SE, _("Matrix dataset variable %s should be numeric."), + var_get_name (v)); + return NULL; + } } - size_t dvarcnt; - const struct variable **dvars = NULL; - dict_get_vars (dict, &dvars, &dvarcnt, DC_SCRATCH); - - if (n_vars) - *n_vars = dvarcnt - var_get_dict_index (mr->varname) - 1; - - if (vars) + size_t n_vars; + const struct variable **vars; + dict_get_vars (dict, &vars, &n_vars, DC_SCRATCH); + + /* Different kinds of variables. */ + size_t first_svar = 0; + size_t n_svars = rowtype_idx; + size_t first_fvar = rowtype_idx + 1; + size_t n_fvars = varname_idx - rowtype_idx - 1; + size_t first_cvar = varname_idx + 1; + size_t n_cvars = n_vars - varname_idx - 1; + if (!n_cvars) { - int i; - *vars = xcalloc (*n_vars, sizeof (struct variable **)); - - for (i = 0; i < *n_vars; ++i) - { - (*vars)[i] = dvars[i + var_get_dict_index (mr->varname) + 1]; - } + msg (SE, _("Matrix dataset does not have any continuous variables.")); + free (vars); + return NULL; } - /* All the variables before ROWTYPE_ (if any) are split variables */ - mr->grouper = casegrouper_create_vars (in_reader, dvars, var_get_dict_index (mr->rowtype)); - - free (dvars); + struct matrix_reader *mr = xmalloc (sizeof *mr); + *mr = (struct matrix_reader) { + .dict = dict, + .grouper = casegrouper_create_vars (in_reader, &vars[first_svar], n_svars), + .svars = xmemdup (vars + first_svar, n_svars * sizeof *mr->svars), + .n_svars = n_svars, + .rowtype = rowtype, + .fvars = xmemdup (vars + first_fvar, n_fvars * sizeof *mr->fvars), + .n_fvars = n_fvars, + .varname = varname, + .cvars = xmemdup (vars + first_cvar, n_cvars * sizeof *mr->cvars), + .n_cvars = n_cvars, + }; + free (vars); return mr; } bool -destroy_matrix_reader (struct matrix_reader *mr) +matrix_reader_destroy (struct matrix_reader *mr) { if (mr == NULL) return false; @@ -214,26 +225,43 @@ find_varname (const struct variable **vars, int n_vars, return -1; } -bool -next_matrix_from_reader (struct matrix_material *mm, - struct matrix_reader *mr, - const struct variable **vars, int n_vars) +struct substring +matrix_reader_get_string (const struct ccase *c, const struct variable *var) { - struct casereader *group; + struct substring s = case_ss (c, var); + ss_rtrim (&s, ss_cstr (CC_SPACES)); + return s; +} - assert (vars); +void +matrix_reader_set_string (struct ccase *c, const struct variable *var, + struct substring src) +{ + struct substring dst = case_ss (c, var); + for (size_t i = 0; i < dst.length; i++) + dst.string[i] = i < src.length ? src.string[i] : ' '; +} +bool +matrix_reader_next (struct matrix_material *mm, struct matrix_reader *mr, + struct casereader **groupp) +{ + struct casereader *group; if (!casegrouper_get_next_group (mr->grouper, &group)) { *mm = (struct matrix_material) MATRIX_MATERIAL_INIT; + if (groupp) + *groupp = NULL; return false; } - *mm = (struct matrix_material) { - .n = gsl_matrix_calloc (n_vars, n_vars), - .mean_matrix = gsl_matrix_calloc (n_vars, n_vars), - .var_matrix = gsl_matrix_calloc (n_vars, n_vars), - }; + if (groupp) + *groupp = casereader_clone (group); + + const struct variable **vars = mr->cvars; + size_t n_vars = mr->n_cvars; + + *mm = (struct matrix_material) { .n = NULL }; struct matrix { @@ -251,23 +279,25 @@ next_matrix_from_reader (struct matrix_material *mm, struct ccase *c; for (; (c = casereader_read (group)); case_unref (c)) { - struct substring rowtype = case_ss (c, mr->rowtype); - ss_rtrim (&rowtype, ss_cstr (CC_SPACES)); + struct substring rowtype = matrix_reader_get_string (c, mr->rowtype); - gsl_matrix *v - = (ss_equals_case (rowtype, ss_cstr ("N")) ? mm->n - : ss_equals_case (rowtype, ss_cstr ("MEAN")) ? mm->mean_matrix - : ss_equals_case (rowtype, ss_cstr ("STDDEV")) ? mm->var_matrix + gsl_matrix **v + = (ss_equals_case (rowtype, ss_cstr ("N")) ? &mm->n + : ss_equals_case (rowtype, ss_cstr ("MEAN")) ? &mm->mean_matrix + : ss_equals_case (rowtype, ss_cstr ("STDDEV")) ? &mm->var_matrix : NULL); if (v) { + if (!*v) + *v = gsl_matrix_calloc (n_vars, n_vars); + for (int x = 0; x < n_vars; ++x) { double n = case_num (c, vars[x]); - if (v == mm->var_matrix) + if (v == &mm->var_matrix) n *= n; for (int y = 0; y < n_vars; ++y) - gsl_matrix_set (v, y, x, n); + gsl_matrix_set (*v, y, x, n); } continue; } @@ -303,7 +333,7 @@ next_matrix_from_reader (struct matrix_material *mm, for (size_t i = 0; i < N_MATRICES; i++) if (matrices[i].good_rows && matrices[i].good_rows != n_vars) - msg (SW, _("%s matrix has %d columns but %zu rows named variables " + msg (SW, _("%s matrix has %zu columns but %zu rows named variables " "to be analyzed (and %zu rows named unknown variables)."), matrices[i].name, n_vars, matrices[i].good_rows, matrices[i].bad_rows); @@ -312,12 +342,21 @@ next_matrix_from_reader (struct matrix_material *mm, } int -cmd_debug_matrix_read (struct lexer *lexer UNUSED, struct dataset *ds) +cmd_debug_matrix_read (struct lexer *lexer, struct dataset *ds) { - const struct variable **vars; - size_t n_vars; - struct matrix_reader *mr = create_matrix_reader_from_case_reader ( - dataset_dict (ds), proc_open (ds), &vars, &n_vars); + if (lex_match_id (lexer, "NODATA")) + { + struct casereader *cr = casewriter_make_reader ( + mem_writer_create (dict_get_proto (dataset_dict (ds)))); + struct matrix_reader *mr = matrix_reader_create (dataset_dict (ds), cr); + if (!mr) + return CMD_FAILURE; + matrix_reader_destroy (mr); + return CMD_SUCCESS; + } + + struct matrix_reader *mr = matrix_reader_create (dataset_dict (ds), + proc_open (ds)); if (!mr) return CMD_FAILURE; @@ -348,9 +387,10 @@ cmd_debug_matrix_read (struct lexer *lexer UNUSED, struct dataset *ds) if (!i) pivot_category_create_leaf_rc (d->root, pivot_value_new_text ("Value"), PIVOT_RC_CORRELATION); - for (size_t j = 0; j < n_vars; j++) + for (size_t j = 0; j < mr->n_cvars; j++) pivot_category_create_leaf_rc ( - d->root, pivot_value_new_variable (vars[j]), PIVOT_RC_CORRELATION); + d->root, pivot_value_new_variable (mr->cvars[j]), + PIVOT_RC_CORRELATION); } struct pivot_dimension *stat = pivot_dimension_create (pt, PIVOT_AXIS_ROW, @@ -365,7 +405,7 @@ cmd_debug_matrix_read (struct lexer *lexer UNUSED, struct dataset *ds) int split_num = 0; struct matrix_material mm = MATRIX_MATERIAL_INIT; - while (next_matrix_from_reader (&mm, mr, vars, n_vars)) + while (matrix_reader_next (&mm, mr, NULL)) { pivot_category_create_leaf (split->root, pivot_value_new_integer (split_num + 1)); @@ -383,14 +423,14 @@ cmd_debug_matrix_read (struct lexer *lexer UNUSED, struct dataset *ds) { if (i == MM_COV || i == MM_CORR) { - for (size_t y = 0; y < n_vars; y++) - for (size_t x = 0; x < n_vars; x++) + for (size_t y = 0; y < mr->n_cvars; y++) + for (size_t x = 0; x < mr->n_cvars; x++) pivot_table_put4 ( pt, y + 1, x, i, split_num, pivot_value_new_number (gsl_matrix_get (m[i], y, x))); } else - for (size_t x = 0; x < n_vars; x++) + for (size_t x = 0; x < mr->n_cvars; x++) { double n = gsl_matrix_get (m[i], 0, x); if (i == MM_STDDEV) @@ -407,7 +447,6 @@ cmd_debug_matrix_read (struct lexer *lexer UNUSED, struct dataset *ds) proc_commit (ds); - destroy_matrix_reader (mr); - free (vars); + matrix_reader_destroy (mr); return CMD_SUCCESS; } diff --git a/src/language/data-io/matrix-reader.h b/src/language/data-io/matrix-reader.h index f3ac7000d5..5aec05e009 100644 --- a/src/language/data-io/matrix-reader.h +++ b/src/language/data-io/matrix-reader.h @@ -20,6 +20,28 @@ #include #include +struct casereader; +struct ccase; +struct dictionary; +struct matrix_reader; +struct variable; + +struct matrix_reader + { + const struct dictionary *dict; + struct casegrouper *grouper; + + /* Variables in 'dict'. */ + const struct variable **svars; /* Split variables. */ + size_t n_svars; + const struct variable *rowtype; /* ROWTYPE_. */ + const struct variable **fvars; /* Factor variables. */ + size_t n_fvars; + const struct variable *varname; /* VARNAME_. */ + const struct variable **cvars; /* Continuous variables. */ + size_t n_cvars; + }; + struct matrix_material { gsl_matrix *corr; /* The correlation matrix */ @@ -34,22 +56,18 @@ struct matrix_material #define MATRIX_MATERIAL_INIT { .corr = NULL } void matrix_material_uninit (struct matrix_material *); -struct dictionary; -struct variable; -struct casereader; - - -struct matrix_reader; +struct matrix_reader *matrix_reader_create (const struct dictionary *, + struct casereader *); -struct matrix_reader *create_matrix_reader_from_case_reader (const struct dictionary *dict, - struct casereader *in_reader, - const struct variable ***vars, size_t *n_vars); +bool matrix_reader_destroy (struct matrix_reader *mr); -bool destroy_matrix_reader (struct matrix_reader *mr); +bool matrix_reader_next (struct matrix_material *mm, struct matrix_reader *mr, + struct casereader **groupp); -bool next_matrix_from_reader (struct matrix_material *mm, - struct matrix_reader *mr, - const struct variable **vars, int n_vars); +struct substring matrix_reader_get_string (const struct ccase *, + const struct variable *); +void matrix_reader_set_string (struct ccase *, const struct variable *, + struct substring); #endif diff --git a/src/language/data-io/mconvert.c b/src/language/data-io/mconvert.c new file mode 100644 index 0000000000..0ea35f6d78 --- /dev/null +++ b/src/language/data-io/mconvert.c @@ -0,0 +1,235 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2021 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include + +#include "data/any-reader.h" +#include "data/any-writer.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "language/data-io/file-handle.h" +#include "language/data-io/matrix-reader.h" +#include "language/lexer/lexer.h" +#include "language/command.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +int +cmd_mconvert (struct lexer *lexer, struct dataset *ds) +{ + bool append = false; + struct file_handle *in = NULL; + struct file_handle *out = NULL; + while (lex_token (lexer) != T_ENDCMD) + { + lex_match (lexer, T_SLASH); + + if (lex_match_id (lexer, "APPEND")) + append = true; + else if (lex_match_id (lexer, "REPLACE")) + append = false; + else + { + if (lex_match_id (lexer, "MATRIX")) + lex_match (lexer, T_EQUALS); + + struct file_handle **fhp = (lex_match_id (lexer, "IN") ? &in + : lex_match_id (lexer, "OUT") ? &out + : NULL); + if (!fhp) + { + lex_error_expecting (lexer, "IN", "OUT", "APPEND", "REPLACE"); + goto error; + } + if (!lex_force_match (lexer, T_LPAREN)) + goto error; + + fh_unref (*fhp); + if (lex_match (lexer, T_ASTERISK)) + *fhp = NULL; + else + { + *fhp = fh_parse (lexer, FH_REF_FILE, dataset_session (ds)); + if (!*fhp) + goto error; + } + + if (!lex_force_match (lexer, T_RPAREN)) + goto error; + } + } + + if (!in && !dataset_has_source (ds)) + { + msg (SE, _("No active file is defined and no external file is " + "specified on MATRIX=IN.")); + goto error; + } + + struct dictionary *d; + struct casereader *cr; + if (in) + { + cr = any_reader_open_and_decode (in, NULL, &d, NULL); + if (!cr) + goto error; + } + else + { + d = dict_clone (dataset_dict (ds)); + cr = proc_open (ds); + } + + struct matrix_reader *mr = matrix_reader_create (d, cr); + if (!mr) + { + casereader_destroy (cr); + dict_unref (d); + if (!in) + proc_commit (ds); + goto error; + } + + struct casewriter *cw; + if (out) + { + cw = any_writer_open (out, d); + if (!cw) + { + matrix_reader_destroy (mr); + casereader_destroy (cr); + dict_unref (d); + if (!in) + proc_commit (ds); + goto error; + } + } + else + cw = autopaging_writer_create (dict_get_proto (d)); + + for (;;) + { + struct matrix_material mm; + struct casereader *group; + if (!matrix_reader_next (&mm, mr, &group)) + break; + + bool add_corr = mm.cov && !mm.corr; + bool add_cov = mm.corr && !mm.cov && mm.var_matrix; + bool add_stddev = add_corr && !mm.var_matrix; + bool remove_corr = add_cov && !append; + bool remove_cov = add_corr && !append; + + struct ccase *model = casereader_peek (group, 0); + for (size_t i = 0; i < mr->n_fvars; i++) + *case_num_rw (model, mr->fvars[i]) = SYSMIS; + + for (;;) + { + struct ccase *c = casereader_read (group); + if (!c) + break; + + struct substring rowtype = matrix_reader_get_string (c, mr->rowtype); + if ((remove_cov && ss_equals_case (rowtype, ss_cstr ("COV"))) + || (remove_corr && ss_equals_case (rowtype, ss_cstr ("CORR")))) + case_unref (c); + else + casewriter_write (cw, c); + } + casereader_destroy (group); + + if (add_corr) + { + for (size_t y = 0; y < mr->n_cvars; y++) + { + struct ccase *c = case_clone (model); + for (size_t x = 0; x < mr->n_cvars; x++) + { + double d1 = gsl_matrix_get (mm.cov, x, x); + double d2 = gsl_matrix_get (mm.cov, y, y); + double cov = gsl_matrix_get (mm.cov, y, x); + *case_num_rw (c, mr->cvars[x]) = cov / sqrt (d1 * d2); + } + matrix_reader_set_string (c, mr->rowtype, ss_cstr ("CORR")); + matrix_reader_set_string (c, mr->varname, + ss_cstr (var_get_name (mr->cvars[y]))); + casewriter_write (cw, c); + } + } + + if (add_stddev) + { + struct ccase *c = case_clone (model); + for (size_t x = 0; x < mr->n_cvars; x++) + { + double variance = gsl_matrix_get (mm.cov, x, x); + *case_num_rw (c, mr->cvars[x]) = sqrt (variance); + } + matrix_reader_set_string (c, mr->rowtype, ss_cstr ("STDDEV")); + matrix_reader_set_string (c, mr->varname, ss_empty ()); + casewriter_write (cw, c); + } + + if (add_cov) + { + for (size_t y = 0; y < mr->n_cvars; y++) + { + struct ccase *c = case_clone (model); + for (size_t x = 0; x < mr->n_cvars; x++) + { + double d1 = gsl_matrix_get (mm.var_matrix, x, x); + double d2 = gsl_matrix_get (mm.var_matrix, y, y); + double corr = gsl_matrix_get (mm.corr, y, x); + *case_num_rw (c, mr->cvars[x]) = corr * sqrt (d1 * d2); + } + matrix_reader_set_string (c, mr->rowtype, ss_cstr ("COV")); + matrix_reader_set_string (c, mr->varname, + ss_cstr (var_get_name (mr->cvars[y]))); + casewriter_write (cw, c); + } + } + + case_unref (model); + } + + matrix_reader_destroy (mr); + if (!in) + proc_commit (ds); + if (out) + casewriter_destroy (cw); + else + { + dataset_set_dict (ds, dict_ref (d)); + dataset_set_source (ds, casewriter_make_reader (cw)); + } + + fh_unref (in); + fh_unref (out); + dict_unref (d); + return CMD_SUCCESS; + +error: + fh_unref (in); + fh_unref (out); + return CMD_FAILURE; +} + diff --git a/src/language/stats/factor.c b/src/language/stats/factor.c index 9fa0a8ad25..4feece0da0 100644 --- a/src/language/stats/factor.c +++ b/src/language/stats/factor.c @@ -1145,8 +1145,9 @@ cmd_factor (struct lexer *lexer, struct dataset *ds) if (! lex_force_match (lexer, T_RPAREN)) goto error; - mr = create_matrix_reader_from_case_reader (dict, matrix_reader, - &factor.vars, &factor.n_vars); + mr = matrix_reader_create (dict, matrix_reader); + factor.vars = xmemdup (mr->cvars, mr->n_cvars * sizeof *mr->cvars); + factor.n_vars = mr->n_cvars; } else { @@ -1177,6 +1178,13 @@ cmd_factor (struct lexer *lexer, struct dataset *ds) free (factor.vars); factor.vars = vars; factor.n_vars = n_vars; + + if (mr) + { + free (mr->cvars); + mr->cvars = xmemdup (vars, n_vars * sizeof *vars); + mr->n_cvars = n_vars; + } } else if (lex_match_id (lexer, "PLOT")) { @@ -1527,8 +1535,7 @@ cmd_factor (struct lexer *lexer, struct dataset *ds) { struct idata *id = idata_alloc (factor.n_vars); - while (next_matrix_from_reader (&id->mm, mr, - factor.vars, factor.n_vars)) + while (matrix_reader_next (&id->mm, mr, NULL)) { do_factor_by_matrix (&factor, id); @@ -1546,13 +1553,12 @@ cmd_factor (struct lexer *lexer, struct dataset *ds) if (! run_factor (ds, &factor)) goto error; - - destroy_matrix_reader (mr); + matrix_reader_destroy (mr); free (factor.vars); return CMD_SUCCESS; error: - destroy_matrix_reader (mr); + matrix_reader_destroy (mr); free (factor.vars); return CMD_FAILURE; } @@ -2005,9 +2011,10 @@ do_factor (const struct cmd_factor *factor, struct casereader *r) static void do_factor_by_matrix (const struct cmd_factor *factor, struct idata *idata) { - if (!idata->mm.cov && !idata->mm.corr) + if (!idata->mm.cov && !(idata->mm.corr && idata->mm.var_matrix)) { - msg (ME, _("The dataset has no complete covariance or correlation matrix.")); + msg (ME, _("The dataset has no covariance matrix or a " + "correlation matrix along with standard devications.")); return; } @@ -2055,7 +2062,8 @@ do_factor_by_matrix (const struct cmd_factor *factor, struct idata *idata) gsl_matrix_free (tmp); } - if (factor->print & PRINT_UNIVARIATE) + if (factor->print & PRINT_UNIVARIATE + && idata->mm.n && idata->mm.mean_matrix && idata->mm.var_matrix) { struct pivot_table *table = pivot_table_create ( N_("Descriptive Statistics")); @@ -2089,7 +2097,7 @@ do_factor_by_matrix (const struct cmd_factor *factor, struct idata *idata) pivot_table_submit (table); } - if (factor->print & PRINT_KMO) + if (factor->print & PRINT_KMO && idata->mm.n) { struct pivot_table *table = pivot_table_create ( N_("KMO and Bartlett's Test")); diff --git a/tests/automake.mk b/tests/automake.mk index 4d75aa84ab..2ddea5c8b1 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -358,6 +358,8 @@ TESTSUITE_AT = \ tests/language/data-io/list.at \ tests/language/data-io/match-files.at \ tests/language/data-io/matrix-data.at \ + tests/language/data-io/matrix-reader.at \ + tests/language/data-io/mconvert.at \ tests/language/data-io/print-space.at \ tests/language/data-io/print.at \ tests/language/data-io/save.at \ diff --git a/tests/language/data-io/matrix-data.at b/tests/language/data-io/matrix-data.at index e66d1aaa16..3c69db6cd4 100644 --- a/tests/language/data-io/matrix-data.at +++ b/tests/language/data-io/matrix-data.at @@ -561,7 +561,6 @@ Table: Debug Matrix Reader ,,var08,81.000,82.000,83.000,84.000,85.000,86.000,87.000,88.000,89.000 ,,var09,91.000,92.000,93.000,94.000,95.000,96.000,97.000,98.000,99.000 ,N,Value,1.000,2.000,3.000,4.000,5.000,6.000,7.000,8.000,9.000 -,Mean,Value,.000,.000,.000,.000,.000,.000,.000,.000,.000 ,Standard Deviation,Value,100.000,200.000,300.000,400.000,500.000,600.000,700.000,800.000,900.000 Table: Data List @@ -718,7 +717,6 @@ Table: Debug Matrix Reader ,,v4xxxxxxxxxxxxxxxxxxxxxzzzzzzzzzzzzzxxxxxxxxx,. ,. ,. ,. @&t@ ,N,Value,2.000,3.000,4.000,5.000 ,Mean,Value,1.000,2.000,3.000,4.000 -,Standard Deviation,Value,.000,.000,.000,.000 ]) AT_CLEANUP diff --git a/tests/language/data-io/matrix-reader.at b/tests/language/data-io/matrix-reader.at new file mode 100644 index 0000000000..5cf2e4822e --- /dev/null +++ b/tests/language/data-io/matrix-reader.at @@ -0,0 +1,54 @@ +AT_BANNER([Matrix reader]) + +AT_SETUP([Matrix reader - negative tests]) +AT_DATA([matrix-reader.sps], [dnl +DATA LIST LIST NOTABLE /s1 (f1) ROWTYPE_(a8) f1 (f1) VARNAME_ (a8) c1 to c5 (f8.2). +DEBUG MATRIX READ NODATA. + +DATA LIST LIST NOTABLE /s1 (f1) VARNAME_(a8) f1 (f1) ROWTYPE_ (a8) c1 to c5 (f8.2). +DEBUG MATRIX READ NODATA. + +DATA LIST LIST NOTABLE /s1 (f1) ROWTYPE_(f8) f1 (f1) VARNAME_ (a8) c1 to c5 (f8.2). +DEBUG MATRIX READ NODATA. + +DATA LIST LIST NOTABLE /s1 (f1) ROWTYPE_(a8) f1 (f1) VARNAME_ (f8) c1 to c5 (f8.2). +DEBUG MATRIX READ NODATA. + +DATA LIST LIST NOTABLE /s1 (f1) ROWTPYE_(a8) f1 (f1) VARNAME_ (a8) c1 to c5 (f8.2). +DEBUG MATRIX READ NODATA. + +DATA LIST LIST NOTABLE /s1 (f1) ROWTYPE_(a8) f1 (f1) VARANME_ (a8) c1 to c5 (f8.2). +DEBUG MATRIX READ NODATA. + +DATA LIST LIST NOTABLE /s1 (a1) ROWTYPE_(a8) f1 (f1) VARNAME_ (a8) c1 to c5 (f8.2). +DEBUG MATRIX READ NODATA. + +DATA LIST LIST NOTABLE /s1 (f1) ROWTYPE_(a8) f1 (a1) VARNAME_ (a8) c1 to c5 (f8.2). +DEBUG MATRIX READ NODATA. + +DATA LIST LIST NOTABLE /s1 (f1) ROWTYPE_(a8) f1 (f1) VARNAME_ (a8) c1 to c5 (a8). +DEBUG MATRIX READ NODATA. + +DATA LIST LIST NOTABLE /s1 (f1) ROWTYPE_(a8) f1 (f1) VARNAME_ (a8). +DEBUG MATRIX READ NODATA. +]) +AT_CHECK([pspp matrix-reader.sps --testing-mode -O format=csv], [1], [dnl +matrix-reader.sps:5: error: DEBUG MATRIX READ: Variable ROWTYPE_ must precede VARNAME_ in matrix file dictionary. + +matrix-reader.sps:8: error: DEBUG MATRIX READ: Matrix dataset variable ROWTYPE_ should be of string type. + +matrix-reader.sps:11: error: DEBUG MATRIX READ: Matrix dataset variable VARNAME_ should be of string type. + +matrix-reader.sps:14: error: DEBUG MATRIX READ: Matrix dataset lacks a variable called ROWTYPE_. + +matrix-reader.sps:17: error: DEBUG MATRIX READ: Matrix dataset lacks a variable called VARNAME_. + +matrix-reader.sps:20: error: DEBUG MATRIX READ: Matrix dataset variable s1 should be numeric. + +matrix-reader.sps:23: error: DEBUG MATRIX READ: Matrix dataset variable f1 should be numeric. + +matrix-reader.sps:26: error: DEBUG MATRIX READ: Matrix dataset variable c1 should be numeric. + +matrix-reader.sps:29: error: DEBUG MATRIX READ: Matrix dataset does not have any continuous variables. +]) +AT_CLEANUP \ No newline at end of file diff --git a/tests/language/data-io/mconvert.at b/tests/language/data-io/mconvert.at new file mode 100644 index 0000000000..fccd435f65 --- /dev/null +++ b/tests/language/data-io/mconvert.at @@ -0,0 +1,222 @@ +dnl PSPP - a program for statistical analysis. +dnl Copyright (C) 2021 Free Software Foundation, Inc. +dnl +dnl This program is free software: you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation, either version 3 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +dnl GNU General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program. If not, see . +dnl +AT_BANNER([MCONVERT]) + +AT_SETUP([MCONVERT]) +AT_DATA([mconvert.sps], [dnl +MATRIX DATA VARIABLES=s ROWTYPE_ var01 TO var03/SPLIT s. +BEGIN DATA. +0 COV 1.0 +0 COV 1.0 16.0 +0 COV 8.1 18.0 81.0 +1 CORR 1 +1 CORR .25 1 +1 CORR .9 .5 1 +1 STDDEV 1 4 9 +END DATA. +FORMATS var01 TO var03(F5.2). +SPLIT FILE OFF. +MCONVERT. +LIST. +]) + +AT_CHECK([pspp -O format=csv mconvert.sps], [0], [dnl +Table: Data List +s,ROWTYPE_,VARNAME_,var01,var02,var03 +0,CORR,var01,1.00,.25,.90 +0,CORR,var02,.25,1.00,.50 +0,CORR,var03,.90,.50,1.00 +0,STDDEV,,1.00,4.00,9.00 +1,STDDEV,,1.00,4.00,9.00 +1,COV,var01,1.00,1.00,8.10 +1,COV,var02,1.00,16.00,18.00 +1,COV,var03,8.10,18.00,81.00 +]) +AT_CLEANUP + +AT_SETUP([MCONVERT from .sav file]) +AT_DATA([input.sps], [dnl +MATRIX DATA VARIABLES=s ROWTYPE_ var01 TO var03/SPLIT s. +BEGIN DATA. +0 COV 1.0 +0 COV 1.0 16.0 +0 COV 8.1 18.0 81.0 +1 CORR 1 +1 CORR .25 1 +1 CORR .9 .5 1 +1 STDDEV 1 4 9 +END DATA. +FORMATS var01 TO var03(F5.2). +SPLIT FILE OFF. +SAVE OUTFILE='input.sav'. +]) +AT_DATA([mconvert.sps], [dnl +MCONVERT MATRIX=IN('input.sav'). +LIST. +]) + +AT_CHECK([pspp -O format=csv input.sps]) +AT_CHECK([pspp -O format=csv mconvert.sps], [0], [dnl +Table: Data List +s,ROWTYPE_,VARNAME_,var01,var02,var03 +0,CORR,var01,1.00,.25,.90 +0,CORR,var02,.25,1.00,.50 +0,CORR,var03,.90,.50,1.00 +0,STDDEV,,1.00,4.00,9.00 +1,STDDEV,,1.00,4.00,9.00 +1,COV,var01,1.00,1.00,8.10 +1,COV,var02,1.00,16.00,18.00 +1,COV,var03,8.10,18.00,81.00 +]) +AT_CLEANUP + +AT_SETUP([MCONVERT to .sav file]) +AT_DATA([mconvert.sps], [dnl +MATRIX DATA VARIABLES=s ROWTYPE_ var01 TO var03/SPLIT s. +BEGIN DATA. +0 COV 1.0 +0 COV 1.0 16.0 +0 COV 8.1 18.0 81.0 +1 CORR 1 +1 CORR .25 1 +1 CORR .9 .5 1 +1 STDDEV 1 4 9 +END DATA. +FORMATS var01 TO var03(F5.2). +SPLIT FILE OFF. +MCONVERT/REPLACE/OUT('output.sav'). +LIST. +]) +AT_DATA([output.sps], [dnl +GET 'output.sav'. +LIST. +]) + +AT_CHECK([pspp -O format=csv mconvert.sps], [0], [dnl +Table: Data List +s,ROWTYPE_,VARNAME_,var01,var02,var03 +0,COV,var01,1.00,1.00,8.10 +0,COV,var02,1.00,16.00,18.00 +0,COV,var03,8.10,18.00,81.00 +1,CORR,var01,1.00,.25,.90 +1,CORR,var02,.25,1.00,.50 +1,CORR,var03,.90,.50,1.00 +1,STDDEV,,1.00,4.00,9.00 +]) +AT_CHECK([pspp -O format=csv output.sps], [0], [dnl +Table: Data List +s,ROWTYPE_,VARNAME_,var01,var02,var03 +0,CORR,var01,1.00,.25,.90 +0,CORR,var02,.25,1.00,.50 +0,CORR,var03,.90,.50,1.00 +0,STDDEV,,1.00,4.00,9.00 +1,STDDEV,,1.00,4.00,9.00 +1,COV,var01,1.00,1.00,8.10 +1,COV,var02,1.00,16.00,18.00 +1,COV,var03,8.10,18.00,81.00 +]) +AT_CLEANUP + +AT_SETUP([MCONVERT from .sav file to .sav file]) +AT_DATA([input.sps], [dnl +MATRIX DATA VARIABLES=s ROWTYPE_ var01 TO var03/SPLIT s. +BEGIN DATA. +0 COV 1.0 +0 COV 1.0 16.0 +0 COV 8.1 18.0 81.0 +1 CORR 1 +1 CORR .25 1 +1 CORR .9 .5 1 +1 STDDEV 1 4 9 +END DATA. +FORMATS var01 TO var03(F5.2). +SPLIT FILE OFF. +SAVE OUTFILE='input.sav'. +]) +AT_DATA([mconvert.sps], [dnl +MCONVERT MATRIX=IN('input.sav') OUT('output.sav')/REPLACE. +LIST. +]) +AT_DATA([output.sps], [dnl +GET 'output.sav'. +LIST. +]) + +AT_CHECK([pspp -O format=csv input.sps]) +AT_CHECK([pspp -O format=csv mconvert.sps], [1], [dnl +mconvert.sps:2: error: LIST: LIST is allowed only after the active dataset has been defined. +]) +AT_CHECK([pspp -O format=csv output.sps], [0], [dnl +Table: Data List +s,ROWTYPE_,VARNAME_,var01,var02,var03 +0,CORR,var01,1.00,.25,.90 +0,CORR,var02,.25,1.00,.50 +0,CORR,var03,.90,.50,1.00 +0,STDDEV,,1.00,4.00,9.00 +1,STDDEV,,1.00,4.00,9.00 +1,COV,var01,1.00,1.00,8.10 +1,COV,var02,1.00,16.00,18.00 +1,COV,var03,8.10,18.00,81.00 +]) +AT_CLEANUP + +AT_SETUP([MCONVERT with APPEND]) +AT_DATA([mconvert.sps], [dnl +MATRIX DATA VARIABLES=s ROWTYPE_ var01 TO var03/SPLIT s. +BEGIN DATA. +0 COV 1.0 +0 COV 1.0 16.0 +0 COV 8.1 18.0 81.0 +1 CORR 1 +1 CORR .25 1 +1 CORR .9 .5 1 +1 STDDEV 1 4 9 +END DATA. +FORMATS var01 TO var03(F5.2). +SPLIT FILE OFF. +MCONVERT/APPEND. +LIST. +]) + +AT_CHECK([pspp -O format=csv mconvert.sps], [0], [dnl +Table: Data List +s,ROWTYPE_,VARNAME_,var01,var02,var03 +0,COV,var01,1.00,1.00,8.10 +0,COV,var02,1.00,16.00,18.00 +0,COV,var03,8.10,18.00,81.00 +0,CORR,var01,1.00,.25,.90 +0,CORR,var02,.25,1.00,.50 +0,CORR,var03,.90,.50,1.00 +0,STDDEV,,1.00,4.00,9.00 +1,CORR,var01,1.00,.25,.90 +1,CORR,var02,.25,1.00,.50 +1,CORR,var03,.90,.50,1.00 +1,STDDEV,,1.00,4.00,9.00 +1,COV,var01,1.00,1.00,8.10 +1,COV,var02,1.00,16.00,18.00 +1,COV,var03,8.10,18.00,81.00 +]) +AT_CLEANUP + +AT_SETUP([MCONVERT negative test]) +AT_DATA([mconvert.sps], [MCONVERT. +]) +AT_CHECK([pspp mconvert.sps], [1], [dnl +mconvert.sps:1: error: MCONVERT: No active file is defined and no external file +is specified on MATRIX=IN. +]) +AT_CLEANUP \ No newline at end of file