From: John Darrington Date: Wed, 26 Apr 2017 17:06:53 +0000 (+0200) Subject: Added the MATRIX DATA procedure. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cc0b5800fcdde6126c4fc65b656f39c1459bf17c;p=pspp Added the MATRIX DATA procedure. --- diff --git a/NEWS b/NEWS index f3247652d0..dcdfb90bfe 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,8 @@ Please send PSPP bug reports to bug-gnu-pspp@gnu.org. Changes from 0.10.2 to 0.10.4: + * The MATRIX DATA command has been added. + * Some inappropriate properties in selection dialogs have been corrected. * The graphical user interface has a new menu: Edit|Options diff --git a/doc/data-io.texi b/doc/data-io.texi index cd0fc6386b..a5ba26f018 100644 --- a/doc/data-io.texi +++ b/doc/data-io.texi @@ -39,6 +39,7 @@ actually be read until a procedure is executed. * INPUT PROGRAM:: Support for complex input programs. * LIST:: List cases in the active dataset. * NEW FILE:: Clear the active dataset. +* MATRIX DATA:: Defining matrix material for procedures. * PRINT:: Display values in print formats. * PRINT EJECT:: Eject the current page then print. * PRINT SPACE:: Print blank lines. @@ -958,6 +959,125 @@ NEW FILE. @cmd{NEW FILE} command clears the dictionary and data from the current active dataset. +@node MATRIX DATA +@section MATRIX DATA +@vindex MATRIX DATA + +@display +MATRIX DATA + VARIABLES = @var{columns} + [eFILE='@var{file_name}'| INLINE @} + [/FORMAT= [@{LIST | FREE@}] + [@{UPPER | LOWER | FULL@}] + [@{DIAGONAL | NODIAGONAL@}]] + [/SPLIT= @var{split_variables}]. +@end display + +The @cmd{MATRIX DATA} command is used to input data in the form of matrices +which can subsequently be used by other commands. If the +@subcmd{FILE} is omitted or takes the value @samp{INLINE} then the command +should immediately followed by @cmd{BEGIN DATA}, @xref{BEGIN DATA}. + +There is one mandatory subcommand, @i{viz:} @subcmd{VARIABLES}, which defines +the @var{columns} of the matrix. +Normally, the @var{columns} should include an item called @samp{ROWTYPE_}. +The @samp{ROWTYPE_} column is used to specify the purpose of a row in the +matrix. + +@example +matrix data + variables = rowtype_ var01 TO var08. + +begin data. +mean 24.3 5.4 69.7 20.1 13.4 2.7 27.9 3.7 +sd 5.7 1.5 23.5 5.8 2.8 4.5 5.4 1.5 +n 92 92 92 92 92 92 92 92 +corr 1.00 +corr .18 1.00 +corr -.22 -.17 1.00 +corr .36 .31 -.14 1.00 +corr .27 .16 -.12 .22 1.00 +corr .33 .15 -.17 .24 .21 1.00 +corr .50 .29 -.20 .32 .12 .38 1.00 +corr .17 .29 -.05 .20 .27 .20 .04 1.00 +end data. +@end example + +In the above example, the first three rows have ROWTYPE_ values of +@samp{mean}, @samp{sd}, and @samp{n}. These indicate that the rows +contain mean values, standard deviations and counts, respectively. +All subsequent rows have a ROWTYPE_ of @samp{corr} which indicates +that the values are correlation coefficients. + +Note that in this example, the upper right values of the @samp{corr} +values are blank, and in each case, the rightmost value is unity. +This is because, the +@subcmd{FORMAT} subcommand defaults to @samp{LOWER DIAGONAL}, +which indicates that only the lower triangle is provided in the data. +The opposite triangle is automatically inferred. One could instead +specify the upper triangle as follows: + + +@example +matrix data + variables = rowtype_ var01 TO var08 + /format = upper nodiagonal. + +begin data. +mean 24.3 5.4 69.7 20.1 13.4 2.7 27.9 3.7 +sd 5.7 1.5 23.5 5.8 2.8 4.5 5.4 1.5 +n 92 92 92 92 92 92 92 92 +corr .17 .50 -.33 .27 .36 -.22 .18 +corr .29 .29 -.20 .32 .12 .38 +corr .05 .20 -.15 .16 .21 +corr .20 .32 -.17 .12 +corr .27 .12 -.24 +corr -.20 -.38 +corr .04 +end data. +@end example + +In this example the @samp{NODIAGONAL} keyword is used. Accordingly +the diagonal values of the matrix are omitted. This implies that +there is one less @samp{corr} line than there are variables. +If the @samp{FULL} option is passed to the @subcmd{FORMAT} subcommand, +then all the matrix elements must be provided, including the diagonal +elements. + +In the preceding examples, each matrix row has been specified on a +single line. If you pass the keyword @var{FREE} to @subcmd{FORMAT} +then the data may be data for several matrix rows may be specified on +the same line, or a single row may be split across lines. + +The @subcmd{SPLIT} is used to indicate that variables are to be +considered as split variables. For example, the following +defines two matrices using the variable @samp{S1} to distinguish +between them. + +@example +matrix data + variables = s1 rowtype_ var01 TO var04 + /split = s1 + /format = full diagonal. + +begin data +0 mean 34 35 36 37 +0 sd 22 11 55 66 +0 n 99 98 99 92 +0 corr 1 9 8 7 +0 corr 9 1 6 5 +0 corr 8 6 1 4 +0 corr 7 5 4 1 +1 mean 44 45 34 39 +1 sd 23 15 51 46 +1 n 98 34 87 23 +1 corr 1 2 3 4 +1 corr 2 1 5 6 +1 corr 3 5 1 7 +1 corr 4 6 7 1 +end data. +@end example + @node PRINT @section PRINT @vindex PRINT diff --git a/src/language/command.def b/src/language/command.def index 49becdf922..f2d5033185 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -52,6 +52,7 @@ DEF_CMD (S_INITIAL | S_DATA, 0, "GET DATA", cmd_get_data) DEF_CMD (S_INITIAL | S_DATA, 0, "IMPORT", cmd_import) DEF_CMD (S_INITIAL | S_DATA, 0, "INPUT PROGRAM", cmd_input_program) DEF_CMD (S_INITIAL | S_DATA, 0, "MATCH FILES", cmd_match_files) +DEF_CMD (S_INITIAL | S_DATA | S_INPUT_PROGRAM | S_FILE_TYPE, 0, "MATRIX DATA", cmd_matrix) DEF_CMD (S_INITIAL | S_DATA, 0, "UPDATE", cmd_update) DEF_CMD (S_INITIAL | S_DATA, 0, "DATASET ACTIVATE", cmd_dataset_activate) DEF_CMD (S_INITIAL | S_DATA, 0, "DATASET DECLARE", cmd_dataset_declare) @@ -206,7 +207,6 @@ UNIMPL_CMD ("LOGLINEAR", "General model fitting") UNIMPL_CMD ("MANOVA", "Multivariate analysis of variance") UNIMPL_CMD ("MAPS", "Geographical display") UNIMPL_CMD ("MATRIX", "Matrix processing") -UNIMPL_CMD ("MATRIX DATA", "Matrix data input") UNIMPL_CMD ("MCONVERT", "Convert covariance/correlation matrices") UNIMPL_CMD ("MIXED", "Mixed linear models") UNIMPL_CMD ("MODEL CLOSE", "Close server connection") diff --git a/src/language/data-io/automake.mk b/src/language/data-io/automake.mk index 770300ee77..1b3f5a9383 100644 --- a/src/language/data-io/automake.mk +++ b/src/language/data-io/automake.mk @@ -21,6 +21,7 @@ language_data_io_sources = \ src/language/data-io/placement-parser.h \ src/language/data-io/print-space.c \ src/language/data-io/print.c \ + src/language/data-io/matrix-data.c \ src/language/data-io/save-translate.c \ src/language/data-io/save.c \ src/language/data-io/trim.c \ diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index 461289bc60..0b98f40a07 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -306,7 +306,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns); } else - data_parser_make_active_file (parser, ds, reader, dict); + data_parser_make_active_file (parser, ds, reader, dict, NULL, NULL); fh_unref (fh); free (encoding); diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c index 960505a5ce..63f9f0eda6 100644 --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@ -52,6 +52,7 @@ struct data_parser /* DP_DELIMITED parsers only. */ bool span; /* May cases span multiple records? */ bool empty_line_has_field; /* Does an empty line have an (empty) field? */ + bool warn_missing_fields; /* Should missing fields be considered errors? */ struct substring quotes; /* Characters that can quote separators. */ bool quote_escape; /* Doubled quote acts as escape? */ struct substring soft_seps; /* Two soft separators act like just one. */ @@ -92,6 +93,7 @@ data_parser_create (const struct dictionary *dict) parser->span = true; parser->empty_line_has_field = false; + parser->warn_missing_fields = true; ss_alloc_substring (&parser->quotes, ss_cstr ("\"'")); parser->quote_escape = false; ss_alloc_substring (&parser->soft_seps, ss_cstr (CC_SPACES)); @@ -186,6 +188,21 @@ data_parser_set_empty_line_has_field (struct data_parser *parser, parser->empty_line_has_field = empty_line_has_field; } + +/* If WARN_MISSING_FIELDS is true, configures PARSER to emit a warning + and cause an error condition when a missing field is encountered. + If WARN_MISSING_FIELDS is false, PARSER will silently fill such + fields with the system missing value. + + This setting affects parsing of DP_DELIMITED files only. */ +void +data_parser_set_warn_missing_fields (struct data_parser *parser, + bool warn_missing_fields) +{ + parser->warn_missing_fields = warn_missing_fields; +} + + /* Sets the characters that may be used for quoting field contents to QUOTES. If QUOTES is empty, quoting will be disabled. @@ -386,10 +403,11 @@ data_parser_parse (struct data_parser *parser, struct dfm_reader *reader, *FIELD is set to the field content. The caller must not or destroy this constant string. - After parsing the field, sets the current position in the - record to just past the field and any trailing delimiter. - Returns 0 on failure or a 1-based column number indicating the - beginning of the field on success. */ + Sets *FIRST_COLUMN to the 1-based column number of the start of + the extracted field, and *LAST_COLUMN to the end of the extracted + field. + + Returns true on success, false on failure. */ static bool cut_field (const struct data_parser *parser, struct dfm_reader *reader, int *first_column, int *last_column, struct string *tmp, @@ -610,7 +628,7 @@ parse_delimited_no_span (const struct data_parser *parser, if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s)) { - if (f < end - 1 && settings_get_undefined ()) + if (f < end - 1 && settings_get_undefined () && parser->warn_missing_fields) msg (DW, _("Missing value(s) for all variables from %s onward. " "These will be filled with the system-missing value " "or blanks, as appropriate."), @@ -740,23 +758,35 @@ static const struct casereader_class data_parser_casereader_class; transferred to the dataset. */ void data_parser_make_active_file (struct data_parser *parser, struct dataset *ds, - struct dfm_reader *reader, - struct dictionary *dict) + struct dfm_reader *reader, + struct dictionary *dict, + struct casereader* (*func)(struct casereader *, + const struct dictionary *, + void *), + void *ud) { struct data_parser_casereader *r; - struct casereader *casereader; + struct casereader *casereader0; + struct casereader *casereader1; r = xmalloc (sizeof *r); r->parser = parser; r->reader = reader; r->proto = caseproto_ref (dict_get_proto (dict)); - casereader = casereader_create_sequential (NULL, r->proto, + casereader0 = casereader_create_sequential (NULL, r->proto, CASENUMBER_MAX, &data_parser_casereader_class, r); + + if (func) + casereader1 = func (casereader0, dict, ud); + else + casereader1 = casereader0; + dataset_set_dict (ds, dict); - dataset_set_source (ds, casereader); + dataset_set_source (ds, casereader1); } + static struct ccase * data_parser_casereader_read (struct casereader *reader UNUSED, void *r_) { diff --git a/src/language/data-io/data-parser.h b/src/language/data-io/data-parser.h index 560eed30a2..339783106d 100644 --- a/src/language/data-io/data-parser.h +++ b/src/language/data-io/data-parser.h @@ -52,6 +52,9 @@ void data_parser_set_span (struct data_parser *, bool may_cases_span_records); void data_parser_set_empty_line_has_field (struct data_parser *, bool empty_line_has_field); +void data_parser_set_warn_missing_fields (struct data_parser *parser, + bool warn_missing_fields); + void data_parser_set_quotes (struct data_parser *, struct substring); void data_parser_set_quote_escape (struct data_parser *, bool escape); void data_parser_set_soft_delimiters (struct data_parser *, struct substring); @@ -76,7 +79,13 @@ bool data_parser_parse (struct data_parser *, /* Uses for a configured parser. */ void data_parser_output_description (struct data_parser *, const struct file_handle *); +struct casereader; void data_parser_make_active_file (struct data_parser *, struct dataset *, - struct dfm_reader *, struct dictionary *); + struct dfm_reader *, struct dictionary *, + struct casereader* (*func)(struct casereader *, + const struct dictionary *, + void *), + void *ud); + #endif /* language/data-io/data-parser.h */ diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c index e34b86bc1a..a0608f6dd3 100644 --- a/src/language/data-io/get-data.c +++ b/src/language/data-io/get-data.c @@ -673,7 +673,7 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds) if (reader == NULL) goto error; - data_parser_make_active_file (parser, ds, reader, dict); + data_parser_make_active_file (parser, ds, reader, dict, NULL, NULL); fh_unref (fh); free (encoding); return CMD_SUCCESS; diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c new file mode 100644 index 0000000000..209ccb7af5 --- /dev/null +++ b/src/language/data-io/matrix-data.c @@ -0,0 +1,478 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2017 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "data/case.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/transformations.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/data-io/data-parser.h" +#include "language/data-io/data-reader.h" +#include "language/data-io/file-handle.h" +#include "language/data-io/inpt-pgm.h" +#include "language/data-io/placement-parser.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" + +#include "gl/xsize.h" +#include "gl/xalloc.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +/* DATA LIST transformation data. */ +struct data_list_trns + { + struct data_parser *parser; /* Parser. */ + struct dfm_reader *reader; /* Data file reader. */ + struct variable *end; /* Variable specified on END subcommand. */ + }; + +static trns_free_func data_list_trns_free; +static trns_proc_func data_list_trns_proc; + +enum diagonal + { + DIAGONAL, + NO_DIAGONAL + }; + +enum triangle + { + LOWER, + UPPER, + FULL + }; + +struct matrix_format +{ + enum triangle triangle; + enum diagonal diagonal; + const struct variable *rowtype; + const struct variable *varname; + int n_continuous_vars; +}; + +/* +valid rowtype_ values: + CORR, + COV, + MAT, + + + MSE, + DFE, + MEAN, + STDDEV (or SD), + N_VECTOR (or N), + N_SCALAR, + N_MATRIX, + COUNT, + PROX. +*/ + +/* Sets the value of OUTCASE which corresponds to MFORMAT's varname variable + to the string STR. VAR must be of type string. + */ +static void +set_varname_column (struct ccase *outcase, const struct matrix_format *mformat, + const char *str, int len) +{ + const struct variable *var = mformat->varname; + uint8_t *s = value_str_rw (case_data_rw (outcase, var), len); + + strncpy ((char *) s, str, len); +} + + +static struct casereader * +preprocess (struct casereader *casereader0, const struct dictionary *dict, void *aux) +{ + struct matrix_format *mformat = aux; + const struct caseproto *proto = casereader_get_proto (casereader0); + struct casewriter *writer; + writer = autopaging_writer_create (proto); + + double *temp_matrix = + xcalloc (sizeof (*temp_matrix), + mformat->n_continuous_vars * mformat->n_continuous_vars); + + /* Make an initial pass to populate our temporary matrix */ + struct casereader *pass0 = casereader_clone (casereader0); + struct ccase *c; + int row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0; + for (; (c = casereader_read (pass0)) != NULL; case_unref (c)) + { + int c_offset = (mformat->triangle == UPPER) ? row : 0; + if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL) + c_offset++; + const union value *v = case_data (c, mformat->rowtype); + const char *val = (const char *) value_str (v, 8); + if (0 == strncasecmp (val, "corr ", 8) || + 0 == strncasecmp (val, "cov ", 8)) + { + int col; + for (col = c_offset; col < mformat->n_continuous_vars; ++col) + { + const struct variable *var = + dict_get_var (dict, + 1 + col - c_offset + var_get_dict_index (mformat->varname)); + + double e = case_data (c, var)->f; + if (e == SYSMIS) + continue; + temp_matrix [col + mformat->n_continuous_vars * row] = e; + temp_matrix [row + mformat->n_continuous_vars * col] = e; + } + row++; + } + } + casereader_destroy (pass0); + + /* Now make a second pass to fill in the other triangle from our + temporary matrix */ + const int idx = var_get_dict_index (mformat->varname); + row = 0; + struct ccase *prev_case = NULL; + for (; (c = casereader_read (casereader0)) != NULL; prev_case = c) + { + case_unref (prev_case); + struct ccase *outcase = case_create (proto); + case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto)); + const union value *v = case_data (c, mformat->rowtype); + const char *val = (const char *) value_str (v, 8); + if (0 == strncasecmp (val, "corr ", 8) || + 0 == strncasecmp (val, "cov ", 8)) + { + int col; + const struct variable *var = dict_get_var (dict, idx + 1 + row); + set_varname_column (outcase, mformat, var_get_name (var), 8); + value_copy (case_data_rw (outcase, mformat->rowtype), v, 8); + + for (col = 0; col < mformat->n_continuous_vars; ++col) + { + union value *dest_val = + case_data_rw_idx (outcase, + 1 + col + var_get_dict_index (mformat->varname)); + dest_val->f = temp_matrix [col + mformat->n_continuous_vars * row]; + if (col == row && mformat->diagonal == NO_DIAGONAL) + dest_val->f = 1.0; + } + row++; + } + else + { + set_varname_column (outcase, mformat, " ", 8); + } + + casewriter_write (writer, outcase); + } + + /* If NODIAGONAL is specified, then a final case must be written */ + if (mformat->diagonal == NO_DIAGONAL) + { + int col; + struct ccase *outcase = case_create (proto); + + if (prev_case) + case_copy (outcase, 0, prev_case, 0, caseproto_get_n_widths (proto)); + + + const struct variable *var = dict_get_var (dict, idx + 1 + row); + set_varname_column (outcase, mformat, var_get_name (var), 8); + + for (col = 0; col < mformat->n_continuous_vars; ++col) + { + union value *dest_val = + case_data_rw_idx (outcase, 1 + col + + var_get_dict_index (mformat->varname)); + dest_val->f = temp_matrix [col + mformat->n_continuous_vars * row]; + if (col == row && mformat->diagonal == NO_DIAGONAL) + dest_val->f = 1.0; + } + + casewriter_write (writer, outcase); + } + + if (prev_case) + case_unref (prev_case); + + free (temp_matrix); + struct casereader *reader1 = casewriter_make_reader (writer); + casereader_destroy (casereader0); + return reader1; +} + +int +cmd_matrix (struct lexer *lexer, struct dataset *ds) +{ + struct dictionary *dict; + struct data_parser *parser; + struct dfm_reader *reader; + struct file_handle *fh = NULL; + char *encoding = NULL; + struct matrix_format mformat; + int i; + size_t n_names; + char **names = NULL; + + mformat.triangle = LOWER; + mformat.diagonal = DIAGONAL; + + dict = (in_input_program () + ? dataset_dict (ds) + : dict_create (get_default_encoding ())); + parser = data_parser_create (dict); + reader = NULL; + + data_parser_set_type (parser, DP_DELIMITED); + data_parser_set_warn_missing_fields (parser, false); + data_parser_set_span (parser, false); + + mformat.rowtype = dict_create_var (dict, "ROWTYPE_", 8); + mformat.varname = dict_create_var (dict, "VARNAME_", 8); + + mformat.n_continuous_vars = 0; + + if (! lex_force_match_id (lexer, "VARIABLES")) + goto error; + + lex_match (lexer, T_EQUALS); + + if (! parse_mixed_vars (lexer, dict, &names, &n_names, 0)) + { + int i; + for (i = 0; i < n_names; ++i) + free (names[i]); + free (names); + goto error; + } + + for (i = 0; i < n_names; ++i) + { + if (0 == strcasecmp (names[i], "ROWTYPE_")) + { + const struct fmt_spec fmt = fmt_for_input (FMT_A, 8, 0); + data_parser_add_delimited_field (parser, + &fmt, + var_get_case_index (mformat.rowtype), + "ROWTYPE_"); + } + else + { + const struct fmt_spec fmt = fmt_for_input (FMT_F, 10, 4); + struct variable *v = dict_create_var (dict, names[i], 0); + var_set_both_formats (v, &fmt); + data_parser_add_delimited_field (parser, + &fmt, + var_get_case_index (mformat.varname) + + ++mformat.n_continuous_vars, + names[i]); + } + } + for (i = 0; i < n_names; ++i) + free (names[i]); + free (names); + + while (lex_token (lexer) != T_ENDCMD) + { + if (! lex_force_match (lexer, T_SLASH)) + goto error; + + if (lex_match_id (lexer, "FORMAT")) + { + lex_match (lexer, T_EQUALS); + + while (lex_token (lexer) != T_SLASH && (lex_token (lexer) != T_ENDCMD)) + { + if (lex_match_id (lexer, "LIST")) + { + data_parser_set_span (parser, false); + } + else if (lex_match_id (lexer, "FREE")) + { + data_parser_set_span (parser, true); + } + else if (lex_match_id (lexer, "UPPER")) + { + mformat.triangle = UPPER; + } + else if (lex_match_id (lexer, "LOWER")) + { + mformat.triangle = LOWER; + } + else if (lex_match_id (lexer, "FULL")) + { + mformat.triangle = FULL; + } + else if (lex_match_id (lexer, "DIAGONAL")) + { + mformat.diagonal = DIAGONAL; + } + else if (lex_match_id (lexer, "NODIAGONAL")) + { + mformat.diagonal = NO_DIAGONAL; + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + } + else if (lex_match_id (lexer, "FILE")) + { + lex_match (lexer, T_EQUALS); + fh_unref (fh); + fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL); + if (fh == NULL) + goto error; + } + else if (lex_match_id (lexer, "SPLIT")) + { + lex_match (lexer, T_EQUALS); + struct variable **split_vars = NULL; + size_t n_split_vars; + if (! parse_variables (lexer, dict, &split_vars, &n_split_vars, 0)) + { + free (split_vars); + goto error; + } + int i; + for (i = 0; i < n_split_vars; ++i) + { + const struct fmt_spec fmt = fmt_for_input (FMT_F, 4, 0); + var_set_both_formats (split_vars[i], &fmt); + } + dict_reorder_vars (dict, split_vars, n_split_vars); + mformat.n_continuous_vars -= n_split_vars; + free (split_vars); + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + + if (mformat.diagonal == NO_DIAGONAL && mformat.triangle == FULL) + { + msg (SE, _("FORMAT = FULL and FORMAT = NODIAGONAL are mutually exclusive.")); + goto error; + } + + if (fh == NULL) + fh = fh_inline_file (); + fh_set_default_handle (fh); + + if (!data_parser_any_fields (parser)) + { + msg (SE, _("At least one variable must be specified.")); + goto error; + } + + if (lex_end_of_command (lexer) != CMD_SUCCESS) + goto error; + + reader = dfm_open_reader (fh, lexer, encoding); + if (reader == NULL) + goto error; + + if (in_input_program ()) + { + struct data_list_trns *trns = xmalloc (sizeof *trns); + trns->parser = parser; + trns->reader = reader; + trns->end = NULL; + add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns); + } + else + { + data_parser_make_active_file (parser, ds, reader, dict, preprocess, &mformat); + } + + fh_unref (fh); + free (encoding); + + return CMD_DATA_LIST; + + error: + data_parser_destroy (parser); + if (!in_input_program ()) + dict_destroy (dict); + fh_unref (fh); + free (encoding); + return CMD_CASCADING_FAILURE; +} + + +/* Input procedure. */ + +/* Destroys DATA LIST transformation TRNS. + Returns true if successful, false if an I/O error occurred. */ +static bool +data_list_trns_free (void *trns_) +{ + struct data_list_trns *trns = trns_; + data_parser_destroy (trns->parser); + dfm_close_reader (trns->reader); + free (trns); + return true; +} + +/* Handle DATA LIST transformation TRNS, parsing data into *C. */ +static int +data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED) +{ + struct data_list_trns *trns = trns_; + int retval; + + *c = case_unshare (*c); + if (data_parser_parse (trns->parser, trns->reader, *c)) + retval = TRNS_CONTINUE; + else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1) + { + /* An I/O error, or encountering end of file for a second + time, should be escalated into a more serious error. */ + retval = TRNS_ERROR; + } + else + retval = TRNS_END_FILE; + + /* If there was an END subcommand handle it. */ + if (trns->end != NULL) + { + double *end = &case_data_rw (*c, trns->end)->f; + if (retval == TRNS_END_FILE) + { + *end = 1.0; + retval = TRNS_CONTINUE; + } + else + *end = 0.0; + } + + return retval; +} + diff --git a/tests/automake.mk b/tests/automake.mk index a736300c23..0f297f7ca0 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -304,6 +304,7 @@ TESTSUITE_AT = \ tests/language/data-io/inpt-pgm.at \ tests/language/data-io/list.at \ tests/language/data-io/match-files.at \ + tests/language/data-io/matrix-data.at \ tests/language/data-io/print-space.at \ tests/language/data-io/print.at \ tests/language/data-io/save.at \ diff --git a/tests/language/data-io/matrix-data.at b/tests/language/data-io/matrix-data.at new file mode 100644 index 0000000000..6cc1e67bb9 --- /dev/null +++ b/tests/language/data-io/matrix-data.at @@ -0,0 +1,237 @@ +AT_BANNER([MATRIX DATA]) + +AT_SETUP([Matrix data (lower file)]) + +AT_DATA([matrix-data.pspp], [dnl +matrix data + variables = rowtype_ + var01 TO var08 + /format = lower diagonal + /file = 'matrix.dat' + . + +list. +]) + +AT_DATA([matrix.dat], [dnl +mean 24.3 5.4 69.7 20.1 13.4 2.7 27.9 3.7 +sd 5.7 1.5 23.5 5.8 2.8 4.5 5.4 1.5 +n 92 92 92 92 92 92 92 92 +corr 1.00 +corr .18 1.00 +corr -.22 -.17 1.00 +corr .36 .31 -.14 1.00 +corr .27 .16 -.12 .22 1.00 +corr .33 .15 -.17 .24 .21 1.00 +corr .50 .29 -.20 .32 .12 .38 1.00 +corr .17 .29 -.05 .20 .27 .20 .04 1.00 +]) + + +AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl +Table: Data List +ROWTYPE_,VARNAME_,var01,var02,var03,var04,var05,var06,var07,var08 +mean ,,24.3000,5.4000,69.7000,20.1000,13.4000,2.7000,27.9000,3.7000 +sd ,,5.7000,1.5000,23.5000,5.8000,2.8000,4.5000,5.4000,1.5000 +n ,,92.0000,92.0000,92.0000,92.0000,92.0000,92.0000,92.0000,92.0000 +corr ,var01,1.0000,.1800,-.2200,.3600,.2700,.3300,.5000,.1700 +corr ,var02,.1800,1.0000,-.1700,.3100,.1600,.1500,.2900,.2900 +corr ,var03,-.2200,-.1700,1.0000,-.1400,-.1200,-.1700,-.2000,-.0500 +corr ,var04,.3600,.3100,-.1400,1.0000,.2200,.2400,.3200,.2000 +corr ,var05,.2700,.1600,-.1200,.2200,1.0000,.2100,.1200,.2700 +corr ,var06,.3300,.1500,-.1700,.2400,.2100,1.0000,.3800,.2000 +corr ,var07,.5000,.2900,-.2000,.3200,.1200,.3800,1.0000,.0400 +corr ,var08,.1700,.2900,-.0500,.2000,.2700,.2000,.0400,1.0000 +]) +AT_CLEANUP + + + +AT_SETUP([Matrix data (upper)]) + +AT_DATA([matrix-data.pspp], [dnl +matrix data + variables = rowtype_ var01 var02 var03 var04 + /format = upper diagonal. + +begin data +mean 34 35 36 37 +sd 22 11 55 66 +n 100 101 102 103 +corr 1 9 8 7 +corr 1 6 5 +corr 1 4 +corr 1 +end data. + +list. +]) + +AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl +Table: Data List +ROWTYPE_,VARNAME_,var01,var02,var03,var04 +mean ,,34.0000,35.0000,36.0000,37.0000 +sd ,,22.0000,11.0000,55.0000,66.0000 +n ,,100.0000,101.0000,102.0000,103.0000 +corr ,var01,1.0000,9.0000,8.0000,7.0000 +corr ,var02,9.0000,1.0000,6.0000,5.0000 +corr ,var03,8.0000,6.0000,1.0000,4.0000 +corr ,var04,7.0000,5.0000,4.0000,1.0000 +]) + +AT_CLEANUP + +AT_SETUP([Matrix data (full)]) + +dnl Just for fun, this one is in a different case. +AT_DATA([matrix-data.pspp], [dnl +matrix data + variables = ROWTYPE_ var01 var02 var03 var04 + /format = full diagonal. + +begin data +MEAN 34 35 36 37 +SD 22 11 55 66 +N 100 101 102 103 +CORR 1 9 8 7 +CORR 9 1 6 5 +CORR 8 6 1 4 +CORR 7 5 4 1 +end data. + +list. +]) + + +AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl +Table: Data List +ROWTYPE_,VARNAME_,var01,var02,var03,var04 +MEAN ,,34.0000,35.0000,36.0000,37.0000 +SD ,,22.0000,11.0000,55.0000,66.0000 +N ,,100.0000,101.0000,102.0000,103.0000 +CORR ,var01,1.0000,9.0000,8.0000,7.0000 +CORR ,var02,9.0000,1.0000,6.0000,5.0000 +CORR ,var03,8.0000,6.0000,1.0000,4.0000 +CORR ,var04,7.0000,5.0000,4.0000,1.0000 +]) + +AT_CLEANUP + + +AT_SETUP([Matrix data (upper nodiagonal)]) + +AT_DATA([matrix-data.pspp], [dnl +matrix data + variables = rowtype_ var01 var02 var03 var04 + /format = upper nodiagonal. + +begin data +mean 34 35 36 37 +sd 22 11 55 66 +n 100 101 102 103 +corr 9 8 7 +corr 6 5 +corr 4 +end data. + +list. +]) + +AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl +Table: Data List +ROWTYPE_,VARNAME_,var01,var02,var03,var04 +mean ,,34.0000,35.0000,36.0000,37.0000 +sd ,,22.0000,11.0000,55.0000,66.0000 +n ,,100.0000,101.0000,102.0000,103.0000 +corr ,var01,1.0000,9.0000,8.0000,7.0000 +corr ,var02,9.0000,1.0000,6.0000,5.0000 +corr ,var03,8.0000,6.0000,1.0000,4.0000 +corr ,var04,7.0000,5.0000,4.0000,1.0000 +]) + +AT_CLEANUP + + +AT_SETUP([Matrix data (lower nodiagonal)]) + +AT_DATA([matrix-data.pspp], [dnl +matrix data + variables = rowtype_ var01 var02 var03 var04 + /format = lower nodiagonal. + +begin data +mean 34 35 36 37 +sd 22 11 55 66 +n 100 101 102 103 +corr 9 +corr 8 6 +corr 7 5 4 +end data. + +list. +]) + +AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl +Table: Data List +ROWTYPE_,VARNAME_,var01,var02,var03,var04 +mean ,,34.0000,35.0000,36.0000,37.0000 +sd ,,22.0000,11.0000,55.0000,66.0000 +n ,,100.0000,101.0000,102.0000,103.0000 +corr ,var01,1.0000,9.0000,8.0000,7.0000 +corr ,var02,9.0000,1.0000,6.0000,5.0000 +corr ,var03,8.0000,6.0000,1.0000,4.0000 +corr ,var04,7.0000,5.0000,4.0000,1.0000 +]) + +AT_CLEANUP + + +AT_SETUP([Matrix data split]) + +AT_DATA([matrix-data.pspp], [dnl +matrix data + variables = s1 s2 rowtype_ var01 TO var04 + /split = s1 s2 + /format = full diagonal. + +begin data +1 1 mean 34 35 36 37 +1 1 sd 22 11 55 66 +1 0 n 100 101 102 103 +1 0 corr 1 9 8 7 +0 1 corr 9 1 6 5 +0 1 corr 8 6 1 4 +0 0 corr 7 5 4 1 +end data. + +display dictionary. + +list. +]) + +AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl +Variable,Description,Position +s1,Format: F4.0,1 +s2,Format: F4.0,2 +ROWTYPE_,Format: A8,3 +VARNAME_,Format: A8,4 +var01,Format: F10.4,5 +var02,Format: F10.4,6 +var03,Format: F10.4,7 +var04,Format: F10.4,8 + +Table: Data List +s1,s2,ROWTYPE_,VARNAME_,var01,var02,var03,var04 +1,1,mean ,,34.0000,35.0000,36.0000,37.0000 +1,1,sd ,,22.0000,11.0000,55.0000,66.0000 +1,0,n ,,100.0000,101.0000,102.0000,103.0000 +1,0,corr ,var01,1.0000,9.0000,8.0000,7.0000 +0,1,corr ,var02,9.0000,1.0000,6.0000,5.0000 +0,1,corr ,var03,8.0000,6.0000,1.0000,4.0000 +0,0,corr ,var04,7.0000,5.0000,4.0000,1.0000 +]) + +AT_CLEANUP + + +