X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fmatrix-data.c;h=752a0be3d26105c5930aa8e1b15b0b093ffcebd1;hb=b4e3d932f4dfbdf3e51c81b78daabb40e23528b2;hp=f958584b5403f7442984e8922e783b355ec0e851;hpb=480a0746507ce73d26f528b56dc3ed80195096e0;p=pspp diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c index f958584b54..752a0be3d2 100644 --- a/src/language/data-io/matrix-data.c +++ b/src/language/data-io/matrix-data.c @@ -1,1990 +1,583 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. +/* PSPP - a program for statistical analysis. + Copyright (C) 2017 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "minmax.h" -#include "size_max.h" +#include "data/case.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/transformations.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/data-io/data-parser.h" +#include "language/data-io/data-reader.h" +#include "language/data-io/file-handle.h" +#include "language/data-io/inpt-pgm.h" +#include "language/data-io/placement-parser.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" + +#include "gl/xsize.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) - -/* FIXME: /N subcommand not implemented. It should be pretty simple, - too. */ - -/* Different types of variables for MATRIX DATA procedure. Order is - important: these are used for sort keys. */ -enum + +/* DATA LIST transformation data. */ +struct data_list_trns { - MXD_SPLIT, /* SPLIT FILE variables. */ - MXD_ROWTYPE, /* ROWTYPE_. */ - MXD_FACTOR, /* Factor variables. */ - MXD_VARNAME, /* VARNAME_. */ - MXD_CONTINUOUS, /* Continuous variables. */ - - MXD_COUNT + struct data_parser *parser; /* Parser. */ + struct dfm_reader *reader; /* Data file reader. */ + struct variable *end; /* Variable specified on END subcommand. */ }; -/* Format type enums. */ -enum format_type +static trns_free_func data_list_trns_free; +static trns_proc_func data_list_trns_proc; + +enum diagonal { - LIST, - FREE + DIAGONAL, + NO_DIAGONAL }; -/* Matrix section enums. */ -enum matrix_section +enum triangle { LOWER, UPPER, FULL }; -/* Diagonal inclusion enums. */ -enum include_diagonal - { - DIAGONAL, - NODIAGONAL - }; - -/* CONTENTS types. */ -enum content_type - { - N_VECTOR, - N_SCALAR, - N_MATRIX, - MEAN, - STDDEV, - COUNT, - MSE, - DFE, - MAT, - COV, - CORR, - PROX, - - LPAREN, - RPAREN, - EOC - }; - -/* 0=vector, 1=matrix, 2=scalar. */ -static const int content_type[PROX + 1] = - { - 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, - }; - -/* Name of each content type. */ -static const char *const content_names[PROX + 1] = - { - "N", "N", "N_MATRIX", "MEAN", "STDDEV", "COUNT", "MSE", - "DFE", "MAT", "COV", "CORR", "PROX", - }; - -/* A MATRIX DATA input program. */ -struct matrix_data_pgm - { - struct pool *container; /* Arena used for all allocations. */ - struct dfm_reader *reader; /* Data file to read. */ - - /* Format. */ - enum format_type fmt; /* LIST or FREE. */ - enum matrix_section section;/* LOWER or UPPER or FULL. */ - enum include_diagonal diag; /* DIAGONAL or NODIAGONAL. */ - - int explicit_rowtype; /* ROWTYPE_ specified explicitly in data? */ - struct variable *rowtype_, *varname_; /* ROWTYPE_, VARNAME_ variables. */ - - struct variable *single_split; /* Single SPLIT FILE variable. */ - - /* Factor variables. */ - size_t n_factors; /* Number of factor variables. */ - struct variable **factors; /* Factor variables. */ - int is_per_factor[PROX + 1]; /* Is there per-factor data? */ - - int cells; /* Number of cells, or -1 if none. */ - - int pop_n; /* Population N specified by user. */ - - /* CONTENTS subcommand. */ - int contents[EOC * 3 + 1]; /* Contents. */ - int n_contents; /* Number of entries. */ - - /* Continuous variables. */ - int n_continuous; /* Number of continuous variables. */ - int first_continuous; /* Index into dictionary of - first continuous variable. */ - }; +static const int ROWTYPE_WIDTH = 8; -/* Auxiliary data attached to MATRIX DATA variables. */ -struct mxd_var - { - int var_type; /* Variable type. */ - int sub_type; /* Subtype. */ - }; - -static const struct case_source_class matrix_data_with_rowtype_source_class; -static const struct case_source_class matrix_data_without_rowtype_source_class; +struct matrix_format +{ + enum triangle triangle; + enum diagonal diagonal; + const struct variable *rowtype; + const struct variable *varname; + int n_continuous_vars; + struct variable **split_vars; + size_t n_split_vars; +}; + +/* +valid rowtype_ values: + CORR, + COV, + MAT, + + + MSE, + DFE, + MEAN, + STDDEV (or SD), + N_VECTOR (or N), + N_SCALAR, + N_MATRIX, + COUNT, + PROX. +*/ + +/* Sets the value of OUTCASE which corresponds to VNAME + to the value STR. VNAME must be of type string. + */ +static void +set_varname_column (struct ccase *outcase, const struct variable *vname, + const char *str) +{ + int len = var_get_width (vname); + uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len); -static int compare_variables_by_mxd_var_type (const void *pa, - const void *pb); -static bool read_matrices_without_rowtype (struct dataset *ds, struct matrix_data_pgm *); -static bool read_matrices_with_rowtype (struct dataset *ds, struct matrix_data_pgm *); -static int string_to_content_type (const char *, int *); -static void attach_mxd_aux (struct variable *, int var_type, int sub_type); + strncpy ((char *) s, str, len); +} -int -cmd_matrix_data (struct lexer *lexer, struct dataset *ds) +static void +blank_varname_column (struct ccase *outcase, const struct variable *vname) { - struct pool *pool; - struct matrix_data_pgm *mx; - struct file_handle *fh = fh_inline_file (); - bool ok; - - unsigned seen = 0; - - discard_variables (ds); - - pool = pool_create (); - mx = pool_alloc (pool, sizeof *mx); - mx->container = pool; - mx->reader = NULL; - mx->fmt = LIST; - mx->section = LOWER; - mx->diag = DIAGONAL; - mx->explicit_rowtype = 0; - mx->rowtype_ = NULL; - mx->varname_ = NULL; - mx->single_split = NULL; - mx->n_factors = 0; - mx->factors = NULL; - memset (mx->is_per_factor, 0, sizeof mx->is_per_factor); - mx->cells = -1; - mx->pop_n = -1; - mx->n_contents = 0; - mx->n_continuous = 0; - mx->first_continuous = 0; - while (lex_token (lexer) != '.') - { - lex_match (lexer, '/'); + int len = var_get_width (vname); + uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len); - if (lex_match_id (lexer, "VARIABLES")) - { - char **v; - size_t nv; + memset (s, ' ', len); +} - if (seen & 1) - { - msg (SE, _("VARIABLES subcommand multiply specified.")); - goto lossage; - } - seen |= 1; - - lex_match (lexer, '='); - if (!parse_DATA_LIST_vars (lexer, &v, &nv, PV_NO_DUPLICATE)) - goto lossage; - - { - size_t i; - - for (i = 0; i < nv; i++) - if (!strcasecmp (v[i], "VARNAME_")) - { - msg (SE, _("VARNAME_ cannot be explicitly specified on " - "VARIABLES.")); - for (i = 0; i < nv; i++) - free (v[i]); - free (v); - goto lossage; - } - } - - { - size_t i; - - for (i = 0; i < nv; i++) - { - struct variable *new_var; - - if (strcasecmp (v[i], "ROWTYPE_")) - { - new_var = dict_create_var_assert (dataset_dict (ds), v[i], 0); - attach_mxd_aux (new_var, MXD_CONTINUOUS, i); - } - else - mx->explicit_rowtype = 1; - free (v[i]); - } - free (v); - } - - mx->rowtype_ = dict_create_var_assert (dataset_dict (ds), - "ROWTYPE_", 8); - attach_mxd_aux (mx->rowtype_, MXD_ROWTYPE, 0); - } - else if (lex_match_id (lexer, "FILE")) +static struct casereader * +preprocess (struct casereader *casereader0, const struct dictionary *dict, void *aux) +{ + struct matrix_format *mformat = aux; + const struct caseproto *proto = casereader_get_proto (casereader0); + struct casewriter *writer; + writer = autopaging_writer_create (proto); + struct ccase *prev_case = NULL; + double **matrices = NULL; + size_t n_splits = 0; + + const size_t sizeof_matrix = + sizeof (double) * mformat->n_continuous_vars * mformat->n_continuous_vars; + + + /* Make an initial pass to populate our temporary matrix */ + struct casereader *pass0 = casereader_clone (casereader0); + struct ccase *c; + unsigned int prev_split_hash = 1; + int row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0; + for (; (c = casereader_read (pass0)) != NULL; case_unref (c)) + { + int s; + unsigned int split_hash = 0; + for (s = 0; s < mformat->n_split_vars; ++s) { - lex_match (lexer, '='); - fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE); - if (fh == NULL) - goto lossage; + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + split_hash = value_hash (sv, var_get_width (svar), split_hash); } - else if (lex_match_id (lexer, "FORMAT")) + + if (matrices == NULL || prev_split_hash != split_hash) { - lex_match (lexer, '='); + row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? + 1 : 0; - while (lex_token (lexer) == T_ID) - { - if (lex_match_id (lexer, "LIST")) - mx->fmt = LIST; - else if (lex_match_id (lexer, "FREE")) - mx->fmt = FREE; - else if (lex_match_id (lexer, "LOWER")) - mx->section = LOWER; - else if (lex_match_id (lexer, "UPPER")) - mx->section = UPPER; - else if (lex_match_id (lexer, "FULL")) - mx->section = FULL; - else if (lex_match_id (lexer, "DIAGONAL")) - mx->diag = DIAGONAL; - else if (lex_match_id (lexer, "NODIAGONAL")) - mx->diag = NODIAGONAL; - else - { - lex_error (lexer, _("in FORMAT subcommand")); - goto lossage; - } - } + n_splits++; + matrices = xrealloc (matrices, sizeof (double*) * n_splits); + matrices[n_splits - 1] = xmalloc (sizeof_matrix); } - else if (lex_match_id (lexer, "SPLIT")) - { - lex_match (lexer, '='); - if (seen & 2) - { - msg (SE, _("SPLIT subcommand multiply specified.")); - goto lossage; - } - seen |= 2; - - if (lex_token (lexer) != T_ID) + prev_split_hash = split_hash; + + int c_offset = (mformat->triangle == UPPER) ? row : 0; + if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL) + c_offset++; + const union value *v = case_data (c, mformat->rowtype); + const char *val = (const char *) value_str (v, ROWTYPE_WIDTH); + if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) || + 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH)) + { + if (row >= mformat->n_continuous_vars) { - lex_error (lexer, _("in SPLIT subcommand")); - goto lossage; + msg (SE, + _("There are %d variable declared but the data has at least %d matrix rows."), + mformat->n_continuous_vars, row + 1); + goto error; } - - if (dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL - && (lex_look_ahead (lexer) == '.' || lex_look_ahead (lexer) == '/')) + int col; + for (col = c_offset; col < mformat->n_continuous_vars; ++col) { - if (!strcasecmp (lex_tokid (lexer), "ROWTYPE_") - || !strcasecmp (lex_tokid (lexer), "VARNAME_")) - { - msg (SE, _("Split variable may not be named ROWTYPE_ " - "or VARNAME_.")); - goto lossage; - } - - mx->single_split = dict_create_var_assert (dataset_dict (ds), - lex_tokid (lexer), 0); - attach_mxd_aux (mx->single_split, MXD_CONTINUOUS, 0); - lex_get (lexer); + const struct variable *var = + dict_get_var (dict, + 1 + col - c_offset + + var_get_dict_index (mformat->varname)); - dict_set_split_vars (dataset_dict (ds), &mx->single_split, 1); - } - else - { - struct variable **split; - size_t n; + double e = case_data (c, var)->f; + if (e == SYSMIS) + continue; - if (!parse_variables (lexer, dataset_dict (ds), - &split, &n, PV_NO_DUPLICATE)) - goto lossage; + /* Fill in the lower triangle */ + (matrices[n_splits-1])[col + mformat->n_continuous_vars * row] = e; - dict_set_split_vars (dataset_dict (ds), split, n); + if (mformat->triangle != FULL) + /* Fill in the upper triangle */ + (matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e; } - - { - struct variable *const *split = dict_get_split_vars (dataset_dict (ds)); - size_t split_cnt = dict_get_split_cnt (dataset_dict (ds)); - int i; - - for (i = 0; i < split_cnt; i++) - { - struct mxd_var *mv = var_get_aux (split[i]); - if (mv->var_type != MXD_CONTINUOUS) - { - msg (SE, _("Split variable %s is already another type."), - lex_tokid (lexer)); - goto lossage; - } - var_clear_aux (split[i]); - attach_mxd_aux (split[i], MXD_SPLIT, i); - } - } + row++; } - else if (lex_match_id (lexer, "FACTORS")) + } + casereader_destroy (pass0); + + /* Now make a second pass to fill in the other triangle from our + temporary matrix */ + const int idx = var_get_dict_index (mformat->varname); + row = 0; + + prev_split_hash = 1; + n_splits = 0; + for (; (c = casereader_read (casereader0)) != NULL; prev_case = c) + { + int s; + unsigned int split_hash = 0; + for (s = 0; s < mformat->n_split_vars; ++s) { - lex_match (lexer, '='); - - if (seen & 4) - { - msg (SE, _("FACTORS subcommand multiply specified.")); - goto lossage; - } - seen |= 4; - - if (!parse_variables (lexer, dataset_dict (ds), &mx->factors, &mx->n_factors, - PV_NONE)) - goto lossage; - - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - { - struct variable *v = mx->factors[i]; - struct mxd_var *mv = var_get_aux (v); - if (mv->var_type != MXD_CONTINUOUS) - { - msg (SE, _("Factor variable %s is already another type."), - lex_tokid (lexer)); - goto lossage; - } - var_clear_aux (v); - attach_mxd_aux (v, MXD_FACTOR, i); - } - } + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + split_hash = value_hash (sv, var_get_width (svar), split_hash); } - else if (lex_match_id (lexer, "CELLS")) + if (prev_split_hash != split_hash) { - lex_match (lexer, '='); - - if (mx->cells != -1) - { - msg (SE, _("CELLS subcommand multiply specified.")); - goto lossage; - } + n_splits++; + row = 0; + } - if (!lex_is_integer (lexer) || lex_integer (lexer) < 1) - { - lex_error (lexer, _("expecting positive integer")); - goto lossage; - } + prev_split_hash = split_hash; - mx->cells = lex_integer (lexer); - lex_get (lexer); - } - else if (lex_match_id (lexer, "N")) + case_unref (prev_case); + struct ccase *outcase = case_create (proto); + case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto)); + const union value *v = case_data (c, mformat->rowtype); + const char *val = (const char *) value_str (v, ROWTYPE_WIDTH); + if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) || + 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH)) { - lex_match (lexer, '='); + int col; + const struct variable *var = dict_get_var (dict, idx + 1 + row); + set_varname_column (outcase, mformat->varname, var_get_name (var)); + value_copy (case_data_rw (outcase, mformat->rowtype), v, ROWTYPE_WIDTH); - if (mx->pop_n != -1) + for (col = 0; col < mformat->n_continuous_vars; ++col) { - msg (SE, _("N subcommand multiply specified.")); - goto lossage; + union value *dest_val = + case_data_rw_idx (outcase, + 1 + col + var_get_dict_index (mformat->varname)); + dest_val->f = (matrices[n_splits - 1])[col + mformat->n_continuous_vars * row]; + if (col == row && mformat->diagonal == NO_DIAGONAL) + dest_val->f = 1.0; } - - if (!lex_is_integer (lexer) || lex_integer (lexer) < 1) - { - lex_error (lexer, _("expecting positive integer")); - goto lossage; - } - - mx->pop_n = lex_integer (lexer); - lex_get (lexer); + row++; } - else if (lex_match_id (lexer, "CONTENTS")) + else { - int inside_parens = 0; - unsigned collide = 0; - int item; - - if (seen & 8) - { - msg (SE, _("CONTENTS subcommand multiply specified.")); - goto lossage; - } - seen |= 8; - - lex_match (lexer, '='); - - { - int i; - - for (i = 0; i <= PROX; i++) - mx->is_per_factor[i] = 0; - } - - for (;;) - { - if (lex_match (lexer, '(')) - { - if (inside_parens) - { - msg (SE, _("Nested parentheses not allowed.")); - goto lossage; - } - inside_parens = 1; - item = LPAREN; - } - else if (lex_match (lexer, ')')) - { - if (!inside_parens) - { - msg (SE, _("Mismatched right parenthesis (`(').")); - goto lossage; - } - if (mx->contents[mx->n_contents - 1] == LPAREN) - { - msg (SE, _("Empty parentheses not allowed.")); - goto lossage; - } - inside_parens = 0; - item = RPAREN; - } - else - { - int content_type; - int collide_index; - - if (lex_token (lexer) != T_ID) - { - lex_error (lexer, _("in CONTENTS subcommand")); - goto lossage; - } - - content_type = string_to_content_type (lex_tokid (lexer), - &collide_index); - if (content_type == -1) - { - lex_error (lexer, _("in CONTENTS subcommand")); - goto lossage; - } - lex_get (lexer); - - if (collide & (1 << collide_index)) - { - msg (SE, _("Content multiply specified for %s."), - content_names[content_type]); - goto lossage; - } - collide |= (1 << collide_index); - - item = content_type; - mx->is_per_factor[item] = inside_parens; - } - mx->contents[mx->n_contents++] = item; - - if (lex_token (lexer) == '/' || lex_token (lexer) == '.') - break; - } + blank_varname_column (outcase, mformat->varname); + } - if (inside_parens) - { - msg (SE, _("Missing right parenthesis.")); - goto lossage; - } - mx->contents[mx->n_contents] = EOC; + /* Special case for SD and N_VECTOR: Rewrite as STDDEV and N respectively */ + if (0 == strncasecmp (val, "sd ", ROWTYPE_WIDTH)) + { + value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH, + (uint8_t *) "STDDEV", 6, ' '); } - else + else if (0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH)) { - lex_error (lexer, NULL); - goto lossage; + value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH, + (uint8_t *) "N", 1, ' '); } - } - - if (lex_token (lexer) != '.') - { - lex_error (lexer, _("expecting end of command")); - goto lossage; - } - - if (!(seen & 1)) - { - msg (SE, _("Missing VARIABLES subcommand.")); - goto lossage; - } - - if (!mx->n_contents && !mx->explicit_rowtype) - { - msg (SW, _("CONTENTS subcommand not specified: assuming file " - "contains only CORR matrix.")); - mx->contents[0] = CORR; - mx->contents[1] = EOC; - mx->n_contents = 0; + casewriter_write (writer, outcase); } - if (mx->n_factors && !mx->explicit_rowtype && mx->cells == -1) + /* If NODIAGONAL is specified, then a final case must be written */ + if (mformat->diagonal == NO_DIAGONAL) { - msg (SE, _("Missing CELLS subcommand. CELLS is required " - "when ROWTYPE_ is not given in the data and " - "factors are present.")); - goto lossage; - } - - if (mx->explicit_rowtype && mx->single_split) - { - msg (SE, _("Split file values must be present in the data when " - "ROWTYPE_ is present.")); - goto lossage; - } - - /* Create VARNAME_. */ - mx->varname_ = dict_create_var_assert (dataset_dict (ds), "VARNAME_", 8); - attach_mxd_aux (mx->varname_, MXD_VARNAME, 0); - - /* Sort the dictionary variables into the desired order for the - system file output. */ - { - struct variable **v; - size_t nv; - - dict_get_vars (dataset_dict (ds), &v, &nv, 0); - qsort (v, nv, sizeof *v, compare_variables_by_mxd_var_type); - dict_reorder_vars (dataset_dict (ds), v, nv); - free (v); - } - - /* Set formats. */ - { - static const struct fmt_spec fmt_tab[MXD_COUNT] = - { - {FMT_F, 4, 0}, - {FMT_A, 8, 0}, - {FMT_F, 4, 0}, - {FMT_A, 8, 0}, - {FMT_F, 10, 4}, - }; - - int i; - - mx->first_continuous = -1; - for (i = 0; i < dict_get_var_cnt (dataset_dict (ds)); i++) - { - struct variable *v = dict_get_var (dataset_dict (ds), i); - struct mxd_var *mv = var_get_aux (v); - int type = mv->var_type; - - assert (type >= 0 && type < MXD_COUNT); - var_set_both_formats (v, &fmt_tab[type]); - - if (type == MXD_CONTINUOUS) - mx->n_continuous++; - if (mx->first_continuous == -1 && type == MXD_CONTINUOUS) - mx->first_continuous = i; - } - } - - if (mx->n_continuous == 0) - { - msg (SE, _("No continuous variables specified.")); - goto lossage; - } - - mx->reader = dfm_open_reader (fh, lexer); - if (mx->reader == NULL) - goto lossage; - - if (mx->explicit_rowtype) - ok = read_matrices_with_rowtype (ds, mx); - else - ok = read_matrices_without_rowtype (ds, mx); - - dfm_close_reader (mx->reader); + int col; + struct ccase *outcase = case_create (proto); - pool_destroy (mx->container); + if (prev_case) + case_copy (outcase, 0, prev_case, 0, caseproto_get_n_widths (proto)); - return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; + const struct variable *var = dict_get_var (dict, idx + 1 + row); + set_varname_column (outcase, mformat->varname, var_get_name (var)); -lossage: - discard_variables (ds); - free (mx->factors); - pool_destroy (mx->container); - return CMD_CASCADING_FAILURE; -} + for (col = 0; col < mformat->n_continuous_vars; ++col) + { + union value *dest_val = + case_data_rw_idx (outcase, 1 + col + + var_get_dict_index (mformat->varname)); + dest_val->f = (matrices[n_splits - 1]) [col + mformat->n_continuous_vars * row]; + if (col == row && mformat->diagonal == NO_DIAGONAL) + dest_val->f = 1.0; + } -/* Look up string S as a content-type name and return the - corresponding enumerated value, or -1 if there is no match. If - COLLIDE is non-NULL then *COLLIDE returns a value (suitable for use - as a bit-index) which can be used for determining whether a related - statistic has already been used. */ -static int -string_to_content_type (const char *s, int *collide) -{ - static const struct - { - int value; - int collide; - const char *string; + casewriter_write (writer, outcase); } - *tp, - tab[] = - { - {N_VECTOR, 0, "N_VECTOR"}, - {N_VECTOR, 0, "N"}, - {N_SCALAR, 0, "N_SCALAR"}, - {N_MATRIX, 1, "N_MATRIX"}, - {MEAN, 2, "MEAN"}, - {STDDEV, 3, "STDDEV"}, - {STDDEV, 3, "SD"}, - {COUNT, 4, "COUNT"}, - {MSE, 5, "MSE"}, - {DFE, 6, "DFE"}, - {MAT, 7, "MAT"}, - {COV, 8, "COV"}, - {CORR, 9, "CORR"}, - {PROX, 10, "PROX"}, - {-1, -1, NULL}, - }; - - for (tp = tab; tp->value != -1; tp++) - if (!strcasecmp (s, tp->string)) - { - if (collide) - *collide = tp->collide; - - return tp->value; - } - return -1; -} -/* Compare two variables using p.mxd.var_type and p.mxd.sub_type - fields. */ -static int -compare_variables_by_mxd_var_type (const void *a_, const void *b_) -{ - struct variable *const *pa = a_; - struct variable *const *pb = b_; - const struct mxd_var *a = var_get_aux (*pa); - const struct mxd_var *b = var_get_aux (*pb); - - if (a->var_type != b->var_type) - return a->var_type > b->var_type ? 1 : -1; - else - return a->sub_type < b->sub_type ? -1 : a->sub_type > b->sub_type; -} -/* Attaches a struct mxd_var with the specific member values to - V. */ -static void -attach_mxd_aux (struct variable *v, int var_type, int sub_type) -{ - struct mxd_var *mv; - - assert (var_get_aux (v) == NULL); - mv = xmalloc (sizeof *mv); - mv->var_type = var_type; - mv->sub_type = sub_type; - var_attach_aux (v, mv, var_dtor_free); -} - -/* Matrix tokenizer. */ - -/* Matrix token types. */ -enum matrix_token_type - { - MNUM, /* Number. */ - MSTR /* String. */ - }; - -/* A MATRIX DATA parsing token. */ -struct matrix_token - { - enum matrix_token_type type; - double number; /* MNUM: token value. */ - char *string; /* MSTR: token string; not null-terminated. */ - int length; /* MSTR: tokstr length. */ - }; - -static int mget_token (struct matrix_token *, struct dfm_reader *); - -#if DEBUGGING -#define mget_token(TOKEN, READER) mget_token_dump(TOKEN, READER) + if (prev_case) + case_unref (prev_case); -static void -mdump_token (const struct matrix_token *token) -{ - switch (token->type) - { - case MNUM: - printf (" #%g", token->number); - break; - case MSTR: - printf (" '%.*s'", token->length, token->string); - break; - default: - NOT_REACHED (); - } - fflush (stdout); + int i; + for (i = 0 ; i < n_splits; ++i) + free (matrices[i]); + free (matrices); + struct casereader *reader1 = casewriter_make_reader (writer); + casereader_destroy (casereader0); + return reader1; + + +error: + if (prev_case) + case_unref (prev_case); + + for (i = 0 ; i < n_splits; ++i) + free (matrices[i]); + free (matrices); + casereader_destroy (casereader0); + return NULL; } -static int -mget_token_dump (struct matrix_token *token, struct dfm_reader *reader) +int +cmd_matrix (struct lexer *lexer, struct dataset *ds) { - int result = (mget_token) (token, reader); - mdump_token (token); - return result; -} -#endif + struct dictionary *dict; + struct data_parser *parser; + struct dfm_reader *reader; + struct file_handle *fh = NULL; + char *encoding = NULL; + struct matrix_format mformat; + int i; + size_t n_names; + char **names = NULL; -/* Return the current position in READER. */ -static const char * -context (struct dfm_reader *reader) -{ - static struct string buf = DS_EMPTY_INITIALIZER; + mformat.triangle = LOWER; + mformat.diagonal = DIAGONAL; + mformat.n_split_vars = 0; + mformat.split_vars = NULL; - ds_clear (&buf); - if (dfm_eof (reader)) - ds_assign_cstr (&buf, "at end of file"); - else - { - struct substring p; - - p = dfm_get_record (reader); - ss_ltrim (&p, ss_cstr (CC_SPACES)); - if (ss_is_empty (p)) - ds_assign_cstr (&buf, "at end of line"); - else - ds_put_format (&buf, "before `%.*s'", - (int) ss_cspan (p, ss_cstr (CC_SPACES)), ss_data (p)); - } - - return ds_cstr (&buf); -} + dict = (in_input_program () + ? dataset_dict (ds) + : dict_create (get_default_encoding ())); + parser = data_parser_create (dict); + reader = NULL; -/* Is there at least one token left in the data file? */ -static bool -another_token (struct dfm_reader *reader) -{ - for (;;) - { - struct substring p; - size_t space_cnt; - - if (dfm_eof (reader)) - return false; - - p = dfm_get_record (reader); - space_cnt = ss_span (p, ss_cstr (CC_SPACES)); - if (space_cnt < ss_length (p)) - { - dfm_forward_columns (reader, space_cnt); - return true; - } + data_parser_set_type (parser, DP_DELIMITED); + data_parser_set_warn_missing_fields (parser, false); + data_parser_set_span (parser, false); - dfm_forward_record (reader); - } - NOT_REACHED(); -} + mformat.rowtype = dict_create_var (dict, "ROWTYPE_", ROWTYPE_WIDTH); -/* Parse a MATRIX DATA token from READER into TOKEN. */ -static int -(mget_token) (struct matrix_token *token, struct dfm_reader *reader) -{ - struct substring line, p; - struct substring s; - int c; + mformat.n_continuous_vars = 0; + mformat.n_split_vars = 0; - if (!another_token (reader)) - return 0; + if (! lex_force_match_id (lexer, "VARIABLES")) + goto error; - line = p = dfm_get_record (reader); + lex_match (lexer, T_EQUALS); - /* Three types of fields: quoted with ', quoted with ", unquoted. */ - c = ss_first (p); - if (c == '\'' || c == '"') + if (! parse_mixed_vars (lexer, dict, &names, &n_names, PV_NO_DUPLICATE)) { - ss_get_char (&p); - if (!ss_get_until (&p, c, &s)) - msg (SW, _("Scope of string exceeds line.")); - } - else - { - bool is_num = isdigit (c) || c == '.'; - const char *start = ss_data (p); - - for (;;) - { - c = ss_first (p); - if (strchr (CC_SPACES ",-+", c) != NULL) - break; - - if (isdigit (c)) - is_num = true; - if (strchr ("deDE", c) && strchr ("+-", ss_at (p, 1))) - { - is_num = true; - ss_advance (&p, 2); - } - else - ss_advance (&p, 1); - } - s = ss_buffer (start, ss_data (p) - start); - - if (is_num) - data_in (s, FMT_F, 0, - dfm_get_column (reader, ss_data (s)), - (union value *) &token->number, 0); - else - token->type = MSTR; + int i; + for (i = 0; i < n_names; ++i) + free (names[i]); + free (names); + goto error; } - token->string = ss_data (s); - token->length = ss_length (s); - - dfm_reread_record (reader, dfm_get_column (reader, ss_end (s))); - - return 1; -} -/* Forcibly skip the end of a line for content type CONTENT in - READER. */ -static int -force_eol (struct dfm_reader *reader, const char *content) -{ - struct substring p; - - if (dfm_eof (reader)) - return 0; - - p = dfm_get_record (reader); - if (ss_span (p, ss_cstr (CC_SPACES)) != ss_length (p)) + int longest_name = 0; + for (i = 0; i < n_names; ++i) { - msg (SE, _("End of line expected %s while reading %s."), - context (reader), content); - return 0; + maximize_int (&longest_name, strlen (names[i])); } - - dfm_forward_record (reader); - return 1; -} - -/* Back end, omitting ROWTYPE_. */ - -struct nr_aux_data - { - const struct dictionary *dict; /* The dictionary */ - struct matrix_data_pgm *mx; /* MATRIX DATA program. */ - double ***data; /* MATRIX DATA data. */ - double *factor_values; /* Factor values. */ - int max_cell_idx; /* Max-numbered cell that we have - read so far, plus one. */ - double *split_values; /* SPLIT FILE variable values. */ - }; -static bool nr_read_splits (struct nr_aux_data *, int compare); -static bool nr_read_factors (struct nr_aux_data *, int cell); -static bool nr_output_data (struct nr_aux_data *, struct ccase *, - write_case_func *, write_case_data); -static bool matrix_data_read_without_rowtype (struct case_source *source, - struct ccase *, - write_case_func *, - write_case_data); + mformat.varname = dict_create_var (dict, "VARNAME_", + 8 * DIV_RND_UP (longest_name, 8)); -/* Read from the data file and write it to the active file. - Returns true if successful, false if an I/O error occurred. */ -static bool -read_matrices_without_rowtype (struct dataset *ds, struct matrix_data_pgm *mx) -{ - struct nr_aux_data nr; - bool ok; - - if (mx->cells == -1) - mx->cells = 1; - - nr.mx = mx; - nr.dict = dataset_dict (ds); - nr.data = NULL; - nr.factor_values = xnmalloc (mx->n_factors * mx->cells, - sizeof *nr.factor_values); - nr.max_cell_idx = 0; - nr.split_values = xnmalloc (dict_get_split_cnt (dataset_dict (ds)), - sizeof *nr.split_values); - - proc_set_source (ds, create_case_source ( - &matrix_data_without_rowtype_source_class, &nr)); - - ok = procedure (ds, NULL, NULL); - - free (nr.split_values); - free (nr.factor_values); - - return ok; -} - -/* Mirror data across the diagonal of matrix CP which contains - CONTENT type data. */ -static void -fill_matrix (struct matrix_data_pgm *mx, int content, double *cp) -{ - int type = content_type[content]; - - if (type == 1 && mx->section != FULL) + for (i = 0; i < n_names; ++i) { - if (mx->diag == NODIAGONAL) + if (0 == strcasecmp (names[i], "ROWTYPE_")) { - const double fill = content == CORR ? 1.0 : SYSMIS; - int i; - - for (i = 0; i < mx->n_continuous; i++) - cp[i * (1 + mx->n_continuous)] = fill; + const struct fmt_spec fmt = fmt_for_input (FMT_A, 8, 0); + data_parser_add_delimited_field (parser, + &fmt, + var_get_case_index (mformat.rowtype), + "ROWTYPE_"); + } + else + { + const struct fmt_spec fmt = fmt_for_input (FMT_F, 10, 4); + struct variable *v = dict_create_var (dict, names[i], 0); + var_set_both_formats (v, &fmt); + data_parser_add_delimited_field (parser, + &fmt, + var_get_case_index (mformat.varname) + + ++mformat.n_continuous_vars, + names[i]); } - - { - int c, r; - - if (mx->section == LOWER) - { - int n_lines = mx->n_continuous; - if (mx->section != FULL && mx->diag == NODIAGONAL) - n_lines--; - - for (r = 1; r < n_lines; r++) - for (c = 0; c < r; c++) - cp[r + c * mx->n_continuous] = cp[c + r * mx->n_continuous]; - } - else - { - assert (mx->section == UPPER); - for (r = 1; r < mx->n_continuous; r++) - for (c = 0; c < r; c++) - cp[c + r * mx->n_continuous] = cp[r + c * mx->n_continuous]; - } - } - } - else if (type == 2) - { - int c; - - for (c = 1; c < mx->n_continuous; c++) - cp[c] = cp[0]; } -} + for (i = 0; i < n_names; ++i) + free (names[i]); + free (names); -/* Read data lines for content type CONTENT from the data file. - If PER_FACTOR is nonzero, then factor information is read from - the data file. Data is for cell number CELL. */ -static int -nr_read_data_lines (struct nr_aux_data *nr, - int per_factor, int cell, int content, int compare) -{ - struct matrix_data_pgm *mx = nr->mx; - const int type = content_type[content]; /* Content type. */ - int n_lines; /* Number of lines to parse from data file for this type. */ - double *cp; /* Current position in vector or matrix. */ - int i; - - if (type != 1) - n_lines = 1; - else + while (lex_token (lexer) != T_ENDCMD) { - n_lines = mx->n_continuous; - if (mx->section != FULL && mx->diag == NODIAGONAL) - n_lines--; - } - - cp = nr->data[content][cell]; - if (type == 1 && mx->section == LOWER && mx->diag == NODIAGONAL) - cp += mx->n_continuous; + if (! lex_force_match (lexer, T_SLASH)) + goto error; - for (i = 0; i < n_lines; i++) - { - int n_cols; - - if (!nr_read_splits (nr, 1)) - return 0; - if (per_factor && !nr_read_factors (nr, cell)) - return 0; - compare = 1; - - switch (type) + if (lex_match_id (lexer, "FORMAT")) { - case 0: - n_cols = mx->n_continuous; - break; - case 1: - switch (mx->section) + lex_match (lexer, T_EQUALS); + + while (lex_token (lexer) != T_SLASH && (lex_token (lexer) != T_ENDCMD)) { - case LOWER: - n_cols = i + 1; - break; - case UPPER: - cp += i; - n_cols = mx->n_continuous - i; - if (mx->diag == NODIAGONAL) + if (lex_match_id (lexer, "LIST")) + { + data_parser_set_span (parser, false); + } + else if (lex_match_id (lexer, "FREE")) { - n_cols--; - cp++; + data_parser_set_span (parser, true); + } + else if (lex_match_id (lexer, "UPPER")) + { + mformat.triangle = UPPER; + } + else if (lex_match_id (lexer, "LOWER")) + { + mformat.triangle = LOWER; + } + else if (lex_match_id (lexer, "FULL")) + { + mformat.triangle = FULL; + } + else if (lex_match_id (lexer, "DIAGONAL")) + { + mformat.diagonal = DIAGONAL; + } + else if (lex_match_id (lexer, "NODIAGONAL")) + { + mformat.diagonal = NO_DIAGONAL; + } + else + { + lex_error (lexer, NULL); + goto error; } - break; - case FULL: - n_cols = mx->n_continuous; - break; - default: - NOT_REACHED (); } - break; - case 2: - n_cols = 1; - break; - default: - NOT_REACHED (); } - - { - int j; - - for (j = 0; j < n_cols; j++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return 0; - if (token.type != MNUM) - { - msg (SE, _("expecting value for %s %s"), - var_get_name (dict_get_var (nr->dict, j)), - context (mx->reader)); - return 0; - } - - *cp++ = token.number; - } - if (mx->fmt != FREE - && !force_eol (mx->reader, content_names[content])) - return 0; - } - - if (mx->section == LOWER) - cp += mx->n_continuous - n_cols; - } - - fill_matrix (mx, content, nr->data[content][cell]); - - return 1; -} - -/* When ROWTYPE_ does not appear in the data, reads the matrices and - writes them to the output file. - Returns true if successful, false if an I/O error occurred. */ -static bool -matrix_data_read_without_rowtype (struct case_source *source, - struct ccase *c, - write_case_func *write_case, - write_case_data wc_data) -{ - struct nr_aux_data *nr = source->aux; - struct matrix_data_pgm *mx = nr->mx; - - { - int *cp; - - nr->data = pool_nalloc (mx->container, PROX + 1, sizeof *nr->data); - - { - int i; - - for (i = 0; i <= PROX; i++) - nr->data[i] = NULL; - } - - for (cp = mx->contents; *cp != EOC; cp++) - if (*cp != LPAREN && *cp != RPAREN) + else if (lex_match_id (lexer, "FILE")) { - int per_factor = mx->is_per_factor[*cp]; - int n_entries; - - n_entries = mx->n_continuous; - if (content_type[*cp] == 1) - n_entries *= mx->n_continuous; - - { - int n_vectors = per_factor ? mx->cells : 1; - int i; - - nr->data[*cp] = pool_nalloc (mx->container, - n_vectors, sizeof **nr->data); - - for (i = 0; i < n_vectors; i++) - nr->data[*cp][i] = pool_nalloc (mx->container, - n_entries, sizeof ***nr->data); - } + lex_match (lexer, T_EQUALS); + fh_unref (fh); + fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL); + if (fh == NULL) + goto error; } - } - - for (;;) - { - int *bp, *ep, *np; - - if (!nr_read_splits (nr, 0)) - return true; - - for (bp = mx->contents; *bp != EOC; bp = np) + else if (lex_match_id (lexer, "SPLIT")) { - int per_factor; - - /* Trap the CONTENTS that we should parse in this pass - between bp and ep. Set np to the starting bp for next - iteration. */ - if (*bp == LPAREN) + lex_match (lexer, T_EQUALS); + if (! parse_variables (lexer, dict, &mformat.split_vars, &mformat.n_split_vars, 0)) { - ep = ++bp; - while (*ep != RPAREN) - ep++; - np = &ep[1]; - per_factor = 1; + free (mformat.split_vars); + goto error; } - else + int i; + for (i = 0; i < mformat.n_split_vars; ++i) { - ep = &bp[1]; - while (*ep != EOC && *ep != LPAREN) - ep++; - np = ep; - per_factor = 0; + const struct fmt_spec fmt = fmt_for_input (FMT_F, 4, 0); + var_set_both_formats (mformat.split_vars[i], &fmt); } - - { - int i; - - for (i = 0; i < (per_factor ? mx->cells : 1); i++) - { - int *cp; - - for (cp = bp; cp < ep; cp++) - if (!nr_read_data_lines (nr, per_factor, i, *cp, cp != bp)) - return true; - } - } + dict_reorder_vars (dict, mformat.split_vars, mformat.n_split_vars); + mformat.n_continuous_vars -= mformat.n_split_vars; + } + else + { + lex_error (lexer, NULL); + goto error; } - - if (!nr_output_data (nr, c, write_case, wc_data)) - return false; - - if (dict_get_split_cnt (nr->dict) == 0 - || !another_token (mx->reader)) - return true; } -} - -/* Read the split file variables. If COMPARE is 1, compares the - values read to the last values read and returns true if they're equal, - false otherwise. */ -static bool -nr_read_splits (struct nr_aux_data *nr, int compare) -{ - struct matrix_data_pgm *mx = nr->mx; - static int just_read = 0; /* FIXME: WTF? */ - size_t split_cnt; - size_t i; - if (compare && just_read) + if (mformat.diagonal == NO_DIAGONAL && mformat.triangle == FULL) { - just_read = 0; - return true; + msg (SE, _("FORMAT = FULL and FORMAT = NODIAGONAL are mutually exclusive.")); + goto error; } - - if (dict_get_split_vars (nr->dict) == NULL) - return true; - if (mx->single_split) + if (fh == NULL) + fh = fh_inline_file (); + fh_set_default_handle (fh); + + if (!data_parser_any_fields (parser)) { - if (!compare) - { - struct mxd_var *mv = var_get_aux (dict_get_split_vars (nr->dict)[0]); - nr->split_values[0] = ++mv->sub_type; - } - return true; + msg (SE, _("At least one variable must be specified.")); + goto error; } - if (!compare) - just_read = 1; + if (lex_end_of_command (lexer) != CMD_SUCCESS) + goto error; - split_cnt = dict_get_split_cnt (nr->dict); - for (i = 0; i < split_cnt; i++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return false; - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting SPLIT FILE value %s."), - context (mx->reader)); - return false; - } + reader = dfm_open_reader (fh, lexer, encoding); + if (reader == NULL) + goto error; - if (!compare) - nr->split_values[i] = token.number; - else if (nr->split_values[i] != token.number) - { - msg (SE, _("Expecting value %g for %s."), - nr->split_values[i], - var_get_name (dict_get_split_vars (nr->dict)[i])); - return false; - } + if (in_input_program ()) + { + struct data_list_trns *trns = xmalloc (sizeof *trns); + trns->parser = parser; + trns->reader = reader; + trns->end = NULL; + add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns); } - - return true; -} - -/* Read the factors for cell CELL. If COMPARE is 1, compares the - values read to the last values read and returns true if they're equal, - false otherwise. */ -static bool -nr_read_factors (struct nr_aux_data *nr, int cell) -{ - struct matrix_data_pgm *mx = nr->mx; - bool compare; - - if (mx->n_factors == 0) - return true; - - assert (nr->max_cell_idx >= cell); - if (cell != nr->max_cell_idx) - compare = true; else { - compare = false; - nr->max_cell_idx++; + data_parser_make_active_file (parser, ds, reader, dict, preprocess, &mformat); } - - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return false; - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting factor value %s."), - context (mx->reader)); - return false; - } - - if (!compare) - nr->factor_values[i + mx->n_factors * cell] = token.number; - else if (nr->factor_values[i + mx->n_factors * cell] != token.number) - { - msg (SE, _("Syntax error expecting value %g for %s %s."), - nr->factor_values[i + mx->n_factors * cell], - var_get_name (mx->factors[i]), context (mx->reader)); - return false; - } - } - } - return true; -} - -/* Write the contents of a cell having content type CONTENT and data - CP to the active file. - Returns true if successful, false if an I/O error occurred. */ -static bool -dump_cell_content (const struct dictionary *dict, - struct matrix_data_pgm *mx, int content, double *cp, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) -{ - int type = content_type[content]; + fh_unref (fh); + free (encoding); + free (mformat.split_vars); - { - buf_copy_str_rpad (case_data_rw (c, mx->rowtype_)->s, 8, - content_names[content]); - - if (type != 1) - memset (case_data_rw (c, mx->varname_)->s, ' ', 8); - } + return CMD_DATA_LIST; - { - int n_lines = (type == 1) ? mx->n_continuous : 1; - int i; - - for (i = 0; i < n_lines; i++) - { - int j; - - for (j = 0; j < mx->n_continuous; j++) - { - struct variable *v = dict_get_var (dict, mx->first_continuous + j); - case_data_rw (c, v)->f = *cp; - cp++; - } - if (type == 1) - buf_copy_str_rpad (case_data_rw (c, mx->varname_)->s, 8, - var_get_name ( - dict_get_var (dict, mx->first_continuous + i))); - if (!write_case (wc_data)) - return false; - } - } - return true; + error: + data_parser_destroy (parser); + if (!in_input_program ()) + dict_destroy (dict); + fh_unref (fh); + free (encoding); + free (mformat.split_vars); + return CMD_CASCADING_FAILURE; } -/* Finally dump out everything from nr_data[] to the output file. */ -static bool -nr_output_data (struct nr_aux_data *nr, struct ccase *c, - write_case_func *write_case, write_case_data wc_data) -{ - struct matrix_data_pgm *mx = nr->mx; - - { - struct variable *const *split; - size_t split_cnt; - size_t i; - - split_cnt = dict_get_split_cnt (nr->dict); - split = dict_get_split_vars (nr->dict); - for (i = 0; i < split_cnt; i++) - case_data_rw (c, split[i])->f = nr->split_values[i]; - } - - if (mx->n_factors) - { - int cell; - - for (cell = 0; cell < mx->cells; cell++) - { - { - size_t factor; - - for (factor = 0; factor < mx->n_factors; factor++) - case_data_rw (c, mx->factors[factor])->f - = nr->factor_values[factor + cell * mx->n_factors]; - } - - { - int content; - - for (content = 0; content <= PROX; content++) - if (mx->is_per_factor[content]) - { - assert (nr->data[content] != NULL - && nr->data[content][cell] != NULL); - - if (!dump_cell_content (nr->dict, mx, - content, nr->data[content][cell], - c, write_case, wc_data)) - return false; - } - } - } - } - - { - int content; - - { - size_t factor; - - for (factor = 0; factor < mx->n_factors; factor++) - case_data_rw (c, mx->factors[factor])->f = SYSMIS; - } - - for (content = 0; content <= PROX; content++) - if (!mx->is_per_factor[content] && nr->data[content] != NULL) - { - if (!dump_cell_content (nr->dict, mx, content, nr->data[content][0], - c, write_case, wc_data)) - return false; - } - } - - return true; -} -/* Back end, with ROWTYPE_. */ - -/* All the data for one set of factor values. */ -struct factor_data - { - double *factors; - int n_rows[PROX + 1]; - double *data[PROX + 1]; - struct factor_data *next; - }; - -/* With ROWTYPE_ auxiliary data. */ -struct wr_aux_data - { - const struct dictionary *dict; /* The dictionary */ - struct matrix_data_pgm *mx; /* MATRIX DATA program. */ - int content; /* Type of current row. */ - double *split_values; /* SPLIT FILE variable values. */ - struct factor_data *data; /* All the data. */ - struct factor_data *current; /* Current factor. */ - }; +/* Input procedure. */ -static bool wr_read_splits (struct wr_aux_data *, struct ccase *, - write_case_func *, write_case_data); -static bool wr_output_data (struct wr_aux_data *, struct ccase *, - write_case_func *, write_case_data); -static bool wr_read_rowtype (struct wr_aux_data *, - const struct matrix_token *, struct dfm_reader *); -static bool wr_read_factors (struct wr_aux_data *); -static bool wr_read_indeps (struct wr_aux_data *); -static bool matrix_data_read_with_rowtype (struct case_source *, - struct ccase *, - write_case_func *, - write_case_data); - -/* When ROWTYPE_ appears in the data, reads the matrices and writes - them to the output file. +/* Destroys DATA LIST transformation TRNS. Returns true if successful, false if an I/O error occurred. */ static bool -read_matrices_with_rowtype (struct dataset *ds, struct matrix_data_pgm *mx) -{ - struct wr_aux_data wr; - bool ok; - - wr.mx = mx; - wr.content = -1; - wr.split_values = NULL; - wr.data = NULL; - wr.current = NULL; - wr.dict = dataset_dict (ds); - mx->cells = 0; - - proc_set_source (ds, - create_case_source (&matrix_data_with_rowtype_source_class, - &wr)); - ok = procedure (ds, NULL, NULL); - - free (wr.split_values); - return ok; -} - -/* Read from the data file and write it to the active file. - Returns true if successful, false if an I/O error occurred. */ -static bool -matrix_data_read_with_rowtype (struct case_source *source, - struct ccase *c, - write_case_func *write_case, - write_case_data wc_data) -{ - struct wr_aux_data *wr = source->aux; - struct matrix_data_pgm *mx = wr->mx; - - do - { - if (!wr_read_splits (wr, c, write_case, wc_data)) - return true; - - if (!wr_read_factors (wr)) - return true; - - if (!wr_read_indeps (wr)) - return true; - } - while (another_token (mx->reader)); - - return wr_output_data (wr, c, write_case, wc_data); -} - -/* Read the split file variables. If they differ from the previous - set of split variables then output the data. Returns success. */ -static bool -wr_read_splits (struct wr_aux_data *wr, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) +data_list_trns_free (void *trns_) { - struct matrix_data_pgm *mx = wr->mx; - bool compare; - size_t split_cnt; - - split_cnt = dict_get_split_cnt (wr->dict); - if (split_cnt == 0) - return true; - - if (wr->split_values) - compare = true; - else - { - compare = false; - wr->split_values = xnmalloc (split_cnt, sizeof *wr->split_values); - } - - { - bool different = false; - int i; - - for (i = 0; i < split_cnt; i++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return false; - if (token.type != MNUM) - { - msg (SE, _("Syntax error %s expecting SPLIT FILE value."), - context (mx->reader)); - return false; - } - - if (compare && wr->split_values[i] != token.number && !different) - { - if (!wr_output_data (wr, c, write_case, wc_data)) - return 0; - different = true; - mx->cells = 0; - } - wr->split_values[i] = token.number; - } - } - + struct data_list_trns *trns = trns_; + data_parser_destroy (trns->parser); + dfm_close_reader (trns->reader); + free (trns); return true; } -/* Compares doubles A and B, treating SYSMIS as greatest. */ +/* Handle DATA LIST transformation TRNS, parsing data into *C. */ static int -compare_doubles (const void *a_, const void *b_, const void *aux UNUSED) -{ - const double *a = a_; - const double *b = b_; - - if (*a == *b) - return 0; - else if (*a == SYSMIS) - return 1; - else if (*b == SYSMIS) - return -1; - else if (*a > *b) - return 1; - else - return -1; -} - -/* Return strcmp()-type comparison of the MX->n_factors factors at _A and - _B. Sort missing values toward the end. */ -static int -compare_factors (const void *a_, const void *b_, const void *mx_) -{ - const struct matrix_data_pgm *mx = mx_; - struct factor_data *const *pa = a_; - struct factor_data *const *pb = b_; - const double *a = (*pa)->factors; - const double *b = (*pb)->factors; - - return lexicographical_compare_3way (a, mx->n_factors, - b, mx->n_factors, - sizeof *a, - compare_doubles, NULL); -} - -/* Write out the data for the current split file to the active - file. - Returns true if successful, false if an I/O error occurred. */ -static bool -wr_output_data (struct wr_aux_data *wr, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) -{ - struct matrix_data_pgm *mx = wr->mx; - bool ok = true; - - { - struct variable *const *split; - size_t split_cnt; - size_t i; - - split_cnt = dict_get_split_cnt (wr->dict); - split = dict_get_split_vars (wr->dict); - for (i = 0; i < split_cnt; i++) - case_data_rw (c, split[i])->f = wr->split_values[i]; - } - - /* Sort the wr->data list. */ - { - struct factor_data **factors; - struct factor_data *iter; - int i; - - factors = xnmalloc (mx->cells, sizeof *factors); - - for (i = 0, iter = wr->data; iter; iter = iter->next, i++) - factors[i] = iter; - - sort (factors, mx->cells, sizeof *factors, compare_factors, mx); - - wr->data = factors[0]; - for (i = 0; i < mx->cells - 1; i++) - factors[i]->next = factors[i + 1]; - factors[mx->cells - 1]->next = NULL; - - free (factors); - } - - /* Write out records for every set of factor values. */ - { - struct factor_data *iter; - - for (iter = wr->data; iter; iter = iter->next) - { - { - size_t factor; - - for (factor = 0; factor < mx->n_factors; factor++) - case_data_rw (c, mx->factors[factor])->f = iter->factors[factor]; - } - - { - int content; - - for (content = 0; content <= PROX; content++) - { - if (!iter->n_rows[content]) - continue; - - { - int type = content_type[content]; - int n_lines = (type == 1 - ? (mx->n_continuous - - (mx->section != FULL && mx->diag == NODIAGONAL)) - : 1); - - if (n_lines != iter->n_rows[content]) - { - msg (SE, _("Expected %d lines of data for %s content; " - "actually saw %d lines. No data will be " - "output for this content."), - n_lines, content_names[content], - iter->n_rows[content]); - continue; - } - } - - fill_matrix (mx, content, iter->data[content]); - - ok = dump_cell_content (wr->dict, mx, content, - iter->data[content], - c, write_case, wc_data); - if (!ok) - break; - } - } - } - } - - pool_destroy (mx->container); - mx->container = pool_create (); - - wr->data = wr->current = NULL; - - return ok; -} - -/* Sets ROWTYPE_ based on the given TOKEN read from READER. - Return success. */ -static bool -wr_read_rowtype (struct wr_aux_data *wr, - const struct matrix_token *token, - struct dfm_reader *reader) -{ - if (wr->content != -1) - { - msg (SE, _("Multiply specified ROWTYPE_ %s."), context (reader)); - return false; - } - if (token->type != MSTR) - { - msg (SE, _("Syntax error %s expecting ROWTYPE_ string."), - context (reader)); - return false; - } - - { - char s[16]; - char *cp; - - memcpy (s, token->string, MIN (15, token->length)); - s[MIN (15, token->length)] = 0; - - for (cp = s; *cp; cp++) - *cp = toupper ((unsigned char) *cp); - - wr->content = string_to_content_type (s, NULL); - } - - if (wr->content == -1) - { - msg (SE, _("Syntax error %s."), context (reader)); - return 0; - } - - return true; -} - -/* Read the factors for the current row. Select a set of factors and - point wr_current to it. */ -static bool -wr_read_factors (struct wr_aux_data *wr) +data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED) { - struct matrix_data_pgm *mx = wr->mx; - double *factor_values = local_alloc (sizeof *factor_values * mx->n_factors); - - wr->content = -1; - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - goto lossage; - if (token.type == MSTR) - { - if (!wr_read_rowtype (wr, &token, mx->reader)) - goto lossage; - if (!mget_token (&token, mx->reader)) - goto lossage; - } - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting factor value %s."), - context (mx->reader)); - goto lossage; - } - - factor_values[i] = token.number; - } - } - if (wr->content == -1) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - goto lossage; - if (!wr_read_rowtype (wr, &token, mx->reader)) - goto lossage; - } - - /* Try the most recent factor first as a simple caching - mechanism. */ - if (wr->current) - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - if (factor_values[i] != wr->current->factors[i]) - goto cache_miss; - goto winnage; - } + struct data_list_trns *trns = trns_; + int retval; - /* Linear search through the list. */ -cache_miss: - { - struct factor_data *iter; - - for (iter = wr->data; iter; iter = iter->next) - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - if (factor_values[i] != iter->factors[i]) - goto next_item; - - wr->current = iter; - goto winnage; - - next_item: ; - } - } - - /* Not found. Make a new item. */ - { - struct factor_data *new = pool_alloc (mx->container, sizeof *new); - - new->factors = pool_nalloc (mx->container, - mx->n_factors, sizeof *new->factors); - + *c = case_unshare (*c); + if (data_parser_parse (trns->parser, trns->reader, *c)) + retval = TRNS_CONTINUE; + else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1) { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - new->factors[i] = factor_values[i]; - } - - { - int i; - - for (i = 0; i <= PROX; i++) - { - new->n_rows[i] = 0; - new->data[i] = NULL; - } + /* An I/O error, or encountering end of file for a second + time, should be escalated into a more serious error. */ + retval = TRNS_ERROR; } + else + retval = TRNS_END_FILE; - new->next = wr->data; - wr->data = wr->current = new; - mx->cells++; - } - -winnage: - local_free (factor_values); - return true; - -lossage: - local_free (factor_values); - return false; -} - -/* Read the independent variables into wr->current. */ -static bool -wr_read_indeps (struct wr_aux_data *wr) -{ - struct matrix_data_pgm *mx = wr->mx; - struct factor_data *c = wr->current; - const int type = content_type[wr->content]; - const int n_rows = c->n_rows[wr->content]; - double *cp; - int n_cols; - - /* Allocate room for data if necessary. */ - if (c->data[wr->content] == NULL) - { - int n_items = mx->n_continuous; - if (type == 1) - n_items *= mx->n_continuous; - - c->data[wr->content] = pool_nalloc (mx->container, - n_items, sizeof **c->data); - } - - cp = &c->data[wr->content][n_rows * mx->n_continuous]; - - /* Figure out how much to read from this line. */ - switch (type) + /* If there was an END subcommand handle it. */ + if (trns->end != NULL) { - case 0: - case 2: - if (n_rows > 0) - { - msg (SE, _("Duplicate specification for %s."), - content_names[wr->content]); - return false; - } - if (type == 0) - n_cols = mx->n_continuous; + double *end = &case_data_rw (*c, trns->end)->f; + if (retval == TRNS_END_FILE) + { + *end = 1.0; + retval = TRNS_CONTINUE; + } else - n_cols = 1; - break; - case 1: - if (n_rows >= mx->n_continuous - (mx->section != FULL && mx->diag == NODIAGONAL)) - { - msg (SE, _("Too many rows of matrix data for %s."), - content_names[wr->content]); - return false; - } - - switch (mx->section) - { - case LOWER: - n_cols = n_rows + 1; - if (mx->diag == NODIAGONAL) - cp += mx->n_continuous; - break; - case UPPER: - cp += n_rows; - n_cols = mx->n_continuous - n_rows; - if (mx->diag == NODIAGONAL) - { - n_cols--; - cp++; - } - break; - case FULL: - n_cols = mx->n_continuous; - break; - default: - NOT_REACHED (); - } - break; - default: - NOT_REACHED (); + *end = 0.0; } - c->n_rows[wr->content]++; - /* Read N_COLS items at CP. */ - { - int j; - - for (j = 0; j < n_cols; j++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return false; - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting value for %s %s."), - var_get_name (dict_get_var (wr->dict, - mx->first_continuous + j)), - context (mx->reader)); - return false; - } - - *cp++ = token.number; - } - if (mx->fmt != FREE - && !force_eol (mx->reader, content_names[wr->content])) - return false; - } - - return true; + return retval; } - -/* Matrix source. */ - -static const struct case_source_class matrix_data_with_rowtype_source_class = - { - "MATRIX DATA", - NULL, - matrix_data_read_with_rowtype, - NULL, - }; - -static const struct case_source_class -matrix_data_without_rowtype_source_class = - { - "MATRIX DATA", - NULL, - matrix_data_read_without_rowtype, - NULL, - };