X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fmatrix-data.c;h=f510ec25162d194273de45aa9572077a8a718af3;hb=339f1956cc727eda788638644ef93ab7852b31cd;hp=eb2d226f29341bbe4933f3219f8ca005c98bd941;hpb=0d7b430cd1cef60b963a54307622d6f4aa8efeb4;p=pspp diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c index eb2d226f29..f510ec2516 100644 --- a/src/language/data-io/matrix-data.c +++ b/src/language/data-io/matrix-data.c @@ -1,2018 +1,1197 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 2017 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include + +#include "data/case.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/data-in.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/short-names.h" +#include "data/transformations.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/data-io/data-parser.h" +#include "language/data-io/data-reader.h" +#include "language/data-io/file-handle.h" +#include "language/data-io/inpt-pgm.h" +#include "language/data-io/placement-parser.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" +#include "libpspp/assertion.h" +#include "libpspp/i18n.h" +#include "libpspp/intern.h" +#include "libpspp/message.h" +#include "libpspp/str.h" + +#include "gl/c-ctype.h" +#include "gl/minmax.h" +#include "gl/xsize.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) - -/* FIXME: /N subcommand not implemented. It should be pretty simple, - too. */ - -/* Different types of variables for MATRIX DATA procedure. Order is - important: these are used for sort keys. */ -enum - { - MXD_SPLIT, /* SPLIT FILE variables. */ - MXD_ROWTYPE, /* ROWTYPE_. */ - MXD_FACTOR, /* Factor variables. */ - MXD_VARNAME, /* VARNAME_. */ - MXD_CONTINUOUS, /* Continuous variables. */ - - MXD_COUNT - }; - -/* Format type enums. */ -enum format_type - { - LIST, - FREE - }; - -/* Matrix section enums. */ -enum matrix_section - { - LOWER, - UPPER, - FULL - }; - -/* Diagonal inclusion enums. */ -enum include_diagonal - { - DIAGONAL, - NODIAGONAL - }; - -/* CONTENTS types. */ -enum content_type - { - N_VECTOR, - N_SCALAR, - N_MATRIX, - MEAN, - STDDEV, - COUNT, - MSE, - DFE, - MAT, - COV, - CORR, - PROX, - - LPAREN, - RPAREN, - EOC - }; - -/* 0=vector, 1=matrix, 2=scalar. */ -static const int content_type[PROX + 1] = + +#define ROWTYPES \ + /* Matrix row types. */ \ + RT(CORR, 2) \ + RT(COV, 2) \ + RT(MAT, 2) \ + RT(N_MATRIX, 2) \ + RT(PROX, 2) \ + \ + /* Vector row types. */ \ + RT(COUNT, 1) \ + RT(DFE, 1) \ + RT(MEAN, 1) \ + RT(MSE, 1) \ + RT(STDDEV, 1) \ + RT(N, 1) \ + \ + /* Scalar row types. */ \ + RT(N_SCALAR, 0) + +enum rowtype { - 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, +#define RT(NAME, DIMS) C_##NAME, + ROWTYPES +#undef RT }; -/* Name of each content type. */ -static const char *content_names[PROX + 1] = +enum { - "N", "N", "N_MATRIX", "MEAN", "STDDEV", "COUNT", "MSE", - "DFE", "MAT", "COV", "CORR", "PROX", +#define RT(NAME, DIMS) +1 + N_ROWTYPES = ROWTYPES +#undef RT }; +verify (N_ROWTYPES < 32); -/* A MATRIX DATA input program. */ -struct matrix_data_pgm - { - struct pool *container; /* Arena used for all allocations. */ - struct dfm_reader *reader; /* Data file to read. */ - - /* Format. */ - enum format_type fmt; /* LIST or FREE. */ - enum matrix_section section;/* LOWER or UPPER or FULL. */ - enum include_diagonal diag; /* DIAGONAL or NODIAGONAL. */ - - int explicit_rowtype; /* ROWTYPE_ specified explicitly in data? */ - struct variable *rowtype_, *varname_; /* ROWTYPE_, VARNAME_ variables. */ - - struct variable *single_split; /* Single SPLIT FILE variable. */ - - /* Factor variables. */ - size_t n_factors; /* Number of factor variables. */ - struct variable **factors; /* Factor variables. */ - int is_per_factor[PROX + 1]; /* Is there per-factor data? */ - - int cells; /* Number of cells, or -1 if none. */ - - int pop_n; /* Population N specified by user. */ - - /* CONTENTS subcommand. */ - int contents[EOC * 3 + 1]; /* Contents. */ - int n_contents; /* Number of entries. */ - - /* Continuous variables. */ - int n_continuous; /* Number of continuous variables. */ - int first_continuous; /* Index into default_dict.var of - first continuous variable. */ +/* Returns the number of dimensions in the indexes for row type RT. A matrix + has 2 dimensions, a vector has 1, a scalar has 0. */ +static int +rowtype_dimensions (enum rowtype rt) +{ + static const int rowtype_dims[N_ROWTYPES] = { +#define RT(NAME, DIMS) [C_##NAME] = DIMS, + ROWTYPES +#undef RT }; + return rowtype_dims[rt]; +} -/* Auxiliary data attached to MATRIX DATA variables. */ -struct mxd_var - { - int var_type; /* Variable type. */ - int sub_type; /* Subtype. */ +static struct substring +rowtype_name (enum rowtype rt) +{ + static const struct substring rowtype_names[N_ROWTYPES] = { +#define RT(NAME, DIMS) [C_##NAME] = SS_LITERAL_INITIALIZER (#NAME), + ROWTYPES +#undef RT }; -static const struct case_source_class matrix_data_with_rowtype_source_class; -static const struct case_source_class matrix_data_without_rowtype_source_class; - -static int compare_variables_by_mxd_var_type (const void *pa, - const void *pb); -static bool read_matrices_without_rowtype (struct matrix_data_pgm *); -static bool read_matrices_with_rowtype (struct matrix_data_pgm *); -static int string_to_content_type (char *, int *); -static void attach_mxd_aux (struct variable *, int var_type, int sub_type); + return rowtype_names[rt]; +} -int -cmd_matrix_data (void) +static bool +rowtype_from_string (struct substring token, enum rowtype *rt) { - struct pool *pool; - struct matrix_data_pgm *mx; - struct file_handle *fh = fh_inline_file (); - bool ok; - - unsigned seen = 0; - - discard_variables (); - - pool = pool_create (); - mx = pool_alloc (pool, sizeof *mx); - mx->container = pool; - mx->reader = NULL; - mx->fmt = LIST; - mx->section = LOWER; - mx->diag = DIAGONAL; - mx->explicit_rowtype = 0; - mx->rowtype_ = NULL; - mx->varname_ = NULL; - mx->single_split = NULL; - mx->n_factors = 0; - mx->factors = NULL; - memset (mx->is_per_factor, 0, sizeof mx->is_per_factor); - mx->cells = -1; - mx->pop_n = -1; - mx->n_contents = 0; - mx->n_continuous = 0; - mx->first_continuous = 0; - while (token != '.') - { - lex_match ('/'); - - if (lex_match_id ("VARIABLES")) - { - char **v; - size_t nv; - - if (seen & 1) - { - msg (SE, _("VARIABLES subcommand multiply specified.")); - goto lossage; - } - seen |= 1; - - lex_match ('='); - if (!parse_DATA_LIST_vars (&v, &nv, PV_NO_DUPLICATE)) - goto lossage; - - { - size_t i; - - for (i = 0; i < nv; i++) - if (!strcasecmp (v[i], "VARNAME_")) - { - msg (SE, _("VARNAME_ cannot be explicitly specified on " - "VARIABLES.")); - for (i = 0; i < nv; i++) - free (v[i]); - free (v); - goto lossage; - } - } - - { - size_t i; - - for (i = 0; i < nv; i++) - { - struct variable *new_var; - - if (strcasecmp (v[i], "ROWTYPE_")) - { - new_var = dict_create_var_assert (default_dict, v[i], 0); - attach_mxd_aux (new_var, MXD_CONTINUOUS, i); - } - else - mx->explicit_rowtype = 1; - free (v[i]); - } - free (v); - } - - mx->rowtype_ = dict_create_var_assert (default_dict, - "ROWTYPE_", 8); - attach_mxd_aux (mx->rowtype_, MXD_ROWTYPE, 0); - } - else if (lex_match_id ("FILE")) - { - lex_match ('='); - fh = fh_parse (FH_REF_FILE | FH_REF_INLINE); - if (fh == NULL) - goto lossage; - } - else if (lex_match_id ("FORMAT")) - { - lex_match ('='); - - while (token == T_ID) - { - if (lex_match_id ("LIST")) - mx->fmt = LIST; - else if (lex_match_id ("FREE")) - mx->fmt = FREE; - else if (lex_match_id ("LOWER")) - mx->section = LOWER; - else if (lex_match_id ("UPPER")) - mx->section = UPPER; - else if (lex_match_id ("FULL")) - mx->section = FULL; - else if (lex_match_id ("DIAGONAL")) - mx->diag = DIAGONAL; - else if (lex_match_id ("NODIAGONAL")) - mx->diag = NODIAGONAL; - else - { - lex_error (_("in FORMAT subcommand")); - goto lossage; - } - } - } - else if (lex_match_id ("SPLIT")) - { - lex_match ('='); - - if (seen & 2) - { - msg (SE, _("SPLIT subcommand multiply specified.")); - goto lossage; - } - seen |= 2; - - if (token != T_ID) - { - lex_error (_("in SPLIT subcommand")); - goto lossage; - } - - if (dict_lookup_var (default_dict, tokid) == NULL - && (lex_look_ahead () == '.' || lex_look_ahead () == '/')) - { - if (!strcasecmp (tokid, "ROWTYPE_") - || !strcasecmp (tokid, "VARNAME_")) - { - msg (SE, _("Split variable may not be named ROWTYPE_ " - "or VARNAME_.")); - goto lossage; - } - - mx->single_split = dict_create_var_assert (default_dict, - tokid, 0); - attach_mxd_aux (mx->single_split, MXD_CONTINUOUS, 0); - lex_get (); - - dict_set_split_vars (default_dict, &mx->single_split, 1); - } - else - { - struct variable **split; - size_t n; - - if (!parse_variables (default_dict, &split, &n, PV_NO_DUPLICATE)) - goto lossage; - - dict_set_split_vars (default_dict, split, n); - } - - { - struct variable *const *split = dict_get_split_vars (default_dict); - size_t split_cnt = dict_get_split_cnt (default_dict); - int i; - - for (i = 0; i < split_cnt; i++) - { - struct mxd_var *mv = split[i]->aux; - assert (mv != NULL); - if (mv->var_type != MXD_CONTINUOUS) - { - msg (SE, _("Split variable %s is already another type."), - tokid); - goto lossage; - } - var_clear_aux (split[i]); - attach_mxd_aux (split[i], MXD_SPLIT, i); - } - } - } - else if (lex_match_id ("FACTORS")) - { - lex_match ('='); - - if (seen & 4) - { - msg (SE, _("FACTORS subcommand multiply specified.")); - goto lossage; - } - seen |= 4; - - if (!parse_variables (default_dict, &mx->factors, &mx->n_factors, - PV_NONE)) - goto lossage; - - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - { - struct variable *v = mx->factors[i]; - struct mxd_var *mv = v->aux; - assert (mv != NULL); - if (mv->var_type != MXD_CONTINUOUS) - { - msg (SE, _("Factor variable %s is already another type."), - tokid); - goto lossage; - } - var_clear_aux (v); - attach_mxd_aux (v, MXD_FACTOR, i); - } - } - } - else if (lex_match_id ("CELLS")) - { - lex_match ('='); - - if (mx->cells != -1) - { - msg (SE, _("CELLS subcommand multiply specified.")); - goto lossage; - } - - if (!lex_is_integer () || lex_integer () < 1) - { - lex_error (_("expecting positive integer")); - goto lossage; - } - - mx->cells = lex_integer (); - lex_get (); - } - else if (lex_match_id ("N")) - { - lex_match ('='); - - if (mx->pop_n != -1) - { - msg (SE, _("N subcommand multiply specified.")); - goto lossage; - } - - if (!lex_is_integer () || lex_integer () < 1) - { - lex_error (_("expecting positive integer")); - goto lossage; - } - - mx->pop_n = lex_integer (); - lex_get (); - } - else if (lex_match_id ("CONTENTS")) - { - int inside_parens = 0; - unsigned collide = 0; - int item; - - if (seen & 8) - { - msg (SE, _("CONTENTS subcommand multiply specified.")); - goto lossage; - } - seen |= 8; - - lex_match ('='); - - { - int i; - - for (i = 0; i <= PROX; i++) - mx->is_per_factor[i] = 0; - } - - for (;;) - { - if (lex_match ('(')) - { - if (inside_parens) - { - msg (SE, _("Nested parentheses not allowed.")); - goto lossage; - } - inside_parens = 1; - item = LPAREN; - } - else if (lex_match (')')) - { - if (!inside_parens) - { - msg (SE, _("Mismatched right parenthesis (`(').")); - goto lossage; - } - if (mx->contents[mx->n_contents - 1] == LPAREN) - { - msg (SE, _("Empty parentheses not allowed.")); - goto lossage; - } - inside_parens = 0; - item = RPAREN; - } - else - { - int content_type; - int collide_index; - - if (token != T_ID) - { - lex_error (_("in CONTENTS subcommand")); - goto lossage; - } - - content_type = string_to_content_type (tokid, - &collide_index); - if (content_type == -1) - { - lex_error (_("in CONTENTS subcommand")); - goto lossage; - } - lex_get (); - - if (collide & (1 << collide_index)) - { - msg (SE, _("Content multiply specified for %s."), - content_names[content_type]); - goto lossage; - } - collide |= (1 << collide_index); - - item = content_type; - mx->is_per_factor[item] = inside_parens; - } - mx->contents[mx->n_contents++] = item; - - if (token == '/' || token == '.') - break; - } + ss_trim (&token, ss_cstr (CC_SPACES)); + for (size_t i = 0; i < N_ROWTYPES; i++) + if (lex_id_match (rowtype_name (i), token)) + { + *rt = i; + return true; + } - if (inside_parens) - { - msg (SE, _("Missing right parenthesis.")); - goto lossage; - } - mx->contents[mx->n_contents] = EOC; - } - else - { - lex_error (NULL); - goto lossage; - } - } - - if (token != '.') + if (lex_id_match (ss_cstr ("N_VECTOR"), token)) { - lex_error (_("expecting end of command")); - goto lossage; + *rt = C_N; + return true; } - - if (!(seen & 1)) + else if (lex_id_match (ss_cstr ("SD"), token)) { - msg (SE, _("Missing VARIABLES subcommand.")); - goto lossage; + *rt = C_STDDEV; + return true; } - - if (!mx->n_contents && !mx->explicit_rowtype) - { - msg (SW, _("CONTENTS subcommand not specified: assuming file " - "contains only CORR matrix.")); - mx->contents[0] = CORR; - mx->contents[1] = EOC; - mx->n_contents = 0; - } - - if (mx->n_factors && !mx->explicit_rowtype && mx->cells == -1) - { - msg (SE, _("Missing CELLS subcommand. CELLS is required " - "when ROWTYPE_ is not given in the data and " - "factors are present.")); - goto lossage; - } - - if (mx->explicit_rowtype && mx->single_split) - { - msg (SE, _("Split file values must be present in the data when " - "ROWTYPE_ is present.")); - goto lossage; - } - - /* Create VARNAME_. */ - mx->varname_ = dict_create_var_assert (default_dict, "VARNAME_", 8); - attach_mxd_aux (mx->varname_, MXD_VARNAME, 0); - - /* Sort the dictionary variables into the desired order for the - system file output. */ - { - struct variable **v; - size_t nv; - - dict_get_vars (default_dict, &v, &nv, 0); - qsort (v, nv, sizeof *v, compare_variables_by_mxd_var_type); - dict_reorder_vars (default_dict, v, nv); - free (v); - } + return false; +} - /* Set formats. */ +static bool +rowtype_parse (struct lexer *lexer, enum rowtype *rt) +{ + bool parsed = (lex_token (lexer) == T_ID + && rowtype_from_string (lex_tokss (lexer), rt)); + if (parsed) + lex_get (lexer); + return parsed; +} + +struct matrix_format { - static const struct fmt_spec fmt_tab[MXD_COUNT] = + bool span; + enum triangle { - {FMT_F, 4, 0}, - {FMT_A, 8, 0}, - {FMT_F, 4, 0}, - {FMT_A, 8, 0}, - {FMT_F, 10, 4}, - }; - - int i; - - mx->first_continuous = -1; - for (i = 0; i < dict_get_var_cnt (default_dict); i++) + LOWER, + UPPER, + FULL + } + triangle; + enum diagonal { - struct variable *v = dict_get_var (default_dict, i); - struct mxd_var *mv = v->aux; - int type = mv->var_type; - - assert (type >= 0 && type < MXD_COUNT); - v->print = v->write = fmt_tab[type]; - - if (type == MXD_CONTINUOUS) - mx->n_continuous++; - if (mx->first_continuous == -1 && type == MXD_CONTINUOUS) - mx->first_continuous = i; + DIAGONAL, + NO_DIAGONAL } - } - - if (mx->n_continuous == 0) - { - msg (SE, _("No continuous variables specified.")); - goto lossage; - } - - mx->reader = dfm_open_reader (fh); - if (mx->reader == NULL) - goto lossage; - - if (mx->explicit_rowtype) - ok = read_matrices_with_rowtype (mx); - else - ok = read_matrices_without_rowtype (mx); - - dfm_close_reader (mx->reader); + diagonal; - pool_destroy (mx->container); + bool input_rowtype; + struct variable **input_vars; + size_t n_input_vars; - return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; - -lossage: - discard_variables (); - free (mx->factors); - pool_destroy (mx->container); - return CMD_CASCADING_FAILURE; -} - -/* Look up string S as a content-type name and return the - corresponding enumerated value, or -1 if there is no match. If - COLLIDE is non-NULL then *COLLIDE returns a value (suitable for use - as a bit-index) which can be used for determining whether a related - statistic has already been used. */ -static int -string_to_content_type (char *s, int *collide) -{ - static const struct - { - int value; - int collide; - const char *string; - } - *tp, - tab[] = - { - {N_VECTOR, 0, "N_VECTOR"}, - {N_VECTOR, 0, "N"}, - {N_SCALAR, 0, "N_SCALAR"}, - {N_MATRIX, 1, "N_MATRIX"}, - {MEAN, 2, "MEAN"}, - {STDDEV, 3, "STDDEV"}, - {STDDEV, 3, "SD"}, - {COUNT, 4, "COUNT"}, - {MSE, 5, "MSE"}, - {DFE, 6, "DFE"}, - {MAT, 7, "MAT"}, - {COV, 8, "COV"}, - {CORR, 9, "CORR"}, - {PROX, 10, "PROX"}, - {-1, -1, NULL}, - }; - - for (tp = tab; tp->value != -1; tp++) - if (!strcasecmp (s, tp->string)) + /* How to read matrices with each possible number of dimensions (0=scalar, + 1=vector, 2=matrix). */ + struct matrix_sched { - if (collide) - *collide = tp->collide; - - return tp->value; + /* Number of rows and columns in the matrix: (1,1) for a scalar, (1,n) for + a vector, (n,n) for a matrix. */ + int nr, nc; + + /* Rows of data to read and the number of columns in each. Because we + often read just a triangle and sometimes omit the diagonal, 'n_rp' can + be less than 'nr' and 'rp[i]->y' isn't always 'y'. */ + struct row_sched + { + /* The y-value of the row inside the matrix. */ + int y; + + /* first and last (exclusive) columns to read in this row. */ + int x0, x1; + } + *rp; + size_t n_rp; } - return -1; -} + ms[3]; + + struct variable *rowtype; + struct variable *varname; + struct variable **cvars; + int n_cvars; + struct variable **svars; + size_t *svar_indexes; + size_t n_svars; + struct variable **fvars; + size_t *fvar_indexes; + size_t n_fvars; + int cells; + int n; + + unsigned int pooled_rowtype_mask; + unsigned int factor_rowtype_mask; + + struct content + { + bool open; + enum rowtype rowtype; + bool close; + } + *contents; + size_t n_contents; + }; -/* Compare two variables using p.mxd.var_type and p.mxd.sub_type - fields. */ -static int -compare_variables_by_mxd_var_type (const void *a_, const void *b_) +static void +matrix_format_uninit (struct matrix_format *mf) { - struct variable *const *pa = a_; - struct variable *const *pb = b_; - const struct mxd_var *a = (*pa)->aux; - const struct mxd_var *b = (*pb)->aux; - - if (a->var_type != b->var_type) - return a->var_type > b->var_type ? 1 : -1; - else - return a->sub_type < b->sub_type ? -1 : a->sub_type > b->sub_type; + free (mf->input_vars); + for (int i = 0; i < 3; i++) + free (mf->ms[i].rp); + free (mf->cvars); + free (mf->svars); + free (mf->svar_indexes); + free (mf->fvars); + free (mf->fvar_indexes); + free (mf->contents); } -/* Attaches a struct mxd_var with the specific member values to - V. */ static void -attach_mxd_aux (struct variable *v, int var_type, int sub_type) +set_string (struct ccase *outcase, const struct variable *var, + struct substring src) { - struct mxd_var *mv; - - assert (v->aux == NULL); - mv = xmalloc (sizeof *mv); - mv->var_type = var_type; - mv->sub_type = sub_type; - var_attach_aux (v, mv, var_dtor_free); + struct substring dst = case_ss (outcase, var); + for (size_t i = 0; i < dst.length; i++) + dst.string[i] = i < src.length ? src.string[i] : ' '; } - -/* Matrix tokenizer. */ - -/* Matrix token types. */ -enum matrix_token_type - { - MNUM, /* Number. */ - MSTR /* String. */ - }; - -/* A MATRIX DATA parsing token. */ -struct matrix_token - { - enum matrix_token_type type; - double number; /* MNUM: token value. */ - char *string; /* MSTR: token string; not null-terminated. */ - int length; /* MSTR: tokstr length. */ - }; - -static int mget_token (struct matrix_token *, struct dfm_reader *); - -#if DEBUGGING -#define mget_token(TOKEN, READER) mget_token_dump(TOKEN, READER) static void -mdump_token (const struct matrix_token *token) +parse_msg (struct dfm_reader *reader, const struct substring *token, + char *text, enum msg_severity severity) { - switch (token->type) + int first_column = 0; + if (token) { - case MNUM: - printf (" #%g", token->number); - break; - case MSTR: - printf (" '%.*s'", token->length, token->string); - break; - default: - assert (0); + struct substring line = dfm_get_record (reader); + if (token->string >= line.string && token->string < ss_end (line)) + first_column = ss_pointer_to_position (line, token->string) + 1; } - fflush (stdout); + + int line_number = dfm_get_line_number (reader); + struct msg_location *location = xmalloc (sizeof *location); + int last_column = (first_column && token->length + ? first_column + token->length - 1 + : 0); + *location = (struct msg_location) { + .file_name = intern_new (dfm_get_file_name (reader)), + .start = { .line = line_number, .column = first_column }, + .end = { .line = line_number, .column = last_column }, + }; + struct msg *m = xmalloc (sizeof *m); + *m = (struct msg) { + .category = MSG_C_DATA, + .severity = severity, + .location = location, + .text = text, + }; + msg_emit (m); } -static int -mget_token_dump (struct matrix_token *token, struct dfm_reader *reader) +static void PRINTF_FORMAT (3, 4) +parse_warning (struct dfm_reader *reader, const struct substring *token, + const char *format, ...) { - int result = (mget_token) (token, reader); - mdump_token (token); - return result; + va_list args; + va_start (args, format); + parse_msg (reader, token, xvasprintf (format, args), MSG_S_WARNING); + va_end (args); } -#endif -/* Return the current position in READER. */ -static const char * -context (struct dfm_reader *reader) +static void PRINTF_FORMAT (3, 4) +parse_error (struct dfm_reader *reader, const struct substring *token, + const char *format, ...) { - static char buf[32]; - - if (dfm_eof (reader)) - strcpy (buf, "at end of file"); - else - { - struct fixed_string line; - const char *sp; - - dfm_get_record (reader, &line); - sp = ls_c_str (&line); - while (sp < ls_end (&line) && isspace ((unsigned char) *sp)) - sp++; - if (sp >= ls_end (&line)) - strcpy (buf, "at end of line"); - else - { - char *dp; - size_t copy_cnt = 0; - - dp = stpcpy (buf, "before `"); - while (sp < ls_end (&line) && !isspace ((unsigned char) *sp) - && copy_cnt < 10) - { - *dp++ = *sp++; - copy_cnt++; - } - strcpy (dp, "'"); - } - } - - return buf; + va_list args; + va_start (args, format); + parse_msg (reader, token, xvasprintf (format, args), MSG_S_ERROR); + va_end (args); } -/* Is there at least one token left in the data file? */ -static int -another_token (struct dfm_reader *reader) +/* Advance to beginning of next token. */ +static bool +more_tokens (struct substring *p, struct dfm_reader *r) { for (;;) { - struct fixed_string line; - const char *cp; - - if (dfm_eof (reader)) - return 0; - dfm_get_record (reader, &line); - - cp = ls_c_str (&line); - while (isspace ((unsigned char) *cp) && cp < ls_end (&line)) - cp++; - - if (cp < ls_end (&line)) - { - dfm_forward_columns (reader, cp - ls_c_str (&line)); - return 1; - } + ss_ltrim (p, ss_cstr (CC_SPACES ",")); + if (p->length) + return true; - dfm_forward_record (reader); + dfm_forward_record (r); + if (dfm_eof (r)) + return false; + *p = dfm_get_record (r); } } -/* Parse a MATRIX DATA token from READER into TOKEN. */ -static int -(mget_token) (struct matrix_token *token, struct dfm_reader *reader) +static bool +next_token (struct substring *p, struct dfm_reader *r, struct substring *token) { - struct fixed_string line; - int first_column; - char *cp; - - if (!another_token (reader)) - return 0; + if (!more_tokens (p, r)) + return false; - dfm_get_record (reader, &line); - first_column = dfm_column_start (reader); - - /* Three types of fields: quoted with ', quoted with ", unquoted. */ - cp = ls_c_str (&line); - if (*cp == '\'' || *cp == '"') + /* Collect token. */ + int c = ss_first (*p); + if (c == '\'' || c == '"') { - int quote = *cp; - - token->type = MSTR; - token->string = ++cp; - while (cp < ls_end (&line) && *cp != quote) - cp++; - token->length = cp - token->string; - if (cp < ls_end (&line)) - cp++; - else - msg (SW, _("Scope of string exceeds line.")); + ss_advance (p, 1); + ss_get_until (p, c, token); } else { - int is_num = isdigit ((unsigned char) *cp) || *cp == '.'; - - token->string = cp++; - while (cp < ls_end (&line) - && !isspace ((unsigned char) *cp) && *cp != ',' - && *cp != '-' && *cp != '+') - { - if (isdigit ((unsigned char) *cp)) - is_num = 1; - - if ((tolower ((unsigned char) *cp) == 'd' - || tolower ((unsigned char) *cp) == 'e') - && (cp[1] == '+' || cp[1] == '-')) - cp += 2; - else - cp++; - } - - token->length = cp - token->string; - assert (token->length); - - if (is_num) - { - struct data_in di; - - di.s = token->string; - di.e = token->string + token->length; - di.v = (union value *) &token->number; - di.f1 = first_column; - di.format = make_output_format (FMT_F, token->length, 0); - - if (!data_in (&di)) - return 0; - } - else - token->type = MSTR; + size_t n = 1; + for (;;) + { + c = ss_at (*p, n); + if (c == EOF + || ss_find_byte (ss_cstr (CC_SPACES ","), c) != SIZE_MAX + || ((c == '+' || c == '-') + && ss_find_byte (ss_cstr ("dDeE"), + ss_at (*p, n - 1)) == SIZE_MAX)) + break; + n++; + } + ss_get_bytes (p, n, token); } - - dfm_forward_columns (reader, cp - ls_c_str (&line)); - - return 1; + return true; } -/* Forcibly skip the end of a line for content type CONTENT in - READER. */ -static int -force_eol (struct dfm_reader *reader, const char *content) +static bool +next_number (struct substring *p, struct dfm_reader *r, double *d) { - struct fixed_string line; - const char *cp; - - if (dfm_eof (reader)) - return 0; - dfm_get_record (reader, &line); - - cp = ls_c_str (&line); - while (isspace ((unsigned char) *cp) && cp < ls_end (&line)) - cp++; - - if (cp < ls_end (&line)) + struct substring token; + if (!next_token (p, r, &token)) + return false; + + union value v; + char *error = data_in (token, dfm_reader_get_encoding (r), FMT_F, + settings_get_fmt_settings (), &v, 0, NULL); + if (error) { - msg (SE, _("End of line expected %s while reading %s."), - context (reader), content); - return 0; + parse_error (r, &token, "%s", error); + free (error); } - - dfm_forward_record (reader); - return 1; + *d = v.f; + return true; } - -/* Back end, omitting ROWTYPE_. */ - -struct nr_aux_data - { - struct matrix_data_pgm *mx; /* MATRIX DATA program. */ - double ***data; /* MATRIX DATA data. */ - double *factor_values; /* Factor values. */ - int max_cell_idx; /* Max-numbered cell that we have - read so far, plus one. */ - double *split_values; /* SPLIT FILE variable values. */ - }; -static int nr_read_splits (struct nr_aux_data *, int compare); -static int nr_read_factors (struct nr_aux_data *, int cell); -static bool nr_output_data (struct nr_aux_data *, struct ccase *, - write_case_func *, write_case_data); -static bool matrix_data_read_without_rowtype (struct case_source *source, - struct ccase *, - write_case_func *, - write_case_data); - -/* Read from the data file and write it to the active file. - Returns true if successful, false if an I/O error occurred. */ static bool -read_matrices_without_rowtype (struct matrix_data_pgm *mx) +next_rowtype (struct substring *p, struct dfm_reader *r, enum rowtype *rt) { - struct nr_aux_data nr; - bool ok; - - if (mx->cells == -1) - mx->cells = 1; - - nr.mx = mx; - nr.data = NULL; - nr.factor_values = xnmalloc (mx->n_factors * mx->cells, - sizeof *nr.factor_values); - nr.max_cell_idx = 0; - nr.split_values = xnmalloc (dict_get_split_cnt (default_dict), - sizeof *nr.split_values); - - vfm_source = create_case_source (&matrix_data_without_rowtype_source_class, &nr); - - ok = procedure (NULL, NULL); - - free (nr.split_values); - free (nr.factor_values); - - return ok; + struct substring token; + if (!next_token (p, r, &token)) + return false; + + if (rowtype_from_string (token, rt)) + return true; + + parse_error (r, &token, _("Unknown row type \"%.*s\"."), + (int) token.length, token.string); + return false; } -/* Mirror data across the diagonal of matrix CP which contains - CONTENT type data. */ -static void -fill_matrix (struct matrix_data_pgm *mx, int content, double *cp) -{ - int type = content_type[content]; +struct read_matrix_params + { + /* Adjustments to first and last row to read. */ + int dy0, dy1; - if (type == 1 && mx->section != FULL) - { - if (mx->diag == NODIAGONAL) - { - const double fill = content == CORR ? 1.0 : SYSMIS; - int i; + /* Left and right columns to read in first row, inclusive. + For x1, INT_MAX is the rightmost column. */ + int x0, x1; - for (i = 0; i < mx->n_continuous; i++) - cp[i * (1 + mx->n_continuous)] = fill; - } - - { - int c, r; - - if (mx->section == LOWER) - { - int n_lines = mx->n_continuous; - if (mx->section != FULL && mx->diag == NODIAGONAL) - n_lines--; - - for (r = 1; r < n_lines; r++) - for (c = 0; c < r; c++) - cp[r + c * mx->n_continuous] = cp[c + r * mx->n_continuous]; - } - else - { - assert (mx->section == UPPER); - for (r = 1; r < mx->n_continuous; r++) - for (c = 0; c < r; c++) - cp[c + r * mx->n_continuous] = cp[r + c * mx->n_continuous]; - } - } + /* Adjustment to x0 and x1 for each subsequent row we read. Each of these + is 0 to keep it the same or -1 or +1 to adjust it by that much. */ + int dx0, dx1; + }; + +static const struct read_matrix_params * +get_read_matrix_params (const struct matrix_format *mf) +{ + if (mf->triangle == FULL) + { + /* 1 2 3 4 + 2 1 5 6 + 3 5 1 7 + 4 6 7 1 */ + static const struct read_matrix_params rmp = { 0, 0, 0, INT_MAX, 0, 0 }; + return &rmp; } - else if (type == 2) + else if (mf->triangle == LOWER) { - int c; - - for (c = 1; c < mx->n_continuous; c++) - cp[c] = cp[0]; + if (mf->diagonal == DIAGONAL) + { + /* 1 . . . + 2 1 . . + 3 5 1 . + 4 6 7 1 */ + static const struct read_matrix_params rmp = { 0, 0, 0, 0, 0, 1 }; + return &rmp; + } + else + { + /* . . . . + 2 . . . + 3 5 . . + 4 6 7 . */ + static const struct read_matrix_params rmp = { 1, 0, 0, 0, 0, 1 }; + return &rmp; + } + } + else if (mf->triangle == UPPER) + { + if (mf->diagonal == DIAGONAL) + { + /* 1 2 3 4 + . 1 5 6 + . . 1 7 + . . . 1 */ + static const struct read_matrix_params rmp = { 0, 0, 0, INT_MAX, 1, 0 }; + return &rmp; + } + else + { + /* . 2 3 4 + . . 5 6 + . . . 7 + . . . . */ + static const struct read_matrix_params rmp = { 0, -1, 1, INT_MAX, 1, 0 }; + return &rmp; + } } + else + NOT_REACHED (); } -/* Read data lines for content type CONTENT from the data file. - If PER_FACTOR is nonzero, then factor information is read from - the data file. Data is for cell number CELL. */ -static int -nr_read_data_lines (struct nr_aux_data *nr, - int per_factor, int cell, int content, int compare) +static void +schedule_matrices (struct matrix_format *mf) { - struct matrix_data_pgm *mx = nr->mx; - const int type = content_type[content]; /* Content type. */ - int n_lines; /* Number of lines to parse from data file for this type. */ - double *cp; /* Current position in vector or matrix. */ - int i; - - if (type != 1) - n_lines = 1; - else + struct matrix_sched *ms0 = &mf->ms[0]; + ms0->nr = 1; + ms0->nc = 1; + ms0->rp = xmalloc (sizeof *ms0->rp); + ms0->rp[0] = (struct row_sched) { .y = 0, .x0 = 0, .x1 = 1 }; + ms0->n_rp = 1; + + struct matrix_sched *ms1 = &mf->ms[1]; + ms1->nr = 1; + ms1->nc = mf->n_cvars; + ms1->rp = xmalloc (sizeof *ms1->rp); + ms1->rp[0] = (struct row_sched) { .y = 0, .x0 = 0, .x1 = mf->n_cvars }; + ms1->n_rp = 1; + + struct matrix_sched *ms2 = &mf->ms[2]; + ms2->nr = mf->n_cvars; + ms2->nc = mf->n_cvars; + ms2->rp = xmalloc (mf->n_cvars * sizeof *ms2->rp); + ms2->n_rp = 0; + + const struct read_matrix_params *rmp = get_read_matrix_params (mf); + int x0 = rmp->x0; + int x1 = rmp->x1 < mf->n_cvars ? rmp->x1 : mf->n_cvars - 1; + int y0 = rmp->dy0; + int y1 = (int) mf->n_cvars + rmp->dy1; + for (int y = y0; y < y1; y++) { - n_lines = mx->n_continuous; - if (mx->section != FULL && mx->diag == NODIAGONAL) - n_lines--; - } + assert (x0 >= 0 && x0 < mf->n_cvars); + assert (x1 >= 0 && x1 < mf->n_cvars); + assert (x1 >= x0); - cp = nr->data[content][cell]; - if (type == 1 && mx->section == LOWER && mx->diag == NODIAGONAL) - cp += mx->n_continuous; - - for (i = 0; i < n_lines; i++) - { - int n_cols; - - if (!nr_read_splits (nr, 1)) - return 0; - if (per_factor && !nr_read_factors (nr, cell)) - return 0; - compare = 1; - - switch (type) - { - case 0: - n_cols = mx->n_continuous; - break; - case 1: - switch (mx->section) - { - case LOWER: - n_cols = i + 1; - break; - case UPPER: - cp += i; - n_cols = mx->n_continuous - i; - if (mx->diag == NODIAGONAL) - { - n_cols--; - cp++; - } - break; - case FULL: - n_cols = mx->n_continuous; - break; - default: - assert (0); - abort (); - } - break; - case 2: - n_cols = 1; - break; - default: - assert (0); - abort (); - } - - { - int j; - - for (j = 0; j < n_cols; j++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return 0; - if (token.type != MNUM) - { - msg (SE, _("expecting value for %s %s"), - dict_get_var (default_dict, j)->name, - context (mx->reader)); - return 0; - } - - *cp++ = token.number; - } - if (mx->fmt != FREE - && !force_eol (mx->reader, content_names[content])) - return 0; - } + ms2->rp[ms2->n_rp++] = (struct row_sched) { + .y = y, .x0 = x0, .x1 = x1 + 1 + }; - if (mx->section == LOWER) - cp += mx->n_continuous - n_cols; + x0 += rmp->dx0; + x1 += rmp->dx1; } - - fill_matrix (mx, content, nr->data[content][cell]); - - return 1; } -/* When ROWTYPE_ does not appear in the data, reads the matrices and - writes them to the output file. - Returns true if successful, false if an I/O error occurred. */ static bool -matrix_data_read_without_rowtype (struct case_source *source, - struct ccase *c, - write_case_func *write_case, - write_case_data wc_data) +read_id_columns (const struct matrix_format *mf, + struct substring *p, struct dfm_reader *r, + double *d, enum rowtype *rt) { - struct nr_aux_data *nr = source->aux; - struct matrix_data_pgm *mx = nr->mx; + for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++) + if (!(mf->input_vars[i] == mf->rowtype + ? next_rowtype (p, r, rt) + : next_number (p, r, &d[i]))) + return false; + return true; +} - { - int *cp; +static bool +equal_id_columns (const struct matrix_format *mf, + const double *a, const double *b) +{ + for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++) + if (mf->input_vars[i] != mf->rowtype && a[i] != b[i]) + return false; + return true; +} - nr->data = pool_nalloc (mx->container, PROX + 1, sizeof *nr->data); - +static bool +equal_split_columns (const struct matrix_format *mf, + const double *a, const double *b) +{ + for (size_t i = 0; i < mf->n_svars; i++) { - int i; - - for (i = 0; i <= PROX; i++) - nr->data[i] = NULL; + size_t idx = mf->svar_indexes[i]; + if (a[idx] != b[idx]) + return false; } - - for (cp = mx->contents; *cp != EOC; cp++) - if (*cp != LPAREN && *cp != RPAREN) - { - int per_factor = mx->is_per_factor[*cp]; - int n_entries; - - n_entries = mx->n_continuous; - if (content_type[*cp] == 1) - n_entries *= mx->n_continuous; - - { - int n_vectors = per_factor ? mx->cells : 1; - int i; - - nr->data[*cp] = pool_nalloc (mx->container, - n_vectors, sizeof **nr->data); - - for (i = 0; i < n_vectors; i++) - nr->data[*cp][i] = pool_nalloc (mx->container, - n_entries, sizeof ***nr->data); - } - } - } - - for (;;) - { - int *bp, *ep, *np; - - if (!nr_read_splits (nr, 0)) - return true; - - for (bp = mx->contents; *bp != EOC; bp = np) - { - int per_factor; - - /* Trap the CONTENTS that we should parse in this pass - between bp and ep. Set np to the starting bp for next - iteration. */ - if (*bp == LPAREN) - { - ep = ++bp; - while (*ep != RPAREN) - ep++; - np = &ep[1]; - per_factor = 1; - } - else - { - ep = &bp[1]; - while (*ep != EOC && *ep != LPAREN) - ep++; - np = ep; - per_factor = 0; - } - - { - int i; - - for (i = 0; i < (per_factor ? mx->cells : 1); i++) - { - int *cp; - - for (cp = bp; cp < ep; cp++) - if (!nr_read_data_lines (nr, per_factor, i, *cp, cp != bp)) - return true; - } - } - } + return true; +} - if (!nr_output_data (nr, c, write_case, wc_data)) - return false; +static bool +is_pooled (const struct matrix_format *mf, const double *d) +{ + for (size_t i = 0; i < mf->n_fvars; i++) + if (d[mf->fvar_indexes[i]] != SYSMIS) + return false; + return true; +} - if (dict_get_split_cnt (default_dict) == 0 - || !another_token (mx->reader)) - return true; - } +static void +matrix_sched_init (const struct matrix_format *mf, enum rowtype rt, + gsl_matrix *m) +{ + int n_dims = rowtype_dimensions (rt); + const struct matrix_sched *ms = &mf->ms[n_dims]; + double diagonal = n_dims < 2 || rt != C_CORR ? SYSMIS : 1.0; + for (size_t y = 0; y < ms->nr; y++) + for (size_t x = 0; x < ms->nc; x++) + gsl_matrix_set (m, y, x, y == x ? diagonal : SYSMIS); } -/* Read the split file variables. If COMPARE is 1, compares the - values read to the last values read and returns 1 if they're equal, - 0 otherwise. */ -static int -nr_read_splits (struct nr_aux_data *nr, int compare) +static void +matrix_sched_output (const struct matrix_format *mf, enum rowtype rt, + gsl_matrix *m, const double *d, int split_num, + struct casewriter *w) { - struct matrix_data_pgm *mx = nr->mx; - static int just_read = 0; /* FIXME: WTF? */ - size_t split_cnt; - size_t i; + int n_dims = rowtype_dimensions (rt); + const struct matrix_sched *ms = &mf->ms[n_dims]; - if (compare && just_read) + if (rt == C_N_SCALAR) { - just_read = 0; - return 1; + for (size_t x = 1; x < mf->n_cvars; x++) + gsl_matrix_set (m, 0, x, gsl_matrix_get (m, 0, 0)); + rt = C_N; } - - if (dict_get_split_vars (default_dict) == NULL) - return 1; - if (mx->single_split) + for (int y = 0; y < ms->nr; y++) { - if (!compare) - { - struct mxd_var *mv = dict_get_split_vars (default_dict)[0]->aux; - nr->split_values[0] = ++mv->sub_type; - } - return 1; + struct ccase *c = case_create (casewriter_get_proto (w)); + for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++) + if (mf->input_vars[i] != mf->rowtype) + *case_num_rw (c, mf->input_vars[i]) = d[i]; + if (mf->n_svars && !mf->svar_indexes) + *case_num_rw (c, mf->svars[0]) = split_num; + set_string (c, mf->rowtype, rowtype_name (rt)); + const char *varname = n_dims == 2 ? var_get_name (mf->cvars[y]) : ""; + set_string (c, mf->varname, ss_cstr (varname)); + for (int x = 0; x < mf->n_cvars; x++) + *case_num_rw (c, mf->cvars[x]) = gsl_matrix_get (m, y, x); + casewriter_write (w, c); } +} - if (!compare) - just_read = 1; +static void +matrix_sched_output_n (const struct matrix_format *mf, double n, + gsl_matrix *m, const double *d, int split_num, + struct casewriter *w) +{ + gsl_matrix_set (m, 0, 0, n); + matrix_sched_output (mf, C_N_SCALAR, m, d, split_num, w); +} - split_cnt = dict_get_split_cnt (default_dict); - for (i = 0; i < split_cnt; i++) +static void +check_eol (const struct matrix_format *mf, struct substring *p, + struct dfm_reader *r) +{ + if (!mf->span) { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return 0; - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting SPLIT FILE value %s."), - context (mx->reader)); - return 0; - } - - if (!compare) - nr->split_values[i] = token.number; - else if (nr->split_values[i] != token.number) + ss_ltrim (p, ss_cstr (CC_SPACES ",")); + if (p->length) { - msg (SE, _("Expecting value %g for %s."), - nr->split_values[i], - dict_get_split_vars (default_dict)[i]->name); - return 0; + parse_error (r, p, _("Extraneous data expecting end of line.")); + p->length = 0; } } - - return 1; } -/* Read the factors for cell CELL. If COMPARE is 1, compares the - values read to the last values read and returns 1 if they're equal, - 0 otherwise. */ -static int -nr_read_factors (struct nr_aux_data *nr, int cell) +static void +parse_data_with_rowtype (const struct matrix_format *mf, + struct dfm_reader *r, struct casewriter *w) { - struct matrix_data_pgm *mx = nr->mx; - int compare; - - if (mx->n_factors == 0) - return 1; - - assert (nr->max_cell_idx >= cell); - if (cell != nr->max_cell_idx) - compare = 1; - else - { - compare = 0; - nr->max_cell_idx++; - } - - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return 0; - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting factor value %s."), - context (mx->reader)); - return 0; - } - - if (!compare) - nr->factor_values[i + mx->n_factors * cell] = token.number; - else if (nr->factor_values[i + mx->n_factors * cell] != token.number) - { - msg (SE, _("Syntax error expecting value %g for %s %s."), - nr->factor_values[i + mx->n_factors * cell], - mx->factors[i]->name, context (mx->reader)); - return 0; - } - } - } + if (dfm_eof (r)) + return; + struct substring p = dfm_get_record (r); - return 1; -} + double *prev = NULL; + gsl_matrix *m = gsl_matrix_alloc (mf->n_cvars, mf->n_cvars); -/* Write the contents of a cell having content type CONTENT and data - CP to the active file. - Returns true if successful, false if an I/O error occurred. */ -static bool -dump_cell_content (struct matrix_data_pgm *mx, int content, double *cp, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) -{ - int type = content_type[content]; + double *d = xnmalloc (mf->n_input_vars, sizeof *d); + enum rowtype rt; - { - buf_copy_str_rpad (case_data_rw (c, mx->rowtype_->fv)->s, 8, - content_names[content]); - - if (type != 1) - memset (case_data_rw (c, mx->varname_->fv)->s, ' ', 8); - } + double *d_next = xnmalloc (mf->n_input_vars, sizeof *d_next); - { - int n_lines = (type == 1) ? mx->n_continuous : 1; - int i; - - for (i = 0; i < n_lines; i++) - { - int j; - - for (j = 0; j < mx->n_continuous; j++) - { - int fv = dict_get_var (default_dict, mx->first_continuous + j)->fv; - case_data_rw (c, fv)->f = *cp; - cp++; - } - if (type == 1) - buf_copy_str_rpad (case_data_rw (c, mx->varname_->fv)->s, 8, - dict_get_var (default_dict, - mx->first_continuous + i)->name); - if (!write_case (wc_data)) - return false; - } - } - return true; -} + if (!read_id_columns (mf, &p, r, d, &rt)) + goto exit; + for (;;) + { + /* If this has rowtype N but there was an N subcommand, then the + subcommand takes precedence, so we will suppress outputting this + record. We still need to parse it, though, so we can't skip other + work. */ + bool suppress_output = mf->n >= 0 && (rt == C_N || rt == C_N_SCALAR); + if (suppress_output) + parse_error (r, NULL, _("N record is not allowed with N subcommand. " + "Ignoring N record.")); + + /* If there's an N subcommand, and this is a new split, then output an N + record. */ + if (mf->n >= 0 && (!prev || !equal_split_columns (mf, prev, d))) + { + matrix_sched_output_n (mf, mf->n, m, d, 0, w); -/* Finally dump out everything from nr_data[] to the output file. */ -static bool -nr_output_data (struct nr_aux_data *nr, struct ccase *c, - write_case_func *write_case, write_case_data wc_data) -{ - struct matrix_data_pgm *mx = nr->mx; - - { - struct variable *const *split; - size_t split_cnt; - size_t i; + if (!prev) + prev = xnmalloc (mf->n_input_vars, sizeof *prev); + memcpy (prev, d, mf->n_input_vars * sizeof *prev); + } - split_cnt = dict_get_split_cnt (default_dict); - split = dict_get_split_vars (default_dict); - for (i = 0; i < split_cnt; i++) - case_data_rw (c, split[i]->fv)->f = nr->split_values[i]; - } + /* Usually users don't provide the CONTENTS subcommand with ROWTYPE_, but + if they did then warn if ROWTYPE_ is an unexpected type. */ + if (mf->factor_rowtype_mask || mf->pooled_rowtype_mask) + { + const char *name = rowtype_name (rt).string; + if (is_pooled (mf, d)) + { + if (!((1u << rt) & mf->pooled_rowtype_mask)) + parse_warning (r, NULL, _("Data contains pooled row type %s not " + "included in CONTENTS."), name); + } + else + { + if (!((1u << rt) & mf->factor_rowtype_mask)) + parse_warning (r, NULL, _("Data contains with-factors row type " + "%s not included in CONTENTS."), name); + } + } - if (mx->n_factors) - { - int cell; + /* Initialize the matrix to be filled-in. */ + int n_dims = rowtype_dimensions (rt); + const struct matrix_sched *ms = &mf->ms[n_dims]; + matrix_sched_init (mf, rt, m); - for (cell = 0; cell < mx->cells; cell++) - { - { - size_t factor; - - for (factor = 0; factor < mx->n_factors; factor++) - case_data_rw (c, mx->factors[factor]->fv)->f - = nr->factor_values[factor + cell * mx->n_factors]; - } - - { - int content; - - for (content = 0; content <= PROX; content++) - if (mx->is_per_factor[content]) - { - assert (nr->data[content] != NULL - && nr->data[content][cell] != NULL); + enum rowtype rt_next; + bool eof; - if (!dump_cell_content (mx, content, nr->data[content][cell], - c, write_case, wc_data)) - return false; - } - } - } - } + size_t n_rows; + for (n_rows = 1; ; n_rows++) + { + if (n_rows <= ms->n_rp) + { + const struct row_sched *rs = &ms->rp[n_rows - 1]; + size_t y = rs->y; + for (size_t x = rs->x0; x < rs->x1; x++) + { + double e; + if (!next_number (&p, r, &e)) + goto exit; + gsl_matrix_set (m, y, x, e); + if (n_dims == 2 && mf->triangle != FULL) + gsl_matrix_set (m, x, y, e); + } + check_eol (mf, &p, r); + } + else + { + /* Suppress bad input data. We'll issue an error later. */ + p.length = 0; + } - { - int content; - - { - size_t factor; + eof = (!more_tokens (&p, r) + || !read_id_columns (mf, &p, r, d_next, &rt_next)); + if (eof) + break; - for (factor = 0; factor < mx->n_factors; factor++) - case_data_rw (c, mx->factors[factor]->fv)->f = SYSMIS; - } - - for (content = 0; content <= PROX; content++) - if (!mx->is_per_factor[content] && nr->data[content] != NULL) - { - if (!dump_cell_content (mx, content, nr->data[content][0], - c, write_case, wc_data)) - return false; + if (!equal_id_columns (mf, d, d_next) || rt_next != rt) + break; } - } + if (!suppress_output) + matrix_sched_output (mf, rt, m, d, 0, w); - return true; -} - -/* Back end, with ROWTYPE_. */ + if (n_rows != ms->n_rp) + parse_error (r, NULL, + _("Matrix %s had %zu rows but %zu rows were expected."), + rowtype_name (rt).string, n_rows, ms->n_rp); + if (eof) + break; -/* All the data for one set of factor values. */ -struct factor_data - { - double *factors; - int n_rows[PROX + 1]; - double *data[PROX + 1]; - struct factor_data *next; - }; + double *d_tmp = d; + d = d_next; + d_next = d_tmp; -/* With ROWTYPE_ auxiliary data. */ -struct wr_aux_data - { - struct matrix_data_pgm *mx; /* MATRIX DATA program. */ - int content; /* Type of current row. */ - double *split_values; /* SPLIT FILE variable values. */ - struct factor_data *data; /* All the data. */ - struct factor_data *current; /* Current factor. */ - }; + rt = rt_next; + } -static int wr_read_splits (struct wr_aux_data *, struct ccase *, - write_case_func *, write_case_data); -static bool wr_output_data (struct wr_aux_data *, struct ccase *, - write_case_func *, write_case_data); -static int wr_read_rowtype (struct wr_aux_data *, - const struct matrix_token *, struct dfm_reader *); -static int wr_read_factors (struct wr_aux_data *); -static int wr_read_indeps (struct wr_aux_data *); -static bool matrix_data_read_with_rowtype (struct case_source *, - struct ccase *, - write_case_func *, - write_case_data); - -/* When ROWTYPE_ appears in the data, reads the matrices and writes - them to the output file. - Returns true if successful, false if an I/O error occurred. */ -static bool -read_matrices_with_rowtype (struct matrix_data_pgm *mx) +exit: + free (prev); + gsl_matrix_free (m); + free (d); + free (d_next); +} + +static void +parse_matrix_without_rowtype (const struct matrix_format *mf, + struct substring *p, struct dfm_reader *r, + gsl_matrix *m, enum rowtype rowtype, bool pooled, + int split_num, struct casewriter *w) { - struct wr_aux_data wr; - bool ok; - - wr.mx = mx; - wr.content = -1; - wr.split_values = NULL; - wr.data = NULL; - wr.current = NULL; - mx->cells = 0; - - vfm_source = create_case_source (&matrix_data_with_rowtype_source_class, - &wr); - ok = procedure (NULL, NULL); - - free (wr.split_values); - return ok; + int n_dims = rowtype_dimensions (rowtype); + const struct matrix_sched *ms = &mf->ms[n_dims]; + + double *d = xnmalloc (mf->n_input_vars, sizeof *d); + matrix_sched_init (mf, rowtype, m); + for (size_t i = 0; i < ms->n_rp; i++) + { + int y = ms->rp[i].y; + int k = 0; + int h = 0; + for (size_t j = 0; j < mf->n_input_vars; j++) + { + const struct variable *iv = mf->input_vars[j]; + if (k < mf->n_cvars && iv == mf->cvars[k]) + { + if (k < ms->rp[i].x1 - ms->rp[i].x0) + { + double e; + if (!next_number (p, r, &e)) + goto exit; + + int x = k + ms->rp[i].x0; + gsl_matrix_set (m, y, x, e); + if (n_dims == 2 && mf->triangle != FULL) + gsl_matrix_set (m, x, y, e); + } + k++; + continue; + } + if (h < mf->n_fvars && iv == mf->fvars[h]) + { + h++; + if (pooled) + { + d[j] = SYSMIS; + continue; + } + } + + double e; + if (!next_number (p, r, &e)) + goto exit; + d[j] = e; + } + check_eol (mf, p, r); + } + + matrix_sched_output (mf, rowtype, m, d, split_num, w); +exit: + free (d); } -/* Read from the data file and write it to the active file. - Returns true if successful, false if an I/O error occurred. */ -static bool -matrix_data_read_with_rowtype (struct case_source *source, - struct ccase *c, - write_case_func *write_case, - write_case_data wc_data) +static void +parse_data_without_rowtype (const struct matrix_format *mf, + struct dfm_reader *r, struct casewriter *w) { - struct wr_aux_data *wr = source->aux; - struct matrix_data_pgm *mx = wr->mx; + if (dfm_eof (r)) + return; + struct substring p = dfm_get_record (r); + + gsl_matrix *m = gsl_matrix_alloc (mf->n_cvars, mf->n_cvars); + int split_num = 1; do { - if (!wr_read_splits (wr, c, write_case, wc_data)) - return true; + for (size_t i = 0; i < mf->n_contents; ) + { + size_t j = i; + if (mf->contents[i].open) + while (!mf->contents[j].close) + j++; - if (!wr_read_factors (wr)) - return true; + if (mf->contents[i].open) + { + for (size_t k = 0; k < mf->cells; k++) + for (size_t h = i; h <= j; h++) + parse_matrix_without_rowtype (mf, &p, r, m, + mf->contents[h].rowtype, false, + split_num, w); + } + else + parse_matrix_without_rowtype (mf, &p, r, m, mf->contents[i].rowtype, + true, split_num, w); + i = j + 1; + } - if (!wr_read_indeps (wr)) - return true; + split_num++; } - while (another_token (mx->reader)); + while (more_tokens (&p, r)); - return wr_output_data (wr, c, write_case, wc_data); + gsl_matrix_free (m); } -/* Read the split file variables. If they differ from the previous - set of split variables then output the data. Returns success. */ -static int -wr_read_splits (struct wr_aux_data *wr, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) +/* Parses VARIABLES=varnames for MATRIX DATA and returns a dictionary with the + named variables in it. */ +static struct dictionary * +parse_matrix_data_variables (struct lexer *lexer) { - struct matrix_data_pgm *mx = wr->mx; - int compare; - size_t split_cnt; + if (!lex_force_match_id (lexer, "VARIABLES")) + return NULL; + lex_match (lexer, T_EQUALS); - split_cnt = dict_get_split_cnt (default_dict); - if (split_cnt == 0) - return 1; + struct dictionary *dict = dict_create (get_default_encoding ()); - if (wr->split_values) - compare = 1; - else + size_t n_names = 0; + char **names = NULL; + if (!parse_DATA_LIST_vars (lexer, dict, &names, &n_names, PV_NO_DUPLICATE)) { - compare = 0; - wr->split_values = xnmalloc (split_cnt, sizeof *wr->split_values); + dict_unref (dict); + return NULL; } - - { - int different = 0; - int i; - for (i = 0; i < split_cnt; i++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return 0; - if (token.type != MNUM) - { - msg (SE, _("Syntax error %s expecting SPLIT FILE value."), - context (mx->reader)); - return 0; - } - - if (compare && wr->split_values[i] != token.number && !different) - { - if (!wr_output_data (wr, c, write_case, wc_data)) - return 0; - different = 1; - mx->cells = 0; - } - wr->split_values[i] = token.number; - } - } + for (size_t i = 0; i < n_names; i++) + if (!strcasecmp (names[i], "ROWTYPE_")) + dict_create_var_assert (dict, "ROWTYPE_", 8); + else + dict_create_var_assert (dict, names[i], 0); - return 1; -} + for (size_t i = 0; i < n_names; ++i) + free (names[i]); + free (names); -/* Compares doubles A and B, treating SYSMIS as greatest. */ -static int -compare_doubles (const void *a_, const void *b_, void *aux UNUSED) -{ - const double *a = a_; - const double *b = b_; - - if (*a == *b) - return 0; - else if (*a == SYSMIS) - return 1; - else if (*b == SYSMIS) - return -1; - else if (*a > *b) - return 1; - else - return -1; + if (dict_lookup_var (dict, "VARNAME_")) + { + msg (SE, _("VARIABLES may not include VARNAME_.")); + dict_unref (dict); + return NULL; + } + return dict; } -/* Return strcmp()-type comparison of the MX->n_factors factors at _A and - _B. Sort missing values toward the end. */ -static int -compare_factors (const void *a_, const void *b_, void *mx_) +static bool +parse_matrix_data_subvars (struct lexer *lexer, struct dictionary *dict, + bool *taken_vars, + struct variable ***vars, size_t **indexes, + size_t *n_vars) { - struct matrix_data_pgm *mx = mx_; - struct factor_data *const *pa = a_; - struct factor_data *const *pb = b_; - const double *a = (*pa)->factors; - const double *b = (*pb)->factors; - - return lexicographical_compare_3way (a, mx->n_factors, - b, mx->n_factors, - sizeof *a, - compare_doubles, NULL); + if (!parse_variables (lexer, dict, vars, n_vars, 0)) + return false; + + *indexes = xnmalloc (*n_vars, sizeof **indexes); + for (size_t i = 0; i < *n_vars; i++) + { + struct variable *v = (*vars)[i]; + if (!strcasecmp (var_get_name (v), "ROWTYPE_")) + { + msg (SE, _("ROWTYPE_ is not allowed on SPLIT or FACTORS.")); + goto error; + } + (*indexes)[i] = var_get_dict_index (v); + + bool *tv = &taken_vars[var_get_dict_index (v)]; + if (*tv) + { + msg (SE, _("%s may not appear on both SPLIT and FACTORS."), + var_get_name (v)); + goto error; + } + *tv = true; + + var_set_both_formats (v, &(struct fmt_spec) { .type = FMT_F, .w = 4 }); + } + return true; + +error: + free (*vars); + *vars = NULL; + *n_vars = 0; + free (*indexes); + *indexes = NULL; + return false; } -/* Write out the data for the current split file to the active - file. - Returns true if successful, false if an I/O error occurred. */ -static bool -wr_output_data (struct wr_aux_data *wr, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) +int +cmd_matrix_data (struct lexer *lexer, struct dataset *ds) { - struct matrix_data_pgm *mx = wr->mx; - bool ok = true; + struct dictionary *dict = parse_matrix_data_variables (lexer); + if (!dict) + return CMD_FAILURE; - { - struct variable *const *split; - size_t split_cnt; - size_t i; + size_t n_input_vars = dict_get_n_vars (dict); + struct variable **input_vars = xnmalloc (n_input_vars, sizeof *input_vars); + for (size_t i = 0; i < n_input_vars; i++) + input_vars[i] = dict_get_var (dict, i); - split_cnt = dict_get_split_cnt (default_dict); - split = dict_get_split_vars (default_dict); - for (i = 0; i < split_cnt; i++) - case_data_rw (c, split[i]->fv)->f = wr->split_values[i]; - } + int varname_width = 8; + for (size_t i = 0; i < n_input_vars; i++) + { + int w = strlen (var_get_name (input_vars[i])); + varname_width = MAX (w, varname_width); + } - /* Sort the wr->data list. */ - { - struct factor_data **factors; - struct factor_data *iter; - int i; + struct variable *rowtype = dict_lookup_var (dict, "ROWTYPE_"); + bool input_rowtype = rowtype != NULL; + if (!rowtype) + rowtype = dict_create_var_assert (dict, "ROWTYPE_", 8); - factors = xnmalloc (mx->cells, sizeof *factors); + struct matrix_format mf = { + .input_rowtype = input_rowtype, + .input_vars = input_vars, + .n_input_vars = n_input_vars, - for (i = 0, iter = wr->data; iter; iter = iter->next, i++) - factors[i] = iter; + .rowtype = rowtype, + .varname = dict_create_var_assert (dict, "VARNAME_", varname_width), - sort (factors, mx->cells, sizeof *factors, compare_factors, mx); + .triangle = LOWER, + .diagonal = DIAGONAL, + .n = -1, + .cells = -1, + }; - wr->data = factors[0]; - for (i = 0; i < mx->cells - 1; i++) - factors[i]->next = factors[i + 1]; - factors[mx->cells - 1]->next = NULL; + bool *taken_vars = XCALLOC (n_input_vars, bool); + if (input_rowtype) + taken_vars[var_get_dict_index (rowtype)] = true; - free (factors); - } + struct file_handle *fh = NULL; + while (lex_token (lexer) != T_ENDCMD) + { + if (!lex_force_match (lexer, T_SLASH)) + goto error; - /* Write out records for every set of factor values. */ - { - struct factor_data *iter; - - for (iter = wr->data; iter; iter = iter->next) - { + if (lex_match_id (lexer, "N")) { - size_t factor; + lex_match (lexer, T_EQUALS); - for (factor = 0; factor < mx->n_factors; factor++) - case_data_rw (c, mx->factors[factor]->fv)->f - = iter->factors[factor]; + if (!lex_force_int_range (lexer, "N", 0, INT_MAX)) + goto error; + + mf.n = lex_integer (lexer); + lex_get (lexer); } - + else if (lex_match_id (lexer, "FORMAT")) { - int content; + lex_match (lexer, T_EQUALS); - for (content = 0; content <= PROX; content++) + while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) { - if (!iter->n_rows[content]) - continue; - - { - int type = content_type[content]; - int n_lines = (type == 1 - ? (mx->n_continuous - - (mx->section != FULL && mx->diag == NODIAGONAL)) - : 1); - - if (n_lines != iter->n_rows[content]) - { - msg (SE, _("Expected %d lines of data for %s content; " - "actually saw %d lines. No data will be " - "output for this content."), - n_lines, content_names[content], - iter->n_rows[content]); - continue; - } - } - - fill_matrix (mx, content, iter->data[content]); - - ok = dump_cell_content (mx, content, iter->data[content], - c, write_case, wc_data); - if (!ok) - break; + if (lex_match_id (lexer, "LIST")) + mf.span = false; + else if (lex_match_id (lexer, "FREE")) + mf.span = true; + else if (lex_match_id (lexer, "UPPER")) + mf.triangle = UPPER; + else if (lex_match_id (lexer, "LOWER")) + mf.triangle = LOWER; + else if (lex_match_id (lexer, "FULL")) + mf.triangle = FULL; + else if (lex_match_id (lexer, "DIAGONAL")) + mf.diagonal = DIAGONAL; + else if (lex_match_id (lexer, "NODIAGONAL")) + mf.diagonal = NO_DIAGONAL; + else + { + lex_error (lexer, NULL); + goto error; + } } } - } - } - - pool_destroy (mx->container); - mx->container = pool_create (); - - wr->data = wr->current = NULL; - - return ok; -} + else if (lex_match_id (lexer, "FILE")) + { + lex_match (lexer, T_EQUALS); + fh_unref (fh); + fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL); + if (!fh) + goto error; + } + else if (!mf.n_svars && lex_match_id (lexer, "SPLIT")) + { + lex_match (lexer, T_EQUALS); + if (!mf.input_rowtype + && lex_token (lexer) == T_ID + && !dict_lookup_var (dict, lex_tokcstr (lexer))) + { + mf.svars = xmalloc (sizeof *mf.svars); + mf.svars[0] = dict_create_var_assert (dict, lex_tokcstr (lexer), + 0); + var_set_both_formats ( + mf.svars[0], &(struct fmt_spec) { .type = FMT_F, .w = 4 }); + mf.n_svars = 1; + lex_get (lexer); + } + else if (!parse_matrix_data_subvars (lexer, dict, taken_vars, + &mf.svars, &mf.svar_indexes, + &mf.n_svars)) + goto error; + } + else if (!mf.n_fvars && lex_match_id (lexer, "FACTORS")) + { + lex_match (lexer, T_EQUALS); + if (!parse_matrix_data_subvars (lexer, dict, taken_vars, + &mf.fvars, &mf.fvar_indexes, + &mf.n_fvars)) + goto error; + } + else if (lex_match_id (lexer, "CELLS")) + { + if (mf.input_rowtype) + msg (SW, _("CELLS is ignored when VARIABLES includes ROWTYPE_")); -/* Sets ROWTYPE_ based on the given TOKEN read from READER. - Return success. */ -static int -wr_read_rowtype (struct wr_aux_data *wr, - const struct matrix_token *token, - struct dfm_reader *reader) -{ - if (wr->content != -1) - { - msg (SE, _("Multiply specified ROWTYPE_ %s."), context (reader)); - return 0; - } - if (token->type != MSTR) - { - msg (SE, _("Syntax error %s expecting ROWTYPE_ string."), - context (reader)); - return 0; - } - - { - char s[16]; - char *cp; - - memcpy (s, token->string, min (15, token->length)); - s[min (15, token->length)] = 0; + lex_match (lexer, T_EQUALS); - for (cp = s; *cp; cp++) - *cp = toupper ((unsigned char) *cp); + if (!lex_force_int_range (lexer, "CELLS", 0, INT_MAX)) + goto error; - wr->content = string_to_content_type (s, NULL); - } + mf.cells = lex_integer (lexer); + lex_get (lexer); + } + else if (lex_match_id (lexer, "CONTENTS")) + { + lex_match (lexer, T_EQUALS); - if (wr->content == -1) - { - msg (SE, _("Syntax error %s."), context (reader)); - return 0; + size_t allocated_contents = mf.n_contents; + bool in_parens = false; + for (;;) + { + bool open = !in_parens && lex_match (lexer, T_LPAREN); + enum rowtype rt; + if (!rowtype_parse (lexer, &rt)) + { + if (open || in_parens || (lex_token (lexer) != T_ENDCMD + && lex_token (lexer) != T_SLASH)) + { + lex_error (lexer, _("Row type keyword expected.")); + goto error; + } + break; + } + + if (open) + in_parens = true; + + if (in_parens) + mf.factor_rowtype_mask |= 1u << rt; + else + mf.pooled_rowtype_mask |= 1u << rt; + + bool close = in_parens && lex_match (lexer, T_RPAREN); + if (close) + in_parens = false; + + if (mf.n_contents >= allocated_contents) + mf.contents = x2nrealloc (mf.contents, &allocated_contents, + sizeof *mf.contents); + mf.contents[mf.n_contents++] = (struct content) { + .open = open, .rowtype = rt, .close = close + }; + } + } + else + { + lex_error (lexer, NULL); + goto error; + } } - - return 1; -} - -/* Read the factors for the current row. Select a set of factors and - point wr_current to it. */ -static int -wr_read_factors (struct wr_aux_data *wr) -{ - struct matrix_data_pgm *mx = wr->mx; - double *factor_values = local_alloc (sizeof *factor_values * mx->n_factors); - - wr->content = -1; - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - goto lossage; - if (token.type == MSTR) - { - if (!wr_read_rowtype (wr, &token, mx->reader)) - goto lossage; - if (!mget_token (&token, mx->reader)) - goto lossage; - } - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting factor value %s."), - context (mx->reader)); - goto lossage; - } - - factor_values[i] = token.number; - } - } - if (wr->content == -1) + if (mf.diagonal == NO_DIAGONAL && mf.triangle == FULL) { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - goto lossage; - if (!wr_read_rowtype (wr, &token, mx->reader)) - goto lossage; + msg (SE, _("FORMAT=FULL and FORMAT=NODIAGONAL are mutually exclusive.")); + goto error; } - - /* Try the most recent factor first as a simple caching - mechanism. */ - if (wr->current) + if (!mf.input_rowtype) { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - if (factor_values[i] != wr->current->factors[i]) - goto cache_miss; - goto winnage; - } + if (mf.cells < 0) + { + if (mf.n_fvars) + { + msg (SE, _("CELLS is required when factor variables are specified " + "and VARIABLES does not include ROWTYPE_.")); + goto error; + } + mf.cells = 1; + } - /* Linear search through the list. */ -cache_miss: - { - struct factor_data *iter; + if (!mf.n_contents) + { + msg (SW, _("CONTENTS was not specified and VARIABLES does not " + "include ROWTYPE_. Assuming CONTENTS=CORR.")); - for (iter = wr->data; iter; iter = iter->next) + mf.n_contents = 1; + mf.contents = xmalloc (sizeof *mf.contents); + *mf.contents = (struct content) { .rowtype = C_CORR }; + } + } + mf.cvars = xmalloc (mf.n_input_vars * sizeof *mf.cvars); + for (size_t i = 0; i < mf.n_input_vars; i++) + if (!taken_vars[i]) { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - if (factor_values[i] != iter->factors[i]) - goto next_item; - - wr->current = iter; - goto winnage; - - next_item: ; + struct variable *v = input_vars[i]; + mf.cvars[mf.n_cvars++] = v; + var_set_both_formats (v, &(struct fmt_spec) { .type = FMT_F, .w = 10, + .d = 4 }); } - } - - /* Not found. Make a new item. */ - { - struct factor_data *new = pool_alloc (mx->container, sizeof *new); - - new->factors = pool_nalloc (mx->container, - mx->n_factors, sizeof *new->factors); - + if (!mf.n_cvars) { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - new->factors[i] = factor_values[i]; + msg (SE, _("At least one continuous variable is required.")); + goto error; } - + if (mf.input_rowtype) { - int i; - - for (i = 0; i <= PROX; i++) - { - new->n_rows[i] = 0; - new->data[i] = NULL; - } + for (size_t i = 0; i < mf.n_cvars; i++) + if (mf.cvars[i] != input_vars[n_input_vars - mf.n_cvars + i]) + { + msg (SE, _("VARIABLES includes ROWTYPE_ but the continuous " + "variables are not the last ones on VARIABLES.")); + goto error; + } } - - new->next = wr->data; - wr->data = wr->current = new; - mx->cells++; - } - -winnage: - local_free (factor_values); - return 1; - -lossage: - local_free (factor_values); - return 0; -} - -/* Read the independent variables into wr->current. */ -static int -wr_read_indeps (struct wr_aux_data *wr) -{ - struct matrix_data_pgm *mx = wr->mx; - struct factor_data *c = wr->current; - const int type = content_type[wr->content]; - const int n_rows = c->n_rows[wr->content]; - double *cp; - int n_cols; - - /* Allocate room for data if necessary. */ - if (c->data[wr->content] == NULL) + unsigned int rowtype_mask = mf.pooled_rowtype_mask | mf.factor_rowtype_mask; + if (rowtype_mask & (1u << C_N) && mf.n >= 0) { - int n_items = mx->n_continuous; - if (type == 1) - n_items *= mx->n_continuous; - - c->data[wr->content] = pool_nalloc (mx->container, - n_items, sizeof **c->data); - } - - cp = &c->data[wr->content][n_rows * mx->n_continuous]; - - /* Figure out how much to read from this line. */ - switch (type) - { - case 0: - case 2: - if (n_rows > 0) - { - msg (SE, _("Duplicate specification for %s."), - content_names[wr->content]); - return 0; - } - if (type == 0) - n_cols = mx->n_continuous; - else - n_cols = 1; - break; - case 1: - if (n_rows >= mx->n_continuous - (mx->section != FULL && mx->diag == NODIAGONAL)) - { - msg (SE, _("Too many rows of matrix data for %s."), - content_names[wr->content]); - return 0; - } - - switch (mx->section) - { - case LOWER: - n_cols = n_rows + 1; - if (mx->diag == NODIAGONAL) - cp += mx->n_continuous; - break; - case UPPER: - cp += n_rows; - n_cols = mx->n_continuous - n_rows; - if (mx->diag == NODIAGONAL) - { - n_cols--; - cp++; - } - break; - case FULL: - n_cols = mx->n_continuous; - break; - default: - assert (0); - abort (); - } - break; - default: - assert (0); - abort (); + msg (SE, _("Cannot specify N on CONTENTS along with the N subcommand.")); + goto error; } - c->n_rows[wr->content]++; - /* Read N_COLS items at CP. */ - { - int j; - - for (j = 0; j < n_cols; j++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return 0; - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting value for %s %s."), - dict_get_var (default_dict, mx->first_continuous + j)->name, - context (mx->reader)); - return 0; - } - - *cp++ = token.number; - } - if (mx->fmt != FREE - && !force_eol (mx->reader, content_names[wr->content])) - return 0; - } + struct variable **order = xnmalloc (dict_get_n_vars (dict), sizeof *order); + size_t n_order = 0; + for (size_t i = 0; i < mf.n_svars; i++) + order[n_order++] = mf.svars[i]; + order[n_order++] = mf.rowtype; + for (size_t i = 0; i < mf.n_fvars; i++) + order[n_order++] = mf.fvars[i]; + order[n_order++] = mf.varname; + for (size_t i = 0; i < mf.n_cvars; i++) + order[n_order++] = mf.cvars[i]; + assert (n_order == dict_get_n_vars (dict)); + dict_reorder_vars (dict, order, n_order); + free (order); + + dict_set_split_vars (dict, mf.svars, mf.n_svars); + + schedule_matrices (&mf); + + if (fh == NULL) + fh = fh_inline_file (); + + if (lex_end_of_command (lexer) != CMD_SUCCESS) + goto error; + + struct dfm_reader *reader = dfm_open_reader (fh, lexer, NULL); + if (reader == NULL) + goto error; + + struct casewriter *writer = autopaging_writer_create (dict_get_proto (dict)); + if (mf.input_rowtype) + parse_data_with_rowtype (&mf, reader, writer); + else + parse_data_without_rowtype (&mf, reader, writer); + dfm_close_reader (reader); - return 1; -} - -/* Matrix source. */ + dataset_set_dict (ds, dict); + dataset_set_source (ds, casewriter_make_reader (writer)); -static const struct case_source_class matrix_data_with_rowtype_source_class = - { - "MATRIX DATA", - NULL, - matrix_data_read_with_rowtype, - NULL, - }; + matrix_format_uninit (&mf); + free (taken_vars); + fh_unref (fh); -static const struct case_source_class -matrix_data_without_rowtype_source_class = - { - "MATRIX DATA", - NULL, - matrix_data_read_without_rowtype, - NULL, - }; + return CMD_SUCCESS; + error: + matrix_format_uninit (&mf); + free (taken_vars); + dict_unref (dict); + fh_unref (fh); + return CMD_FAILURE; +}