-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2017 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <float.h>
-
-#include <data/case-source.h>
-#include <data/case.h>
-#include <data/data-in.h>
-#include <data/dictionary.h>
-#include <data/procedure.h>
-#include <data/variable.h>
-#include <language/command.h>
-#include <language/data-io/data-reader.h>
-#include <language/data-io/file-handle.h>
-#include <language/lexer/lexer.h>
-#include <language/lexer/variable-parser.h>
-#include <libpspp/alloc.h>
-#include <libpspp/array.h>
-#include <libpspp/assertion.h>
-#include <libpspp/compiler.h>
-#include <libpspp/message.h>
-#include <libpspp/message.h>
-#include <libpspp/misc.h>
-#include <libpspp/pool.h>
-#include <libpspp/str.h>
-
-#include "size_max.h"
+#include <gsl/gsl_matrix.h>
+#include <gsl/gsl_vector.h>
+
+#include "data/case.h"
+#include "data/casereader.h"
+#include "data/casewriter.h"
+#include "data/data-in.h"
+#include "data/dataset.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/short-names.h"
+#include "data/transformations.h"
+#include "data/variable.h"
+#include "language/command.h"
+#include "language/data-io/data-parser.h"
+#include "language/data-io/data-reader.h"
+#include "language/data-io/file-handle.h"
+#include "language/data-io/inpt-pgm.h"
+#include "language/data-io/placement-parser.h"
+#include "language/lexer/lexer.h"
+#include "language/lexer/variable-parser.h"
+#include "libpspp/assertion.h"
+#include "libpspp/i18n.h"
+#include "libpspp/intern.h"
+#include "libpspp/message.h"
+#include "libpspp/str.h"
+
+#include "gl/c-ctype.h"
+#include "gl/minmax.h"
+#include "gl/xsize.h"
+#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
-
-/* FIXME: /N subcommand not implemented. It should be pretty simple,
- too. */
-
-/* Different types of variables for MATRIX DATA procedure. Order is
- important: these are used for sort keys. */
-enum
- {
- MXD_SPLIT, /* SPLIT FILE variables. */
- MXD_ROWTYPE, /* ROWTYPE_. */
- MXD_FACTOR, /* Factor variables. */
- MXD_VARNAME, /* VARNAME_. */
- MXD_CONTINUOUS, /* Continuous variables. */
-
- MXD_COUNT
- };
-
-/* Format type enums. */
-enum format_type
- {
- LIST,
- FREE
- };
-
-/* Matrix section enums. */
-enum matrix_section
- {
- LOWER,
- UPPER,
- FULL
- };
-
-/* Diagonal inclusion enums. */
-enum include_diagonal
- {
- DIAGONAL,
- NODIAGONAL
- };
-
-/* CONTENTS types. */
-enum content_type
- {
- N_VECTOR,
- N_SCALAR,
- N_MATRIX,
- MEAN,
- STDDEV,
- COUNT,
- MSE,
- DFE,
- MAT,
- COV,
- CORR,
- PROX,
-
- LPAREN,
- RPAREN,
- EOC
- };
-
-/* 0=vector, 1=matrix, 2=scalar. */
-static const int content_type[PROX + 1] =
+\f
+#define ROWTYPES \
+ /* Matrix row types. */ \
+ RT(CORR, 2) \
+ RT(COV, 2) \
+ RT(MAT, 2) \
+ RT(N_MATRIX, 2) \
+ RT(PROX, 2) \
+ \
+ /* Vector row types. */ \
+ RT(COUNT, 1) \
+ RT(DFE, 1) \
+ RT(MEAN, 1) \
+ RT(MSE, 1) \
+ RT(STDDEV, 1) \
+ RT(N, 1) \
+ \
+ /* Scalar row types. */ \
+ RT(N_SCALAR, 0)
+
+enum rowtype
{
- 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+#define RT(NAME, DIMS) C_##NAME,
+ ROWTYPES
+#undef RT
};
-/* Name of each content type. */
-static const char *const content_names[PROX + 1] =
+enum
{
- "N", "N", "N_MATRIX", "MEAN", "STDDEV", "COUNT", "MSE",
- "DFE", "MAT", "COV", "CORR", "PROX",
+#define RT(NAME, DIMS) +1
+ N_ROWTYPES = ROWTYPES
+#undef RT
};
+verify (N_ROWTYPES < 32);
-/* A MATRIX DATA input program. */
-struct matrix_data_pgm
- {
- struct pool *container; /* Arena used for all allocations. */
- struct dfm_reader *reader; /* Data file to read. */
-
- /* Format. */
- enum format_type fmt; /* LIST or FREE. */
- enum matrix_section section;/* LOWER or UPPER or FULL. */
- enum include_diagonal diag; /* DIAGONAL or NODIAGONAL. */
-
- int explicit_rowtype; /* ROWTYPE_ specified explicitly in data? */
- struct variable *rowtype_, *varname_; /* ROWTYPE_, VARNAME_ variables. */
-
- struct variable *single_split; /* Single SPLIT FILE variable. */
-
- /* Factor variables. */
- size_t n_factors; /* Number of factor variables. */
- struct variable **factors; /* Factor variables. */
- int is_per_factor[PROX + 1]; /* Is there per-factor data? */
-
- int cells; /* Number of cells, or -1 if none. */
-
- int pop_n; /* Population N specified by user. */
-
- /* CONTENTS subcommand. */
- int contents[EOC * 3 + 1]; /* Contents. */
- int n_contents; /* Number of entries. */
-
- /* Continuous variables. */
- int n_continuous; /* Number of continuous variables. */
- int first_continuous; /* Index into dictionary of
- first continuous variable. */
+/* Returns the number of dimensions in the indexes for row type RT. A matrix
+ has 2 dimensions, a vector has 1, a scalar has 0. */
+static int
+rowtype_dimensions (enum rowtype rt)
+{
+ static const int rowtype_dims[N_ROWTYPES] = {
+#define RT(NAME, DIMS) [C_##NAME] = DIMS,
+ ROWTYPES
+#undef RT
};
+ return rowtype_dims[rt];
+}
-/* Auxiliary data attached to MATRIX DATA variables. */
-struct mxd_var
- {
- int var_type; /* Variable type. */
- int sub_type; /* Subtype. */
+static struct substring
+rowtype_name (enum rowtype rt)
+{
+ static const struct substring rowtype_names[N_ROWTYPES] = {
+#define RT(NAME, DIMS) [C_##NAME] = SS_LITERAL_INITIALIZER (#NAME),
+ ROWTYPES
+#undef RT
};
-static const struct case_source_class matrix_data_with_rowtype_source_class;
-static const struct case_source_class matrix_data_without_rowtype_source_class;
-
-static int compare_variables_by_mxd_var_type (const void *pa,
- const void *pb);
-static bool read_matrices_without_rowtype (struct dataset *ds, struct matrix_data_pgm *);
-static bool read_matrices_with_rowtype (struct dataset *ds, struct matrix_data_pgm *);
-static int string_to_content_type (char *, int *);
-static void attach_mxd_aux (struct variable *, int var_type, int sub_type);
+ return rowtype_names[rt];
+}
-int
-cmd_matrix_data (struct dataset *ds)
+static bool
+rowtype_from_string (struct substring token, enum rowtype *rt)
{
- struct pool *pool;
- struct matrix_data_pgm *mx;
- struct file_handle *fh = fh_inline_file ();
- bool ok;
-
- unsigned seen = 0;
-
- discard_variables (ds);
-
- pool = pool_create ();
- mx = pool_alloc (pool, sizeof *mx);
- mx->container = pool;
- mx->reader = NULL;
- mx->fmt = LIST;
- mx->section = LOWER;
- mx->diag = DIAGONAL;
- mx->explicit_rowtype = 0;
- mx->rowtype_ = NULL;
- mx->varname_ = NULL;
- mx->single_split = NULL;
- mx->n_factors = 0;
- mx->factors = NULL;
- memset (mx->is_per_factor, 0, sizeof mx->is_per_factor);
- mx->cells = -1;
- mx->pop_n = -1;
- mx->n_contents = 0;
- mx->n_continuous = 0;
- mx->first_continuous = 0;
- while (token != '.')
- {
- lex_match ('/');
-
- if (lex_match_id ("VARIABLES"))
- {
- char **v;
- size_t nv;
-
- if (seen & 1)
- {
- msg (SE, _("VARIABLES subcommand multiply specified."));
- goto lossage;
- }
- seen |= 1;
-
- lex_match ('=');
- if (!parse_DATA_LIST_vars (&v, &nv, PV_NO_DUPLICATE))
- goto lossage;
-
- {
- size_t i;
-
- for (i = 0; i < nv; i++)
- if (!strcasecmp (v[i], "VARNAME_"))
- {
- msg (SE, _("VARNAME_ cannot be explicitly specified on "
- "VARIABLES."));
- for (i = 0; i < nv; i++)
- free (v[i]);
- free (v);
- goto lossage;
- }
- }
-
- {
- size_t i;
-
- for (i = 0; i < nv; i++)
- {
- struct variable *new_var;
-
- if (strcasecmp (v[i], "ROWTYPE_"))
- {
- new_var = dict_create_var_assert (dataset_dict (ds), v[i], 0);
- attach_mxd_aux (new_var, MXD_CONTINUOUS, i);
- }
- else
- mx->explicit_rowtype = 1;
- free (v[i]);
- }
- free (v);
- }
-
- mx->rowtype_ = dict_create_var_assert (dataset_dict (ds),
- "ROWTYPE_", 8);
- attach_mxd_aux (mx->rowtype_, MXD_ROWTYPE, 0);
- }
- else if (lex_match_id ("FILE"))
- {
- lex_match ('=');
- fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
- if (fh == NULL)
- goto lossage;
- }
- else if (lex_match_id ("FORMAT"))
- {
- lex_match ('=');
-
- while (token == T_ID)
- {
- if (lex_match_id ("LIST"))
- mx->fmt = LIST;
- else if (lex_match_id ("FREE"))
- mx->fmt = FREE;
- else if (lex_match_id ("LOWER"))
- mx->section = LOWER;
- else if (lex_match_id ("UPPER"))
- mx->section = UPPER;
- else if (lex_match_id ("FULL"))
- mx->section = FULL;
- else if (lex_match_id ("DIAGONAL"))
- mx->diag = DIAGONAL;
- else if (lex_match_id ("NODIAGONAL"))
- mx->diag = NODIAGONAL;
- else
- {
- lex_error (_("in FORMAT subcommand"));
- goto lossage;
- }
- }
- }
- else if (lex_match_id ("SPLIT"))
- {
- lex_match ('=');
-
- if (seen & 2)
- {
- msg (SE, _("SPLIT subcommand multiply specified."));
- goto lossage;
- }
- seen |= 2;
-
- if (token != T_ID)
- {
- lex_error (_("in SPLIT subcommand"));
- goto lossage;
- }
-
- if (dict_lookup_var (dataset_dict (ds), tokid) == NULL
- && (lex_look_ahead () == '.' || lex_look_ahead () == '/'))
- {
- if (!strcasecmp (tokid, "ROWTYPE_")
- || !strcasecmp (tokid, "VARNAME_"))
- {
- msg (SE, _("Split variable may not be named ROWTYPE_ "
- "or VARNAME_."));
- goto lossage;
- }
-
- mx->single_split = dict_create_var_assert (dataset_dict (ds),
- tokid, 0);
- attach_mxd_aux (mx->single_split, MXD_CONTINUOUS, 0);
- lex_get ();
-
- dict_set_split_vars (dataset_dict (ds), &mx->single_split, 1);
- }
- else
- {
- struct variable **split;
- size_t n;
-
- if (!parse_variables (dataset_dict (ds), &split, &n, PV_NO_DUPLICATE))
- goto lossage;
-
- dict_set_split_vars (dataset_dict (ds), split, n);
- }
-
- {
- struct variable *const *split = dict_get_split_vars (dataset_dict (ds));
- size_t split_cnt = dict_get_split_cnt (dataset_dict (ds));
- int i;
-
- for (i = 0; i < split_cnt; i++)
- {
- struct mxd_var *mv = split[i]->aux;
- assert (mv != NULL);
- if (mv->var_type != MXD_CONTINUOUS)
- {
- msg (SE, _("Split variable %s is already another type."),
- tokid);
- goto lossage;
- }
- var_clear_aux (split[i]);
- attach_mxd_aux (split[i], MXD_SPLIT, i);
- }
- }
- }
- else if (lex_match_id ("FACTORS"))
- {
- lex_match ('=');
-
- if (seen & 4)
- {
- msg (SE, _("FACTORS subcommand multiply specified."));
- goto lossage;
- }
- seen |= 4;
-
- if (!parse_variables (dataset_dict (ds), &mx->factors, &mx->n_factors,
- PV_NONE))
- goto lossage;
-
- {
- size_t i;
-
- for (i = 0; i < mx->n_factors; i++)
- {
- struct variable *v = mx->factors[i];
- struct mxd_var *mv = v->aux;
- assert (mv != NULL);
- if (mv->var_type != MXD_CONTINUOUS)
- {
- msg (SE, _("Factor variable %s is already another type."),
- tokid);
- goto lossage;
- }
- var_clear_aux (v);
- attach_mxd_aux (v, MXD_FACTOR, i);
- }
- }
- }
- else if (lex_match_id ("CELLS"))
- {
- lex_match ('=');
-
- if (mx->cells != -1)
- {
- msg (SE, _("CELLS subcommand multiply specified."));
- goto lossage;
- }
-
- if (!lex_is_integer () || lex_integer () < 1)
- {
- lex_error (_("expecting positive integer"));
- goto lossage;
- }
-
- mx->cells = lex_integer ();
- lex_get ();
- }
- else if (lex_match_id ("N"))
- {
- lex_match ('=');
-
- if (mx->pop_n != -1)
- {
- msg (SE, _("N subcommand multiply specified."));
- goto lossage;
- }
-
- if (!lex_is_integer () || lex_integer () < 1)
- {
- lex_error (_("expecting positive integer"));
- goto lossage;
- }
-
- mx->pop_n = lex_integer ();
- lex_get ();
- }
- else if (lex_match_id ("CONTENTS"))
- {
- int inside_parens = 0;
- unsigned collide = 0;
- int item;
-
- if (seen & 8)
- {
- msg (SE, _("CONTENTS subcommand multiply specified."));
- goto lossage;
- }
- seen |= 8;
-
- lex_match ('=');
-
- {
- int i;
-
- for (i = 0; i <= PROX; i++)
- mx->is_per_factor[i] = 0;
- }
-
- for (;;)
- {
- if (lex_match ('('))
- {
- if (inside_parens)
- {
- msg (SE, _("Nested parentheses not allowed."));
- goto lossage;
- }
- inside_parens = 1;
- item = LPAREN;
- }
- else if (lex_match (')'))
- {
- if (!inside_parens)
- {
- msg (SE, _("Mismatched right parenthesis (`(')."));
- goto lossage;
- }
- if (mx->contents[mx->n_contents - 1] == LPAREN)
- {
- msg (SE, _("Empty parentheses not allowed."));
- goto lossage;
- }
- inside_parens = 0;
- item = RPAREN;
- }
- else
- {
- int content_type;
- int collide_index;
-
- if (token != T_ID)
- {
- lex_error (_("in CONTENTS subcommand"));
- goto lossage;
- }
-
- content_type = string_to_content_type (tokid,
- &collide_index);
- if (content_type == -1)
- {
- lex_error (_("in CONTENTS subcommand"));
- goto lossage;
- }
- lex_get ();
-
- if (collide & (1 << collide_index))
- {
- msg (SE, _("Content multiply specified for %s."),
- content_names[content_type]);
- goto lossage;
- }
- collide |= (1 << collide_index);
-
- item = content_type;
- mx->is_per_factor[item] = inside_parens;
- }
- mx->contents[mx->n_contents++] = item;
-
- if (token == '/' || token == '.')
- break;
- }
-
- if (inside_parens)
- {
- msg (SE, _("Missing right parenthesis."));
- goto lossage;
- }
- mx->contents[mx->n_contents] = EOC;
- }
- else
- {
- lex_error (NULL);
- goto lossage;
- }
- }
-
- if (token != '.')
- {
- lex_error (_("expecting end of command"));
- goto lossage;
- }
-
- if (!(seen & 1))
- {
- msg (SE, _("Missing VARIABLES subcommand."));
- goto lossage;
- }
-
- if (!mx->n_contents && !mx->explicit_rowtype)
- {
- msg (SW, _("CONTENTS subcommand not specified: assuming file "
- "contains only CORR matrix."));
-
- mx->contents[0] = CORR;
- mx->contents[1] = EOC;
- mx->n_contents = 0;
- }
+ ss_trim (&token, ss_cstr (CC_SPACES));
+ for (size_t i = 0; i < N_ROWTYPES; i++)
+ if (lex_id_match (rowtype_name (i), token))
+ {
+ *rt = i;
+ return true;
+ }
- if (mx->n_factors && !mx->explicit_rowtype && mx->cells == -1)
+ if (lex_id_match (ss_cstr ("N_VECTOR"), token))
{
- msg (SE, _("Missing CELLS subcommand. CELLS is required "
- "when ROWTYPE_ is not given in the data and "
- "factors are present."));
- goto lossage;
+ *rt = C_N;
+ return true;
}
-
- if (mx->explicit_rowtype && mx->single_split)
+ else if (lex_id_match (ss_cstr ("SD"), token))
{
- msg (SE, _("Split file values must be present in the data when "
- "ROWTYPE_ is present."));
- goto lossage;
+ *rt = C_STDDEV;
+ return true;
}
-
- /* Create VARNAME_. */
- mx->varname_ = dict_create_var_assert (dataset_dict (ds), "VARNAME_", 8);
- attach_mxd_aux (mx->varname_, MXD_VARNAME, 0);
-
- /* Sort the dictionary variables into the desired order for the
- system file output. */
- {
- struct variable **v;
- size_t nv;
- dict_get_vars (dataset_dict (ds), &v, &nv, 0);
- qsort (v, nv, sizeof *v, compare_variables_by_mxd_var_type);
- dict_reorder_vars (dataset_dict (ds), v, nv);
- free (v);
- }
+ return false;
+}
- /* Set formats. */
+static bool
+rowtype_parse (struct lexer *lexer, enum rowtype *rt)
+{
+ bool parsed = (lex_token (lexer) == T_ID
+ && rowtype_from_string (lex_tokss (lexer), rt));
+ if (parsed)
+ lex_get (lexer);
+ return parsed;
+}
+\f
+struct matrix_format
{
- static const struct fmt_spec fmt_tab[MXD_COUNT] =
+ bool span;
+ enum triangle
{
- {FMT_F, 4, 0},
- {FMT_A, 8, 0},
- {FMT_F, 4, 0},
- {FMT_A, 8, 0},
- {FMT_F, 10, 4},
- };
-
- int i;
-
- mx->first_continuous = -1;
- for (i = 0; i < dict_get_var_cnt (dataset_dict (ds)); i++)
+ LOWER,
+ UPPER,
+ FULL
+ }
+ triangle;
+ enum diagonal
{
- struct variable *v = dict_get_var (dataset_dict (ds), i);
- struct mxd_var *mv = v->aux;
- int type = mv->var_type;
-
- assert (type >= 0 && type < MXD_COUNT);
- v->print = v->write = fmt_tab[type];
-
- if (type == MXD_CONTINUOUS)
- mx->n_continuous++;
- if (mx->first_continuous == -1 && type == MXD_CONTINUOUS)
- mx->first_continuous = i;
+ DIAGONAL,
+ NO_DIAGONAL
}
- }
-
- if (mx->n_continuous == 0)
- {
- msg (SE, _("No continuous variables specified."));
- goto lossage;
- }
-
- mx->reader = dfm_open_reader (fh);
- if (mx->reader == NULL)
- goto lossage;
-
- if (mx->explicit_rowtype)
- ok = read_matrices_with_rowtype (ds, mx);
- else
- ok = read_matrices_without_rowtype (ds, mx);
-
- dfm_close_reader (mx->reader);
-
- pool_destroy (mx->container);
-
- return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
+ diagonal;
-lossage:
- discard_variables (ds);
- free (mx->factors);
- pool_destroy (mx->container);
- return CMD_CASCADING_FAILURE;
-}
+ bool input_rowtype;
+ struct variable **input_vars;
+ size_t n_input_vars;
-/* Look up string S as a content-type name and return the
- corresponding enumerated value, or -1 if there is no match. If
- COLLIDE is non-NULL then *COLLIDE returns a value (suitable for use
- as a bit-index) which can be used for determining whether a related
- statistic has already been used. */
-static int
-string_to_content_type (char *s, int *collide)
-{
- static const struct
- {
- int value;
- int collide;
- const char *string;
- }
- *tp,
- tab[] =
- {
- {N_VECTOR, 0, "N_VECTOR"},
- {N_VECTOR, 0, "N"},
- {N_SCALAR, 0, "N_SCALAR"},
- {N_MATRIX, 1, "N_MATRIX"},
- {MEAN, 2, "MEAN"},
- {STDDEV, 3, "STDDEV"},
- {STDDEV, 3, "SD"},
- {COUNT, 4, "COUNT"},
- {MSE, 5, "MSE"},
- {DFE, 6, "DFE"},
- {MAT, 7, "MAT"},
- {COV, 8, "COV"},
- {CORR, 9, "CORR"},
- {PROX, 10, "PROX"},
- {-1, -1, NULL},
- };
-
- for (tp = tab; tp->value != -1; tp++)
- if (!strcasecmp (s, tp->string))
+ /* How to read matrices with each possible number of dimensions (0=scalar,
+ 1=vector, 2=matrix). */
+ struct matrix_sched
{
- if (collide)
- *collide = tp->collide;
-
- return tp->value;
+ /* Number of rows and columns in the matrix: (1,1) for a scalar, (1,n) for
+ a vector, (n,n) for a matrix. */
+ int nr, nc;
+
+ /* Rows of data to read and the number of columns in each. Because we
+ often read just a triangle and sometimes omit the diagonal, 'n_rp' can
+ be less than 'nr' and 'rp[i]->y' isn't always 'y'. */
+ struct row_sched
+ {
+ /* The y-value of the row inside the matrix. */
+ int y;
+
+ /* first and last (exclusive) columns to read in this row. */
+ int x0, x1;
+ }
+ *rp;
+ size_t n_rp;
}
- return -1;
-}
+ ms[3];
+
+ struct variable *rowtype;
+ struct variable *varname;
+ struct variable **cvars;
+ int n_cvars;
+ struct variable **svars;
+ size_t *svar_indexes;
+ size_t n_svars;
+ struct variable **fvars;
+ size_t *fvar_indexes;
+ size_t n_fvars;
+ int cells;
+ int n;
+
+ unsigned int pooled_rowtype_mask;
+ unsigned int factor_rowtype_mask;
+
+ struct content
+ {
+ bool open;
+ enum rowtype rowtype;
+ bool close;
+ }
+ *contents;
+ size_t n_contents;
+ };
-/* Compare two variables using p.mxd.var_type and p.mxd.sub_type
- fields. */
-static int
-compare_variables_by_mxd_var_type (const void *a_, const void *b_)
+static void
+matrix_format_uninit (struct matrix_format *mf)
{
- struct variable *const *pa = a_;
- struct variable *const *pb = b_;
- const struct mxd_var *a = (*pa)->aux;
- const struct mxd_var *b = (*pb)->aux;
-
- if (a->var_type != b->var_type)
- return a->var_type > b->var_type ? 1 : -1;
- else
- return a->sub_type < b->sub_type ? -1 : a->sub_type > b->sub_type;
+ free (mf->input_vars);
+ for (int i = 0; i < 3; i++)
+ free (mf->ms[i].rp);
+ free (mf->cvars);
+ free (mf->svars);
+ free (mf->svar_indexes);
+ free (mf->fvars);
+ free (mf->fvar_indexes);
+ free (mf->contents);
}
-/* Attaches a struct mxd_var with the specific member values to
- V. */
static void
-attach_mxd_aux (struct variable *v, int var_type, int sub_type)
+set_string (struct ccase *outcase, const struct variable *var,
+ struct substring src)
{
- struct mxd_var *mv;
-
- assert (v->aux == NULL);
- mv = xmalloc (sizeof *mv);
- mv->var_type = var_type;
- mv->sub_type = sub_type;
- var_attach_aux (v, mv, var_dtor_free);
+ struct substring dst = case_ss (outcase, var);
+ for (size_t i = 0; i < dst.length; i++)
+ dst.string[i] = i < src.length ? src.string[i] : ' ';
}
-\f
-/* Matrix tokenizer. */
-
-/* Matrix token types. */
-enum matrix_token_type
- {
- MNUM, /* Number. */
- MSTR /* String. */
- };
-
-/* A MATRIX DATA parsing token. */
-struct matrix_token
- {
- enum matrix_token_type type;
- double number; /* MNUM: token value. */
- char *string; /* MSTR: token string; not null-terminated. */
- int length; /* MSTR: tokstr length. */
- };
-
-static int mget_token (struct matrix_token *, struct dfm_reader *);
-
-#if DEBUGGING
-#define mget_token(TOKEN, READER) mget_token_dump(TOKEN, READER)
static void
-mdump_token (const struct matrix_token *token)
+parse_msg (struct dfm_reader *reader, const struct substring *token,
+ char *text, enum msg_severity severity)
{
- switch (token->type)
+ int first_column = 0;
+ if (token)
{
- case MNUM:
- printf (" #%g", token->number);
- break;
- case MSTR:
- printf (" '%.*s'", token->length, token->string);
- break;
- default:
- NOT_REACHED ();
+ struct substring line = dfm_get_record (reader);
+ if (token->string >= line.string && token->string < ss_end (line))
+ first_column = ss_pointer_to_position (line, token->string) + 1;
}
- fflush (stdout);
+
+ int line_number = dfm_get_line_number (reader);
+ struct msg_location *location = xmalloc (sizeof *location);
+ int last_column = (first_column && token->length
+ ? first_column + token->length - 1
+ : 0);
+ *location = (struct msg_location) {
+ .file_name = intern_new (dfm_get_file_name (reader)),
+ .start = { .line = line_number, .column = first_column },
+ .end = { .line = line_number, .column = last_column },
+ };
+ struct msg *m = xmalloc (sizeof *m);
+ *m = (struct msg) {
+ .category = MSG_C_DATA,
+ .severity = severity,
+ .location = location,
+ .text = text,
+ };
+ msg_emit (m);
}
-static int
-mget_token_dump (struct matrix_token *token, struct dfm_reader *reader)
+static void PRINTF_FORMAT (3, 4)
+parse_warning (struct dfm_reader *reader, const struct substring *token,
+ const char *format, ...)
{
- int result = (mget_token) (token, reader);
- mdump_token (token);
- return result;
+ va_list args;
+ va_start (args, format);
+ parse_msg (reader, token, xvasprintf (format, args), MSG_S_WARNING);
+ va_end (args);
}
-#endif
-/* Return the current position in READER. */
-static const char *
-context (struct dfm_reader *reader)
+static void PRINTF_FORMAT (3, 4)
+parse_error (struct dfm_reader *reader, const struct substring *token,
+ const char *format, ...)
{
- static struct string buf = DS_EMPTY_INITIALIZER;
-
- ds_clear (&buf);
- if (dfm_eof (reader))
- ds_assign_cstr (&buf, "at end of file");
- else
- {
- struct substring p;
-
- p = dfm_get_record (reader);
- ss_ltrim (&p, ss_cstr (CC_SPACES));
- if (ss_is_empty (p))
- ds_assign_cstr (&buf, "at end of line");
- else
- ds_put_format (&buf, "before `%.*s'",
- (int) ss_cspan (p, ss_cstr (CC_SPACES)), ss_data (p));
- }
-
- return ds_cstr (&buf);
+ va_list args;
+ va_start (args, format);
+ parse_msg (reader, token, xvasprintf (format, args), MSG_S_ERROR);
+ va_end (args);
}
-/* Is there at least one token left in the data file? */
+/* Advance to beginning of next token. */
static bool
-another_token (struct dfm_reader *reader)
+more_tokens (struct substring *p, struct dfm_reader *r)
{
for (;;)
{
- struct substring p;
- size_t space_cnt;
-
- if (dfm_eof (reader))
- return false;
-
- p = dfm_get_record (reader);
- space_cnt = ss_span (p, ss_cstr (CC_SPACES));
- if (space_cnt < ss_length (p))
- {
- dfm_forward_columns (reader, space_cnt);
- return true;
- }
+ ss_ltrim (p, ss_cstr (CC_SPACES ","));
+ if (p->length)
+ return true;
- dfm_forward_record (reader);
+ dfm_forward_record (r);
+ if (dfm_eof (r))
+ return false;
+ *p = dfm_get_record (r);
}
- NOT_REACHED();
}
-/* Parse a MATRIX DATA token from READER into TOKEN. */
-static int
-(mget_token) (struct matrix_token *token, struct dfm_reader *reader)
+static bool
+next_token (struct substring *p, struct dfm_reader *r, struct substring *token)
{
- struct substring line, p;
- struct substring s;
- int c;
-
- if (!another_token (reader))
- return 0;
+ if (!more_tokens (p, r))
+ return false;
- line = p = dfm_get_record (reader);
-
- /* Three types of fields: quoted with ', quoted with ", unquoted. */
- c = ss_first (p);
+ /* Collect token. */
+ int c = ss_first (*p);
if (c == '\'' || c == '"')
{
- ss_get_char (&p);
- if (!ss_get_until (&p, c, &s))
- msg (SW, _("Scope of string exceeds line."));
+ ss_advance (p, 1);
+ ss_get_until (p, c, token);
}
else
{
- bool is_num = isdigit (c) || c == '.';
- const char *start = ss_data (p);
-
- for (;;)
+ size_t n = 1;
+ for (;;)
{
- c = ss_first (p);
- if (strchr (CC_SPACES ",-+", c) != NULL)
+ c = ss_at (*p, n);
+ if (c == EOF
+ || ss_find_byte (ss_cstr (CC_SPACES ","), c) != SIZE_MAX
+ || ((c == '+' || c == '-')
+ && ss_find_byte (ss_cstr ("dDeE"),
+ ss_at (*p, n - 1)) == SIZE_MAX))
break;
-
- if (isdigit (c))
- is_num = true;
- if (strchr ("deDE", c) && strchr ("+-", ss_at (p, 1)))
- {
- is_num = true;
- ss_advance (&p, 2);
- }
- else
- ss_advance (&p, 1);
+ n++;
}
- s = ss_buffer (start, ss_data (p) - start);
-
- if (is_num)
- {
- struct data_in di;
-
- di.s = ss_data (s);
- di.e = ss_end (s);
- di.v = (union value *) &token->number;
- di.f1 = dfm_get_column (reader, di.s);
- di.format = fmt_for_output (FMT_F, token->length, 0);
-
- data_in (&di);
- }
- else
- token->type = MSTR;
+ ss_get_bytes (p, n, token);
}
- token->string = ss_data (s);
- token->length = ss_length (s);
-
- dfm_reread_record (reader, dfm_get_column (reader, ss_end (s)));
-
- return 1;
+ return true;
}
-/* Forcibly skip the end of a line for content type CONTENT in
- READER. */
-static int
-force_eol (struct dfm_reader *reader, const char *content)
+static bool
+next_number (struct substring *p, struct dfm_reader *r, double *d)
{
- struct substring p;
-
- if (dfm_eof (reader))
- return 0;
-
- p = dfm_get_record (reader);
- if (ss_span (p, ss_cstr (CC_SPACES)) != ss_length (p))
+ struct substring token;
+ if (!next_token (p, r, &token))
+ return false;
+
+ union value v;
+ char *error = data_in (token, dfm_reader_get_encoding (r), FMT_F,
+ settings_get_fmt_settings (), &v, 0, NULL);
+ if (error)
{
- msg (SE, _("End of line expected %s while reading %s."),
- context (reader), content);
- return 0;
+ parse_error (r, &token, "%s", error);
+ free (error);
}
-
- dfm_forward_record (reader);
- return 1;
+ *d = v.f;
+ return true;
}
-\f
-/* Back end, omitting ROWTYPE_. */
-struct nr_aux_data
- {
- const struct dictionary *dict; /* The dictionary */
- struct matrix_data_pgm *mx; /* MATRIX DATA program. */
- double ***data; /* MATRIX DATA data. */
- double *factor_values; /* Factor values. */
- int max_cell_idx; /* Max-numbered cell that we have
- read so far, plus one. */
- double *split_values; /* SPLIT FILE variable values. */
- };
-
-static bool nr_read_splits (struct nr_aux_data *, int compare);
-static bool nr_read_factors (struct nr_aux_data *, int cell);
-static bool nr_output_data (struct nr_aux_data *, struct ccase *,
- write_case_func *, write_case_data);
-static bool matrix_data_read_without_rowtype (struct case_source *source,
- struct ccase *,
- write_case_func *,
- write_case_data);
-
-/* Read from the data file and write it to the active file.
- Returns true if successful, false if an I/O error occurred. */
static bool
-read_matrices_without_rowtype (struct dataset *ds, struct matrix_data_pgm *mx)
+next_rowtype (struct substring *p, struct dfm_reader *r, enum rowtype *rt)
{
- struct nr_aux_data nr;
- bool ok;
-
- if (mx->cells == -1)
- mx->cells = 1;
-
- nr.mx = mx;
- nr.dict = dataset_dict (ds);
- nr.data = NULL;
- nr.factor_values = xnmalloc (mx->n_factors * mx->cells,
- sizeof *nr.factor_values);
- nr.max_cell_idx = 0;
- nr.split_values = xnmalloc (dict_get_split_cnt (dataset_dict (ds)),
- sizeof *nr.split_values);
-
- proc_set_source (ds, create_case_source (
- &matrix_data_without_rowtype_source_class, &nr));
-
- ok = procedure (ds, NULL, NULL);
-
- free (nr.split_values);
- free (nr.factor_values);
-
- return ok;
+ struct substring token;
+ if (!next_token (p, r, &token))
+ return false;
+
+ if (rowtype_from_string (token, rt))
+ return true;
+
+ parse_error (r, &token, _("Unknown row type \"%.*s\"."),
+ (int) token.length, token.string);
+ return false;
}
-/* Mirror data across the diagonal of matrix CP which contains
- CONTENT type data. */
-static void
-fill_matrix (struct matrix_data_pgm *mx, int content, double *cp)
-{
- int type = content_type[content];
+struct read_matrix_params
+ {
+ /* Adjustments to first and last row to read. */
+ int dy0, dy1;
- if (type == 1 && mx->section != FULL)
- {
- if (mx->diag == NODIAGONAL)
- {
- const double fill = content == CORR ? 1.0 : SYSMIS;
- int i;
+ /* Left and right columns to read in first row, inclusive.
+ For x1, INT_MAX is the rightmost column. */
+ int x0, x1;
- for (i = 0; i < mx->n_continuous; i++)
- cp[i * (1 + mx->n_continuous)] = fill;
- }
-
- {
- int c, r;
-
- if (mx->section == LOWER)
- {
- int n_lines = mx->n_continuous;
- if (mx->section != FULL && mx->diag == NODIAGONAL)
- n_lines--;
-
- for (r = 1; r < n_lines; r++)
- for (c = 0; c < r; c++)
- cp[r + c * mx->n_continuous] = cp[c + r * mx->n_continuous];
- }
- else
- {
- assert (mx->section == UPPER);
- for (r = 1; r < mx->n_continuous; r++)
- for (c = 0; c < r; c++)
- cp[c + r * mx->n_continuous] = cp[r + c * mx->n_continuous];
- }
- }
+ /* Adjustment to x0 and x1 for each subsequent row we read. Each of these
+ is 0 to keep it the same or -1 or +1 to adjust it by that much. */
+ int dx0, dx1;
+ };
+
+static const struct read_matrix_params *
+get_read_matrix_params (const struct matrix_format *mf)
+{
+ if (mf->triangle == FULL)
+ {
+ /* 1 2 3 4
+ 2 1 5 6
+ 3 5 1 7
+ 4 6 7 1 */
+ static const struct read_matrix_params rmp = { 0, 0, 0, INT_MAX, 0, 0 };
+ return &rmp;
}
- else if (type == 2)
+ else if (mf->triangle == LOWER)
{
- int c;
-
- for (c = 1; c < mx->n_continuous; c++)
- cp[c] = cp[0];
+ if (mf->diagonal == DIAGONAL)
+ {
+ /* 1 . . .
+ 2 1 . .
+ 3 5 1 .
+ 4 6 7 1 */
+ static const struct read_matrix_params rmp = { 0, 0, 0, 0, 0, 1 };
+ return &rmp;
+ }
+ else
+ {
+ /* . . . .
+ 2 . . .
+ 3 5 . .
+ 4 6 7 . */
+ static const struct read_matrix_params rmp = { 1, 0, 0, 0, 0, 1 };
+ return &rmp;
+ }
}
-}
-
-/* Read data lines for content type CONTENT from the data file.
- If PER_FACTOR is nonzero, then factor information is read from
- the data file. Data is for cell number CELL. */
-static int
-nr_read_data_lines (struct nr_aux_data *nr,
- int per_factor, int cell, int content, int compare)
-{
- struct matrix_data_pgm *mx = nr->mx;
- const int type = content_type[content]; /* Content type. */
- int n_lines; /* Number of lines to parse from data file for this type. */
- double *cp; /* Current position in vector or matrix. */
- int i;
-
- if (type != 1)
- n_lines = 1;
- else
+ else if (mf->triangle == UPPER)
{
- n_lines = mx->n_continuous;
- if (mx->section != FULL && mx->diag == NODIAGONAL)
- n_lines--;
+ if (mf->diagonal == DIAGONAL)
+ {
+ /* 1 2 3 4
+ . 1 5 6
+ . . 1 7
+ . . . 1 */
+ static const struct read_matrix_params rmp = { 0, 0, 0, INT_MAX, 1, 0 };
+ return &rmp;
+ }
+ else
+ {
+ /* . 2 3 4
+ . . 5 6
+ . . . 7
+ . . . . */
+ static const struct read_matrix_params rmp = { 0, -1, 1, INT_MAX, 1, 0 };
+ return &rmp;
+ }
}
+ else
+ NOT_REACHED ();
+}
- cp = nr->data[content][cell];
- if (type == 1 && mx->section == LOWER && mx->diag == NODIAGONAL)
- cp += mx->n_continuous;
-
- for (i = 0; i < n_lines; i++)
+static void
+schedule_matrices (struct matrix_format *mf)
+{
+ struct matrix_sched *ms0 = &mf->ms[0];
+ ms0->nr = 1;
+ ms0->nc = 1;
+ ms0->rp = xmalloc (sizeof *ms0->rp);
+ ms0->rp[0] = (struct row_sched) { .y = 0, .x0 = 0, .x1 = 1 };
+ ms0->n_rp = 1;
+
+ struct matrix_sched *ms1 = &mf->ms[1];
+ ms1->nr = 1;
+ ms1->nc = mf->n_cvars;
+ ms1->rp = xmalloc (sizeof *ms1->rp);
+ ms1->rp[0] = (struct row_sched) { .y = 0, .x0 = 0, .x1 = mf->n_cvars };
+ ms1->n_rp = 1;
+
+ struct matrix_sched *ms2 = &mf->ms[2];
+ ms2->nr = mf->n_cvars;
+ ms2->nc = mf->n_cvars;
+ ms2->rp = xmalloc (mf->n_cvars * sizeof *ms2->rp);
+ ms2->n_rp = 0;
+
+ const struct read_matrix_params *rmp = get_read_matrix_params (mf);
+ int x0 = rmp->x0;
+ int x1 = rmp->x1 < mf->n_cvars ? rmp->x1 : mf->n_cvars - 1;
+ int y0 = rmp->dy0;
+ int y1 = (int) mf->n_cvars + rmp->dy1;
+ for (int y = y0; y < y1; y++)
{
- int n_cols;
-
- if (!nr_read_splits (nr, 1))
- return 0;
- if (per_factor && !nr_read_factors (nr, cell))
- return 0;
- compare = 1;
-
- switch (type)
- {
- case 0:
- n_cols = mx->n_continuous;
- break;
- case 1:
- switch (mx->section)
- {
- case LOWER:
- n_cols = i + 1;
- break;
- case UPPER:
- cp += i;
- n_cols = mx->n_continuous - i;
- if (mx->diag == NODIAGONAL)
- {
- n_cols--;
- cp++;
- }
- break;
- case FULL:
- n_cols = mx->n_continuous;
- break;
- default:
- NOT_REACHED ();
- }
- break;
- case 2:
- n_cols = 1;
- break;
- default:
- NOT_REACHED ();
- }
+ assert (x0 >= 0 && x0 < mf->n_cvars);
+ assert (x1 >= 0 && x1 < mf->n_cvars);
+ assert (x1 >= x0);
- {
- int j;
-
- for (j = 0; j < n_cols; j++)
- {
- struct matrix_token token;
- if (!mget_token (&token, mx->reader))
- return 0;
- if (token.type != MNUM)
- {
- msg (SE, _("expecting value for %s %s"),
- dict_get_var (nr->dict, j)->name,
- context (mx->reader));
- return 0;
- }
-
- *cp++ = token.number;
- }
- if (mx->fmt != FREE
- && !force_eol (mx->reader, content_names[content]))
- return 0;
- }
+ ms2->rp[ms2->n_rp++] = (struct row_sched) {
+ .y = y, .x0 = x0, .x1 = x1 + 1
+ };
- if (mx->section == LOWER)
- cp += mx->n_continuous - n_cols;
+ x0 += rmp->dx0;
+ x1 += rmp->dx1;
}
-
- fill_matrix (mx, content, nr->data[content][cell]);
-
- return 1;
}
-/* When ROWTYPE_ does not appear in the data, reads the matrices and
- writes them to the output file.
- Returns true if successful, false if an I/O error occurred. */
static bool
-matrix_data_read_without_rowtype (struct case_source *source,
- struct ccase *c,
- write_case_func *write_case,
- write_case_data wc_data)
+read_id_columns (const struct matrix_format *mf,
+ struct substring *p, struct dfm_reader *r,
+ double *d, enum rowtype *rt)
{
- struct nr_aux_data *nr = source->aux;
- struct matrix_data_pgm *mx = nr->mx;
-
- {
- int *cp;
+ for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
+ if (!(mf->input_vars[i] == mf->rowtype
+ ? next_rowtype (p, r, rt)
+ : next_number (p, r, &d[i])))
+ return false;
+ return true;
+}
- nr->data = pool_nalloc (mx->container, PROX + 1, sizeof *nr->data);
-
- {
- int i;
+static bool
+equal_id_columns (const struct matrix_format *mf,
+ const double *a, const double *b)
+{
+ for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
+ if (mf->input_vars[i] != mf->rowtype && a[i] != b[i])
+ return false;
+ return true;
+}
- for (i = 0; i <= PROX; i++)
- nr->data[i] = NULL;
- }
-
- for (cp = mx->contents; *cp != EOC; cp++)
- if (*cp != LPAREN && *cp != RPAREN)
- {
- int per_factor = mx->is_per_factor[*cp];
- int n_entries;
-
- n_entries = mx->n_continuous;
- if (content_type[*cp] == 1)
- n_entries *= mx->n_continuous;
-
- {
- int n_vectors = per_factor ? mx->cells : 1;
- int i;
-
- nr->data[*cp] = pool_nalloc (mx->container,
- n_vectors, sizeof **nr->data);
-
- for (i = 0; i < n_vectors; i++)
- nr->data[*cp][i] = pool_nalloc (mx->container,
- n_entries, sizeof ***nr->data);
- }
- }
- }
-
- for (;;)
+static bool
+equal_split_columns (const struct matrix_format *mf,
+ const double *a, const double *b)
+{
+ for (size_t i = 0; i < mf->n_svars; i++)
{
- int *bp, *ep, *np;
-
- if (!nr_read_splits (nr, 0))
- return true;
-
- for (bp = mx->contents; *bp != EOC; bp = np)
- {
- int per_factor;
-
- /* Trap the CONTENTS that we should parse in this pass
- between bp and ep. Set np to the starting bp for next
- iteration. */
- if (*bp == LPAREN)
- {
- ep = ++bp;
- while (*ep != RPAREN)
- ep++;
- np = &ep[1];
- per_factor = 1;
- }
- else
- {
- ep = &bp[1];
- while (*ep != EOC && *ep != LPAREN)
- ep++;
- np = ep;
- per_factor = 0;
- }
-
- {
- int i;
-
- for (i = 0; i < (per_factor ? mx->cells : 1); i++)
- {
- int *cp;
-
- for (cp = bp; cp < ep; cp++)
- if (!nr_read_data_lines (nr, per_factor, i, *cp, cp != bp))
- return true;
- }
- }
- }
-
- if (!nr_output_data (nr, c, write_case, wc_data))
+ size_t idx = mf->svar_indexes[i];
+ if (a[idx] != b[idx])
return false;
-
- if (dict_get_split_cnt (nr->dict) == 0
- || !another_token (mx->reader))
- return true;
}
+ return true;
}
-/* Read the split file variables. If COMPARE is 1, compares the
- values read to the last values read and returns true if they're equal,
- false otherwise. */
static bool
-nr_read_splits (struct nr_aux_data *nr, int compare)
+is_pooled (const struct matrix_format *mf, const double *d)
+{
+ for (size_t i = 0; i < mf->n_fvars; i++)
+ if (d[mf->fvar_indexes[i]] != SYSMIS)
+ return false;
+ return true;
+}
+
+static void
+matrix_sched_init (const struct matrix_format *mf, enum rowtype rt,
+ gsl_matrix *m)
+{
+ int n_dims = rowtype_dimensions (rt);
+ const struct matrix_sched *ms = &mf->ms[n_dims];
+ double diagonal = n_dims < 2 || rt != C_CORR ? SYSMIS : 1.0;
+ for (size_t y = 0; y < ms->nr; y++)
+ for (size_t x = 0; x < ms->nc; x++)
+ gsl_matrix_set (m, y, x, y == x ? diagonal : SYSMIS);
+}
+
+static void
+matrix_sched_output (const struct matrix_format *mf, enum rowtype rt,
+ gsl_matrix *m, const double *d, int split_num,
+ struct casewriter *w)
{
- struct matrix_data_pgm *mx = nr->mx;
- static int just_read = 0; /* FIXME: WTF? */
- size_t split_cnt;
- size_t i;
+ int n_dims = rowtype_dimensions (rt);
+ const struct matrix_sched *ms = &mf->ms[n_dims];
- if (compare && just_read)
+ if (rt == C_N_SCALAR)
{
- just_read = 0;
- return true;
+ for (size_t x = 1; x < mf->n_cvars; x++)
+ gsl_matrix_set (m, 0, x, gsl_matrix_get (m, 0, 0));
+ rt = C_N;
}
-
- if (dict_get_split_vars (nr->dict) == NULL)
- return true;
- if (mx->single_split)
+ for (int y = 0; y < ms->nr; y++)
{
- if (!compare)
- {
- struct mxd_var *mv = dict_get_split_vars (nr->dict)[0]->aux;
- nr->split_values[0] = ++mv->sub_type;
- }
- return true;
+ struct ccase *c = case_create (casewriter_get_proto (w));
+ for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
+ if (mf->input_vars[i] != mf->rowtype)
+ *case_num_rw (c, mf->input_vars[i]) = d[i];
+ if (mf->n_svars && !mf->svar_indexes)
+ *case_num_rw (c, mf->svars[0]) = split_num;
+ set_string (c, mf->rowtype, rowtype_name (rt));
+ const char *varname = n_dims == 2 ? var_get_name (mf->cvars[y]) : "";
+ set_string (c, mf->varname, ss_cstr (varname));
+ for (int x = 0; x < mf->n_cvars; x++)
+ *case_num_rw (c, mf->cvars[x]) = gsl_matrix_get (m, y, x);
+ casewriter_write (w, c);
}
+}
- if (!compare)
- just_read = 1;
+static void
+matrix_sched_output_n (const struct matrix_format *mf, double n,
+ gsl_matrix *m, const double *d, int split_num,
+ struct casewriter *w)
+{
+ gsl_matrix_set (m, 0, 0, n);
+ matrix_sched_output (mf, C_N_SCALAR, m, d, split_num, w);
+}
- split_cnt = dict_get_split_cnt (nr->dict);
- for (i = 0; i < split_cnt; i++)
+static void
+check_eol (const struct matrix_format *mf, struct substring *p,
+ struct dfm_reader *r)
+{
+ if (!mf->span)
{
- struct matrix_token token;
- if (!mget_token (&token, mx->reader))
- return false;
- if (token.type != MNUM)
+ ss_ltrim (p, ss_cstr (CC_SPACES ","));
+ if (p->length)
{
- msg (SE, _("Syntax error expecting SPLIT FILE value %s."),
- context (mx->reader));
- return false;
- }
-
- if (!compare)
- nr->split_values[i] = token.number;
- else if (nr->split_values[i] != token.number)
- {
- msg (SE, _("Expecting value %g for %s."),
- nr->split_values[i],
- dict_get_split_vars (nr->dict)[i]->name);
- return false;
+ parse_error (r, p, _("Extraneous data expecting end of line."));
+ p->length = 0;
}
}
-
- return true;
}
-/* Read the factors for cell CELL. If COMPARE is 1, compares the
- values read to the last values read and returns true if they're equal,
- false otherwise. */
-static bool
-nr_read_factors (struct nr_aux_data *nr, int cell)
+static void
+parse_data_with_rowtype (const struct matrix_format *mf,
+ struct dfm_reader *r, struct casewriter *w)
{
- struct matrix_data_pgm *mx = nr->mx;
- bool compare;
-
- if (mx->n_factors == 0)
- return true;
+ if (dfm_eof (r))
+ return;
+ struct substring p = dfm_get_record (r);
- assert (nr->max_cell_idx >= cell);
- if (cell != nr->max_cell_idx)
- compare = true;
- else
- {
- compare = false;
- nr->max_cell_idx++;
- }
-
- {
- size_t i;
-
- for (i = 0; i < mx->n_factors; i++)
- {
- struct matrix_token token;
- if (!mget_token (&token, mx->reader))
- return false;
- if (token.type != MNUM)
- {
- msg (SE, _("Syntax error expecting factor value %s."),
- context (mx->reader));
- return false;
- }
-
- if (!compare)
- nr->factor_values[i + mx->n_factors * cell] = token.number;
- else if (nr->factor_values[i + mx->n_factors * cell] != token.number)
- {
- msg (SE, _("Syntax error expecting value %g for %s %s."),
- nr->factor_values[i + mx->n_factors * cell],
- mx->factors[i]->name, context (mx->reader));
- return false;
- }
- }
- }
+ double *prev = NULL;
+ gsl_matrix *m = gsl_matrix_alloc (mf->n_cvars, mf->n_cvars);
- return true;
-}
+ double *d = xnmalloc (mf->n_input_vars, sizeof *d);
+ enum rowtype rt;
-/* Write the contents of a cell having content type CONTENT and data
- CP to the active file.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-dump_cell_content (const struct dictionary *dict,
- struct matrix_data_pgm *mx, int content, double *cp,
- struct ccase *c,
- write_case_func *write_case, write_case_data wc_data)
-{
- int type = content_type[content];
+ double *d_next = xnmalloc (mf->n_input_vars, sizeof *d_next);
- {
- buf_copy_str_rpad (case_data_rw (c, mx->rowtype_->fv)->s, 8,
- content_names[content]);
-
- if (type != 1)
- memset (case_data_rw (c, mx->varname_->fv)->s, ' ', 8);
- }
+ if (!read_id_columns (mf, &p, r, d, &rt))
+ goto exit;
+ for (;;)
+ {
+ /* If this has rowtype N but there was an N subcommand, then the
+ subcommand takes precedence, so we will suppress outputting this
+ record. We still need to parse it, though, so we can't skip other
+ work. */
+ bool suppress_output = mf->n >= 0 && (rt == C_N || rt == C_N_SCALAR);
+ if (suppress_output)
+ parse_error (r, NULL, _("N record is not allowed with N subcommand. "
+ "Ignoring N record."));
+
+ /* If there's an N subcommand, and this is a new split, then output an N
+ record. */
+ if (mf->n >= 0 && (!prev || !equal_split_columns (mf, prev, d)))
+ {
+ matrix_sched_output_n (mf, mf->n, m, d, 0, w);
- {
- int n_lines = (type == 1) ? mx->n_continuous : 1;
- int i;
-
- for (i = 0; i < n_lines; i++)
- {
- int j;
-
- for (j = 0; j < mx->n_continuous; j++)
- {
- int fv = dict_get_var (dict, mx->first_continuous + j)->fv;
- case_data_rw (c, fv)->f = *cp;
- cp++;
- }
- if (type == 1)
- buf_copy_str_rpad (case_data_rw (c, mx->varname_->fv)->s, 8,
- dict_get_var (dict,
- mx->first_continuous + i)->name);
- if (!write_case (wc_data))
- return false;
- }
- }
- return true;
-}
+ if (!prev)
+ prev = xnmalloc (mf->n_input_vars, sizeof *prev);
+ memcpy (prev, d, mf->n_input_vars * sizeof *prev);
+ }
-/* Finally dump out everything from nr_data[] to the output file. */
-static bool
-nr_output_data (struct nr_aux_data *nr, struct ccase *c,
- write_case_func *write_case, write_case_data wc_data)
-{
- struct matrix_data_pgm *mx = nr->mx;
-
- {
- struct variable *const *split;
- size_t split_cnt;
- size_t i;
+ /* Usually users don't provide the CONTENTS subcommand with ROWTYPE_, but
+ if they did then warn if ROWTYPE_ is an unexpected type. */
+ if (mf->factor_rowtype_mask || mf->pooled_rowtype_mask)
+ {
+ const char *name = rowtype_name (rt).string;
+ if (is_pooled (mf, d))
+ {
+ if (!((1u << rt) & mf->pooled_rowtype_mask))
+ parse_warning (r, NULL, _("Data contains pooled row type %s not "
+ "included in CONTENTS."), name);
+ }
+ else
+ {
+ if (!((1u << rt) & mf->factor_rowtype_mask))
+ parse_warning (r, NULL, _("Data contains with-factors row type "
+ "%s not included in CONTENTS."), name);
+ }
+ }
- split_cnt = dict_get_split_cnt (nr->dict);
- split = dict_get_split_vars (nr->dict);
- for (i = 0; i < split_cnt; i++)
- case_data_rw (c, split[i]->fv)->f = nr->split_values[i];
- }
+ /* Initialize the matrix to be filled-in. */
+ int n_dims = rowtype_dimensions (rt);
+ const struct matrix_sched *ms = &mf->ms[n_dims];
+ matrix_sched_init (mf, rt, m);
- if (mx->n_factors)
- {
- int cell;
+ enum rowtype rt_next;
+ bool eof;
- for (cell = 0; cell < mx->cells; cell++)
- {
- {
- size_t factor;
-
- for (factor = 0; factor < mx->n_factors; factor++)
- case_data_rw (c, mx->factors[factor]->fv)->f
- = nr->factor_values[factor + cell * mx->n_factors];
- }
-
- {
- int content;
-
- for (content = 0; content <= PROX; content++)
- if (mx->is_per_factor[content])
- {
- assert (nr->data[content] != NULL
- && nr->data[content][cell] != NULL);
+ size_t n_rows;
+ for (n_rows = 1; ; n_rows++)
+ {
+ if (n_rows <= ms->n_rp)
+ {
+ const struct row_sched *rs = &ms->rp[n_rows - 1];
+ size_t y = rs->y;
+ for (size_t x = rs->x0; x < rs->x1; x++)
+ {
+ double e;
+ if (!next_number (&p, r, &e))
+ goto exit;
+ gsl_matrix_set (m, y, x, e);
+ if (n_dims == 2 && mf->triangle != FULL)
+ gsl_matrix_set (m, x, y, e);
+ }
+ check_eol (mf, &p, r);
+ }
+ else
+ {
+ /* Suppress bad input data. We'll issue an error later. */
+ p.length = 0;
+ }
- if (!dump_cell_content (nr->dict, mx,
- content, nr->data[content][cell],
- c, write_case, wc_data))
- return false;
- }
- }
- }
- }
+ eof = (!more_tokens (&p, r)
+ || !read_id_columns (mf, &p, r, d_next, &rt_next));
+ if (eof)
+ break;
- {
- int content;
-
- {
- size_t factor;
+ if (!equal_id_columns (mf, d, d_next) || rt_next != rt)
+ break;
+ }
+ if (!suppress_output)
+ matrix_sched_output (mf, rt, m, d, 0, w);
+
+ if (n_rows != ms->n_rp)
+ parse_error (r, NULL,
+ _("Matrix %s had %zu rows but %zu rows were expected."),
+ rowtype_name (rt).string, n_rows, ms->n_rp);
+ if (eof)
+ break;
- for (factor = 0; factor < mx->n_factors; factor++)
- case_data_rw (c, mx->factors[factor]->fv)->f = SYSMIS;
+ double *d_tmp = d;
+ d = d_next;
+ d_next = d_tmp;
+
+ rt = rt_next;
}
-
- for (content = 0; content <= PROX; content++)
- if (!mx->is_per_factor[content] && nr->data[content] != NULL)
- {
- if (!dump_cell_content (nr->dict, mx, content, nr->data[content][0],
- c, write_case, wc_data))
- return false;
- }
- }
- return true;
+exit:
+ free (prev);
+ gsl_matrix_free (m);
+ free (d);
+ free (d_next);
}
-\f
-/* Back end, with ROWTYPE_. */
-/* All the data for one set of factor values. */
-struct factor_data
- {
- double *factors;
- int n_rows[PROX + 1];
- double *data[PROX + 1];
- struct factor_data *next;
- };
+static void
+parse_matrix_without_rowtype (const struct matrix_format *mf,
+ struct substring *p, struct dfm_reader *r,
+ gsl_matrix *m, enum rowtype rowtype, bool pooled,
+ int split_num, struct casewriter *w)
+{
+ int n_dims = rowtype_dimensions (rowtype);
+ const struct matrix_sched *ms = &mf->ms[n_dims];
-/* With ROWTYPE_ auxiliary data. */
-struct wr_aux_data
- {
- const struct dictionary *dict; /* The dictionary */
- struct matrix_data_pgm *mx; /* MATRIX DATA program. */
- int content; /* Type of current row. */
- double *split_values; /* SPLIT FILE variable values. */
- struct factor_data *data; /* All the data. */
- struct factor_data *current; /* Current factor. */
- };
+ double *d = xnmalloc (mf->n_input_vars, sizeof *d);
+ matrix_sched_init (mf, rowtype, m);
+ for (size_t i = 0; i < ms->n_rp; i++)
+ {
+ int y = ms->rp[i].y;
+ int k = 0;
+ int h = 0;
+ for (size_t j = 0; j < mf->n_input_vars; j++)
+ {
+ const struct variable *iv = mf->input_vars[j];
+ if (k < mf->n_cvars && iv == mf->cvars[k])
+ {
+ if (k < ms->rp[i].x1 - ms->rp[i].x0)
+ {
+ double e;
+ if (!next_number (p, r, &e))
+ goto exit;
+
+ int x = k + ms->rp[i].x0;
+ gsl_matrix_set (m, y, x, e);
+ if (n_dims == 2 && mf->triangle != FULL)
+ gsl_matrix_set (m, x, y, e);
+ }
+ k++;
+ continue;
+ }
+ if (h < mf->n_fvars && iv == mf->fvars[h])
+ {
+ h++;
+ if (pooled)
+ {
+ d[j] = SYSMIS;
+ continue;
+ }
+ }
-static bool wr_read_splits (struct wr_aux_data *, struct ccase *,
- write_case_func *, write_case_data);
-static bool wr_output_data (struct wr_aux_data *, struct ccase *,
- write_case_func *, write_case_data);
-static bool wr_read_rowtype (struct wr_aux_data *,
- const struct matrix_token *, struct dfm_reader *);
-static bool wr_read_factors (struct wr_aux_data *);
-static bool wr_read_indeps (struct wr_aux_data *);
-static bool matrix_data_read_with_rowtype (struct case_source *,
- struct ccase *,
- write_case_func *,
- write_case_data);
-
-/* When ROWTYPE_ appears in the data, reads the matrices and writes
- them to the output file.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-read_matrices_with_rowtype (struct dataset *ds, struct matrix_data_pgm *mx)
-{
- struct wr_aux_data wr;
- bool ok;
-
- wr.mx = mx;
- wr.content = -1;
- wr.split_values = NULL;
- wr.data = NULL;
- wr.current = NULL;
- wr.dict = dataset_dict (ds);
- mx->cells = 0;
-
- proc_set_source (ds,
- create_case_source (&matrix_data_with_rowtype_source_class,
- &wr));
- ok = procedure (ds, NULL, NULL);
-
- free (wr.split_values);
- return ok;
+ double e;
+ if (!next_number (p, r, &e))
+ goto exit;
+ d[j] = e;
+ }
+ check_eol (mf, p, r);
+ }
+
+ matrix_sched_output (mf, rowtype, m, d, split_num, w);
+exit:
+ free (d);
}
-/* Read from the data file and write it to the active file.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-matrix_data_read_with_rowtype (struct case_source *source,
- struct ccase *c,
- write_case_func *write_case,
- write_case_data wc_data)
+static void
+parse_data_without_rowtype (const struct matrix_format *mf,
+ struct dfm_reader *r, struct casewriter *w)
{
- struct wr_aux_data *wr = source->aux;
- struct matrix_data_pgm *mx = wr->mx;
+ if (dfm_eof (r))
+ return;
+ struct substring p = dfm_get_record (r);
+ gsl_matrix *m = gsl_matrix_alloc (mf->n_cvars, mf->n_cvars);
+
+ int split_num = 1;
do
{
- if (!wr_read_splits (wr, c, write_case, wc_data))
- return true;
+ for (size_t i = 0; i < mf->n_contents; )
+ {
+ size_t j = i;
+ if (mf->contents[i].open)
+ while (!mf->contents[j].close)
+ j++;
- if (!wr_read_factors (wr))
- return true;
+ if (mf->contents[i].open)
+ {
+ for (size_t k = 0; k < mf->cells; k++)
+ for (size_t h = i; h <= j; h++)
+ parse_matrix_without_rowtype (mf, &p, r, m,
+ mf->contents[h].rowtype, false,
+ split_num, w);
+ }
+ else
+ parse_matrix_without_rowtype (mf, &p, r, m, mf->contents[i].rowtype,
+ true, split_num, w);
+ i = j + 1;
+ }
- if (!wr_read_indeps (wr))
- return true;
+ split_num++;
}
- while (another_token (mx->reader));
+ while (more_tokens (&p, r));
- return wr_output_data (wr, c, write_case, wc_data);
+ gsl_matrix_free (m);
}
-/* Read the split file variables. If they differ from the previous
- set of split variables then output the data. Returns success. */
-static bool
-wr_read_splits (struct wr_aux_data *wr,
- struct ccase *c,
- write_case_func *write_case, write_case_data wc_data)
+/* Parses VARIABLES=varnames for MATRIX DATA and returns a dictionary with the
+ named variables in it. */
+static struct dictionary *
+parse_matrix_data_variables (struct lexer *lexer)
{
- struct matrix_data_pgm *mx = wr->mx;
- bool compare;
- size_t split_cnt;
+ if (!lex_force_match_id (lexer, "VARIABLES"))
+ return NULL;
+ lex_match (lexer, T_EQUALS);
- split_cnt = dict_get_split_cnt (wr->dict);
- if (split_cnt == 0)
- return true;
+ struct dictionary *dict = dict_create (get_default_encoding ());
- if (wr->split_values)
- compare = true;
- else
+ size_t n_names = 0;
+ char **names = NULL;
+ if (!parse_DATA_LIST_vars (lexer, dict, &names, &n_names, PV_NO_DUPLICATE))
{
- compare = false;
- wr->split_values = xnmalloc (split_cnt, sizeof *wr->split_values);
+ dict_unref (dict);
+ return NULL;
}
-
- {
- bool different = false;
- int i;
- for (i = 0; i < split_cnt; i++)
- {
- struct matrix_token token;
- if (!mget_token (&token, mx->reader))
- return false;
- if (token.type != MNUM)
- {
- msg (SE, _("Syntax error %s expecting SPLIT FILE value."),
- context (mx->reader));
- return false;
- }
-
- if (compare && wr->split_values[i] != token.number && !different)
- {
- if (!wr_output_data (wr, c, write_case, wc_data))
- return 0;
- different = true;
- mx->cells = 0;
- }
- wr->split_values[i] = token.number;
- }
- }
+ for (size_t i = 0; i < n_names; i++)
+ if (!strcasecmp (names[i], "ROWTYPE_"))
+ dict_create_var_assert (dict, "ROWTYPE_", 8);
+ else
+ dict_create_var_assert (dict, names[i], 0);
- return true;
-}
+ for (size_t i = 0; i < n_names; ++i)
+ free (names[i]);
+ free (names);
-/* Compares doubles A and B, treating SYSMIS as greatest. */
-static int
-compare_doubles (const void *a_, const void *b_, const void *aux UNUSED)
-{
- const double *a = a_;
- const double *b = b_;
-
- if (*a == *b)
- return 0;
- else if (*a == SYSMIS)
- return 1;
- else if (*b == SYSMIS)
- return -1;
- else if (*a > *b)
- return 1;
- else
- return -1;
+ if (dict_lookup_var (dict, "VARNAME_"))
+ {
+ msg (SE, _("VARIABLES may not include VARNAME_."));
+ dict_unref (dict);
+ return NULL;
+ }
+ return dict;
}
-/* Return strcmp()-type comparison of the MX->n_factors factors at _A and
- _B. Sort missing values toward the end. */
-static int
-compare_factors (const void *a_, const void *b_, const void *mx_)
+static bool
+parse_matrix_data_subvars (struct lexer *lexer, struct dictionary *dict,
+ bool *taken_vars,
+ struct variable ***vars, size_t **indexes,
+ size_t *n_vars)
{
- const struct matrix_data_pgm *mx = mx_;
- struct factor_data *const *pa = a_;
- struct factor_data *const *pb = b_;
- const double *a = (*pa)->factors;
- const double *b = (*pb)->factors;
-
- return lexicographical_compare_3way (a, mx->n_factors,
- b, mx->n_factors,
- sizeof *a,
- compare_doubles, NULL);
+ if (!parse_variables (lexer, dict, vars, n_vars, 0))
+ return false;
+
+ *indexes = xnmalloc (*n_vars, sizeof **indexes);
+ for (size_t i = 0; i < *n_vars; i++)
+ {
+ struct variable *v = (*vars)[i];
+ if (!strcasecmp (var_get_name (v), "ROWTYPE_"))
+ {
+ msg (SE, _("ROWTYPE_ is not allowed on SPLIT or FACTORS."));
+ goto error;
+ }
+ (*indexes)[i] = var_get_dict_index (v);
+
+ bool *tv = &taken_vars[var_get_dict_index (v)];
+ if (*tv)
+ {
+ msg (SE, _("%s may not appear on both SPLIT and FACTORS."),
+ var_get_name (v));
+ goto error;
+ }
+ *tv = true;
+
+ var_set_both_formats (v, &(struct fmt_spec) { .type = FMT_F, .w = 4 });
+ }
+ return true;
+
+error:
+ free (*vars);
+ *vars = NULL;
+ *n_vars = 0;
+ free (*indexes);
+ *indexes = NULL;
+ return false;
}
-/* Write out the data for the current split file to the active
- file.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-wr_output_data (struct wr_aux_data *wr,
- struct ccase *c,
- write_case_func *write_case, write_case_data wc_data)
+int
+cmd_matrix_data (struct lexer *lexer, struct dataset *ds)
{
- struct matrix_data_pgm *mx = wr->mx;
- bool ok = true;
+ struct dictionary *dict = parse_matrix_data_variables (lexer);
+ if (!dict)
+ return CMD_FAILURE;
- {
- struct variable *const *split;
- size_t split_cnt;
- size_t i;
+ size_t n_input_vars = dict_get_n_vars (dict);
+ struct variable **input_vars = xnmalloc (n_input_vars, sizeof *input_vars);
+ for (size_t i = 0; i < n_input_vars; i++)
+ input_vars[i] = dict_get_var (dict, i);
- split_cnt = dict_get_split_cnt (wr->dict);
- split = dict_get_split_vars (wr->dict);
- for (i = 0; i < split_cnt; i++)
- case_data_rw (c, split[i]->fv)->f = wr->split_values[i];
- }
+ int varname_width = 8;
+ for (size_t i = 0; i < n_input_vars; i++)
+ {
+ int w = strlen (var_get_name (input_vars[i]));
+ varname_width = MAX (w, varname_width);
+ }
- /* Sort the wr->data list. */
- {
- struct factor_data **factors;
- struct factor_data *iter;
- int i;
+ struct variable *rowtype = dict_lookup_var (dict, "ROWTYPE_");
+ bool input_rowtype = rowtype != NULL;
+ if (!rowtype)
+ rowtype = dict_create_var_assert (dict, "ROWTYPE_", 8);
- factors = xnmalloc (mx->cells, sizeof *factors);
+ struct matrix_format mf = {
+ .input_rowtype = input_rowtype,
+ .input_vars = input_vars,
+ .n_input_vars = n_input_vars,
- for (i = 0, iter = wr->data; iter; iter = iter->next, i++)
- factors[i] = iter;
+ .rowtype = rowtype,
+ .varname = dict_create_var_assert (dict, "VARNAME_", varname_width),
- sort (factors, mx->cells, sizeof *factors, compare_factors, mx);
+ .triangle = LOWER,
+ .diagonal = DIAGONAL,
+ .n = -1,
+ .cells = -1,
+ };
- wr->data = factors[0];
- for (i = 0; i < mx->cells - 1; i++)
- factors[i]->next = factors[i + 1];
- factors[mx->cells - 1]->next = NULL;
+ bool *taken_vars = XCALLOC (n_input_vars, bool);
+ if (input_rowtype)
+ taken_vars[var_get_dict_index (rowtype)] = true;
- free (factors);
- }
+ struct file_handle *fh = NULL;
+ while (lex_token (lexer) != T_ENDCMD)
+ {
+ if (!lex_force_match (lexer, T_SLASH))
+ goto error;
- /* Write out records for every set of factor values. */
- {
- struct factor_data *iter;
-
- for (iter = wr->data; iter; iter = iter->next)
- {
+ if (lex_match_id (lexer, "N"))
{
- size_t factor;
+ lex_match (lexer, T_EQUALS);
- for (factor = 0; factor < mx->n_factors; factor++)
- case_data_rw (c, mx->factors[factor]->fv)->f
- = iter->factors[factor];
+ if (!lex_force_int_range (lexer, "N", 0, INT_MAX))
+ goto error;
+
+ mf.n = lex_integer (lexer);
+ lex_get (lexer);
}
-
+ else if (lex_match_id (lexer, "FORMAT"))
{
- int content;
+ lex_match (lexer, T_EQUALS);
- for (content = 0; content <= PROX; content++)
+ while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
{
- if (!iter->n_rows[content])
- continue;
-
- {
- int type = content_type[content];
- int n_lines = (type == 1
- ? (mx->n_continuous
- - (mx->section != FULL && mx->diag == NODIAGONAL))
- : 1);
-
- if (n_lines != iter->n_rows[content])
- {
- msg (SE, _("Expected %d lines of data for %s content; "
- "actually saw %d lines. No data will be "
- "output for this content."),
- n_lines, content_names[content],
- iter->n_rows[content]);
- continue;
- }
- }
-
- fill_matrix (mx, content, iter->data[content]);
-
- ok = dump_cell_content (wr->dict, mx, content,
- iter->data[content],
- c, write_case, wc_data);
- if (!ok)
- break;
+ if (lex_match_id (lexer, "LIST"))
+ mf.span = false;
+ else if (lex_match_id (lexer, "FREE"))
+ mf.span = true;
+ else if (lex_match_id (lexer, "UPPER"))
+ mf.triangle = UPPER;
+ else if (lex_match_id (lexer, "LOWER"))
+ mf.triangle = LOWER;
+ else if (lex_match_id (lexer, "FULL"))
+ mf.triangle = FULL;
+ else if (lex_match_id (lexer, "DIAGONAL"))
+ mf.diagonal = DIAGONAL;
+ else if (lex_match_id (lexer, "NODIAGONAL"))
+ mf.diagonal = NO_DIAGONAL;
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
}
}
- }
- }
-
- pool_destroy (mx->container);
- mx->container = pool_create ();
-
- wr->data = wr->current = NULL;
-
- return ok;
-}
+ else if (lex_match_id (lexer, "FILE"))
+ {
+ lex_match (lexer, T_EQUALS);
+ fh_unref (fh);
+ fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
+ if (!fh)
+ goto error;
+ }
+ else if (!mf.n_svars && lex_match_id (lexer, "SPLIT"))
+ {
+ lex_match (lexer, T_EQUALS);
+ if (!mf.input_rowtype
+ && lex_token (lexer) == T_ID
+ && !dict_lookup_var (dict, lex_tokcstr (lexer)))
+ {
+ mf.svars = xmalloc (sizeof *mf.svars);
+ mf.svars[0] = dict_create_var_assert (dict, lex_tokcstr (lexer),
+ 0);
+ var_set_both_formats (
+ mf.svars[0], &(struct fmt_spec) { .type = FMT_F, .w = 4 });
+ mf.n_svars = 1;
+ lex_get (lexer);
+ }
+ else if (!parse_matrix_data_subvars (lexer, dict, taken_vars,
+ &mf.svars, &mf.svar_indexes,
+ &mf.n_svars))
+ goto error;
+ }
+ else if (!mf.n_fvars && lex_match_id (lexer, "FACTORS"))
+ {
+ lex_match (lexer, T_EQUALS);
+ if (!parse_matrix_data_subvars (lexer, dict, taken_vars,
+ &mf.fvars, &mf.fvar_indexes,
+ &mf.n_fvars))
+ goto error;
+ }
+ else if (lex_match_id (lexer, "CELLS"))
+ {
+ if (mf.input_rowtype)
+ msg (SW, _("CELLS is ignored when VARIABLES includes ROWTYPE_"));
-/* Sets ROWTYPE_ based on the given TOKEN read from READER.
- Return success. */
-static bool
-wr_read_rowtype (struct wr_aux_data *wr,
- const struct matrix_token *token,
- struct dfm_reader *reader)
-{
- if (wr->content != -1)
- {
- msg (SE, _("Multiply specified ROWTYPE_ %s."), context (reader));
- return false;
- }
- if (token->type != MSTR)
- {
- msg (SE, _("Syntax error %s expecting ROWTYPE_ string."),
- context (reader));
- return false;
- }
-
- {
- char s[16];
- char *cp;
-
- memcpy (s, token->string, min (15, token->length));
- s[min (15, token->length)] = 0;
+ lex_match (lexer, T_EQUALS);
- for (cp = s; *cp; cp++)
- *cp = toupper ((unsigned char) *cp);
+ if (!lex_force_int_range (lexer, "CELLS", 0, INT_MAX))
+ goto error;
- wr->content = string_to_content_type (s, NULL);
- }
+ mf.cells = lex_integer (lexer);
+ lex_get (lexer);
+ }
+ else if (lex_match_id (lexer, "CONTENTS"))
+ {
+ lex_match (lexer, T_EQUALS);
- if (wr->content == -1)
- {
- msg (SE, _("Syntax error %s."), context (reader));
- return 0;
+ size_t allocated_contents = mf.n_contents;
+ bool in_parens = false;
+ for (;;)
+ {
+ bool open = !in_parens && lex_match (lexer, T_LPAREN);
+ enum rowtype rt;
+ if (!rowtype_parse (lexer, &rt))
+ {
+ if (open || in_parens || (lex_token (lexer) != T_ENDCMD
+ && lex_token (lexer) != T_SLASH))
+ {
+ lex_error (lexer, _("Row type keyword expected."));
+ goto error;
+ }
+ break;
+ }
+
+ if (open)
+ in_parens = true;
+
+ if (in_parens)
+ mf.factor_rowtype_mask |= 1u << rt;
+ else
+ mf.pooled_rowtype_mask |= 1u << rt;
+
+ bool close = in_parens && lex_match (lexer, T_RPAREN);
+ if (close)
+ in_parens = false;
+
+ if (mf.n_contents >= allocated_contents)
+ mf.contents = x2nrealloc (mf.contents, &allocated_contents,
+ sizeof *mf.contents);
+ mf.contents[mf.n_contents++] = (struct content) {
+ .open = open, .rowtype = rt, .close = close
+ };
+ }
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
}
-
- return true;
-}
-
-/* Read the factors for the current row. Select a set of factors and
- point wr_current to it. */
-static bool
-wr_read_factors (struct wr_aux_data *wr)
-{
- struct matrix_data_pgm *mx = wr->mx;
- double *factor_values = local_alloc (sizeof *factor_values * mx->n_factors);
-
- wr->content = -1;
- {
- size_t i;
-
- for (i = 0; i < mx->n_factors; i++)
- {
- struct matrix_token token;
- if (!mget_token (&token, mx->reader))
- goto lossage;
- if (token.type == MSTR)
- {
- if (!wr_read_rowtype (wr, &token, mx->reader))
- goto lossage;
- if (!mget_token (&token, mx->reader))
- goto lossage;
- }
- if (token.type != MNUM)
- {
- msg (SE, _("Syntax error expecting factor value %s."),
- context (mx->reader));
- goto lossage;
- }
-
- factor_values[i] = token.number;
- }
- }
- if (wr->content == -1)
+ if (mf.diagonal == NO_DIAGONAL && mf.triangle == FULL)
{
- struct matrix_token token;
- if (!mget_token (&token, mx->reader))
- goto lossage;
- if (!wr_read_rowtype (wr, &token, mx->reader))
- goto lossage;
+ msg (SE, _("FORMAT=FULL and FORMAT=NODIAGONAL are mutually exclusive."));
+ goto error;
}
-
- /* Try the most recent factor first as a simple caching
- mechanism. */
- if (wr->current)
+ if (!mf.input_rowtype)
{
- size_t i;
-
- for (i = 0; i < mx->n_factors; i++)
- if (factor_values[i] != wr->current->factors[i])
- goto cache_miss;
- goto winnage;
- }
+ if (mf.cells < 0)
+ {
+ if (mf.n_fvars)
+ {
+ msg (SE, _("CELLS is required when factor variables are specified "
+ "and VARIABLES does not include ROWTYPE_."));
+ goto error;
+ }
+ mf.cells = 1;
+ }
- /* Linear search through the list. */
-cache_miss:
- {
- struct factor_data *iter;
+ if (!mf.n_contents)
+ {
+ msg (SW, _("CONTENTS was not specified and VARIABLES does not "
+ "include ROWTYPE_. Assuming CONTENTS=CORR."));
- for (iter = wr->data; iter; iter = iter->next)
+ mf.n_contents = 1;
+ mf.contents = xmalloc (sizeof *mf.contents);
+ *mf.contents = (struct content) { .rowtype = C_CORR };
+ }
+ }
+ mf.cvars = xmalloc (mf.n_input_vars * sizeof *mf.cvars);
+ for (size_t i = 0; i < mf.n_input_vars; i++)
+ if (!taken_vars[i])
{
- size_t i;
-
- for (i = 0; i < mx->n_factors; i++)
- if (factor_values[i] != iter->factors[i])
- goto next_item;
-
- wr->current = iter;
- goto winnage;
-
- next_item: ;
+ struct variable *v = input_vars[i];
+ mf.cvars[mf.n_cvars++] = v;
+ var_set_both_formats (v, &(struct fmt_spec) { .type = FMT_F, .w = 10,
+ .d = 4 });
}
- }
-
- /* Not found. Make a new item. */
- {
- struct factor_data *new = pool_alloc (mx->container, sizeof *new);
-
- new->factors = pool_nalloc (mx->container,
- mx->n_factors, sizeof *new->factors);
-
- {
- size_t i;
-
- for (i = 0; i < mx->n_factors; i++)
- new->factors[i] = factor_values[i];
- }
-
+ if (!mf.n_cvars)
{
- int i;
-
- for (i = 0; i <= PROX; i++)
- {
- new->n_rows[i] = 0;
- new->data[i] = NULL;
- }
+ msg (SE, _("At least one continuous variable is required."));
+ goto error;
}
-
- new->next = wr->data;
- wr->data = wr->current = new;
- mx->cells++;
- }
-
-winnage:
- local_free (factor_values);
- return true;
-
-lossage:
- local_free (factor_values);
- return false;
-}
-
-/* Read the independent variables into wr->current. */
-static bool
-wr_read_indeps (struct wr_aux_data *wr)
-{
- struct matrix_data_pgm *mx = wr->mx;
- struct factor_data *c = wr->current;
- const int type = content_type[wr->content];
- const int n_rows = c->n_rows[wr->content];
- double *cp;
- int n_cols;
-
- /* Allocate room for data if necessary. */
- if (c->data[wr->content] == NULL)
+ if (mf.input_rowtype)
{
- int n_items = mx->n_continuous;
- if (type == 1)
- n_items *= mx->n_continuous;
-
- c->data[wr->content] = pool_nalloc (mx->container,
- n_items, sizeof **c->data);
+ for (size_t i = 0; i < mf.n_cvars; i++)
+ if (mf.cvars[i] != input_vars[n_input_vars - mf.n_cvars + i])
+ {
+ msg (SE, _("VARIABLES includes ROWTYPE_ but the continuous "
+ "variables are not the last ones on VARIABLES."));
+ goto error;
+ }
}
-
- cp = &c->data[wr->content][n_rows * mx->n_continuous];
-
- /* Figure out how much to read from this line. */
- switch (type)
+ unsigned int rowtype_mask = mf.pooled_rowtype_mask | mf.factor_rowtype_mask;
+ if (rowtype_mask & (1u << C_N) && mf.n >= 0)
{
- case 0:
- case 2:
- if (n_rows > 0)
- {
- msg (SE, _("Duplicate specification for %s."),
- content_names[wr->content]);
- return false;
- }
- if (type == 0)
- n_cols = mx->n_continuous;
- else
- n_cols = 1;
- break;
- case 1:
- if (n_rows >= mx->n_continuous - (mx->section != FULL && mx->diag == NODIAGONAL))
- {
- msg (SE, _("Too many rows of matrix data for %s."),
- content_names[wr->content]);
- return false;
- }
-
- switch (mx->section)
- {
- case LOWER:
- n_cols = n_rows + 1;
- if (mx->diag == NODIAGONAL)
- cp += mx->n_continuous;
- break;
- case UPPER:
- cp += n_rows;
- n_cols = mx->n_continuous - n_rows;
- if (mx->diag == NODIAGONAL)
- {
- n_cols--;
- cp++;
- }
- break;
- case FULL:
- n_cols = mx->n_continuous;
- break;
- default:
- NOT_REACHED ();
- }
- break;
- default:
- NOT_REACHED ();
+ msg (SE, _("Cannot specify N on CONTENTS along with the N subcommand."));
+ goto error;
}
- c->n_rows[wr->content]++;
- /* Read N_COLS items at CP. */
- {
- int j;
-
- for (j = 0; j < n_cols; j++)
- {
- struct matrix_token token;
- if (!mget_token (&token, mx->reader))
- return false;
- if (token.type != MNUM)
- {
- msg (SE, _("Syntax error expecting value for %s %s."),
- dict_get_var (wr->dict, mx->first_continuous + j)->name,
- context (mx->reader));
- return false;
- }
-
- *cp++ = token.number;
- }
- if (mx->fmt != FREE
- && !force_eol (mx->reader, content_names[wr->content]))
- return false;
- }
+ struct variable **order = xnmalloc (dict_get_n_vars (dict), sizeof *order);
+ size_t n_order = 0;
+ for (size_t i = 0; i < mf.n_svars; i++)
+ order[n_order++] = mf.svars[i];
+ order[n_order++] = mf.rowtype;
+ for (size_t i = 0; i < mf.n_fvars; i++)
+ order[n_order++] = mf.fvars[i];
+ order[n_order++] = mf.varname;
+ for (size_t i = 0; i < mf.n_cvars; i++)
+ order[n_order++] = mf.cvars[i];
+ assert (n_order == dict_get_n_vars (dict));
+ dict_reorder_vars (dict, order, n_order);
+ free (order);
+
+ dict_set_split_vars (dict, mf.svars, mf.n_svars);
+
+ schedule_matrices (&mf);
+
+ if (fh == NULL)
+ fh = fh_inline_file ();
+
+ if (lex_end_of_command (lexer) != CMD_SUCCESS)
+ goto error;
+
+ struct dfm_reader *reader = dfm_open_reader (fh, lexer, NULL);
+ if (reader == NULL)
+ goto error;
+
+ struct casewriter *writer = autopaging_writer_create (dict_get_proto (dict));
+ if (mf.input_rowtype)
+ parse_data_with_rowtype (&mf, reader, writer);
+ else
+ parse_data_without_rowtype (&mf, reader, writer);
+ dfm_close_reader (reader);
- return true;
-}
-\f
-/* Matrix source. */
+ dataset_set_dict (ds, dict);
+ dataset_set_source (ds, casewriter_make_reader (writer));
-static const struct case_source_class matrix_data_with_rowtype_source_class =
- {
- "MATRIX DATA",
- NULL,
- matrix_data_read_with_rowtype,
- NULL,
- };
+ matrix_format_uninit (&mf);
+ free (taken_vars);
+ fh_unref (fh);
-static const struct case_source_class
-matrix_data_without_rowtype_source_class =
- {
- "MATRIX DATA",
- NULL,
- matrix_data_read_without_rowtype,
- NULL,
- };
+ return CMD_SUCCESS;
+ error:
+ matrix_format_uninit (&mf);
+ free (taken_vars);
+ dict_unref (dict);
+ fh_unref (fh);
+ return CMD_FAILURE;
+}