#include <config.h>
+#include <gsl/gsl_matrix.h>
+#include <gsl/gsl_vector.h>
+
#include "data/case.h"
#include "data/casereader.h"
#include "data/casewriter.h"
+#include "data/data-in.h"
#include "data/dataset.h"
#include "data/dictionary.h"
#include "data/format.h"
+#include "data/short-names.h"
#include "data/transformations.h"
#include "data/variable.h"
#include "language/command.h"
#include "language/data-io/placement-parser.h"
#include "language/lexer/lexer.h"
#include "language/lexer/variable-parser.h"
+#include "libpspp/assertion.h"
#include "libpspp/i18n.h"
#include "libpspp/message.h"
-#include "libpspp/misc.h"
+#include "libpspp/str.h"
+#include "gl/c-ctype.h"
+#include "gl/minmax.h"
#include "gl/xsize.h"
#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
\f
-/* DATA LIST transformation data. */
-struct data_list_trns
+#define ROWTYPES \
+ /* Matrix row types. */ \
+ RT(CORR, 2) \
+ RT(COV, 2) \
+ RT(MAT, 2) \
+ RT(N_MATRIX, 2) \
+ RT(PROX, 2) \
+ \
+ /* Vector row types. */ \
+ RT(COUNT, 1) \
+ RT(DFE, 1) \
+ RT(MEAN, 1) \
+ RT(MSE, 1) \
+ RT(STDDEV, 1) \
+ RT(N, 1) \
+ \
+ /* Scalar row types. */ \
+ RT(N_SCALAR, 0)
+
+enum rowtype
{
- struct data_parser *parser; /* Parser. */
- struct dfm_reader *reader; /* Data file reader. */
- struct variable *end; /* Variable specified on END subcommand. */
+#define RT(NAME, DIMS) C_##NAME,
+ ROWTYPES
+#undef RT
};
-static trns_free_func data_list_trns_free;
-static trns_proc_func data_list_trns_proc;
-
-enum diagonal
+enum
{
- DIAGONAL,
- NO_DIAGONAL
+#define RT(NAME, DIMS) +1
+ N_ROWTYPES = ROWTYPES
+#undef RT
};
+verify (N_ROWTYPES < 32);
-enum triangle
- {
- LOWER,
- UPPER,
- FULL
+/* Returns the number of dimensions in the indexes for row type RT. A matrix
+ has 2 dimensions, a vector has 1, a scalar has 0. */
+static int
+rowtype_dimensions (enum rowtype rt)
+{
+ static const int rowtype_dims[N_ROWTYPES] = {
+#define RT(NAME, DIMS) [C_##NAME] = DIMS,
+ ROWTYPES
+#undef RT
};
+ return rowtype_dims[rt];
+}
-static const int ROWTYPE_WIDTH = 8;
+static struct substring
+rowtype_name (enum rowtype rt)
+{
+ static const struct substring rowtype_names[N_ROWTYPES] = {
+#define RT(NAME, DIMS) [C_##NAME] = SS_LITERAL_INITIALIZER (#NAME),
+ ROWTYPES
+#undef RT
+ };
-struct matrix_format
+ return rowtype_names[rt];
+}
+
+static bool
+rowtype_from_string (struct substring token, enum rowtype *rt)
+{
+ ss_trim (&token, ss_cstr (CC_SPACES));
+ for (size_t i = 0; i < N_ROWTYPES; i++)
+ if (lex_id_match (rowtype_name (i), token))
+ {
+ *rt = i;
+ return true;
+ }
+
+ if (lex_id_match (ss_cstr ("N_VECTOR"), token))
+ {
+ *rt = C_N;
+ return true;
+ }
+ else if (lex_id_match (ss_cstr ("SD"), token))
+ {
+ *rt = C_STDDEV;
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+rowtype_parse (struct lexer *lexer, enum rowtype *rt)
{
- enum triangle triangle;
- enum diagonal diagonal;
- const struct variable *rowtype;
- const struct variable *varname;
- int n_continuous_vars;
- struct variable **split_vars;
- size_t n_split_vars;
- long n;
-};
-
-/*
-valid rowtype_ values:
- CORR,
- COV,
- MAT,
-
-
- MSE,
- DFE,
- MEAN,
- STDDEV (or SD),
- N_VECTOR (or N),
- N_SCALAR,
- N_MATRIX,
- COUNT,
- PROX.
-*/
-
-/* Sets the value of OUTCASE which corresponds to VNAME
- to the value STR. VNAME must be of type string.
- */
+ bool parsed = (lex_token (lexer) == T_ID
+ && rowtype_from_string (lex_tokss (lexer), rt));
+ if (parsed)
+ lex_get (lexer);
+ return parsed;
+}
+\f
+struct matrix_format
+ {
+ bool span;
+ enum triangle
+ {
+ LOWER,
+ UPPER,
+ FULL
+ }
+ triangle;
+ enum diagonal
+ {
+ DIAGONAL,
+ NO_DIAGONAL
+ }
+ diagonal;
+
+ bool input_rowtype;
+ struct variable **input_vars;
+ size_t n_input_vars;
+
+ /* How to read matrices with each possible number of dimensions (0=scalar,
+ 1=vector, 2=matrix). */
+ struct matrix_sched
+ {
+ /* Number of rows and columns in the matrix: (1,1) for a scalar, (1,n) for
+ a vector, (n,n) for a matrix. */
+ int nr, nc;
+
+ /* Rows of data to read and the number of columns in each. Because we
+ often read just a triangle and sometimes omit the diagonal, 'n_rp' can
+ be less than 'nr' and 'rp[i]->y' isn't always 'y'. */
+ struct row_sched
+ {
+ /* The y-value of the row inside the matrix. */
+ int y;
+
+ /* first and last (exclusive) columns to read in this row. */
+ int x0, x1;
+ }
+ *rp;
+ size_t n_rp;
+ }
+ ms[3];
+
+ struct variable *rowtype;
+ struct variable *varname;
+ struct variable **cvars;
+ int n_cvars;
+ struct variable **svars;
+ size_t *svar_indexes;
+ size_t n_svars;
+ struct variable **fvars;
+ size_t *fvar_indexes;
+ size_t n_fvars;
+ int cells;
+ int n;
+
+ unsigned int pooled_rowtype_mask;
+ unsigned int factor_rowtype_mask;
+
+ struct content
+ {
+ bool open;
+ enum rowtype rowtype;
+ bool close;
+ }
+ *contents;
+ size_t n_contents;
+ };
+
static void
-set_varname_column (struct ccase *outcase, const struct variable *vname,
- const char *str)
+matrix_format_uninit (struct matrix_format *mf)
{
- int len = var_get_width (vname);
- uint8_t *s = case_str_rw (outcase, vname);
+ free (mf->input_vars);
+ for (int i = 0; i < 3; i++)
+ free (mf->ms[i].rp);
+ free (mf->cvars);
+ free (mf->svars);
+ free (mf->svar_indexes);
+ free (mf->fvars);
+ free (mf->fvar_indexes);
+ free (mf->contents);
+}
- strncpy (CHAR_CAST (char *, s), str, len);
+static void
+set_string (struct ccase *outcase, const struct variable *var,
+ struct substring src)
+{
+ struct substring dst = case_ss (outcase, var);
+ for (size_t i = 0; i < dst.length; i++)
+ dst.string[i] = i < src.length ? src.string[i] : ' ';
}
static void
-blank_varname_column (struct ccase *outcase, const struct variable *vname)
+parse_msg (struct dfm_reader *reader, const struct substring *token,
+ char *text, enum msg_severity severity)
+{
+ int first_column = 0;
+ if (token)
+ {
+ struct substring line = dfm_get_record (reader);
+ if (token->string >= line.string && token->string < ss_end (line))
+ first_column = ss_pointer_to_position (line, token->string) + 1;
+ }
+
+ int line_number = dfm_get_line_number (reader);
+ struct msg_location *location = xmalloc (sizeof *location);
+ *location = (struct msg_location) {
+ .file_name = xstrdup (dfm_get_file_name (reader)),
+ .first_line = line_number,
+ .last_line = line_number + 1,
+ .first_column = first_column,
+ .last_column = first_column ? first_column + token->length : 0,
+ };
+ struct msg *m = xmalloc (sizeof *m);
+ *m = (struct msg) {
+ .category = MSG_C_DATA,
+ .severity = severity,
+ .location = location,
+ .text = text,
+ };
+ msg_emit (m);
+}
+
+static void PRINTF_FORMAT (3, 4)
+parse_warning (struct dfm_reader *reader, const struct substring *token,
+ const char *format, ...)
{
- int len = var_get_width (vname);
- uint8_t *s = case_str_rw (outcase, vname);
+ va_list args;
+ va_start (args, format);
+ parse_msg (reader, token, xvasprintf (format, args), MSG_S_WARNING);
+ va_end (args);
+}
- memset (s, ' ', len);
+static void PRINTF_FORMAT (3, 4)
+parse_error (struct dfm_reader *reader, const struct substring *token,
+ const char *format, ...)
+{
+ va_list args;
+ va_start (args, format);
+ parse_msg (reader, token, xvasprintf (format, args), MSG_S_ERROR);
+ va_end (args);
}
-static struct casereader *
-preprocess (struct casereader *casereader0, const struct dictionary *dict, void *aux)
+/* Advance to beginning of next token. */
+static bool
+more_tokens (struct substring *p, struct dfm_reader *r)
{
- struct matrix_format *mformat = aux;
- const struct caseproto *proto = casereader_get_proto (casereader0);
- struct casewriter *writer = autopaging_writer_create (proto);
- struct ccase *prev_case = NULL;
- double **matrices = NULL;
- size_t n_splits = 0;
-
- const size_t sizeof_matrix =
- sizeof (double) * mformat->n_continuous_vars * mformat->n_continuous_vars;
-
-
- /* Make an initial pass to populate our temporary matrix */
- struct casereader *pass0 = casereader_clone (casereader0);
- struct ccase *c;
- union value *prev_values = XCALLOC (mformat->n_split_vars, union value);
- int row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0;
- bool first_case = true;
- for (; (c = casereader_read (pass0)) != NULL; case_unref (c))
+ for (;;)
{
- int s;
- bool match = false;
- if (!first_case)
- {
- match = true;
- for (s = 0; s < mformat->n_split_vars; ++s)
- {
- const struct variable *svar = mformat->split_vars[s];
- const union value *sv = case_data (c, svar);
- if (! value_equal (prev_values + s, sv, var_get_width (svar)))
- {
- match = false;
- break;
- }
- }
- }
- first_case = false;
+ ss_ltrim (p, ss_cstr (CC_SPACES ","));
+ if (p->length)
+ return true;
+
+ dfm_forward_record (r);
+ if (dfm_eof (r))
+ return false;
+ *p = dfm_get_record (r);
+ }
+}
- if (matrices == NULL || ! match)
- {
- row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ?
- 1 : 0;
+static bool
+next_token (struct substring *p, struct dfm_reader *r, struct substring *token)
+{
+ if (!more_tokens (p, r))
+ return false;
- n_splits++;
- matrices = xrealloc (matrices, sizeof (double*) * n_splits);
- matrices[n_splits - 1] = xmalloc (sizeof_matrix);
- }
+ /* Collect token. */
+ int c = ss_first (*p);
+ if (c == '\'' || c == '"')
+ {
+ ss_advance (p, 1);
+ ss_get_until (p, c, token);
+ }
+ else
+ {
+ size_t n = 1;
+ for (;;)
+ {
+ c = ss_at (*p, n);
+ if (c == EOF
+ || ss_find_byte (ss_cstr (CC_SPACES ","), c) != SIZE_MAX
+ || ((c == '+' || c == '-')
+ && ss_find_byte (ss_cstr ("dDeE"),
+ ss_at (*p, n - 1)) == SIZE_MAX))
+ break;
+ n++;
+ }
+ ss_get_bytes (p, n, token);
+ }
+ return true;
+}
- for (s = 0; s < mformat->n_split_vars; ++s)
- {
- const struct variable *svar = mformat->split_vars[s];
- const union value *sv = case_data (c, svar);
- value_clone (prev_values + s, sv, var_get_width (svar));
- }
+static bool
+next_number (struct substring *p, struct dfm_reader *r, double *d)
+{
+ struct substring token;
+ if (!next_token (p, r, &token))
+ return false;
+
+ union value v;
+ char *error = data_in (token, dfm_reader_get_encoding (r), FMT_F,
+ settings_get_fmt_settings (), &v, 0, NULL);
+ if (error)
+ {
+ parse_error (r, &token, "%s", error);
+ free (error);
+ }
+ *d = v.f;
+ return true;
+}
- int c_offset = (mformat->triangle == UPPER) ? row : 0;
- if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL)
- c_offset++;
- const union value *v = case_data (c, mformat->rowtype);
- const char *val = CHAR_CAST (const char *, v->s);
- if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) ||
- 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH))
- {
- if (row >= mformat->n_continuous_vars)
- {
- msg (SE,
- _("There are %d variable declared but the data has at least %d matrix rows."),
- mformat->n_continuous_vars, row + 1);
- case_unref (c);
- casereader_destroy (pass0);
- free (prev_values);
- goto error;
- }
- int col;
- for (col = c_offset; col < mformat->n_continuous_vars; ++col)
- {
- const struct variable *var =
- dict_get_var (dict,
- 1 + col - c_offset +
- var_get_dict_index (mformat->varname));
+static bool
+next_rowtype (struct substring *p, struct dfm_reader *r, enum rowtype *rt)
+{
+ struct substring token;
+ if (!next_token (p, r, &token))
+ return false;
- double e = case_data (c, var)->f;
- if (e == SYSMIS)
- continue;
+ if (rowtype_from_string (token, rt))
+ return true;
- /* Fill in the lower triangle */
- (matrices[n_splits-1])[col + mformat->n_continuous_vars * row] = e;
+ parse_error (r, &token, _("Unknown row type \"%.*s\"."),
+ (int) token.length, token.string);
+ return false;
+}
- if (mformat->triangle != FULL)
- /* Fill in the upper triangle */
- (matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e;
- }
- row++;
- }
- }
- casereader_destroy (pass0);
- free (prev_values);
+struct read_matrix_params
+ {
+ /* Adjustments to first and last row to read. */
+ int dy0, dy1;
- if (!matrices)
- goto error;
+ /* Left and right columns to read in first row, inclusive.
+ For x1, INT_MAX is the rightmost column. */
+ int x0, x1;
- /* Now make a second pass to fill in the other triangle from our
- temporary matrix */
- const int idx = var_get_dict_index (mformat->varname);
- row = 0;
+ /* Adjustment to x0 and x1 for each subsequent row we read. Each of these
+ is 0 to keep it the same or -1 or +1 to adjust it by that much. */
+ int dx0, dx1;
+ };
- if (mformat->n >= 0)
+static const struct read_matrix_params *
+get_read_matrix_params (const struct matrix_format *mf)
+{
+ if (mf->triangle == FULL)
{
- int col;
- struct ccase *outcase = case_create (proto);
- union value *v = case_data_rw (outcase, mformat->rowtype);
- memcpy (v->s, "N ", ROWTYPE_WIDTH);
- blank_varname_column (outcase, mformat->varname);
- for (col = 0; col < mformat->n_continuous_vars; ++col)
- {
- union value *dest_val =
- case_data_rw_idx (outcase,
- 1 + col + var_get_dict_index (mformat->varname));
- dest_val->f = mformat->n;
- }
- casewriter_write (writer, outcase);
+ /* 1 2 3 4
+ 2 1 5 6
+ 3 5 1 7
+ 4 6 7 1 */
+ static const struct read_matrix_params rmp = { 0, 0, 0, INT_MAX, 0, 0 };
+ return &rmp;
+ }
+ else if (mf->triangle == LOWER)
+ {
+ if (mf->diagonal == DIAGONAL)
+ {
+ /* 1 . . .
+ 2 1 . .
+ 3 5 1 .
+ 4 6 7 1 */
+ static const struct read_matrix_params rmp = { 0, 0, 0, 0, 0, 1 };
+ return &rmp;
+ }
+ else
+ {
+ /* . . . .
+ 2 . . .
+ 3 5 . .
+ 4 6 7 . */
+ static const struct read_matrix_params rmp = { 1, 0, 0, 0, 0, 1 };
+ return &rmp;
+ }
}
+ else if (mf->triangle == UPPER)
+ {
+ if (mf->diagonal == DIAGONAL)
+ {
+ /* 1 2 3 4
+ . 1 5 6
+ . . 1 7
+ . . . 1 */
+ static const struct read_matrix_params rmp = { 0, 0, 0, INT_MAX, 1, 0 };
+ return &rmp;
+ }
+ else
+ {
+ /* . 2 3 4
+ . . 5 6
+ . . . 7
+ . . . . */
+ static const struct read_matrix_params rmp = { 0, -1, 1, INT_MAX, 1, 0 };
+ return &rmp;
+ }
+ }
+ else
+ NOT_REACHED ();
+}
- n_splits = 0;
- prev_values = xcalloc (mformat->n_split_vars, sizeof *prev_values);
- first_case = true;
- for (; (c = casereader_read (casereader0)) != NULL; prev_case = c)
+static void
+schedule_matrices (struct matrix_format *mf)
+{
+ struct matrix_sched *ms0 = &mf->ms[0];
+ ms0->nr = 1;
+ ms0->nc = 1;
+ ms0->rp = xmalloc (sizeof *ms0->rp);
+ ms0->rp[0] = (struct row_sched) { .y = 0, .x0 = 0, .x1 = 1 };
+ ms0->n_rp = 1;
+
+ struct matrix_sched *ms1 = &mf->ms[1];
+ ms1->nr = 1;
+ ms1->nc = mf->n_cvars;
+ ms1->rp = xmalloc (sizeof *ms1->rp);
+ ms1->rp[0] = (struct row_sched) { .y = 0, .x0 = 0, .x1 = mf->n_cvars };
+ ms1->n_rp = 1;
+
+ struct matrix_sched *ms2 = &mf->ms[2];
+ ms2->nr = mf->n_cvars;
+ ms2->nc = mf->n_cvars;
+ ms2->rp = xmalloc (mf->n_cvars * sizeof *ms2->rp);
+ ms2->n_rp = 0;
+
+ const struct read_matrix_params *rmp = get_read_matrix_params (mf);
+ int x0 = rmp->x0;
+ int x1 = rmp->x1 < mf->n_cvars ? rmp->x1 : mf->n_cvars - 1;
+ int y0 = rmp->dy0;
+ int y1 = (int) mf->n_cvars + rmp->dy1;
+ for (int y = y0; y < y1; y++)
{
- int s;
- bool match = false;
- if (!first_case)
- {
- match = true;
- for (s = 0; s < mformat->n_split_vars; ++s)
- {
- const struct variable *svar = mformat->split_vars[s];
- const union value *sv = case_data (c, svar);
- if (! value_equal (prev_values + s, sv, var_get_width (svar)))
- {
- match = false;
- break;
- }
- }
- }
- first_case = false;
- if (! match)
- {
- n_splits++;
- row = 0;
- }
+ assert (x0 >= 0 && x0 < mf->n_cvars);
+ assert (x1 >= 0 && x1 < mf->n_cvars);
+ assert (x1 >= x0);
- for (s = 0; s < mformat->n_split_vars; ++s)
- {
- const struct variable *svar = mformat->split_vars[s];
- const union value *sv = case_data (c, svar);
- value_clone (prev_values + s, sv, var_get_width (svar));
- }
+ ms2->rp[ms2->n_rp++] = (struct row_sched) {
+ .y = y, .x0 = x0, .x1 = x1 + 1
+ };
- case_unref (prev_case);
- const union value *v = case_data (c, mformat->rowtype);
- const char *val = CHAR_CAST (const char *, v->s);
- if (mformat->n >= 0)
- {
- if (0 == strncasecmp (val, "n ", ROWTYPE_WIDTH) ||
- 0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH))
- {
- msg (SW,
- _("The N subcommand was specified, but a N record was also found in the data. The N record will be ignored."));
- continue;
- }
- }
+ x0 += rmp->dx0;
+ x1 += rmp->dx1;
+ }
+}
+
+static bool
+read_id_columns (const struct matrix_format *mf,
+ struct substring *p, struct dfm_reader *r,
+ double *d, enum rowtype *rt)
+{
+ for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
+ if (!(mf->input_vars[i] == mf->rowtype
+ ? next_rowtype (p, r, rt)
+ : next_number (p, r, &d[i])))
+ return false;
+ return true;
+}
- struct ccase *outcase = case_create (proto);
- case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto));
+static bool
+equal_id_columns (const struct matrix_format *mf,
+ const double *a, const double *b)
+{
+ for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
+ if (mf->input_vars[i] != mf->rowtype && a[i] != b[i])
+ return false;
+ return true;
+}
- if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) ||
- 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH))
- {
- int col;
- const struct variable *var = dict_get_var (dict, idx + 1 + row);
- set_varname_column (outcase, mformat->varname, var_get_name (var));
- value_copy (case_data_rw (outcase, mformat->rowtype), v, ROWTYPE_WIDTH);
+static bool
+equal_split_columns (const struct matrix_format *mf,
+ const double *a, const double *b)
+{
+ for (size_t i = 0; i < mf->n_svars; i++)
+ {
+ size_t idx = mf->svar_indexes[i];
+ if (a[idx] != b[idx])
+ return false;
+ }
+ return true;
+}
- for (col = 0; col < mformat->n_continuous_vars; ++col)
- {
- union value *dest_val =
- case_data_rw_idx (outcase,
- 1 + col + var_get_dict_index (mformat->varname));
- dest_val->f = (matrices[n_splits - 1])[col + mformat->n_continuous_vars * row];
- if (col == row && mformat->diagonal == NO_DIAGONAL)
- dest_val->f = 1.0;
- }
- row++;
- }
- else
- {
- blank_varname_column (outcase, mformat->varname);
- }
+static bool
+is_pooled (const struct matrix_format *mf, const double *d)
+{
+ for (size_t i = 0; i < mf->n_fvars; i++)
+ if (d[mf->fvar_indexes[i]] != SYSMIS)
+ return false;
+ return true;
+}
- /* Special case for SD and N_VECTOR: Rewrite as STDDEV and N respectively */
- if (0 == strncasecmp (val, "sd ", ROWTYPE_WIDTH))
- {
- value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH,
- (uint8_t *) "STDDEV", 6, ' ');
- }
- else if (0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH))
- {
- value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH,
- (uint8_t *) "N", 1, ' ');
- }
+static void
+matrix_sched_init (const struct matrix_format *mf, enum rowtype rt,
+ gsl_matrix *m)
+{
+ int n_dims = rowtype_dimensions (rt);
+ const struct matrix_sched *ms = &mf->ms[n_dims];
+ double diagonal = n_dims < 2 || rt != C_CORR ? SYSMIS : 1.0;
+ for (size_t y = 0; y < ms->nr; y++)
+ for (size_t x = 0; x < ms->nc; x++)
+ gsl_matrix_set (m, y, x, y == x ? diagonal : SYSMIS);
+}
+
+static void
+matrix_sched_output (const struct matrix_format *mf, enum rowtype rt,
+ gsl_matrix *m, const double *d, int split_num,
+ struct casewriter *w)
+{
+ int n_dims = rowtype_dimensions (rt);
+ const struct matrix_sched *ms = &mf->ms[n_dims];
- casewriter_write (writer, outcase);
+ if (rt == C_N_SCALAR)
+ {
+ for (size_t x = 1; x < mf->n_cvars; x++)
+ gsl_matrix_set (m, 0, x, gsl_matrix_get (m, 0, 0));
+ rt = C_N;
}
- /* If NODIAGONAL is specified, then a final case must be written */
- if (mformat->diagonal == NO_DIAGONAL)
+ for (int y = 0; y < ms->nr; y++)
{
- int col;
- struct ccase *outcase = case_create (proto);
+ struct ccase *c = case_create (casewriter_get_proto (w));
+ for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
+ if (mf->input_vars[i] != mf->rowtype)
+ *case_num_rw (c, mf->input_vars[i]) = d[i];
+ if (mf->n_svars && !mf->svar_indexes)
+ *case_num_rw (c, mf->svars[0]) = split_num;
+ set_string (c, mf->rowtype, rowtype_name (rt));
+ const char *varname = n_dims == 2 ? var_get_name (mf->cvars[y]) : "";
+ set_string (c, mf->varname, ss_cstr (varname));
+ for (int x = 0; x < mf->n_cvars; x++)
+ *case_num_rw (c, mf->cvars[x]) = gsl_matrix_get (m, y, x);
+ casewriter_write (w, c);
+ }
+}
- if (prev_case)
- case_copy (outcase, 0, prev_case, 0, caseproto_get_n_widths (proto));
+static void
+matrix_sched_output_n (const struct matrix_format *mf, double n,
+ gsl_matrix *m, const double *d, int split_num,
+ struct casewriter *w)
+{
+ gsl_matrix_set (m, 0, 0, n);
+ matrix_sched_output (mf, C_N_SCALAR, m, d, split_num, w);
+}
- const struct variable *var = dict_get_var (dict, idx + 1 + row);
- set_varname_column (outcase, mformat->varname, var_get_name (var));
+static void
+check_eol (const struct matrix_format *mf, struct substring *p,
+ struct dfm_reader *r)
+{
+ if (!mf->span)
+ {
+ ss_ltrim (p, ss_cstr (CC_SPACES ","));
+ if (p->length)
+ {
+ parse_error (r, p, _("Extraneous data expecting end of line."));
+ p->length = 0;
+ }
+ }
+}
- for (col = 0; col < mformat->n_continuous_vars; ++col)
- {
- union value *dest_val =
- case_data_rw_idx (outcase, 1 + col +
- var_get_dict_index (mformat->varname));
- dest_val->f = (matrices[n_splits - 1]) [col + mformat->n_continuous_vars * row];
- if (col == row && mformat->diagonal == NO_DIAGONAL)
- dest_val->f = 1.0;
- }
+static void
+parse_data_with_rowtype (const struct matrix_format *mf,
+ struct dfm_reader *r, struct casewriter *w)
+{
+ if (dfm_eof (r))
+ return;
+ struct substring p = dfm_get_record (r);
- casewriter_write (writer, outcase);
- }
- free (prev_values);
+ double *prev = NULL;
+ gsl_matrix *m = gsl_matrix_alloc (mf->n_cvars, mf->n_cvars);
- if (prev_case)
- case_unref (prev_case);
+ double *d = xnmalloc (mf->n_input_vars, sizeof *d);
+ enum rowtype rt;
- int i;
- for (i = 0 ; i < n_splits; ++i)
- free (matrices[i]);
- free (matrices);
- struct casereader *reader1 = casewriter_make_reader (writer);
- casereader_destroy (casereader0);
- return reader1;
+ double *d_next = xnmalloc (mf->n_input_vars, sizeof *d_next);
+ if (!read_id_columns (mf, &p, r, d, &rt))
+ goto exit;
+ for (;;)
+ {
+ /* If this has rowtype N but there was an N subcommand, then the
+ subcommand takes precedence, so we will suppress outputting this
+ record. We still need to parse it, though, so we can't skip other
+ work. */
+ bool suppress_output = mf->n >= 0 && (rt == C_N || rt == C_N_SCALAR);
+ if (suppress_output)
+ parse_error (r, NULL, _("N record is not allowed with N subcommand. "
+ "Ignoring N record."));
+
+ /* If there's an N subcommand, and this is a new split, then output an N
+ record. */
+ if (mf->n >= 0 && (!prev || !equal_split_columns (mf, prev, d)))
+ {
+ matrix_sched_output_n (mf, mf->n, m, d, 0, w);
-error:
- if (prev_case)
- case_unref (prev_case);
-
- if (matrices)
- for (i = 0 ; i < n_splits; ++i)
- free (matrices[i]);
- free (matrices);
- casereader_destroy (casereader0);
- casewriter_destroy (writer);
- return NULL;
+ if (!prev)
+ prev = xnmalloc (mf->n_input_vars, sizeof *prev);
+ memcpy (prev, d, mf->n_input_vars * sizeof *prev);
+ }
+
+ /* Usually users don't provide the CONTENTS subcommand with ROWTYPE_, but
+ if they did then warn if ROWTYPE_ is an unexpected type. */
+ if (mf->factor_rowtype_mask || mf->pooled_rowtype_mask)
+ {
+ const char *name = rowtype_name (rt).string;
+ if (is_pooled (mf, d))
+ {
+ if (!((1u << rt) & mf->pooled_rowtype_mask))
+ parse_warning (r, NULL, _("Data contains pooled row type %s not "
+ "included in CONTENTS."), name);
+ }
+ else
+ {
+ if (!((1u << rt) & mf->factor_rowtype_mask))
+ parse_warning (r, NULL, _("Data contains with-factors row type "
+ "%s not included in CONTENTS."), name);
+ }
+ }
+
+ /* Initialize the matrix to be filled-in. */
+ int n_dims = rowtype_dimensions (rt);
+ const struct matrix_sched *ms = &mf->ms[n_dims];
+ matrix_sched_init (mf, rt, m);
+
+ enum rowtype rt_next;
+ bool eof;
+
+ size_t n_rows;
+ for (n_rows = 1; ; n_rows++)
+ {
+ if (n_rows <= ms->n_rp)
+ {
+ const struct row_sched *rs = &ms->rp[n_rows - 1];
+ size_t y = rs->y;
+ for (size_t x = rs->x0; x < rs->x1; x++)
+ {
+ double e;
+ if (!next_number (&p, r, &e))
+ goto exit;
+ gsl_matrix_set (m, y, x, e);
+ if (n_dims == 2 && mf->triangle != FULL)
+ gsl_matrix_set (m, x, y, e);
+ }
+ check_eol (mf, &p, r);
+ }
+ else
+ {
+ /* Suppress bad input data. We'll issue an error later. */
+ p.length = 0;
+ }
+
+ eof = (!more_tokens (&p, r)
+ || !read_id_columns (mf, &p, r, d_next, &rt_next));
+ if (eof)
+ break;
+
+ if (!equal_id_columns (mf, d, d_next) || rt_next != rt)
+ break;
+ }
+ if (!suppress_output)
+ matrix_sched_output (mf, rt, m, d, 0, w);
+
+ if (n_rows != ms->n_rp)
+ parse_error (r, NULL,
+ _("Matrix %s had %zu rows but %zu rows were expected."),
+ rowtype_name (rt).string, n_rows, ms->n_rp);
+ if (eof)
+ break;
+
+ double *d_tmp = d;
+ d = d_next;
+ d_next = d_tmp;
+
+ rt = rt_next;
+ }
+
+exit:
+ free (prev);
+ gsl_matrix_free (m);
+ free (d);
+ free (d_next);
}
-int
-cmd_matrix (struct lexer *lexer, struct dataset *ds)
+static void
+parse_matrix_without_rowtype (const struct matrix_format *mf,
+ struct substring *p, struct dfm_reader *r,
+ gsl_matrix *m, enum rowtype rowtype, bool pooled,
+ int split_num, struct casewriter *w)
{
- struct dictionary *dict;
- struct data_parser *parser;
- struct dfm_reader *reader;
- struct file_handle *fh = NULL;
- char *encoding = NULL;
- struct matrix_format mformat;
- int i;
- size_t n_names;
- char **names = NULL;
+ int n_dims = rowtype_dimensions (rowtype);
+ const struct matrix_sched *ms = &mf->ms[n_dims];
- mformat.triangle = LOWER;
- mformat.diagonal = DIAGONAL;
- mformat.n_split_vars = 0;
- mformat.split_vars = NULL;
- mformat.n = -1;
+ double *d = xnmalloc (mf->n_input_vars, sizeof *d);
+ matrix_sched_init (mf, rowtype, m);
+ for (size_t i = 0; i < ms->n_rp; i++)
+ {
+ int y = ms->rp[i].y;
+ int k = 0;
+ int h = 0;
+ for (size_t j = 0; j < mf->n_input_vars; j++)
+ {
+ const struct variable *iv = mf->input_vars[j];
+ if (k < mf->n_cvars && iv == mf->cvars[k])
+ {
+ if (k < ms->rp[i].x1 - ms->rp[i].x0)
+ {
+ double e;
+ if (!next_number (p, r, &e))
+ goto exit;
+
+ int x = k + ms->rp[i].x0;
+ gsl_matrix_set (m, y, x, e);
+ if (n_dims == 2 && mf->triangle != FULL)
+ gsl_matrix_set (m, x, y, e);
+ }
+ k++;
+ continue;
+ }
+ if (h < mf->n_fvars && iv == mf->fvars[h])
+ {
+ h++;
+ if (pooled)
+ {
+ d[j] = SYSMIS;
+ continue;
+ }
+ }
+
+ double e;
+ if (!next_number (p, r, &e))
+ goto exit;
+ d[j] = e;
+ }
+ check_eol (mf, p, r);
+ }
+
+ matrix_sched_output (mf, rowtype, m, d, split_num, w);
+exit:
+ free (d);
+}
- dict = (in_input_program ()
- ? dataset_dict (ds)
- : dict_create (get_default_encoding ()));
- parser = data_parser_create (dict);
- reader = NULL;
+static void
+parse_data_without_rowtype (const struct matrix_format *mf,
+ struct dfm_reader *r, struct casewriter *w)
+{
+ if (dfm_eof (r))
+ return;
+ struct substring p = dfm_get_record (r);
- data_parser_set_type (parser, DP_DELIMITED);
- data_parser_set_warn_missing_fields (parser, false);
- data_parser_set_span (parser, false);
+ gsl_matrix *m = gsl_matrix_alloc (mf->n_cvars, mf->n_cvars);
- mformat.rowtype = dict_create_var (dict, "ROWTYPE_", ROWTYPE_WIDTH);
+ int split_num = 1;
+ do
+ {
+ for (size_t i = 0; i < mf->n_contents; )
+ {
+ size_t j = i;
+ if (mf->contents[i].open)
+ while (!mf->contents[j].close)
+ j++;
+
+ if (mf->contents[i].open)
+ {
+ for (size_t k = 0; k < mf->cells; k++)
+ for (size_t h = i; h <= j; h++)
+ parse_matrix_without_rowtype (mf, &p, r, m,
+ mf->contents[h].rowtype, false,
+ split_num, w);
+ }
+ else
+ parse_matrix_without_rowtype (mf, &p, r, m, mf->contents[i].rowtype,
+ true, split_num, w);
+ i = j + 1;
+ }
- mformat.n_continuous_vars = 0;
- mformat.n_split_vars = 0;
+ split_num++;
+ }
+ while (more_tokens (&p, r));
- if (! lex_force_match_id (lexer, "VARIABLES"))
- goto error;
+ gsl_matrix_free (m);
+}
+/* Parses VARIABLES=varnames for MATRIX DATA and returns a dictionary with the
+ named variables in it. */
+static struct dictionary *
+parse_matrix_data_variables (struct lexer *lexer)
+{
+ if (!lex_force_match_id (lexer, "VARIABLES"))
+ return NULL;
lex_match (lexer, T_EQUALS);
- if (! parse_mixed_vars (lexer, dict, &names, &n_names, PV_NO_DUPLICATE))
+ struct dictionary *dict = dict_create (get_default_encoding ());
+
+ size_t n_names = 0;
+ char **names = NULL;
+ if (!parse_DATA_LIST_vars (lexer, dict, &names, &n_names, PV_NO_DUPLICATE))
{
- int i;
- for (i = 0; i < n_names; ++i)
- free (names[i]);
- free (names);
- goto error;
+ dict_unref (dict);
+ return NULL;
}
- int longest_name = 0;
- for (i = 0; i < n_names; ++i)
+ for (size_t i = 0; i < n_names; i++)
+ if (!strcasecmp (names[i], "ROWTYPE_"))
+ dict_create_var_assert (dict, "ROWTYPE_", 8);
+ else
+ dict_create_var_assert (dict, names[i], 0);
+
+ for (size_t i = 0; i < n_names; ++i)
+ free (names[i]);
+ free (names);
+
+ if (dict_lookup_var (dict, "VARNAME_"))
{
- maximize_int (&longest_name, strlen (names[i]));
+ msg (SE, _("VARIABLES may not include VARNAME_."));
+ dict_unref (dict);
+ return NULL;
}
+ return dict;
+}
- mformat.varname = dict_create_var (dict, "VARNAME_",
- 8 * DIV_RND_UP (longest_name, 8));
+static bool
+parse_matrix_data_subvars (struct lexer *lexer, struct dictionary *dict,
+ bool *taken_vars,
+ struct variable ***vars, size_t **indexes,
+ size_t *n_vars)
+{
+ if (!parse_variables (lexer, dict, vars, n_vars, 0))
+ return false;
- for (i = 0; i < n_names; ++i)
+ *indexes = xnmalloc (*n_vars, sizeof **indexes);
+ for (size_t i = 0; i < *n_vars; i++)
{
- if (0 == strcasecmp (names[i], "ROWTYPE_"))
- {
- const struct fmt_spec fmt = fmt_for_input (FMT_A, 8, 0);
- data_parser_add_delimited_field (parser,
- &fmt,
- var_get_case_index (mformat.rowtype),
- "ROWTYPE_");
- }
- else
- {
- const struct fmt_spec fmt = fmt_for_input (FMT_F, 10, 4);
- struct variable *v = dict_create_var (dict, names[i], 0);
- var_set_both_formats (v, &fmt);
- data_parser_add_delimited_field (parser,
- &fmt,
- var_get_case_index (mformat.varname) +
- ++mformat.n_continuous_vars,
- names[i]);
- }
+ struct variable *v = (*vars)[i];
+ if (!strcasecmp (var_get_name (v), "ROWTYPE_"))
+ {
+ msg (SE, _("ROWTYPE_ is not allowed on SPLIT or FACTORS."));
+ goto error;
+ }
+ (*indexes)[i] = var_get_dict_index (v);
+
+ bool *tv = &taken_vars[var_get_dict_index (v)];
+ if (*tv)
+ {
+ msg (SE, _("%s may not appear on both SPLIT and FACTORS."),
+ var_get_name (v));
+ goto error;
+ }
+ *tv = true;
+
+ var_set_both_formats (v, &(struct fmt_spec) { .type = FMT_F, .w = 4 });
}
- for (i = 0; i < n_names; ++i)
- free (names[i]);
- free (names);
+ return true;
+
+error:
+ free (*vars);
+ *vars = NULL;
+ *n_vars = 0;
+ free (*indexes);
+ *indexes = NULL;
+ return false;
+}
+
+int
+cmd_matrix_data (struct lexer *lexer, struct dataset *ds)
+{
+ struct dictionary *dict = parse_matrix_data_variables (lexer);
+ if (!dict)
+ return CMD_FAILURE;
+
+ size_t n_input_vars = dict_get_var_cnt (dict);
+ struct variable **input_vars = xnmalloc (n_input_vars, sizeof *input_vars);
+ for (size_t i = 0; i < n_input_vars; i++)
+ input_vars[i] = dict_get_var (dict, i);
+
+ int varname_width = 8;
+ for (size_t i = 0; i < n_input_vars; i++)
+ {
+ int w = strlen (var_get_name (input_vars[i]));
+ varname_width = MAX (w, varname_width);
+ }
+
+ struct variable *rowtype = dict_lookup_var (dict, "ROWTYPE_");
+ bool input_rowtype = rowtype != NULL;
+ if (!rowtype)
+ rowtype = dict_create_var_assert (dict, "ROWTYPE_", 8);
+
+ struct matrix_format mf = {
+ .input_rowtype = input_rowtype,
+ .input_vars = input_vars,
+ .n_input_vars = n_input_vars,
+ .rowtype = rowtype,
+ .varname = dict_create_var_assert (dict, "VARNAME_", varname_width),
+
+ .triangle = LOWER,
+ .diagonal = DIAGONAL,
+ .n = -1,
+ .cells = -1,
+ };
+
+ bool *taken_vars = xzalloc (n_input_vars);
+ if (input_rowtype)
+ taken_vars[var_get_dict_index (rowtype)] = true;
+
+ struct file_handle *fh = NULL;
while (lex_token (lexer) != T_ENDCMD)
{
- if (! lex_force_match (lexer, T_SLASH))
+ if (!lex_force_match (lexer, T_SLASH))
goto error;
if (lex_match_id (lexer, "N"))
{
lex_match (lexer, T_EQUALS);
- if (! lex_force_int_range (lexer, "N", 0, INT_MAX))
+ if (!lex_force_int_range (lexer, "N", 0, INT_MAX))
goto error;
- mformat.n = lex_integer (lexer);
+ mf.n = lex_integer (lexer);
lex_get (lexer);
}
else if (lex_match_id (lexer, "FORMAT"))
{
lex_match (lexer, T_EQUALS);
- while (lex_token (lexer) != T_SLASH && (lex_token (lexer) != T_ENDCMD))
+ while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
{
if (lex_match_id (lexer, "LIST"))
- {
- data_parser_set_span (parser, false);
- }
+ mf.span = false;
else if (lex_match_id (lexer, "FREE"))
- {
- data_parser_set_span (parser, true);
- }
+ mf.span = true;
else if (lex_match_id (lexer, "UPPER"))
- {
- mformat.triangle = UPPER;
- }
+ mf.triangle = UPPER;
else if (lex_match_id (lexer, "LOWER"))
- {
- mformat.triangle = LOWER;
- }
+ mf.triangle = LOWER;
else if (lex_match_id (lexer, "FULL"))
- {
- mformat.triangle = FULL;
- }
+ mf.triangle = FULL;
else if (lex_match_id (lexer, "DIAGONAL"))
- {
- mformat.diagonal = DIAGONAL;
- }
+ mf.diagonal = DIAGONAL;
else if (lex_match_id (lexer, "NODIAGONAL"))
- {
- mformat.diagonal = NO_DIAGONAL;
- }
+ mf.diagonal = NO_DIAGONAL;
else
{
lex_error (lexer, NULL);
lex_match (lexer, T_EQUALS);
fh_unref (fh);
fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
- if (fh == NULL)
+ if (!fh)
goto error;
}
- else if (lex_match_id (lexer, "SPLIT"))
+ else if (!mf.n_svars && lex_match_id (lexer, "SPLIT"))
+ {
+ lex_match (lexer, T_EQUALS);
+ if (!mf.input_rowtype
+ && lex_token (lexer) == T_ID
+ && !dict_lookup_var (dict, lex_tokcstr (lexer)))
+ {
+ mf.svars = xmalloc (sizeof *mf.svars);
+ mf.svars[0] = dict_create_var_assert (dict, lex_tokcstr (lexer),
+ 0);
+ var_set_both_formats (
+ mf.svars[0], &(struct fmt_spec) { .type = FMT_F, .w = 4 });
+ mf.n_svars = 1;
+ lex_get (lexer);
+ }
+ else if (!parse_matrix_data_subvars (lexer, dict, taken_vars,
+ &mf.svars, &mf.svar_indexes,
+ &mf.n_svars))
+ goto error;
+ }
+ else if (!mf.n_fvars && lex_match_id (lexer, "FACTORS"))
+ {
+ lex_match (lexer, T_EQUALS);
+ if (!parse_matrix_data_subvars (lexer, dict, taken_vars,
+ &mf.fvars, &mf.fvar_indexes,
+ &mf.n_fvars))
+ goto error;
+ }
+ else if (lex_match_id (lexer, "CELLS"))
{
+ if (mf.input_rowtype)
+ msg (SW, _("CELLS is ignored when VARIABLES includes ROWTYPE_"));
+
lex_match (lexer, T_EQUALS);
- if (! parse_variables (lexer, dict, &mformat.split_vars, &mformat.n_split_vars, 0))
- {
- free (mformat.split_vars);
- goto error;
- }
- int i;
- for (i = 0; i < mformat.n_split_vars; ++i)
- {
- const struct fmt_spec fmt = fmt_for_input (FMT_F, 4, 0);
- var_set_both_formats (mformat.split_vars[i], &fmt);
- }
- dict_reorder_vars (dict, mformat.split_vars, mformat.n_split_vars);
- mformat.n_continuous_vars -= mformat.n_split_vars;
+
+ if (!lex_force_int_range (lexer, "CELLS", 0, INT_MAX))
+ goto error;
+
+ mf.cells = lex_integer (lexer);
+ lex_get (lexer);
}
+ else if (lex_match_id (lexer, "CONTENTS"))
+ {
+ lex_match (lexer, T_EQUALS);
+
+ size_t allocated_contents = mf.n_contents;
+ bool in_parens = false;
+ for (;;)
+ {
+ bool open = !in_parens && lex_match (lexer, T_LPAREN);
+ enum rowtype rt;
+ if (!rowtype_parse (lexer, &rt))
+ {
+ if (open || in_parens || (lex_token (lexer) != T_ENDCMD
+ && lex_token (lexer) != T_SLASH))
+ {
+ lex_error (lexer, _("Row type keyword expected."));
+ goto error;
+ }
+ break;
+ }
+
+ if (open)
+ in_parens = true;
+
+ if (in_parens)
+ mf.factor_rowtype_mask |= 1u << rt;
+ else
+ mf.pooled_rowtype_mask |= 1u << rt;
+
+ bool close = in_parens && lex_match (lexer, T_RPAREN);
+ if (close)
+ in_parens = false;
+
+ if (mf.n_contents >= allocated_contents)
+ mf.contents = x2nrealloc (mf.contents, &allocated_contents,
+ sizeof *mf.contents);
+ mf.contents[mf.n_contents++] = (struct content) {
+ .open = open, .rowtype = rt, .close = close
+ };
+ }
+ }
else
{
lex_error (lexer, NULL);
goto error;
}
}
-
- if (mformat.diagonal == NO_DIAGONAL && mformat.triangle == FULL)
+ if (mf.diagonal == NO_DIAGONAL && mf.triangle == FULL)
{
- msg (SE, _("FORMAT = FULL and FORMAT = NODIAGONAL are mutually exclusive."));
+ msg (SE, _("FORMAT=FULL and FORMAT=NODIAGONAL are mutually exclusive."));
goto error;
}
+ if (!mf.input_rowtype)
+ {
+ if (mf.cells < 0)
+ {
+ if (mf.n_fvars)
+ {
+ msg (SE, _("CELLS is required when factor variables are specified "
+ "and VARIABLES does not include ROWTYPE_."));
+ goto error;
+ }
+ mf.cells = 1;
+ }
- if (fh == NULL)
- fh = fh_inline_file ();
- fh_set_default_handle (fh);
+ if (!mf.n_contents)
+ {
+ msg (SW, _("CONTENTS was not specified and VARIABLES does not "
+ "include ROWTYPE_. Assuming CONTENTS=CORR."));
- if (!data_parser_any_fields (parser))
+ mf.n_contents = 1;
+ mf.contents = xmalloc (sizeof *mf.contents);
+ *mf.contents = (struct content) { .rowtype = C_CORR };
+ }
+ }
+ mf.cvars = xmalloc (mf.n_input_vars * sizeof *mf.cvars);
+ for (size_t i = 0; i < mf.n_input_vars; i++)
+ if (!taken_vars[i])
+ {
+ struct variable *v = input_vars[i];
+ mf.cvars[mf.n_cvars++] = v;
+ var_set_both_formats (v, &(struct fmt_spec) { .type = FMT_F, .w = 10,
+ .d = 4 });
+ }
+ if (!mf.n_cvars)
{
- msg (SE, _("At least one variable must be specified."));
+ msg (SE, _("At least one continuous variable is required."));
goto error;
}
-
- if (lex_end_of_command (lexer) != CMD_SUCCESS)
- goto error;
-
- reader = dfm_open_reader (fh, lexer, encoding);
- if (reader == NULL)
- goto error;
-
- if (in_input_program ())
+ if (mf.input_rowtype)
{
- struct data_list_trns *trns = xmalloc (sizeof *trns);
- trns->parser = parser;
- trns->reader = reader;
- trns->end = NULL;
- add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns);
+ for (size_t i = 0; i < mf.n_cvars; i++)
+ if (mf.cvars[i] != input_vars[n_input_vars - mf.n_cvars + i])
+ {
+ msg (SE, _("VARIABLES includes ROWTYPE_ but the continuous "
+ "variables are not the last ones on VARIABLES."));
+ goto error;
+ }
}
- else
+ unsigned int rowtype_mask = mf.pooled_rowtype_mask | mf.factor_rowtype_mask;
+ if (rowtype_mask & (1u << C_N) && mf.n >= 0)
{
- data_parser_make_active_file (parser, ds, reader, dict, preprocess,
- &mformat);
+ msg (SE, _("Cannot specify N on CONTENTS along with the N subcommand."));
+ goto error;
}
- fh_unref (fh);
- free (encoding);
- free (mformat.split_vars);
+ struct variable **order = xnmalloc (dict_get_var_cnt (dict), sizeof *order);
+ size_t n_order = 0;
+ for (size_t i = 0; i < mf.n_svars; i++)
+ order[n_order++] = mf.svars[i];
+ order[n_order++] = mf.rowtype;
+ for (size_t i = 0; i < mf.n_fvars; i++)
+ order[n_order++] = mf.fvars[i];
+ order[n_order++] = mf.varname;
+ for (size_t i = 0; i < mf.n_cvars; i++)
+ order[n_order++] = mf.cvars[i];
+ assert (n_order == dict_get_var_cnt (dict));
+ dict_reorder_vars (dict, order, n_order);
+ free (order);
- return CMD_DATA_LIST;
+ dict_set_split_vars (dict, mf.svars, mf.n_svars);
- error:
- data_parser_destroy (parser);
- if (!in_input_program ())
- dict_unref (dict);
- fh_unref (fh);
- free (encoding);
- free (mformat.split_vars);
- return CMD_CASCADING_FAILURE;
-}
+ schedule_matrices (&mf);
-\f
-/* Input procedure. */
+ if (fh == NULL)
+ fh = fh_inline_file ();
-/* Destroys DATA LIST transformation TRNS.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-data_list_trns_free (void *trns_)
-{
- struct data_list_trns *trns = trns_;
- data_parser_destroy (trns->parser);
- dfm_close_reader (trns->reader);
- free (trns);
- return true;
-}
+ if (lex_end_of_command (lexer) != CMD_SUCCESS)
+ goto error;
-/* Handle DATA LIST transformation TRNS, parsing data into *C. */
-static int
-data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
-{
- struct data_list_trns *trns = trns_;
- int retval;
+ struct dfm_reader *reader = dfm_open_reader (fh, lexer, NULL);
+ if (reader == NULL)
+ goto error;
- *c = case_unshare (*c);
- if (data_parser_parse (trns->parser, trns->reader, *c))
- retval = TRNS_CONTINUE;
- else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
- {
- /* An I/O error, or encountering end of file for a second
- time, should be escalated into a more serious error. */
- retval = TRNS_ERROR;
- }
+ struct casewriter *writer = autopaging_writer_create (dict_get_proto (dict));
+ if (mf.input_rowtype)
+ parse_data_with_rowtype (&mf, reader, writer);
else
- retval = TRNS_END_FILE;
+ parse_data_without_rowtype (&mf, reader, writer);
+ dfm_close_reader (reader);
- /* If there was an END subcommand handle it. */
- if (trns->end != NULL)
- {
- double *end = &case_data_rw (*c, trns->end)->f;
- if (retval == TRNS_END_FILE)
- {
- *end = 1.0;
- retval = TRNS_CONTINUE;
- }
- else
- *end = 0.0;
- }
+ dataset_set_dict (ds, dict);
+ dataset_set_source (ds, casewriter_make_reader (writer));
- return retval;
+ matrix_format_uninit (&mf);
+ free (taken_vars);
+ fh_unref (fh);
+
+ return CMD_SUCCESS;
+
+ error:
+ matrix_format_uninit (&mf);
+ free (taken_vars);
+ dict_unref (dict);
+ fh_unref (fh);
+ return CMD_FAILURE;
}
dnl
AT_BANNER([MATRIX DATA])
-AT_SETUP([Matrix data (lower file)])
-
-AT_DATA([matrix-data.pspp], [dnl
-matrix data
- variables = rowtype_
- var01 TO var08
- /format = lower diagonal
- /file = 'matrix.dat'
- .
-
-list.
+dnl Keep this test in sync with Example 1 in doc/matrices.texi.
+AT_SETUP([MATRIX DATA - LOWER DIAGONAL with ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=ROWTYPE_ var01 TO var08
+ /FILE='matrix-data.txt'.
+FORMATS var01 TO var08(F5.2).
+LIST.
])
-
-AT_DATA([matrix.dat], [dnl
-mean 24.3 5.4 69.7 20.1 13.4 2.7 27.9 3.7
-sd 5.7 1.5 23.5 5.8 2.8 4.5 5.4 1.5
-n 92 92 92 92 92 92 92 92
-corr 1.00
-corr .18 1.00
-corr -.22 -.17 1.00
-corr .36 .31 -.14 1.00
-corr .27 .16 -.12 .22 1.00
-corr .33 .15 -.17 .24 .21 1.00
-corr .50 .29 -.20 .32 .12 .38 1.00
-corr .17 .29 -.05 .20 .27 .20 .04 1.00
+AT_DATA([matrix-data.txt], [dnl
+MEAN 24.3 5.4 69.7 20.1 13.4 2.7 27.9 3.7
+SD 5.7 1.5 23.5 5.8 2.8 4.5 5.4 1.5
+N 92 92 92 92 92 92 92 92
+'CORR' 1.00
+CORR .18 1.00
+CORR -.22 -.17 1.00
+"CORR" .36 .31 -.14 1.00
+COR .27 .16 -.12 .22 1.00
+CORR .33 .15 -.17 .24 .21 1.00
+CORR .50 .29 -.20 .32 .12 .38 1.00
+CORR .17 .29 -.05 .20 .27 .20 .04 1.00
])
-
-AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
Table: Data List
ROWTYPE_,VARNAME_,var01,var02,var03,var04,var05,var06,var07,var08
-mean,,24.3000,5.4000,69.7000,20.1000,13.4000,2.7000,27.9000,3.7000
-STDDEV,,5.7000,1.5000,23.5000,5.8000,2.8000,4.5000,5.4000,1.5000
-n,,92.0000,92.0000,92.0000,92.0000,92.0000,92.0000,92.0000,92.0000
-corr,var01,1.0000,.1800,-.2200,.3600,.2700,.3300,.5000,.1700
-corr,var02,.1800,1.0000,-.1700,.3100,.1600,.1500,.2900,.2900
-corr,var03,-.2200,-.1700,1.0000,-.1400,-.1200,-.1700,-.2000,-.0500
-corr,var04,.3600,.3100,-.1400,1.0000,.2200,.2400,.3200,.2000
-corr,var05,.2700,.1600,-.1200,.2200,1.0000,.2100,.1200,.2700
-corr,var06,.3300,.1500,-.1700,.2400,.2100,1.0000,.3800,.2000
-corr,var07,.5000,.2900,-.2000,.3200,.1200,.3800,1.0000,.0400
-corr,var08,.1700,.2900,-.0500,.2000,.2700,.2000,.0400,1.0000
+MEAN,,24.30,5.40,69.70,20.10,13.40,2.70,27.90,3.70
+STDDEV,,5.70,1.50,23.50,5.80,2.80,4.50,5.40,1.50
+N,,92.00,92.00,92.00,92.00,92.00,92.00,92.00,92.00
+CORR,var01,1.00,.18,-.22,.36,.27,.33,.50,.17
+CORR,var02,.18,1.00,-.17,.31,.16,.15,.29,.29
+CORR,var03,-.22,-.17,1.00,-.14,-.12,-.17,-.20,-.05
+CORR,var04,.36,.31,-.14,1.00,.22,.24,.32,.20
+CORR,var05,.27,.16,-.12,.22,1.00,.21,.12,.27
+CORR,var06,.33,.15,-.17,.24,.21,1.00,.38,.20
+CORR,var07,.50,.29,-.20,.32,.12,.38,1.00,.04
+CORR,var08,.17,.29,-.05,.20,.27,.20,.04,1.00
])
AT_CLEANUP
-
-
-AT_SETUP([Matrix data (upper)])
-
-AT_DATA([matrix-data.pspp], [dnl
+AT_SETUP([MATRIX DATA - UPPER DIAGONAL with ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
matrix data
variables = rowtype_ var01 var02 var03 var04
/format = upper diagonal.
begin data
mean 34 35 36 37
sd 22 11 55 66
-n_vector 100 101 102 103
+n_ve 100 101 102 103
corr 1 9 8 7
corr 1 6 5
corr 1 4
list.
])
-AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
Table: Data List
ROWTYPE_,VARNAME_,var01,var02,var03,var04
-mean,,34.0000,35.0000,36.0000,37.0000
+MEAN,,34.0000,35.0000,36.0000,37.0000
STDDEV,,22.0000,11.0000,55.0000,66.0000
N,,100.0000,101.0000,102.0000,103.0000
-corr,var01,1.0000,9.0000,8.0000,7.0000
-corr,var02,9.0000,1.0000,6.0000,5.0000
-corr,var03,8.0000,6.0000,1.0000,4.0000
-corr,var04,7.0000,5.0000,4.0000,1.0000
+CORR,var01,1.0000,9.0000,8.0000,7.0000
+CORR,var02,9.0000,1.0000,6.0000,5.0000
+CORR,var03,8.0000,6.0000,1.0000,4.0000
+CORR,var04,7.0000,5.0000,4.0000,1.0000
])
-
AT_CLEANUP
-AT_SETUP([Matrix data (full)])
-
+AT_SETUP([MATRIX DATA - FULL with ROWTYPE_])
dnl Just for fun, this one is in a different case.
-AT_DATA([matrix-data.pspp], [dnl
+AT_DATA([matrix-data.sps], [dnl
matrix data
variables = ROWTYPE_ var01 var02 var03 var04
/format = full diagonal.
list.
])
-
-AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
Table: Data List
ROWTYPE_,VARNAME_,var01,var02,var03,var04
MEAN,,34.0000,35.0000,36.0000,37.0000
CORR,var03,8.0000,6.0000,1.0000,4.0000
CORR,var04,7.0000,5.0000,4.0000,1.0000
])
-
AT_CLEANUP
-AT_SETUP([Matrix data (upper nodiagonal)])
-
-AT_DATA([matrix-data.pspp], [dnl
+AT_SETUP([MATRIX DATA - UPPER NODIAGONAL with ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
matrix data
variables = rowtype_ var01 var02 var03 var04
/format = upper nodiagonal.
list.
])
-AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
Table: Data List
ROWTYPE_,VARNAME_,var01,var02,var03,var04
-mean,,34.0000,35.0000,36.0000,37.0000
+MEAN,,34.0000,35.0000,36.0000,37.0000
STDDEV,,22.0000,11.0000,55.0000,66.0000
-n,,100.0000,101.0000,102.0000,103.0000
-corr,var01,1.0000,9.0000,8.0000,7.0000
-corr,var02,9.0000,1.0000,6.0000,5.0000
-corr,var03,8.0000,6.0000,1.0000,4.0000
-corr,var04,7.0000,5.0000,4.0000,1.0000
+N,,100.0000,101.0000,102.0000,103.0000
+CORR,var01,1.0000,9.0000,8.0000,7.0000
+CORR,var02,9.0000,1.0000,6.0000,5.0000
+CORR,var03,8.0000,6.0000,1.0000,4.0000
+CORR,var04,7.0000,5.0000,4.0000,1.0000
])
-
AT_CLEANUP
+dnl Keep this test in sync with Example 2 in doc/matrices.texi.
+AT_SETUP([MATRIX DATA - UPPER NODIAGONAL with ROWTYPE_ - 2])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=ROWTYPE_ var01 TO var08
+ /FORMAT=UPPER NODIAGONAL.
+BEGIN DATA.
+MEAN 24.3 5.4 69.7 20.1 13.4 2.7 27.9 3.7
+SD 5.7 1.5 23.5 5.8 2.8 4.5 5.4 1.5
+N 92 92 92 92 92 92 92 92
+CORR .17 .50 -.33 .27 .36 -.22 .18
+CORR .29 .29 -.20 .32 .12 .38
+CORR .05 .20 -.15 .16 .21
+CORR .20 .32 -.17 .12
+CORR .27 .12 -.24
+CORR -.20 -.38
+CORR .04
+END DATA.
+FORMATS var01 TO var08(F6.2).
+LIST.
+])
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
+Table: Data List
+ROWTYPE_,VARNAME_,var01,var02,var03,var04,var05,var06,var07,var08
+MEAN,,24.30,5.40,69.70,20.10,13.40,2.70,27.90,3.70
+STDDEV,,5.70,1.50,23.50,5.80,2.80,4.50,5.40,1.50
+N,,92.00,92.00,92.00,92.00,92.00,92.00,92.00,92.00
+CORR,var01,1.00,.17,.50,-.33,.27,.36,-.22,.18
+CORR,var02,.17,1.00,.29,.29,-.20,.32,.12,.38
+CORR,var03,.50,.29,1.00,.05,.20,-.15,.16,.21
+CORR,var04,-.33,.29,.05,1.00,.20,.32,-.17,.12
+CORR,var05,.27,-.20,.20,.20,1.00,.27,.12,-.24
+CORR,var06,.36,.32,-.15,.32,.27,1.00,-.20,-.38
+CORR,var07,-.22,.12,.16,-.17,.12,-.20,1.00,.04
+CORR,var08,.18,.38,.21,.12,-.24,-.38,.04,1.00
+])
+AT_CLEANUP
-AT_SETUP([Matrix data (lower nodiagonal)])
-
-AT_DATA([matrix-data.pspp], [dnl
+AT_SETUP([MATRIX DATA - LOWER NODIAGONAL with ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
matrix data
variables = rowtype_ var01 var02 var03 var04
- /format = lower nodiagonal.
+ /format = lower nodiagonal
+ /cells = 2.
begin data
mean 34 35 36 37
list.
])
-AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
+matrix-data.sps:4: warning: MATRIX DATA: CELLS is ignored when VARIABLES includes ROWTYPE_
+
Table: Data List
ROWTYPE_,VARNAME_,var01,var02,var03,var04
-mean,,34.0000,35.0000,36.0000,37.0000
+MEAN,,34.0000,35.0000,36.0000,37.0000
STDDEV,,22.0000,11.0000,55.0000,66.0000
-n,,100.0000,101.0000,102.0000,103.0000
-corr,var01,1.0000,9.0000,8.0000,7.0000
-corr,var02,9.0000,1.0000,6.0000,5.0000
-corr,var03,8.0000,6.0000,1.0000,4.0000
-corr,var04,7.0000,5.0000,4.0000,1.0000
+N,,100.0000,101.0000,102.0000,103.0000
+CORR,var01,1.0000,9.0000,8.0000,7.0000
+CORR,var02,9.0000,1.0000,6.0000,5.0000
+CORR,var03,8.0000,6.0000,1.0000,4.0000
+CORR,var04,7.0000,5.0000,4.0000,1.0000
])
-
AT_CLEANUP
-
-
-AT_SETUP([Matrix data split data])
-
-AT_DATA([matrix-data.pspp], [dnl
+AT_SETUP([MATRIX DATA - split data])
+AT_DATA([matrix-data.sps], [dnl
matrix data
variables = s1 s2 rowtype_ var01 var02 var03
/split=s1 s2.
list.
])
-
-AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
Table: Variables
Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format
s1,1,Scale,Input,8,Right,F4.0,F4.0
var02,6,Scale,Input,8,Right,F10.4,F10.4
var03,7,Scale,Input,8,Right,F10.4,F10.4
+Table: Split Values
+Variable,Value
+s1,8
+s2,0
+
Table: Data List
s1,s2,ROWTYPE_,VARNAME_,var01,var02,var03
-8,0,mean,,21.4000,5.0000,72.9000
+8,0,MEAN,,21.4000,5.0000,72.9000
8,0,STDDEV,,6.5000,1.6000,22.8000
-8,0,n,,106.0000,106.0000,106.0000
-8,0,corr,var01,1.0000,.4100,-.1600
-8,0,corr,var02,.4100,1.0000,-.2200
-8,0,corr,var03,-.1600,-.2200,1.0000
-8,1,mean,,11.4000,1.0000,52.9000
+8,0,N,,106.0000,106.0000,106.0000
+8,0,CORR,var01,1.0000,.4100,-.1600
+8,0,CORR,var02,.4100,1.0000,-.2200
+8,0,CORR,var03,-.1600,-.2200,1.0000
+
+Table: Split Values
+Variable,Value
+s1,8
+s2,1
+
+Table: Data List
+s1,s2,ROWTYPE_,VARNAME_,var01,var02,var03
+8,1,MEAN,,11.4000,1.0000,52.9000
8,1,STDDEV,,9.5000,8.6000,12.8000
-8,1,n,,10.0000,11.0000,12.0000
-8,1,corr,var01,1.0000,.5100,.3600
-8,1,corr,var02,.5100,1.0000,-.4100
-8,1,corr,var03,.3600,-.4100,1.0000
+8,1,N,,10.0000,11.0000,12.0000
+8,1,CORR,var01,1.0000,.5100,.3600
+8,1,CORR,var02,.5100,1.0000,-.4100
+8,1,CORR,var03,.3600,-.4100,1.0000
])
AT_CLEANUP
+dnl Keep this test in sync with Example 4 in doc/matrices.texi.
+AT_SETUP([MATRIX DATA - split data - 2])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=s1 ROWTYPE_ var01 TO var04
+ /SPLIT=s1
+ /FORMAT=FULL.
+BEGIN DATA.
+0 MEAN 34 35 36 37
+0 SD 22 11 55 66
+0 N 99 98 99 92
+0 CORR 1 .9 .8 .7
+0 CORR .9 1 .6 .5
+0 CORR .8 .6 1 .4
+0 CORR .7 .5 .4 1
+1 MEAN 44 45 34 39
+1 SD 23 15 51 46
+1 N 98 34 87 23
+1 CORR 1 .2 .3 .4
+1 CORR .2 1 .5 .6
+1 CORR .3 .5 1 .7
+1 CORR .4 .6 .7 1
+END DATA.
+FORMATS var01 TO var04(F5.1).
+LIST.
+])
+
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
+Table: Split Values
+Variable,Value
+s1,0
+
+Table: Data List
+s1,ROWTYPE_,VARNAME_,var01,var02,var03,var04
+0,MEAN,,34.0,35.0,36.0,37.0
+0,STDDEV,,22.0,11.0,55.0,66.0
+0,N,,99.0,98.0,99.0,92.0
+0,CORR,var01,1.0,.9,.8,.7
+0,CORR,var02,.9,1.0,.6,.5
+0,CORR,var03,.8,.6,1.0,.4
+0,CORR,var04,.7,.5,.4,1.0
+
+Table: Split Values
+Variable,Value
+s1,1
+Table: Data List
+s1,ROWTYPE_,VARNAME_,var01,var02,var03,var04
+1,MEAN,,44.0,45.0,34.0,39.0
+1,STDDEV,,23.0,15.0,51.0,46.0
+1,N,,98.0,34.0,87.0,23.0
+1,CORR,var01,1.0,.2,.3,.4
+1,CORR,var02,.2,1.0,.5,.6
+1,CORR,var03,.3,.5,1.0,.7
+1,CORR,var04,.4,.6,.7,1.0
+])
+AT_CLEANUP
+dnl Keep this test in sync with Example 5 in doc/matrices.texi.
+AT_SETUP([MATRIX DATA - factor variables])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=ROWTYPE_ f1 var01 TO var04
+ /FACTOR=f1.
+BEGIN DATA.
+MEAN 0 34 35 36 37
+SD 0 22 11 55 66
+N 0 99 98 99 92
+MEAN 1 44 45 34 39
+SD 1 23 15 51 46
+N 1 98 34 87 23
+CORR . 1
+CORR . .9 1
+CORR . .8 .6 1
+CORR . .7 .5 .4 1
+END DATA.
+FORMATS var01 TO var04(F5.1).
+LIST.
+])
-AT_SETUP([Matrix data duplicate variable])
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
+Table: Data List
+ROWTYPE_,f1,VARNAME_,var01,var02,var03,var04
+MEAN,0,,34.0,35.0,36.0,37.0
+STDDEV,0,,22.0,11.0,55.0,66.0
+N,0,,99.0,98.0,99.0,92.0
+MEAN,1,,44.0,45.0,34.0,39.0
+STDDEV,1,,23.0,15.0,51.0,46.0
+N,1,,98.0,34.0,87.0,23.0
+CORR,.,var01,1.0,.9,.8,.7
+CORR,.,var02,.9,1.0,.6,.5
+CORR,.,var03,.8,.6,1.0,.4
+CORR,.,var04,.7,.5,.4,1.0
+])
+AT_CLEANUP
-dnl Negative test to check for sane behaviour in the face of bad syntax
-AT_DATA([matrix-data.pspp], [dnl
-set decimal = dot .
+AT_SETUP([MATRIX DATA - bad ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
matrix data
- variables = s1 s1 rowtype_ var01 var02 var03
- /split=s1.
+ variables = rowtype_ var01 var02 var03 var04
+ /format = upper diagonal.
begin data
-0 mean 21.4 5.0 72.9
-0 sd 6.5 1.6 22.8
-0 n 106 106 106
-0 corr 1
-0 corr .41 1
-0 corr -.16 -.22 1
-end data .
+cork 1 9 8 7
+corr 1 6 5
+corr 1 4
+corr 1
+end data.
list.
])
+AT_CHECK([pspp -O format=csv matrix-data.sps], [1], [dnl
+"matrix-data.sps:6.1-6.4: error: Unknown row type ""cork""."
+])
+AT_CLEANUP
-AT_CHECK([pspp -O format=csv matrix-data.pspp], [1], [dnl
-matrix-data.pspp:3: error: MATRIX DATA: Variable s1 appears twice in variable list.
+AT_SETUP([MATRIX DATA - unexpected ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
+matrix data
+ variables = rowtype_ f1 var01 var02 var03 var04
+ /content = corr (sd)
+ /factor = f1
+ /format = upper diagonal.
-matrix-data.pspp:6: error: Stopping syntax file processing here to avoid a cascade of dependent command failures.
+begin data
+corr . 1 9 8 7
+corr . 1 6 5
+corr . 1 4
+corr . 1
+sd . 1 2 3 4
+
+corr 0 1 9 8 7
+corr 0 1 6 5
+corr 0 1 4
+corr 0 1
+sd 0 1 2 3 4
+end data.
+
+list.
])
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
+matrix-data.sps:12: warning: Data contains pooled row type STDDEV not included in CONTENTS.
+
+matrix-data.sps:14: warning: Data contains with-factors row type CORR not included in CONTENTS.
+
+Table: Data List
+ROWTYPE_,f1,VARNAME_,var01,var02,var03,var04
+CORR,.,var01,1.0000,9.0000,8.0000,7.0000
+CORR,.,var02,9.0000,1.0000,6.0000,5.0000
+CORR,.,var03,8.0000,6.0000,1.0000,4.0000
+CORR,.,var04,7.0000,5.0000,4.0000,1.0000
+STDDEV,.,,1.0000,2.0000,3.0000,4.0000
+CORR,0,var01,1.0000,9.0000,8.0000,7.0000
+CORR,0,var02,9.0000,1.0000,6.0000,5.0000
+CORR,0,var03,8.0000,6.0000,1.0000,4.0000
+CORR,0,var04,7.0000,5.0000,4.0000,1.0000
+STDDEV,0,,1.0000,2.0000,3.0000,4.0000
+])
AT_CLEANUP
+AT_SETUP([MATRIX DATA - bad number])
+AT_DATA([matrix-data.sps], [dnl
+matrix data
+ variables = rowtype_ var01 var02 var03 var04
+ /format = upper diagonal.
+
+begin data
+corr 1 9 8 7
+corr 1 x 5
+corr 1 4
+corr 1
+end data.
+list.
+])
-AT_SETUP([Matrix data - long variable names])
+AT_CHECK([pspp -O format=csv matrix-data.sps], [1], [dnl
+matrix-data.sps:7.15: error: Field contents are not numeric.
-AT_DATA([matrix-data.pspp], [dnl
+Table: Data List
+ROWTYPE_,VARNAME_,var01,var02,var03,var04
+CORR,var01,1.0000,9.0000,8.0000,7.0000
+CORR,var02,9.0000,1.0000,. ,5.0000
+CORR,var03,8.0000,. ,1.0000,4.0000
+CORR,var04,7.0000,5.0000,4.0000,1.0000
+])
+AT_CLEANUP
+
+AT_SETUP([MATRIX DATA - long variable names])
+AT_DATA([matrix-data.sps], [dnl
matrix data
variables = rowtype_ var01 var_two variable_number_three variableFour
/format = upper diagonal.
list.
])
-AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
Table: Data List
ROWTYPE_,VARNAME_,var01,var_two,variable_number_three,variableFour
-mean,,34.0000,35.0000,36.0000,37.0000
+MEAN,,34.0000,35.0000,36.0000,37.0000
STDDEV,,22.0000,11.0000,55.0000,66.0000
N,,100.0000,101.0000,102.0000,103.0000
-corr,var01,1.0000,9.0000,8.0000,7.0000
-corr,var_two,9.0000,1.0000,6.0000,5.0000
-corr,variable_number_three,8.0000,6.0000,1.0000,4.0000
-corr,variableFour,7.0000,5.0000,4.0000,1.0000
+CORR,var01,1.0000,9.0000,8.0000,7.0000
+CORR,var_two,9.0000,1.0000,6.0000,5.0000
+CORR,variable_number_three,8.0000,6.0000,1.0000,4.0000
+CORR,variableFour,7.0000,5.0000,4.0000,1.0000
])
-
AT_CLEANUP
-
-
-AT_SETUP([Matrix reader - read integrity])
-
+AT_SETUP([MATRIX DATA - read integrity])
dnl Check that matrices presented are read correctly.
dnl The example below is an unlikely one since all
dnl covariance/correlation matrices must be symmetrical
dnl but it serves a purpose for this test.
-AT_DATA([matrix-reader.pspp], [dnl
+AT_DATA([matrix-reader.sps], [dnl
matrix data
variables = rowtype_ var01 to var9
/format = full.
corr 81 82 83 84 85 86 87 88 89
corr 91 92 93 94 95 96 97 98 99
end data.
-
+DEBUG MATRIX READ.
+FORMATS var01 to var09(F3.0).
+list.
factor /matrix = in (corr = *)
/analysis var02 var04 var06
/method = correlation
/print correlation.
])
-AT_CHECK([pspp -O format=csv matrix-reader.pspp], [0], [dnl
+AT_CHECK([pspp --testing-mode -O format=csv matrix-reader.sps], [0], [dnl
+Table: Debug Matrix Reader
+,,,var01,var02,var03,var04,var05,var06,var07,var08,var09
+1,Correlation,var01,11.000,12.000,13.000,14.000,15.000,16.000,17.000,18.000,19.000
+,,var02,21.000,22.000,23.000,24.000,25.000,26.000,27.000,28.000,29.000
+,,var03,31.000,32.000,33.000,34.000,35.000,36.000,37.000,38.000,39.000
+,,var04,41.000,42.000,43.000,44.000,45.000,46.000,47.000,48.000,49.000
+,,var05,51.000,52.000,53.000,54.000,55.000,56.000,57.000,58.000,59.000
+,,var06,61.000,62.000,63.000,64.000,65.000,66.000,67.000,68.000,69.000
+,,var07,71.000,72.000,73.000,74.000,75.000,76.000,77.000,78.000,79.000
+,,var08,81.000,82.000,83.000,84.000,85.000,86.000,87.000,88.000,89.000
+,,var09,91.000,92.000,93.000,94.000,95.000,96.000,97.000,98.000,99.000
+,N,Value,1.000,2.000,3.000,4.000,5.000,6.000,7.000,8.000,9.000
+,Mean,Value,.000,.000,.000,.000,.000,.000,.000,.000,.000
+,Standard Deviation,Value,100.000,200.000,300.000,400.000,500.000,600.000,700.000,800.000,900.000
+
+Table: Data List
+ROWTYPE_,VARNAME_,var01,var02,var03,var04,var05,var06,var07,var08,var09
+N,,1,2,3,4,5,6,7,8,9
+STDDEV,,100,200,300,400,500,600,700,800,900
+CORR,var01,11,12,13,14,15,16,17,18,19
+CORR,var02,21,22,23,24,25,26,27,28,29
+CORR,var03,31,32,33,34,35,36,37,38,39
+CORR,var04,41,42,43,44,45,46,47,48,49
+CORR,var05,51,52,53,54,55,56,57,58,59
+CORR,var06,61,62,63,64,65,66,67,68,69
+CORR,var07,71,72,73,74,75,76,77,78,79
+CORR,var08,81,82,83,84,85,86,87,88,89
+CORR,var09,91,92,93,94,95,96,97,98,99
+
Table: Correlation Matrix
,,var02,var04,var06
Correlation,var02,22.000,24.000,26.000
var04,6.95,2.15
var06,9.22,.01
])
-
AT_CLEANUP
-
-AT_SETUP([Matrix data - too many rows])
-
+AT_SETUP([MATRIX DATA - too many rows])
dnl Test for a crash which occurred when the matrix had more rows declared
dnl than variables to hold them.
-AT_DATA([matrix-data.pspp], [dnl
+AT_DATA([matrix-data.sps], [dnl
matrix data
variables = rowtype_
var01 var02 var03 var04
corr 1.00 .70
corr 1.00
end data .
-
-execute.
+FORMATS var01 TO var04 (F6.2).
+LIST.
])
+AT_CHECK([pspp -O format=csv matrix-data.sps], [1], [dnl
+matrix-data.sps:10.29-10.31: error: Extraneous data expecting end of line.
-AT_CHECK([pspp -O format=csv matrix-data.pspp], [1], [dnl
-matrix-data.pspp:13: error: MATRIX DATA: There are 4 variable declared but the data has at least 5 matrix rows.
+matrix-data.sps:11.24-11.31: error: Extraneous data expecting end of line.
-matrix-data.pspp:20: error: EXECUTE: EXECUTE is allowed only after the active dataset has been defined.
-])
+matrix-data.sps:12.19-12.32: error: Extraneous data expecting end of line.
+matrix-data.sps:18: error: Matrix CORR had 9 rows but 4 rows were expected.
+Table: Data List
+ROWTYPE_,VARNAME_,var01,var02,var03,var04
+MEAN,,21.40,5.00,72.90,17.40
+STDDEV,,6.50,1.60,22.80,5.70
+N,,106.00,106.00,106.00,106.00
+CORR,var01,1.00,.32,.48,.28
+CORR,var02,.32,1.00,.72,.54
+CORR,var03,.48,.72,1.00,.50
+CORR,var04,.28,.54,.50,1.00
+])
AT_CLEANUP
+AT_SETUP([MATRIX DATA - too few rows])
+AT_DATA([matrix-data.sps], [dnl
+matrix data
+ variables = rowtype_ s1 var01 var02 var03 var04
+ /split s1
+ /format = upper diagonal
+ /file='matrix-data.txt'.
+FORMATS var01 TO var04 (F6.2).
+LIST.
+])
+AT_DATA([matrix-data.txt], [dnl
+mean 1 21.4 5.0 72.9 17.4
+sd 1 6.5 1.6 22.8 5.7
+n 1 106 106 106 106
+corr 1 1.00 .32 .48 .28
+corr 2 1.00 .32 .48 .28
+corr 2 2.00 .72 .54
+])
+AT_CHECK([pspp -O format=csv matrix-data.sps], [1], [dnl
+matrix-data.txt:5: error: Matrix CORR had 1 rows but 4 rows were expected.
+
+matrix-data.txt:6: error: Matrix CORR had 2 rows but 4 rows were expected.
+Table: Split Values
+Variable,Value
+s1,1
-AT_SETUP([Matrix data (badly formed)])
+Table: Data List
+s1,ROWTYPE_,VARNAME_,var01,var02,var03,var04
+1,MEAN,,21.40,5.00,72.90,17.40
+1,STDDEV,,6.50,1.60,22.80,5.70
+1,N,,106.00,106.00,106.00,106.00
+1,CORR,var01,1.00,.32,.48,.28
+1,CORR,var02,.32,1.00,. ,. @&t@
+1,CORR,var03,.48,. ,1.00,. @&t@
+1,CORR,var04,.28,. ,. ,1.00
+
+Table: Split Values
+Variable,Value
+s1,2
-AT_DATA([data.pspp], [dnl
-data list list /ROWTYPE_ (a8) VARNAME_(a4) v1 v2 v3 v4xxxxxxxxxxxxxxxxxxxxxzzzzzzzzzzzzzxxxxxxxxx.
+Table: Data List
+s1,ROWTYPE_,VARNAME_,var01,var02,var03,var04
+2,CORR,var01,1.00,.32,.48,.28
+2,CORR,var02,.32,2.00,.72,.54
+2,CORR,var03,.48,.72,1.00,. @&t@
+2,CORR,var04,.28,.54,. ,1.00
+])
+AT_CLEANUP
+
+AT_SETUP([MATRIX DATA - badly formed])
+AT_DATA([data.sps], [dnl
+data list list NOTABLE /ROWTYPE_ (a8) VARNAME_(a4) v1 v2 v3 v4xxxxxxxxxxxxxxxxxxxxxzzzzzzzzzzzzzxxxxxxxxx.
begin data
mean "" 1 2 3 4
sd "" 5 6 7 8
corr v4 4 3 21 1
end data.
-list.
-
-factor matrix=in(corr = *)
- .
+DEBUG MATRIX READ.
])
-AT_CHECK([pspp -O format=csv data.pspp], [1], [ignore])
-
+AT_CHECK([pspp --testing-mode -O format=csv data.sps], [0], [dnl
+data.sps:12: warning: DEBUG MATRIX READ: CORR matrix has 4 columns but 3 rows named variables to be analyzed (and 1 rows named unknown variables).
+
+Table: Debug Matrix Reader
+,,,v1,v2,v3,v4xxxxxxxxxxxxxxxxxxxxxzzzzzzzzzzzzzxxxxxxxxx
+1,Correlation,v1,11.000,22.000,33.000,44.000
+,,v2,55.000,66.000,77.000,88.000
+,,v3,111.000,222.000,333.000,444.000
+,,v4xxxxxxxxxxxxxxxxxxxxxzzzzzzzzzzzzzxxxxxxxxx,. ,. ,. ,. @&t@
+,N,Value,2.000,3.000,4.000,5.000
+,Mean,Value,1.000,2.000,3.000,4.000
+,Standard Deviation,Value,.000,.000,.000,.000
+])
AT_CLEANUP
-
-
-
-AT_SETUP([Matrix data (N subcommand)])
-
-AT_DATA([matrix-data.pspp], [dnl
+AT_SETUP([MATRIX DATA - N subcommand])
+AT_DATA([matrix-data.sps], [dnl
matrix data
variables = rowtype_ var01 var02 var03 var04
/n = 99
list.
])
-AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl
-"matrix-data.pspp:12: warning: MATRIX DATA: The N subcommand was specified, but a N record was also found in the data. The N record will be ignored."
+AT_CHECK([pspp -O format=csv matrix-data.sps], [1], [dnl
+matrix-data.sps:8: error: N record is not allowed with N subcommand. Ignoring N record.
Table: Data List
ROWTYPE_,VARNAME_,var01,var02,var03,var04
N,,99.0000,99.0000,99.0000,99.0000
-mean,,34.0000,35.0000,36.0000,37.0000
+MEAN,,34.0000,35.0000,36.0000,37.0000
STDDEV,,22.0000,11.0000,55.0000,66.0000
-corr,var01,1.0000,9.0000,8.0000,7.0000
-corr,var02,9.0000,1.0000,6.0000,5.0000
-corr,var03,8.0000,6.0000,1.0000,4.0000
-corr,var04,7.0000,5.0000,4.0000,1.0000
+CORR,var01,1.0000,9.0000,8.0000,7.0000
+CORR,var02,9.0000,1.0000,6.0000,5.0000
+CORR,var03,8.0000,6.0000,1.0000,4.0000
+CORR,var04,7.0000,5.0000,4.0000,1.0000
])
-
AT_CLEANUP
+dnl Keep this test in sync with Example 3 in doc/matrices.texi.
+AT_SETUP([MATRIX DATA - N subcommand - 2])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=ROWTYPE_ var01 TO var08
+ /FORMAT=UPPER NODIAGONAL
+ /N 92.
+BEGIN DATA.
+MEAN 24.3 5.4 69.7 20.1 13.4 2.7 27.9 3.7
+SD 5.7 1.5 23.5 5.8 2.8 4.5 5.4 1.5
+CORR .17 .50 -.33 .27 .36 -.22 .18
+CORR .29 .29 -.20 .32 .12 .38
+CORR .05 .20 -.15 .16 .21
+CORR .20 .32 -.17 .12
+CORR .27 .12 -.24
+CORR -.20 -.38
+CORR .04
+END DATA.
+FORMATS var01 TO var08(F6.2).
+LIST.
+])
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
+Table: Data List
+ROWTYPE_,VARNAME_,var01,var02,var03,var04,var05,var06,var07,var08
+N,,92.00,92.00,92.00,92.00,92.00,92.00,92.00,92.00
+MEAN,,24.30,5.40,69.70,20.10,13.40,2.70,27.90,3.70
+STDDEV,,5.70,1.50,23.50,5.80,2.80,4.50,5.40,1.50
+CORR,var01,1.00,.17,.50,-.33,.27,.36,-.22,.18
+CORR,var02,.17,1.00,.29,.29,-.20,.32,.12,.38
+CORR,var03,.50,.29,1.00,.05,.20,-.15,.16,.21
+CORR,var04,-.33,.29,.05,1.00,.20,.32,-.17,.12
+CORR,var05,.27,-.20,.20,.20,1.00,.27,.12,-.24
+CORR,var06,.36,.32,-.15,.32,.27,1.00,-.20,-.38
+CORR,var07,-.22,.12,.16,-.17,.12,-.20,1.00,.04
+CORR,var08,.18,.38,.21,.12,-.24,-.38,.04,1.00
+])
+AT_CLEANUP
dnl A "no-crash" test. This was observed to cause problems.
dnl See bug #58596
-AT_SETUP([Matrix data crash])
+AT_SETUP([MATRIX DATA - crash])
-AT_DATA([matrix-data.pspp], [dnl
+AT_DATA([matrix-data.sps], [dnl
begin data
corr 31
begin data
])
-AT_CHECK([pspp -O format=csv matrix-data.pspp], [1], [ignore])
+AT_CHECK([pspp -O format=csv matrix-data.sps], [1], [ignore])
+AT_CLEANUP
+\f
+dnl Keep this test in sync with Example 6 in doc/matrices.texi.
+AT_SETUP([MATRIX DATA - LOWER DIAGONAL without ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=var01 TO var08
+ /CONTENTS=MEAN SD N CORR.
+BEGIN DATA.
+24.3 5.4 69.7 20.1 13.4 2.7 27.9 3.7
+ 5.7 1.5 23.5 5.8 2.8 4.5 5.4 1.5
+ 92 92 92 92 92 92 92 92
+1.00
+ .18 1.00
+-.22 -.17 1.00
+ .36 .31 -.14 1.00
+ .27 .16 -.12 .22 1.00
+ .33 .15 -.17 .24 .21 1.00
+ .50 .29 -.20 .32 .12 .38 1.00
+ .17 .29 -.05 .20 .27 .20 .04 1.00
+END DATA.
+FORMATS var01 TO var08(F5.2).
+LIST.
+])
+AT_CHECK([pspp matrix-data.sps -O format=csv], [0], [dnl
+Table: Data List
+ROWTYPE_,VARNAME_,var01,var02,var03,var04,var05,var06,var07,var08
+MEAN,,24.30,5.40,69.70,20.10,13.40,2.70,27.90,3.70
+STDDEV,,5.70,1.50,23.50,5.80,2.80,4.50,5.40,1.50
+N,,92.00,92.00,92.00,92.00,92.00,92.00,92.00,92.00
+CORR,var01,1.00,.18,-.22,.36,.27,.33,.50,.17
+CORR,var02,.18,1.00,-.17,.31,.16,.15,.29,.29
+CORR,var03,-.22,-.17,1.00,-.14,-.12,-.17,-.20,-.05
+CORR,var04,.36,.31,-.14,1.00,.22,.24,.32,.20
+CORR,var05,.27,.16,-.12,.22,1.00,.21,.12,.27
+CORR,var06,.33,.15,-.17,.24,.21,1.00,.38,.20
+CORR,var07,.50,.29,-.20,.32,.12,.38,1.00,.04
+CORR,var08,.17,.29,-.05,.20,.27,.20,.04,1.00
+])
+AT_CLEANUP
+
+AT_SETUP([MATRIX DATA - extraneous data without ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=var01 TO var08
+ /CONTENTS=MEAN SD N CORR.
+BEGIN DATA.
+24.3 5.4 69.7 20.1 13.4 2.7 27.9 3.7
+ 5.7 1.5 23.5 5.8 2.8 4.5 5.4 1.5
+ 92 92 92 92 92 92 92 92
+1.00 .18
+ .18 1.00
+-.22 -.17 1.00
+ .36 .31 -.14 1.00
+ .27 .16 -.12 .22 1.00
+ .33 .15 -.17 .24 .21 1.00
+ .50 .29 -.20 .32 .12 .38 1.00
+ .17 .29 -.05 .20 .27 .20 .04 1.00
+END DATA.
+FORMATS var01 TO var08(F5.2).
+LIST.
+])
+AT_CHECK([pspp matrix-data.sps -O format=csv], [1], [dnl
+matrix-data.sps:8.8-8.10: error: Extraneous data expecting end of line.
+
+Table: Data List
+ROWTYPE_,VARNAME_,var01,var02,var03,var04,var05,var06,var07,var08
+MEAN,,24.30,5.40,69.70,20.10,13.40,2.70,27.90,3.70
+STDDEV,,5.70,1.50,23.50,5.80,2.80,4.50,5.40,1.50
+N,,92.00,92.00,92.00,92.00,92.00,92.00,92.00,92.00
+CORR,var01,1.00,.18,-.22,.36,.27,.33,.50,.17
+CORR,var02,.18,1.00,-.17,.31,.16,.15,.29,.29
+CORR,var03,-.22,-.17,1.00,-.14,-.12,-.17,-.20,-.05
+CORR,var04,.36,.31,-.14,1.00,.22,.24,.32,.20
+CORR,var05,.27,.16,-.12,.22,1.00,.21,.12,.27
+CORR,var06,.33,.15,-.17,.24,.21,1.00,.38,.20
+CORR,var07,.50,.29,-.20,.32,.12,.38,1.00,.04
+CORR,var08,.17,.29,-.05,.20,.27,.20,.04,1.00
+])
+AT_CLEANUP
+
+dnl Keep this test in sync with Example 7 in doc/matrices.texi.
+AT_SETUP([MATRIX DATA - Split variables with explicit values without ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=s1 var01 TO var04
+ /SPLIT=s1
+ /FORMAT=FULL
+ /CONTENTS=MEAN SD N CORR.
+BEGIN DATA.
+0 34 35 36 37
+0 22 11 55 66
+0 99 98 99 92
+0 1 .9 .8 .7
+0 .9 1 .6 .5
+0 .8 .6 1 .4
+0 .7 .5 .4 1
+1 44 45 34 39
+1 23 15 51 46
+1 98 34 87 23
+1 1 .2 .3 .4
+1 .2 1 .5 .6
+1 .3 .5 1 .7
+1 .4 .6 .7 1
+END DATA.
+FORMATS var01 TO var04(F5.2).
+LIST.
+])
+AT_CHECK([pspp matrix-data.sps -O format=csv], [0], [dnl
+Table: Split Values
+Variable,Value
+s1,0
+
+Table: Data List
+s1,ROWTYPE_,VARNAME_,var01,var02,var03,var04
+0,MEAN,,34.00,35.00,36.00,37.00
+0,STDDEV,,22.00,11.00,55.00,66.00
+0,N,,99.00,98.00,99.00,92.00
+0,CORR,var01,1.00,.90,.80,.70
+0,CORR,var02,.90,1.00,.60,.50
+0,CORR,var03,.80,.60,1.00,.40
+0,CORR,var04,.70,.50,.40,1.00
+
+Table: Split Values
+Variable,Value
+s1,1
+Table: Data List
+s1,ROWTYPE_,VARNAME_,var01,var02,var03,var04
+1,MEAN,,44.00,45.00,34.00,39.00
+1,STDDEV,,23.00,15.00,51.00,46.00
+1,N,,98.00,34.00,87.00,23.00
+1,CORR,var01,1.00,.20,.30,.40
+1,CORR,var02,.20,1.00,.50,.60
+1,CORR,var03,.30,.50,1.00,.70
+1,CORR,var04,.40,.60,.70,1.00
+])
+AT_CLEANUP
+
+dnl Keep this test in sync with Example 8 in doc/matrices.texi.
+AT_SETUP([MATRIX DATA - Split variable with sequential values without ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=var01 TO var04
+ /SPLIT=s1
+ /FORMAT=FULL
+ /CONTENTS=MEAN SD N CORR.
+BEGIN DATA.
+34 35 36 37
+22 11 55 66
+99 98 99 92
+ 1 .9 .8 .7
+.9 1 .6 .5
+.8 .6 1 .4
+.7 .5 .4 1
+44 45 34 39
+23 15 51 46
+98 34 87 23
+ 1 .2 .3 .4
+.2 1 .5 .6
+.3 .5 1 .7
+.4 .6 .7 1
+END DATA.
+FORMATS var01 TO var04(F5.2).
+LIST.
+])
+AT_CHECK([pspp matrix-data.sps -O format=csv], [0], [dnl
+Table: Split Values
+Variable,Value
+s1,1
+
+Table: Data List
+s1,ROWTYPE_,VARNAME_,var01,var02,var03,var04
+1,MEAN,,34.00,35.00,36.00,37.00
+1,STDDEV,,22.00,11.00,55.00,66.00
+1,N,,99.00,98.00,99.00,92.00
+1,CORR,var01,1.00,.90,.80,.70
+1,CORR,var02,.90,1.00,.60,.50
+1,CORR,var03,.80,.60,1.00,.40
+1,CORR,var04,.70,.50,.40,1.00
+
+Table: Split Values
+Variable,Value
+s1,2
+
+Table: Data List
+s1,ROWTYPE_,VARNAME_,var01,var02,var03,var04
+2,MEAN,,44.00,45.00,34.00,39.00
+2,STDDEV,,23.00,15.00,51.00,46.00
+2,N,,98.00,34.00,87.00,23.00
+2,CORR,var01,1.00,.20,.30,.40
+2,CORR,var02,.20,1.00,.50,.60
+2,CORR,var03,.30,.50,1.00,.70
+2,CORR,var04,.40,.60,.70,1.00
+])
+AT_CLEANUP
+
+dnl Keep this test in sync with Example 9 in doc/matrices.texi.
+AT_SETUP([MATRIX DATA - Factor variables grouping within-cell records by factor without ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=f1 var01 TO var04
+ /FACTOR=f1
+ /CELLS=2
+ /CONTENTS=(MEAN SD N) CORR.
+BEGIN DATA.
+0 34 35 36 37
+0 22 11 55 66
+0 99 98 99 92
+1 44 45 34 39
+1 23 15 51 46
+1 98 34 87 23
+ 1
+ .9 1
+ .8 .6 1
+ .7 .5 .4 1
+END DATA.
+FORMATS var01 TO var04(F5.1).
+LIST.
+])
+AT_CHECK([pspp matrix-data.sps -O format=csv], [0], [dnl
+Table: Data List
+ROWTYPE_,f1,VARNAME_,var01,var02,var03,var04
+MEAN,0,,34.0,35.0,36.0,37.0
+STDDEV,0,,22.0,11.0,55.0,66.0
+N,0,,99.0,98.0,99.0,92.0
+MEAN,1,,44.0,45.0,34.0,39.0
+STDDEV,1,,23.0,15.0,51.0,46.0
+N,1,,98.0,34.0,87.0,23.0
+CORR,.,var01,1.0,.9,.8,.7
+CORR,.,var02,.9,1.0,.6,.5
+CORR,.,var03,.8,.6,1.0,.4
+CORR,.,var04,.7,.5,.4,1.0
+])
+AT_CLEANUP
+
+dnl Keep this test in sync with Example 10 in doc/matrices.texi.
+AT_SETUP([MATRIX DATA - Factor variables grouping within-cell records by row type without ROWTYPE_])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=f1 var01 TO var04
+ /FACTOR=f1
+ /CELLS=2
+ /CONTENTS=(MEAN) (SD) (N) CORR.
+BEGIN DATA.
+0 34 35 36 37
+1 44 45 34 39
+0 22 11 55 66
+1 23 15 51 46
+0 99 98 99 92
+1 98 34 87 23
+ 1
+ .9 1
+ .8 .6 1
+ .7 .5 .4 1
+END DATA.
+FORMATS var01 TO var04(F5.1).
+LIST.
+])
+AT_CHECK([pspp matrix-data.sps -O format=csv], [0], [dnl
+Table: Data List
+ROWTYPE_,f1,VARNAME_,var01,var02,var03,var04
+MEAN,0,,34.0,35.0,36.0,37.0
+MEAN,1,,44.0,45.0,34.0,39.0
+STDDEV,0,,22.0,11.0,55.0,66.0
+STDDEV,1,,23.0,15.0,51.0,46.0
+N,0,,99.0,98.0,99.0,92.0
+N,1,,98.0,34.0,87.0,23.0
+CORR,.,var01,1.0,.9,.8,.7
+CORR,.,var02,.9,1.0,.6,.5
+CORR,.,var03,.8,.6,1.0,.4
+CORR,.,var04,.7,.5,.4,1.0
+])
+AT_CLEANUP
+
+AT_SETUP([MATRIX DATA - syntax errors])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA VARIABLES=var01 varname_.
+MATRIX DATA VARIABLES=v v v.
+MATRIX DATA VARIABLES=rowtype_ v1 v2 v3/SPLIT=rowtype_.
+MATRIX DATA VARIABLES=rowtype_ v1 v2 v3/FACTORS=rowtype_.
+MATRIX DATA VARIABLES=rowtype_ s1 v1 v2 v3/SPLIT=v1/FACTORS=v1.
+
+MATRIX DATA VARIABLES=v1 v2 v3/FORMAT=FULL NODIAGONAL.
+MATRIX DATA VARIABLES=v1 v2 v3/FACTORS=v1.
+MATRIX DATA VARIABLES=v1 v2 v3.
+BEGIN DATA.
+END DATA.
+MATRIX DATA VARIABLES=v1/FACTORS=v1.
+MATRIX DATA VARIABLES=v1 v2 v3 ROWTYPE_.
+MATRIX DATA VARIABLES=v1 v2 v3/CONTENTS=N/N=5.
+MATRIX DATA VARIABLES=v1/CONTENTS=XYZZY.
+MATRIX DATA VARIABLES=v1/CONTENTS=(.
+MATRIX DATA VARIABLES=v1/CONTENTS=(CORR.
+MATRIX DATA VARIABLES=v1/CONTENTS=).
+MATRIX DATA.
+MATRIX DATA VARIABLES=v*.
+MATRIX DATA VARIABLES=v/N=-1.
+MATRIX DATA VARIABLES=v/FORMAT=XYZZY.
+MATRIX DATA VARIABLES=v/FILE=123.
+MATRIX DATA VARIABLES=v/SPLIT=123.
+MATRIX DATA VARIABLES=v/CELLS=-1.
+MATRIX DATA VARIABLES=v/XYZZY.
+])
+AT_CHECK([pspp matrix-data.sps -O format=csv], [1], [dnl
+matrix-data.sps:1: error: MATRIX DATA: VARIABLES may not include VARNAME_.
+
+matrix-data.sps:2: error: MATRIX DATA: Variable v appears twice in variable list.
+
+matrix-data.sps:3: error: MATRIX DATA: ROWTYPE_ is not allowed on SPLIT or FACTORS.
+
+matrix-data.sps:4: error: MATRIX DATA: ROWTYPE_ is not allowed on SPLIT or FACTORS.
+
+matrix-data.sps:5: error: MATRIX DATA: v1 may not appear on both SPLIT and FACTORS.
+
+matrix-data.sps:7: error: MATRIX DATA: FORMAT=FULL and FORMAT=NODIAGONAL are mutually exclusive.
+
+matrix-data.sps:8: error: MATRIX DATA: CELLS is required when factor variables are specified and VARIABLES does not include ROWTYPE_.
+
+matrix-data.sps:9: warning: MATRIX DATA: CONTENTS was not specified and VARIABLES does not include ROWTYPE_. Assuming CONTENTS=CORR.
+
+matrix-data.sps:12: error: MATRIX DATA: CELLS is required when factor variables are specified and VARIABLES does not include ROWTYPE_.
+
+matrix-data.sps:13: error: MATRIX DATA: VARIABLES includes ROWTYPE_ but the continuous variables are not the last ones on VARIABLES.
+
+matrix-data.sps:14: error: MATRIX DATA: Cannot specify N on CONTENTS along with the N subcommand.
+
+matrix-data.sps:15.35-15.39: error: MATRIX DATA: Syntax error at `XYZZY': Row type keyword expected.
+
+matrix-data.sps:16.36: error: MATRIX DATA: Syntax error at end of command: Row type keyword expected.
+
+matrix-data.sps:17.40: error: MATRIX DATA: Syntax error at end of command: Row type keyword expected.
+
+matrix-data.sps:18.35: error: MATRIX DATA: Syntax error at `)': Row type keyword expected.
+
+matrix-data.sps:19.12: error: MATRIX DATA: Syntax error at end of command: expecting VARIABLES.
+
+matrix-data.sps:20.24: error: MATRIX DATA: Syntax error at `*': expecting `/'.
+
+matrix-data.sps:21.27-21.28: error: MATRIX DATA: Syntax error at `-1': Expected non-negative integer for N.
+
+matrix-data.sps:22.32-22.36: error: MATRIX DATA: Syntax error at `XYZZY'.
+
+matrix-data.sps:23.30-23.32: error: MATRIX DATA: Syntax error at `123': expecting a file name or handle name.
+
+matrix-data.sps:24.31-24.33: error: MATRIX DATA: Syntax error at `123': expecting variable name.
+
+matrix-data.sps:25.31-25.32: error: MATRIX DATA: Syntax error at `-1': Expected non-negative integer for CELLS.
+
+matrix-data.sps:26.25-26.29: error: MATRIX DATA: Syntax error at `XYZZY'.
+])
+AT_CLEANUP
+
+dnl I don't know what lunatic thought this was OK, but we strive to be
+dnl compatible.
+AT_SETUP([MATRIX DATA - plus and minus as delimiters])
+AT_DATA([matrix-data.sps], [dnl
+MATRIX DATA
+ VARIABLES=ROWTYPE_ var01 TO var08.
+BEGIN DATA.
+MEAN+24.3+5.4+69.7+20.1+13.4+2.7+27.9+3.7
+SD +5.7+1.5+23.5+5.8+2.8+4.5+5.4+1.5
+N+92+92+92+92+92+92+92+92
+CORR+1.00
+CORR+.18+1.00
+CORR-.22e+0-.17+1.00
+CORR+.36d-0+.31-.14+1.00
+CORR+.27+.16-.12+.22+1.00
+CORR+.33+.15-.17+.24+.21+1.00
+CORR+.50+.29-.20+.32+.12+.38+1.00
+CORR+.17+.29-.05+.20+.27+.20+.04+1.00
+END DATA.
+FORMATS var01 TO var08(F5.2).
+LIST.
+])
+
+AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl
+Table: Data List
+ROWTYPE_,VARNAME_,var01,var02,var03,var04,var05,var06,var07,var08
+MEAN,,24.30,5.40,69.70,20.10,13.40,2.70,27.90,3.70
+STDDEV,,5.70,1.50,23.50,5.80,2.80,4.50,5.40,1.50
+N,,92.00,92.00,92.00,92.00,92.00,92.00,92.00,92.00
+CORR,var01,1.00,.18,-.22,.36,.27,.33,.50,.17
+CORR,var02,.18,1.00,-.17,.31,.16,.15,.29,.29
+CORR,var03,-.22,-.17,1.00,-.14,-.12,-.17,-.20,-.05
+CORR,var04,.36,.31,-.14,1.00,.22,.24,.32,.20
+CORR,var05,.27,.16,-.12,.22,1.00,.21,.12,.27
+CORR,var06,.33,.15,-.17,.24,.21,1.00,.38,.20
+CORR,var07,.50,.29,-.20,.32,.12,.38,1.00,.04
+CORR,var08,.17,.29,-.05,.20,.27,.20,.04,1.00
+])
AT_CLEANUP