1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <gsl/gsl_matrix.h>
20 #include <gsl/gsl_vector.h>
22 #include "data/case.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/format.h"
29 #include "data/short-names.h"
30 #include "data/transformations.h"
31 #include "data/variable.h"
32 #include "language/command.h"
33 #include "language/commands/data-parser.h"
34 #include "language/commands/data-reader.h"
35 #include "language/commands/file-handle.h"
36 #include "language/commands/inpt-pgm.h"
37 #include "language/commands/placement-parser.h"
38 #include "language/lexer/lexer.h"
39 #include "language/lexer/variable-parser.h"
40 #include "libpspp/assertion.h"
41 #include "libpspp/i18n.h"
42 #include "libpspp/intern.h"
43 #include "libpspp/message.h"
44 #include "libpspp/str.h"
46 #include "gl/c-ctype.h"
47 #include "gl/minmax.h"
49 #include "gl/xalloc.h"
52 #define _(msgid) gettext (msgid)
55 /* Matrix row types. */ \
62 /* Vector row types. */ \
70 /* Scalar row types. */ \
75 #define RT(NAME, DIMS) C_##NAME,
82 #define RT(NAME, DIMS) +1
86 verify (N_ROWTYPES < 32);
88 /* Returns the number of dimensions in the indexes for row type RT. A matrix
89 has 2 dimensions, a vector has 1, a scalar has 0. */
91 rowtype_dimensions (enum rowtype rt)
93 static const int rowtype_dims[N_ROWTYPES] = {
94 #define RT(NAME, DIMS) [C_##NAME] = DIMS,
98 return rowtype_dims[rt];
101 static struct substring
102 rowtype_name (enum rowtype rt)
104 static const struct substring rowtype_names[N_ROWTYPES] = {
105 #define RT(NAME, DIMS) [C_##NAME] = SS_LITERAL_INITIALIZER (#NAME),
110 return rowtype_names[rt];
114 rowtype_from_string (struct substring token, enum rowtype *rt)
116 ss_trim (&token, ss_cstr (CC_SPACES));
117 for (size_t i = 0; i < N_ROWTYPES; i++)
118 if (lex_id_match (rowtype_name (i), token))
124 if (lex_id_match (ss_cstr ("N_VECTOR"), token))
129 else if (lex_id_match (ss_cstr ("SD"), token))
139 rowtype_parse (struct lexer *lexer, enum rowtype *rt)
141 bool parsed = (lex_token (lexer) == T_ID
142 && rowtype_from_string (lex_tokss (lexer), rt));
166 struct variable **input_vars;
169 /* How to read matrices with each possible number of dimensions (0=scalar,
170 1=vector, 2=matrix). */
173 /* Number of rows and columns in the matrix: (1,1) for a scalar, (1,n) for
174 a vector, (n,n) for a matrix. */
177 /* Rows of data to read and the number of columns in each. Because we
178 often read just a triangle and sometimes omit the diagonal, 'n_rp' can
179 be less than 'nr' and 'rp[i]->y' isn't always 'y'. */
182 /* The y-value of the row inside the matrix. */
185 /* first and last (exclusive) columns to read in this row. */
193 struct variable *rowtype;
194 struct variable *varname;
195 struct variable **cvars;
197 struct variable **svars;
198 size_t *svar_indexes;
200 struct variable **fvars;
201 size_t *fvar_indexes;
206 unsigned int pooled_rowtype_mask;
207 unsigned int factor_rowtype_mask;
212 enum rowtype rowtype;
220 matrix_format_uninit (struct matrix_format *mf)
222 free (mf->input_vars);
223 for (int i = 0; i < 3; i++)
227 free (mf->svar_indexes);
229 free (mf->fvar_indexes);
234 set_string (struct ccase *outcase, const struct variable *var,
235 struct substring src)
237 struct substring dst = case_ss (outcase, var);
238 for (size_t i = 0; i < dst.length; i++)
239 dst.string[i] = i < src.length ? src.string[i] : ' ';
243 parse_msg (struct dfm_reader *reader, const struct substring *token,
244 char *text, enum msg_severity severity)
246 int first_column = 0;
249 struct substring line = dfm_get_record (reader);
250 if (token->string >= line.string && token->string < ss_end (line))
251 first_column = ss_pointer_to_position (line, token->string) + 1;
254 int line_number = dfm_get_line_number (reader);
255 struct msg_location *location = xmalloc (sizeof *location);
256 int last_column = (first_column && token->length
257 ? first_column + token->length - 1
259 *location = (struct msg_location) {
260 .file_name = intern_new (dfm_get_file_name (reader)),
261 .start = { .line = line_number, .column = first_column },
262 .end = { .line = line_number, .column = last_column },
264 struct msg *m = xmalloc (sizeof *m);
266 .category = MSG_C_DATA,
267 .severity = severity,
268 .location = location,
274 static void PRINTF_FORMAT (3, 4)
275 parse_warning (struct dfm_reader *reader, const struct substring *token,
276 const char *format, ...)
279 va_start (args, format);
280 parse_msg (reader, token, xvasprintf (format, args), MSG_S_WARNING);
284 static void PRINTF_FORMAT (3, 4)
285 parse_error (struct dfm_reader *reader, const struct substring *token,
286 const char *format, ...)
289 va_start (args, format);
290 parse_msg (reader, token, xvasprintf (format, args), MSG_S_ERROR);
294 /* Advance to beginning of next token. */
296 more_tokens (struct substring *p, struct dfm_reader *r)
300 ss_ltrim (p, ss_cstr (CC_SPACES ","));
304 dfm_forward_record (r);
307 *p = dfm_get_record (r);
312 next_token (struct substring *p, struct dfm_reader *r, struct substring *token)
314 if (!more_tokens (p, r))
318 int c = ss_first (*p);
319 if (c == '\'' || c == '"')
322 ss_get_until (p, c, token);
331 || ss_find_byte (ss_cstr (CC_SPACES ","), c) != SIZE_MAX
332 || ((c == '+' || c == '-')
333 && ss_find_byte (ss_cstr ("dDeE"),
334 ss_at (*p, n - 1)) == SIZE_MAX))
338 ss_get_bytes (p, n, token);
344 next_number (struct substring *p, struct dfm_reader *r, double *d)
346 struct substring token;
347 if (!next_token (p, r, &token))
351 char *error = data_in (token, dfm_reader_get_encoding (r), FMT_F,
352 settings_get_fmt_settings (), &v, 0, NULL);
355 parse_error (r, &token, "%s", error);
363 next_rowtype (struct substring *p, struct dfm_reader *r, enum rowtype *rt)
365 struct substring token;
366 if (!next_token (p, r, &token))
369 if (rowtype_from_string (token, rt))
372 parse_error (r, &token, _("Unknown row type \"%.*s\"."),
373 (int) token.length, token.string);
377 struct read_matrix_params
379 /* Adjustments to first and last row to read. */
382 /* Left and right columns to read in first row, inclusive.
383 For x1, INT_MAX is the rightmost column. */
386 /* Adjustment to x0 and x1 for each subsequent row we read. Each of these
387 is 0 to keep it the same or -1 or +1 to adjust it by that much. */
391 static const struct read_matrix_params *
392 get_read_matrix_params (const struct matrix_format *mf)
394 if (mf->triangle == FULL)
400 static const struct read_matrix_params rmp = { 0, 0, 0, INT_MAX, 0, 0 };
403 else if (mf->triangle == LOWER)
405 if (mf->diagonal == DIAGONAL)
411 static const struct read_matrix_params rmp = { 0, 0, 0, 0, 0, 1 };
420 static const struct read_matrix_params rmp = { 1, 0, 0, 0, 0, 1 };
424 else if (mf->triangle == UPPER)
426 if (mf->diagonal == DIAGONAL)
432 static const struct read_matrix_params rmp = { 0, 0, 0, INT_MAX, 1, 0 };
441 static const struct read_matrix_params rmp = { 0, -1, 1, INT_MAX, 1, 0 };
450 schedule_matrices (struct matrix_format *mf)
452 struct matrix_sched *ms0 = &mf->ms[0];
455 ms0->rp = xmalloc (sizeof *ms0->rp);
456 ms0->rp[0] = (struct row_sched) { .y = 0, .x0 = 0, .x1 = 1 };
459 struct matrix_sched *ms1 = &mf->ms[1];
461 ms1->nc = mf->n_cvars;
462 ms1->rp = xmalloc (sizeof *ms1->rp);
463 ms1->rp[0] = (struct row_sched) { .y = 0, .x0 = 0, .x1 = mf->n_cvars };
466 struct matrix_sched *ms2 = &mf->ms[2];
467 ms2->nr = mf->n_cvars;
468 ms2->nc = mf->n_cvars;
469 ms2->rp = xmalloc (mf->n_cvars * sizeof *ms2->rp);
472 const struct read_matrix_params *rmp = get_read_matrix_params (mf);
474 int x1 = rmp->x1 < mf->n_cvars ? rmp->x1 : mf->n_cvars - 1;
476 int y1 = (int) mf->n_cvars + rmp->dy1;
477 for (int y = y0; y < y1; y++)
479 assert (x0 >= 0 && x0 < mf->n_cvars);
480 assert (x1 >= 0 && x1 < mf->n_cvars);
483 ms2->rp[ms2->n_rp++] = (struct row_sched) {
484 .y = y, .x0 = x0, .x1 = x1 + 1
493 read_id_columns (const struct matrix_format *mf,
494 struct substring *p, struct dfm_reader *r,
495 double *d, enum rowtype *rt)
497 for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
498 if (!(mf->input_vars[i] == mf->rowtype
499 ? next_rowtype (p, r, rt)
500 : next_number (p, r, &d[i])))
506 equal_id_columns (const struct matrix_format *mf,
507 const double *a, const double *b)
509 for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
510 if (mf->input_vars[i] != mf->rowtype && a[i] != b[i])
516 equal_split_columns (const struct matrix_format *mf,
517 const double *a, const double *b)
519 for (size_t i = 0; i < mf->n_svars; i++)
521 size_t idx = mf->svar_indexes[i];
522 if (a[idx] != b[idx])
529 is_pooled (const struct matrix_format *mf, const double *d)
531 for (size_t i = 0; i < mf->n_fvars; i++)
532 if (d[mf->fvar_indexes[i]] != SYSMIS)
538 matrix_sched_init (const struct matrix_format *mf, enum rowtype rt,
541 int n_dims = rowtype_dimensions (rt);
542 const struct matrix_sched *ms = &mf->ms[n_dims];
543 double diagonal = n_dims < 2 || rt != C_CORR ? SYSMIS : 1.0;
544 for (size_t y = 0; y < ms->nr; y++)
545 for (size_t x = 0; x < ms->nc; x++)
546 gsl_matrix_set (m, y, x, y == x ? diagonal : SYSMIS);
550 matrix_sched_output (const struct matrix_format *mf, enum rowtype rt,
551 gsl_matrix *m, const double *d, int split_num,
552 struct casewriter *w)
554 int n_dims = rowtype_dimensions (rt);
555 const struct matrix_sched *ms = &mf->ms[n_dims];
557 if (rt == C_N_SCALAR)
559 for (size_t x = 1; x < mf->n_cvars; x++)
560 gsl_matrix_set (m, 0, x, gsl_matrix_get (m, 0, 0));
564 for (int y = 0; y < ms->nr; y++)
566 struct ccase *c = case_create (casewriter_get_proto (w));
567 for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
568 if (mf->input_vars[i] != mf->rowtype)
569 *case_num_rw (c, mf->input_vars[i]) = d[i];
570 if (mf->n_svars && !mf->svar_indexes)
571 *case_num_rw (c, mf->svars[0]) = split_num;
572 set_string (c, mf->rowtype, rowtype_name (rt));
573 const char *varname = n_dims == 2 ? var_get_name (mf->cvars[y]) : "";
574 set_string (c, mf->varname, ss_cstr (varname));
575 for (int x = 0; x < mf->n_cvars; x++)
576 *case_num_rw (c, mf->cvars[x]) = gsl_matrix_get (m, y, x);
577 casewriter_write (w, c);
582 matrix_sched_output_n (const struct matrix_format *mf, double n,
583 gsl_matrix *m, const double *d, int split_num,
584 struct casewriter *w)
586 gsl_matrix_set (m, 0, 0, n);
587 matrix_sched_output (mf, C_N_SCALAR, m, d, split_num, w);
591 check_eol (const struct matrix_format *mf, struct substring *p,
592 struct dfm_reader *r)
596 ss_ltrim (p, ss_cstr (CC_SPACES ","));
599 parse_error (r, p, _("Extraneous data expecting end of line."));
606 parse_data_with_rowtype (const struct matrix_format *mf,
607 struct dfm_reader *r, struct casewriter *w)
611 struct substring p = dfm_get_record (r);
614 gsl_matrix *m = gsl_matrix_alloc (mf->n_cvars, mf->n_cvars);
616 double *d = xnmalloc (mf->n_input_vars, sizeof *d);
619 double *d_next = xnmalloc (mf->n_input_vars, sizeof *d_next);
621 if (!read_id_columns (mf, &p, r, d, &rt))
625 /* If this has rowtype N but there was an N subcommand, then the
626 subcommand takes precedence, so we will suppress outputting this
627 record. We still need to parse it, though, so we can't skip other
629 bool suppress_output = mf->n >= 0 && (rt == C_N || rt == C_N_SCALAR);
631 parse_error (r, NULL, _("N record is not allowed with N subcommand. "
632 "Ignoring N record."));
634 /* If there's an N subcommand, and this is a new split, then output an N
636 if (mf->n >= 0 && (!prev || !equal_split_columns (mf, prev, d)))
638 matrix_sched_output_n (mf, mf->n, m, d, 0, w);
641 prev = xnmalloc (mf->n_input_vars, sizeof *prev);
642 memcpy (prev, d, mf->n_input_vars * sizeof *prev);
645 /* Usually users don't provide the CONTENTS subcommand with ROWTYPE_, but
646 if they did then warn if ROWTYPE_ is an unexpected type. */
647 if (mf->factor_rowtype_mask || mf->pooled_rowtype_mask)
649 const char *name = rowtype_name (rt).string;
650 if (is_pooled (mf, d))
652 if (!((1u << rt) & mf->pooled_rowtype_mask))
653 parse_warning (r, NULL, _("Data contains pooled row type %s not "
654 "included in CONTENTS."), name);
658 if (!((1u << rt) & mf->factor_rowtype_mask))
659 parse_warning (r, NULL, _("Data contains with-factors row type "
660 "%s not included in CONTENTS."), name);
664 /* Initialize the matrix to be filled-in. */
665 int n_dims = rowtype_dimensions (rt);
666 const struct matrix_sched *ms = &mf->ms[n_dims];
667 matrix_sched_init (mf, rt, m);
669 enum rowtype rt_next;
673 for (n_rows = 1; ; n_rows++)
675 if (n_rows <= ms->n_rp)
677 const struct row_sched *rs = &ms->rp[n_rows - 1];
679 for (size_t x = rs->x0; x < rs->x1; x++)
682 if (!next_number (&p, r, &e))
684 gsl_matrix_set (m, y, x, e);
685 if (n_dims == 2 && mf->triangle != FULL)
686 gsl_matrix_set (m, x, y, e);
688 check_eol (mf, &p, r);
692 /* Suppress bad input data. We'll issue an error later. */
696 eof = (!more_tokens (&p, r)
697 || !read_id_columns (mf, &p, r, d_next, &rt_next));
701 if (!equal_id_columns (mf, d, d_next) || rt_next != rt)
704 if (!suppress_output)
705 matrix_sched_output (mf, rt, m, d, 0, w);
707 if (n_rows != ms->n_rp)
708 parse_error (r, NULL,
709 _("Matrix %s had %zu rows but %zu rows were expected."),
710 rowtype_name (rt).string, n_rows, ms->n_rp);
729 parse_matrix_without_rowtype (const struct matrix_format *mf,
730 struct substring *p, struct dfm_reader *r,
731 gsl_matrix *m, enum rowtype rowtype, bool pooled,
732 int split_num, struct casewriter *w)
734 int n_dims = rowtype_dimensions (rowtype);
735 const struct matrix_sched *ms = &mf->ms[n_dims];
737 double *d = xnmalloc (mf->n_input_vars, sizeof *d);
738 matrix_sched_init (mf, rowtype, m);
739 for (size_t i = 0; i < ms->n_rp; i++)
744 for (size_t j = 0; j < mf->n_input_vars; j++)
746 const struct variable *iv = mf->input_vars[j];
747 if (k < mf->n_cvars && iv == mf->cvars[k])
749 if (k < ms->rp[i].x1 - ms->rp[i].x0)
752 if (!next_number (p, r, &e))
755 int x = k + ms->rp[i].x0;
756 gsl_matrix_set (m, y, x, e);
757 if (n_dims == 2 && mf->triangle != FULL)
758 gsl_matrix_set (m, x, y, e);
763 if (h < mf->n_fvars && iv == mf->fvars[h])
774 if (!next_number (p, r, &e))
778 check_eol (mf, p, r);
781 matrix_sched_output (mf, rowtype, m, d, split_num, w);
787 parse_data_without_rowtype (const struct matrix_format *mf,
788 struct dfm_reader *r, struct casewriter *w)
792 struct substring p = dfm_get_record (r);
794 gsl_matrix *m = gsl_matrix_alloc (mf->n_cvars, mf->n_cvars);
799 for (size_t i = 0; i < mf->n_contents; )
802 if (mf->contents[i].open)
803 while (!mf->contents[j].close)
806 if (mf->contents[i].open)
808 for (size_t k = 0; k < mf->cells; k++)
809 for (size_t h = i; h <= j; h++)
810 parse_matrix_without_rowtype (mf, &p, r, m,
811 mf->contents[h].rowtype, false,
815 parse_matrix_without_rowtype (mf, &p, r, m, mf->contents[i].rowtype,
822 while (more_tokens (&p, r));
827 /* Parses VARIABLES=varnames for MATRIX DATA and returns a dictionary with the
828 named variables in it. */
829 static struct dictionary *
830 parse_matrix_data_variables (struct lexer *lexer)
832 if (!lex_force_match_id (lexer, "VARIABLES"))
834 lex_match (lexer, T_EQUALS);
836 struct dictionary *dict = dict_create (get_default_encoding ());
840 int vars_start = lex_ofs (lexer);
841 if (!parse_DATA_LIST_vars (lexer, dict, &names, &n_names, PV_NO_DUPLICATE))
846 int vars_end = lex_ofs (lexer) - 1;
848 for (size_t i = 0; i < n_names; i++)
849 if (!strcasecmp (names[i], "ROWTYPE_"))
850 dict_create_var_assert (dict, "ROWTYPE_", 8);
853 struct variable *var = dict_create_var_assert (dict, names[i], 0);
854 var_set_measure (var, MEASURE_SCALE);
857 for (size_t i = 0; i < n_names; ++i)
861 if (dict_lookup_var (dict, "VARNAME_"))
863 lex_ofs_error (lexer, vars_start, vars_end,
864 _("VARIABLES may not include VARNAME_."));
872 parse_matrix_data_subvars (struct lexer *lexer, struct dictionary *dict,
874 struct variable ***vars, size_t **indexes,
877 int start_ofs = lex_ofs (lexer);
878 if (!parse_variables (lexer, dict, vars, n_vars, 0))
880 int end_ofs = lex_ofs (lexer) - 1;
882 *indexes = xnmalloc (*n_vars, sizeof **indexes);
883 for (size_t i = 0; i < *n_vars; i++)
885 struct variable *v = (*vars)[i];
886 if (!strcasecmp (var_get_name (v), "ROWTYPE_"))
888 lex_ofs_error (lexer, start_ofs, end_ofs,
889 _("ROWTYPE_ is not allowed on SPLIT or FACTORS."));
892 (*indexes)[i] = var_get_dict_index (v);
894 bool *tv = &taken_vars[var_get_dict_index (v)];
897 lex_ofs_error (lexer, start_ofs, end_ofs,
898 _("%s may not appear on both SPLIT and FACTORS."),
904 var_set_measure (v, MEASURE_NOMINAL);
905 var_set_both_formats (v, &(struct fmt_spec) { .type = FMT_F, .w = 4 });
919 cmd_matrix_data (struct lexer *lexer, struct dataset *ds)
921 int input_vars_start = lex_ofs (lexer);
922 struct dictionary *dict = parse_matrix_data_variables (lexer);
925 int input_vars_end = lex_ofs (lexer) - 1;
927 size_t n_input_vars = dict_get_n_vars (dict);
928 struct variable **input_vars = xnmalloc (n_input_vars, sizeof *input_vars);
929 for (size_t i = 0; i < n_input_vars; i++)
930 input_vars[i] = dict_get_var (dict, i);
932 int varname_width = 8;
933 for (size_t i = 0; i < n_input_vars; i++)
935 int w = strlen (var_get_name (input_vars[i]));
936 varname_width = MAX (w, varname_width);
939 struct variable *rowtype = dict_lookup_var (dict, "ROWTYPE_");
940 bool input_rowtype = rowtype != NULL;
942 rowtype = dict_create_var_assert (dict, "ROWTYPE_", 8);
944 struct matrix_format mf = {
945 .input_rowtype = input_rowtype,
946 .input_vars = input_vars,
947 .n_input_vars = n_input_vars,
950 .varname = dict_create_var_assert (dict, "VARNAME_", varname_width),
953 .diagonal = DIAGONAL,
958 bool *taken_vars = XCALLOC (n_input_vars, bool);
960 taken_vars[var_get_dict_index (rowtype)] = true;
962 struct file_handle *fh = NULL;
965 while (lex_token (lexer) != T_ENDCMD)
967 if (!lex_force_match (lexer, T_SLASH))
970 if (lex_match_id (lexer, "N"))
972 n_start = lex_ofs (lexer) - 1;
973 lex_match (lexer, T_EQUALS);
975 if (!lex_force_int_range (lexer, "N", 0, INT_MAX))
978 mf.n = lex_integer (lexer);
979 n_end = lex_ofs (lexer);
982 else if (lex_match_id (lexer, "FORMAT"))
984 int start_ofs = lex_ofs (lexer) - 1;
985 lex_match (lexer, T_EQUALS);
987 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
989 if (lex_match_id (lexer, "LIST"))
991 else if (lex_match_id (lexer, "FREE"))
993 else if (lex_match_id (lexer, "UPPER"))
995 else if (lex_match_id (lexer, "LOWER"))
997 else if (lex_match_id (lexer, "FULL"))
999 else if (lex_match_id (lexer, "DIAGONAL"))
1000 mf.diagonal = DIAGONAL;
1001 else if (lex_match_id (lexer, "NODIAGONAL"))
1002 mf.diagonal = NO_DIAGONAL;
1005 lex_error_expecting (lexer, "LIST", "FREE",
1006 "UPPER", "LOWER", "FULL",
1007 "DIAGONAL", "NODIAGONAL");
1011 int end_ofs = lex_ofs (lexer) - 1;
1013 if (mf.diagonal == NO_DIAGONAL && mf.triangle == FULL)
1015 lex_ofs_error (lexer, start_ofs, end_ofs,
1016 _("FORMAT=FULL and FORMAT=NODIAGONAL are "
1017 "mutually exclusive."));
1021 else if (lex_match_id (lexer, "FILE"))
1023 lex_match (lexer, T_EQUALS);
1025 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
1029 else if (!mf.n_svars && lex_match_id (lexer, "SPLIT"))
1031 lex_match (lexer, T_EQUALS);
1032 if (!mf.input_rowtype
1033 && lex_token (lexer) == T_ID
1034 && !dict_lookup_var (dict, lex_tokcstr (lexer)))
1036 mf.svars = xmalloc (sizeof *mf.svars);
1037 mf.svars[0] = dict_create_var_assert (dict, lex_tokcstr (lexer),
1039 var_set_measure (mf.svars[0], MEASURE_NOMINAL);
1040 var_set_both_formats (
1041 mf.svars[0], &(struct fmt_spec) { .type = FMT_F, .w = 4 });
1045 else if (!parse_matrix_data_subvars (lexer, dict, taken_vars,
1046 &mf.svars, &mf.svar_indexes,
1050 else if (!mf.n_fvars && lex_match_id (lexer, "FACTORS"))
1052 lex_match (lexer, T_EQUALS);
1053 if (!parse_matrix_data_subvars (lexer, dict, taken_vars,
1054 &mf.fvars, &mf.fvar_indexes,
1058 else if (lex_match_id (lexer, "CELLS"))
1060 if (mf.input_rowtype)
1061 lex_next_msg (lexer, SW,
1062 -1, -1, _("CELLS is ignored when VARIABLES "
1063 "includes ROWTYPE_"));
1065 lex_match (lexer, T_EQUALS);
1067 if (!lex_force_int_range (lexer, "CELLS", 0, INT_MAX))
1070 mf.cells = lex_integer (lexer);
1073 else if (lex_match_id (lexer, "CONTENTS"))
1075 lex_match (lexer, T_EQUALS);
1077 size_t allocated_contents = mf.n_contents;
1078 bool in_parens = false;
1081 bool open = !in_parens && lex_match (lexer, T_LPAREN);
1083 if (!rowtype_parse (lexer, &rt))
1085 if (open || in_parens || (lex_token (lexer) != T_ENDCMD
1086 && lex_token (lexer) != T_SLASH))
1088 const char *rowtypes[] = {
1089 #define RT(NAME, DIMS) #NAME,
1094 lex_error_expecting_array (
1095 lexer, rowtypes, sizeof rowtypes / sizeof *rowtypes);
1105 mf.factor_rowtype_mask |= 1u << rt;
1107 mf.pooled_rowtype_mask |= 1u << rt;
1109 bool close = in_parens && lex_match (lexer, T_RPAREN);
1113 if (mf.n_contents >= allocated_contents)
1114 mf.contents = x2nrealloc (mf.contents, &allocated_contents,
1115 sizeof *mf.contents);
1116 mf.contents[mf.n_contents++] = (struct content) {
1117 .open = open, .rowtype = rt, .close = close
1123 lex_error_expecting (lexer, "N", "FORMAT", "FILE", "SPLIT", "FACTORS",
1124 "CELLS", "CONTENTS");
1128 if (!mf.input_rowtype)
1134 msg (SE, _("CELLS is required when factor variables are specified "
1135 "and VARIABLES does not include ROWTYPE_."));
1143 msg (SW, _("CONTENTS was not specified and VARIABLES does not "
1144 "include ROWTYPE_. Assuming CONTENTS=CORR."));
1147 mf.contents = xmalloc (sizeof *mf.contents);
1148 *mf.contents = (struct content) { .rowtype = C_CORR };
1151 mf.cvars = xmalloc (mf.n_input_vars * sizeof *mf.cvars);
1152 for (size_t i = 0; i < mf.n_input_vars; i++)
1155 struct variable *v = input_vars[i];
1156 mf.cvars[mf.n_cvars++] = v;
1157 var_set_both_formats (v, &(struct fmt_spec) { .type = FMT_F, .w = 10,
1162 lex_ofs_error (lexer, input_vars_start, input_vars_end,
1163 _("At least one continuous variable is required."));
1166 if (mf.input_rowtype)
1168 for (size_t i = 0; i < mf.n_cvars; i++)
1169 if (mf.cvars[i] != input_vars[n_input_vars - mf.n_cvars + i])
1171 lex_ofs_error (lexer, input_vars_start, input_vars_end,
1172 _("VARIABLES includes ROWTYPE_ but the continuous "
1173 "variables are not the last ones on VARIABLES."));
1177 unsigned int rowtype_mask = mf.pooled_rowtype_mask | mf.factor_rowtype_mask;
1178 if (rowtype_mask & (1u << C_N) && mf.n >= 0)
1180 lex_ofs_error (lexer, n_start, n_end,
1181 _("Cannot specify N on CONTENTS along with the "
1186 struct variable **order = xnmalloc (dict_get_n_vars (dict), sizeof *order);
1188 for (size_t i = 0; i < mf.n_svars; i++)
1189 order[n_order++] = mf.svars[i];
1190 order[n_order++] = mf.rowtype;
1191 for (size_t i = 0; i < mf.n_fvars; i++)
1192 order[n_order++] = mf.fvars[i];
1193 order[n_order++] = mf.varname;
1194 for (size_t i = 0; i < mf.n_cvars; i++)
1195 order[n_order++] = mf.cvars[i];
1196 assert (n_order == dict_get_n_vars (dict));
1197 dict_reorder_vars (dict, order, n_order);
1200 dict_set_split_vars (dict, mf.svars, mf.n_svars, SPLIT_LAYERED);
1202 schedule_matrices (&mf);
1205 fh = fh_inline_file ();
1207 if (lex_end_of_command (lexer) != CMD_SUCCESS)
1210 struct dfm_reader *reader = dfm_open_reader (fh, lexer, NULL);
1214 struct casewriter *writer = autopaging_writer_create (dict_get_proto (dict));
1215 if (mf.input_rowtype)
1216 parse_data_with_rowtype (&mf, reader, writer);
1218 parse_data_without_rowtype (&mf, reader, writer);
1219 dfm_close_reader (reader);
1221 dataset_set_dict (ds, dict);
1222 dataset_set_source (ds, casewriter_make_reader (writer));
1224 matrix_format_uninit (&mf);
1231 matrix_format_uninit (&mf);