1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/commands/placement-parser.h"
23 #include "data/format.h"
24 #include "language/lexer/format-parser.h"
25 #include "language/lexer/lexer.h"
26 #include "libpspp/message.h"
27 #include "libpspp/pool.h"
28 #include "libpspp/str.h"
30 #include "gl/c-strcase.h"
31 #include "gl/xalloc.h"
35 #define _(msgid) gettext (msgid)
37 /* Extensions to the format specifiers used only for
41 PRS_TYPE_T = SCHAR_MAX - 3, /* Tab to absolute column. */
42 PRS_TYPE_X, /* Skip columns. */
43 PRS_TYPE_NEW_REC /* Next record. */
46 static bool fixed_parse_columns (struct lexer *, struct pool *, size_t n_vars,
47 enum fmt_use, struct fmt_spec **, size_t *);
48 static bool fixed_parse_fortran (struct lexer *l, struct pool *, enum fmt_use,
49 struct fmt_spec **, size_t *);
51 /* Parses Fortran-like or column-based specifications for placing
52 variable data in fixed positions in columns and rows, that is,
53 formats like those parsed by DATA LIST or PRINT. Returns true
56 The formats parsed are either input or output formats, according
59 If USE is FMT_FOR_INPUT, then T, X, and / "formats" are parsed,
60 in addition to regular formats. If USE is FMT_FOR_OUTPUT, then
61 T and X "formats" are parsed but not /.
63 If successful, formats for N_VARS variables are stored in
64 *FORMATS, and the number of formats required is stored in
65 *FORMAT_CNT. *FORMAT_CNT may be greater than N_VARS because
66 of T, X, and / "formats", but success guarantees that exactly
67 N_VARS variables will be placed by the output formats. The
68 caller should call execute_placement_format to process those
69 "formats" in interpreting the output.
71 Uses POOL for allocation. When the caller is finished
72 interpreting *FORMATS, POOL may be destroyed. */
74 parse_var_placements (struct lexer *lexer, struct pool *pool, size_t n_vars,
76 struct fmt_spec **formats, size_t *n_formats)
79 if (lex_is_number (lexer))
80 return fixed_parse_columns (lexer, pool, n_vars, use,
82 else if (lex_match (lexer, T_LPAREN))
84 int start_ofs = lex_ofs (lexer);
85 if (!fixed_parse_fortran (lexer, pool, use, formats, n_formats))
87 int end_ofs = lex_ofs (lexer) - 1;
89 size_t n_assignments = 0;
90 for (size_t i = 0; i < *n_formats; i++)
91 n_assignments += (*formats)[i].type < FMT_NUMBER_OF_FORMATS;
93 if (n_assignments != n_vars)
95 lex_ofs_error (lexer, start_ofs, end_ofs,
96 _("Number of variables specified (%zu) "
97 "differs from number of variable formats (%zu)."),
98 n_vars, n_assignments);
106 lex_error (lexer, _("SPSS-like or Fortran-like format "
107 "specification expected after variable names."));
112 /* Implements parse_var_placements for column-based formats. */
114 fixed_parse_columns (struct lexer *lexer, struct pool *pool, size_t n_vars,
116 struct fmt_spec **formats, size_t *n_formats)
118 int start_ofs = lex_ofs (lexer);
121 if (!parse_column_range (lexer, 1, &fc, &lc, NULL))
124 /* Divide columns evenly. */
125 int w = (lc - fc + 1) / n_vars;
126 if ((lc - fc + 1) % n_vars)
128 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
129 _("The %d columns %d-%d "
130 "can't be evenly divided into %zu fields."),
131 lc - fc + 1, fc, lc, n_vars);
135 /* Format specifier. */
138 if (lex_match (lexer, T_LPAREN))
140 /* Get format type. */
141 if (lex_token (lexer) == T_ID)
143 if (!parse_format_specifier_name (lexer, &type))
145 lex_match (lexer, T_COMMA);
150 /* Get decimal places. */
151 if (lex_is_integer (lexer))
153 d = lex_integer (lexer);
159 if (!lex_force_match (lexer, T_RPAREN))
167 int end_ofs = lex_ofs (lexer) - 1;
169 struct fmt_spec format = { .type = type, .w = w, .d = d };
170 char *error = fmt_check__ (&format, use);
173 lex_ofs_error (lexer, start_ofs, end_ofs, "%s", error);
178 *formats = pool_nalloc (pool, n_vars + 1, sizeof **formats);
179 *n_formats = n_vars + 1;
180 (*formats)[0].type = (enum fmt_type) PRS_TYPE_T;
181 (*formats)[0].w = fc;
182 for (size_t i = 1; i <= n_vars; i++)
183 (*formats)[i] = format;
187 /* Implements parse_var_placements for Fortran-like formats. */
189 fixed_parse_fortran (struct lexer *lexer, struct pool *pool, enum fmt_use use,
190 struct fmt_spec **formats, size_t *n_formats)
192 size_t formats_allocated = 0;
193 size_t formats_used = 0;
196 while (!lex_match (lexer, T_RPAREN))
199 struct fmt_spec *new_formats;
200 size_t n_new_formats;
202 size_t formats_needed;
205 if (lex_is_integer (lexer))
207 count = lex_integer (lexer);
213 /* Parse format specifier. */
214 if (lex_match (lexer, T_LPAREN))
216 /* Call ourselves recursively to handle parentheses. */
217 if (!fixed_parse_fortran (lexer, pool, use,
218 &new_formats, &n_new_formats))
225 if (use == FMT_FOR_INPUT && lex_match (lexer, T_SLASH))
226 f.type = (enum fmt_type) PRS_TYPE_NEW_REC;
229 int ofs = lex_ofs (lexer);
230 char type[FMT_TYPE_LEN_MAX + 1];
231 if (!parse_abstract_format_specifier (lexer, type, &f.w, &f.d))
234 if (!c_strcasecmp (type, "T"))
235 f.type = (enum fmt_type) PRS_TYPE_T;
236 else if (!c_strcasecmp (type, "X"))
238 f.type = (enum fmt_type) PRS_TYPE_X;
244 if (!fmt_from_name (type, &f.type))
246 lex_ofs_error (lexer, ofs, ofs,
247 _("Unknown format type `%s'."), type);
250 char *error = fmt_check__ (&f, use);
253 lex_ofs_error (lexer, ofs, ofs, "%s", error);
261 /* Add COUNT copies of the NEW_FORMAT_CNT formats in
262 NEW_FORMATS to FORMATS. */
263 if (n_new_formats != 0
264 && size_overflow_p (xtimes (xsum (formats_used,
265 xtimes (count, n_new_formats)),
268 formats_needed = count * n_new_formats;
269 if (formats_used + formats_needed > formats_allocated)
271 formats_allocated = formats_used + formats_needed;
272 *formats = pool_2nrealloc (pool, *formats, &formats_allocated,
275 for (; count > 0; count--)
277 memcpy (&(*formats)[formats_used], new_formats,
278 sizeof **formats * n_new_formats);
279 formats_used += n_new_formats;
282 lex_match (lexer, T_COMMA);
285 *n_formats = formats_used;
289 /* Checks whether FORMAT represents one of the special "formats"
290 for T, X, or /. If so, updates *RECORD or *COLUMN (or both)
291 as appropriate, and returns true. Otherwise, returns false
292 without any side effects. */
294 execute_placement_format (const struct fmt_spec *format,
295 int *record, int *column)
297 switch ((int) format->type)
300 *column += format->w;
307 case PRS_TYPE_NEW_REC:
313 assert (format->type < FMT_NUMBER_OF_FORMATS);
319 parse_column__ (struct lexer *lexer, bool negative, int base, int *column)
321 assert (base == 0 || base == 1);
323 if (!lex_force_int (lexer))
325 long int value = lex_integer (lexer);
330 *column = value - base + 1;
334 lex_next_error (lexer, -1, -1,
335 _("Column positions for fields must be positive."));
337 lex_next_error (lexer, -1, -1,
338 _("Column positions for fields must not be negative."));
344 /* Parses a BASE-based column using LEXER. Returns true and
345 stores a 1-based column number into *COLUMN if successful,
346 otherwise emits an error message and returns false.
348 If BASE is 0, zero-based column numbers are parsed; if BASE is
349 1, 1-based column numbers are parsed. Regardless of BASE, the
350 values stored in *FIRST_COLUMN and *LAST_COLUMN are
353 parse_column (struct lexer *lexer, int base, int *column)
355 return parse_column__ (lexer, false, base, column);
358 /* Parse a column or a range of columns, specified as a single
359 integer or two integers delimited by a dash. Stores the range
360 in *FIRST_COLUMN and *LAST_COLUMN. (If only a single integer
361 is given, it is stored in both.) If RANGE_SPECIFIED is
362 non-null, then *RANGE_SPECIFIED is set to true if the syntax
363 contained a dash, false otherwise. Returns true if
364 successful, false if the syntax was invalid or the values
365 specified did not make sense.
367 If BASE is 0, zero-based column numbers are parsed; if BASE is
368 1, 1-based column numbers are parsed. Regardless of BASE, the
369 values stored in *FIRST_COLUMN and *LAST_COLUMN are
372 parse_column_range (struct lexer *lexer, int base,
373 int *first_column, int *last_column,
374 bool *range_specified)
376 int start_ofs = lex_ofs (lexer);
379 if (!parse_column__ (lexer, false, base, first_column))
383 if (lex_is_integer (lexer) && lex_integer (lexer) < 0)
385 if (!parse_column__ (lexer, true, base, last_column))
388 if (*last_column < *first_column)
390 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
391 _("The ending column for a field must be "
392 "greater than the starting column."));
397 *range_specified = true;
401 *last_column = *first_column;
403 *range_specified = false;
409 /* Parses a (possibly empty) sequence of slashes, each of which
410 may be followed by an integer. A slash on its own increases
411 *RECORD by 1 and sets *COLUMN to 1. A slash followed by an
412 integer sets *RECORD to the integer, as long as that increases
413 *RECORD, and sets *COLUMN to 1.
415 Returns true if successful, false on syntax error. */
417 parse_record_placement (struct lexer *lexer, int *record, int *column)
419 while (lex_match (lexer, T_SLASH))
421 if (lex_is_number (lexer))
423 if (!lex_force_int_range (lexer, NULL, *record + 1, INT_MAX))
425 *record = lex_integer (lexer);
432 assert (*record >= 1);