1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/data-io/placement-parser.h"
23 #include "data/format.h"
24 #include "language/lexer/format-parser.h"
25 #include "language/lexer/lexer.h"
26 #include "libpspp/message.h"
27 #include "libpspp/pool.h"
28 #include "libpspp/str.h"
30 #include "gl/c-strcase.h"
31 #include "gl/xalloc.h"
35 #define _(msgid) gettext (msgid)
37 /* Extensions to the format specifiers used only for
41 PRS_TYPE_T = SCHAR_MAX - 3, /* Tab to absolute column. */
42 PRS_TYPE_X, /* Skip columns. */
43 PRS_TYPE_NEW_REC /* Next record. */
46 static bool fixed_parse_columns (struct lexer *, struct pool *, size_t n_vars,
47 enum fmt_use, struct fmt_spec **, size_t *);
48 static bool fixed_parse_fortran (struct lexer *l, struct pool *, enum fmt_use,
49 struct fmt_spec **, size_t *);
51 /* Parses Fortran-like or column-based specifications for placing
52 variable data in fixed positions in columns and rows, that is,
53 formats like those parsed by DATA LIST or PRINT. Returns true
56 The formats parsed are either input or output formats, according
59 If USE is FMT_FOR_INPUT, then T, X, and / "formats" are parsed,
60 in addition to regular formats. If USE is FMT_FOR_OUTPUT, then
61 T and X "formats" are parsed but not /.
63 If successful, formats for N_VARS variables are stored in
64 *FORMATS, and the number of formats required is stored in
65 *FORMAT_CNT. *FORMAT_CNT may be greater than N_VARS because
66 of T, X, and / "formats", but success guarantees that exactly
67 N_VARS variables will be placed by the output formats. The
68 caller should call execute_placement_format to process those
69 "formats" in interpreting the output.
71 Uses POOL for allocation. When the caller is finished
72 interpreting *FORMATS, POOL may be destroyed. */
74 parse_var_placements (struct lexer *lexer, struct pool *pool, size_t n_vars,
76 struct fmt_spec **formats, size_t *n_formats)
79 if (lex_is_number (lexer))
80 return fixed_parse_columns (lexer, pool, n_vars, use,
82 else if (lex_match (lexer, T_LPAREN))
87 if (!fixed_parse_fortran (lexer, pool, use, formats, n_formats))
91 for (i = 0; i < *n_formats; i++)
92 n_assignments += (*formats)[i].type < FMT_NUMBER_OF_FORMATS;
94 if (n_assignments != n_vars)
96 msg (SE, _("Number of variables specified (%zu) "
97 "differs from number of variable formats (%zu)."),
98 n_vars, n_assignments);
106 lex_error (lexer, _("SPSS-like or Fortran-like format "
107 "specification expected after variable names."));
112 /* Implements parse_var_placements for column-based formats. */
114 fixed_parse_columns (struct lexer *lexer, struct pool *pool, size_t n_vars,
116 struct fmt_spec **formats, size_t *n_formats)
118 int start_ofs = lex_ofs (lexer);
121 if (!parse_column_range (lexer, 1, &fc, &lc, NULL))
124 /* Divide columns evenly. */
125 int w = (lc - fc + 1) / n_vars;
126 if ((lc - fc + 1) % n_vars)
128 msg (SE, _("The %d columns %d-%d "
129 "can't be evenly divided into %zu fields."),
130 lc - fc + 1, fc, lc, n_vars);
134 /* Format specifier. */
137 if (lex_match (lexer, T_LPAREN))
139 /* Get format type. */
140 if (lex_token (lexer) == T_ID)
142 if (!parse_format_specifier_name (lexer, &type))
144 lex_match (lexer, T_COMMA);
149 /* Get decimal places. */
150 if (lex_is_integer (lexer))
152 d = lex_integer (lexer);
158 if (!lex_force_match (lexer, T_RPAREN))
166 int end_ofs = lex_ofs (lexer) - 1;
168 struct fmt_spec format = { .type = type, .w = w, .d = d };
169 char *error = fmt_check__ (&format, use);
172 lex_ofs_error (lexer, start_ofs, end_ofs, "%s", error);
177 *formats = pool_nalloc (pool, n_vars + 1, sizeof **formats);
178 *n_formats = n_vars + 1;
179 (*formats)[0].type = (enum fmt_type) PRS_TYPE_T;
180 (*formats)[0].w = fc;
181 for (size_t i = 1; i <= n_vars; i++)
182 (*formats)[i] = format;
186 /* Implements parse_var_placements for Fortran-like formats. */
188 fixed_parse_fortran (struct lexer *lexer, struct pool *pool, enum fmt_use use,
189 struct fmt_spec **formats, size_t *n_formats)
191 size_t formats_allocated = 0;
192 size_t formats_used = 0;
195 while (!lex_match (lexer, T_RPAREN))
198 struct fmt_spec *new_formats;
199 size_t n_new_formats;
201 size_t formats_needed;
204 if (lex_is_integer (lexer))
206 count = lex_integer (lexer);
212 /* Parse format specifier. */
213 if (lex_match (lexer, T_LPAREN))
215 /* Call ourselves recursively to handle parentheses. */
216 if (!fixed_parse_fortran (lexer, pool, use,
217 &new_formats, &n_new_formats))
224 if (use == FMT_FOR_INPUT && lex_match (lexer, T_SLASH))
225 f.type = (enum fmt_type) PRS_TYPE_NEW_REC;
228 int ofs = lex_ofs (lexer);
229 char type[FMT_TYPE_LEN_MAX + 1];
230 if (!parse_abstract_format_specifier (lexer, type, &f.w, &f.d))
233 if (!c_strcasecmp (type, "T"))
234 f.type = (enum fmt_type) PRS_TYPE_T;
235 else if (!c_strcasecmp (type, "X"))
237 f.type = (enum fmt_type) PRS_TYPE_X;
243 if (!fmt_from_name (type, &f.type))
245 lex_ofs_error (lexer, ofs, ofs,
246 _("Unknown format type `%s'."), type);
249 char *error = fmt_check__ (&f, use);
252 lex_ofs_error (lexer, ofs, ofs, "%s", error);
260 /* Add COUNT copies of the NEW_FORMAT_CNT formats in
261 NEW_FORMATS to FORMATS. */
262 if (n_new_formats != 0
263 && size_overflow_p (xtimes (xsum (formats_used,
264 xtimes (count, n_new_formats)),
267 formats_needed = count * n_new_formats;
268 if (formats_used + formats_needed > formats_allocated)
270 formats_allocated = formats_used + formats_needed;
271 *formats = pool_2nrealloc (pool, *formats, &formats_allocated,
274 for (; count > 0; count--)
276 memcpy (&(*formats)[formats_used], new_formats,
277 sizeof **formats * n_new_formats);
278 formats_used += n_new_formats;
281 lex_match (lexer, T_COMMA);
284 *n_formats = formats_used;
288 /* Checks whether FORMAT represents one of the special "formats"
289 for T, X, or /. If so, updates *RECORD or *COLUMN (or both)
290 as appropriate, and returns true. Otherwise, returns false
291 without any side effects. */
293 execute_placement_format (const struct fmt_spec *format,
294 int *record, int *column)
296 switch ((int) format->type)
299 *column += format->w;
306 case PRS_TYPE_NEW_REC:
312 assert (format->type < FMT_NUMBER_OF_FORMATS);
318 parse_column__ (struct lexer *lexer, bool negative, int base, int *column)
320 assert (base == 0 || base == 1);
322 if (!lex_force_int (lexer))
324 long int value = lex_integer (lexer);
329 *column = value - base + 1;
333 lex_next_error (lexer, -1, -1,
334 _("Column positions for fields must be positive."));
336 lex_next_error (lexer, -1, -1,
337 _("Column positions for fields must not be negative."));
343 /* Parses a BASE-based column using LEXER. Returns true and
344 stores a 1-based column number into *COLUMN if successful,
345 otherwise emits an error message and returns false.
347 If BASE is 0, zero-based column numbers are parsed; if BASE is
348 1, 1-based column numbers are parsed. Regardless of BASE, the
349 values stored in *FIRST_COLUMN and *LAST_COLUMN are
352 parse_column (struct lexer *lexer, int base, int *column)
354 return parse_column__ (lexer, false, base, column);
357 /* Parse a column or a range of columns, specified as a single
358 integer or two integers delimited by a dash. Stores the range
359 in *FIRST_COLUMN and *LAST_COLUMN. (If only a single integer
360 is given, it is stored in both.) If RANGE_SPECIFIED is
361 non-null, then *RANGE_SPECIFIED is set to true if the syntax
362 contained a dash, false otherwise. Returns true if
363 successful, false if the syntax was invalid or the values
364 specified did not make sense.
366 If BASE is 0, zero-based column numbers are parsed; if BASE is
367 1, 1-based column numbers are parsed. Regardless of BASE, the
368 values stored in *FIRST_COLUMN and *LAST_COLUMN are
371 parse_column_range (struct lexer *lexer, int base,
372 int *first_column, int *last_column,
373 bool *range_specified)
375 int start_ofs = lex_ofs (lexer);
378 if (!parse_column__ (lexer, false, base, first_column))
382 if (lex_is_integer (lexer) && lex_integer (lexer) < 0)
384 if (!parse_column__ (lexer, true, base, last_column))
387 if (*last_column < *first_column)
389 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
390 _("The ending column for a field must be "
391 "greater than the starting column."));
396 *range_specified = true;
400 *last_column = *first_column;
402 *range_specified = false;
408 /* Parses a (possibly empty) sequence of slashes, each of which
409 may be followed by an integer. A slash on its own increases
410 *RECORD by 1 and sets *COLUMN to 1. A slash followed by an
411 integer sets *RECORD to the integer, as long as that increases
412 *RECORD, and sets *COLUMN to 1.
414 Returns true if successful, false on syntax error. */
416 parse_record_placement (struct lexer *lexer, int *record, int *column)
418 while (lex_match (lexer, T_SLASH))
420 if (lex_is_number (lexer))
422 if (!lex_force_int_range (lexer, NULL, *record + 1, INT_MAX))
424 *record = lex_integer (lexer);
431 assert (*record >= 1);