1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/data-io/placement-parser.h"
23 #include "data/format.h"
24 #include "language/lexer/format-parser.h"
25 #include "language/lexer/lexer.h"
26 #include "libpspp/message.h"
27 #include "libpspp/pool.h"
28 #include "libpspp/str.h"
30 #include "gl/xalloc.h"
34 #define _(msgid) gettext (msgid)
36 /* Extensions to the format specifiers used only for
40 PRS_TYPE_T = SCHAR_MAX - 3, /* Tab to absolute column. */
41 PRS_TYPE_X, /* Skip columns. */
42 PRS_TYPE_NEW_REC /* Next record. */
45 static bool fixed_parse_columns (struct lexer *, struct pool *, size_t var_cnt,
46 enum fmt_use, struct fmt_spec **, size_t *);
47 static bool fixed_parse_fortran (struct lexer *l, struct pool *, enum fmt_use,
48 struct fmt_spec **, size_t *);
50 /* Parses Fortran-like or column-based specifications for placing
51 variable data in fixed positions in columns and rows, that is,
52 formats like those parsed by DATA LIST or PRINT. Returns true
55 The formats parsed are either input or output formats, according
58 If USE is FMT_FOR_INPUT, then T, X, and / "formats" are parsed,
59 in addition to regular formats. If USE is FMT_FOR_OUTPUT, then
60 T and X "formats" are parsed but not /.
62 If successful, formats for VAR_CNT variables are stored in
63 *FORMATS, and the number of formats required is stored in
64 *FORMAT_CNT. *FORMAT_CNT may be greater than VAR_CNT because
65 of T, X, and / "formats", but success guarantees that exactly
66 VAR_CNT variables will be placed by the output formats. The
67 caller should call execute_placement_format to process those
68 "formats" in interpreting the output.
70 Uses POOL for allocation. When the caller is finished
71 interpreting *FORMATS, POOL may be destroyed. */
73 parse_var_placements (struct lexer *lexer, struct pool *pool, size_t var_cnt,
75 struct fmt_spec **formats, size_t *format_cnt)
78 if (lex_is_number (lexer))
79 return fixed_parse_columns (lexer, pool, var_cnt, use,
81 else if (lex_match (lexer, T_LPAREN))
83 size_t assignment_cnt;
86 if (!fixed_parse_fortran (lexer, pool, use, formats, format_cnt))
90 for (i = 0; i < *format_cnt; i++)
91 assignment_cnt += (*formats)[i].type < FMT_NUMBER_OF_FORMATS;
93 if (assignment_cnt != var_cnt)
95 msg (SE, _("Number of variables specified (%zu) "
96 "differs from number of variable formats (%zu)."),
97 var_cnt, assignment_cnt);
105 msg (SE, _("SPSS-like or Fortran-like format "
106 "specification expected after variable names."));
111 /* Implements parse_var_placements for column-based formats. */
113 fixed_parse_columns (struct lexer *lexer, struct pool *pool, size_t var_cnt,
115 struct fmt_spec **formats, size_t *format_cnt)
117 struct fmt_spec format;
121 if ( !parse_column_range (lexer, 1, &fc, &lc, NULL) )
124 /* Divide columns evenly. */
125 format.w = (lc - fc + 1) / var_cnt;
126 if ((lc - fc + 1) % var_cnt)
128 msg (SE, _("The %d columns %d-%d "
129 "can't be evenly divided into %zu fields."),
130 lc - fc + 1, fc, lc, var_cnt);
134 /* Format specifier. */
135 if (lex_match (lexer, T_LPAREN))
137 /* Get format type. */
138 if (lex_token (lexer) == T_ID)
140 if (!parse_format_specifier_name (lexer, &format.type))
142 lex_match (lexer, T_COMMA);
147 /* Get decimal places. */
148 if (lex_is_integer (lexer))
150 format.d = lex_integer (lexer);
156 if (!lex_force_match (lexer, T_RPAREN))
164 if (!fmt_check (&format, use))
167 *formats = pool_nalloc (pool, var_cnt + 1, sizeof **formats);
168 *format_cnt = var_cnt + 1;
169 (*formats)[0].type = PRS_TYPE_T;
170 (*formats)[0].w = fc;
171 for (i = 1; i <= var_cnt; i++)
172 (*formats)[i] = format;
176 /* Implements parse_var_placements for Fortran-like formats. */
178 fixed_parse_fortran (struct lexer *lexer, struct pool *pool, enum fmt_use use,
179 struct fmt_spec **formats, size_t *format_cnt)
181 size_t formats_allocated = 0;
182 size_t formats_used = 0;
185 while (!lex_match (lexer, T_RPAREN))
188 struct fmt_spec *new_formats;
189 size_t new_format_cnt;
191 size_t formats_needed;
194 if (lex_is_integer (lexer))
196 count = lex_integer (lexer);
202 /* Parse format specifier. */
203 if (lex_match (lexer, T_LPAREN))
205 /* Call ourselves recursively to handle parentheses. */
206 if (!fixed_parse_fortran (lexer, pool, use,
207 &new_formats, &new_format_cnt))
214 if (use == FMT_FOR_INPUT && lex_match (lexer, T_SLASH))
215 f.type = PRS_TYPE_NEW_REC;
218 char type[FMT_TYPE_LEN_MAX + 1];
220 if (!parse_abstract_format_specifier (lexer, type, &f.w, &f.d))
223 if (!strcasecmp (type, "T"))
225 else if (!strcasecmp (type, "X"))
233 if (!fmt_from_name (type, &f.type))
235 msg (SE, _("Unknown format type `%s'."), type);
238 if (!fmt_check (&f, use))
244 /* Add COUNT copies of the NEW_FORMAT_CNT formats in
245 NEW_FORMATS to FORMATS. */
246 if (new_format_cnt != 0
247 && size_overflow_p (xtimes (xsum (formats_used,
248 xtimes (count, new_format_cnt)),
251 formats_needed = count * new_format_cnt;
252 if (formats_used + formats_needed > formats_allocated)
254 formats_allocated = formats_used + formats_needed;
255 *formats = pool_2nrealloc (pool, *formats, &formats_allocated,
258 for (; count > 0; count--)
260 memcpy (&(*formats)[formats_used], new_formats,
261 sizeof **formats * new_format_cnt);
262 formats_used += new_format_cnt;
265 lex_match (lexer, T_COMMA);
268 *format_cnt = formats_used;
272 /* Checks whether FORMAT represents one of the special "formats"
273 for T, X, or /. If so, updates *RECORD or *COLUMN (or both)
274 as appropriate, and returns true. Otherwise, returns false
275 without any side effects. */
277 execute_placement_format (const struct fmt_spec *format,
278 int *record, int *column)
280 switch ((int) format->type)
283 *column += format->w;
290 case PRS_TYPE_NEW_REC:
296 assert (format->type < FMT_NUMBER_OF_FORMATS);
301 /* Parses a BASE-based column using LEXER. Returns true and
302 stores a 1-based column number into *COLUMN if successful,
303 otherwise emits an error message and returns false. */
305 parse_column (int value, int base, int *column)
307 assert (base == 0 || base == 1);
308 *column = value - base + 1;
312 msg (SE, _("Column positions for fields must be positive."));
314 msg (SE, _("Column positions for fields must not be negative."));
320 /* Parse a column or a range of columns, specified as a single
321 integer or two integers delimited by a dash. Stores the range
322 in *FIRST_COLUMN and *LAST_COLUMN. (If only a single integer
323 is given, it is stored in both.) If RANGE_SPECIFIED is
324 non-null, then *RANGE_SPECIFIED is set to true if the syntax
325 contained a dash, false otherwise. Returns true if
326 successful, false if the syntax was invalid or the values
327 specified did not make sense.
329 If BASE is 0, zero-based column numbers are parsed; if BASE is
330 1, 1-based column numbers are parsed. Regardless of BASE, the
331 values stored in *FIRST_COLUMN and *LAST_COLUMN are
334 parse_column_range (struct lexer *lexer, int base,
335 int *first_column, int *last_column,
336 bool *range_specified)
339 if (!lex_force_int (lexer)
340 || !parse_column (lex_integer (lexer), base, first_column))
345 if (lex_is_integer (lexer) && lex_integer (lexer) < 0)
347 if (!parse_column (-lex_integer (lexer), base, last_column))
351 if (*last_column < *first_column)
353 msg (SE, _("The ending column for a field must be "
354 "greater than the starting column."));
359 *range_specified = true;
363 *last_column = *first_column;
365 *range_specified = false;
371 /* Parses a (possibly empty) sequence of slashes, each of which
372 may be followed by an integer. A slash on its own increases
373 *RECORD by 1 and sets *COLUMN to 1. A slash followed by an
374 integer sets *RECORD to the integer, as long as that increases
375 *RECORD, and sets *COLUMN to 1.
377 Returns true if successful, false on syntax error. */
379 parse_record_placement (struct lexer *lexer, int *record, int *column)
381 while (lex_match (lexer, T_SLASH))
383 if (lex_is_integer (lexer))
385 if (lex_integer (lexer) <= *record)
387 msg (SE, _("The record number specified, %ld, is at or "
388 "before the previous record, %d. Data "
389 "fields must be listed in order of "
390 "increasing record number."),
391 lex_integer (lexer), *record);
394 *record = lex_integer (lexer);
401 assert (*record >= 1);