1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/data-io/placement-parser.h"
23 #include "data/format.h"
24 #include "language/lexer/format-parser.h"
25 #include "language/lexer/lexer.h"
26 #include "libpspp/message.h"
27 #include "libpspp/pool.h"
28 #include "libpspp/str.h"
30 #include "gl/xalloc.h"
34 #define _(msgid) gettext (msgid)
36 /* Extensions to the format specifiers used only for
40 PRS_TYPE_T = SCHAR_MAX - 3, /* Tab to absolute column. */
41 PRS_TYPE_X, /* Skip columns. */
42 PRS_TYPE_NEW_REC /* Next record. */
45 static bool fixed_parse_columns (struct lexer *, struct pool *, size_t var_cnt,
46 enum fmt_use, struct fmt_spec **, size_t *);
47 static bool fixed_parse_fortran (struct lexer *l, struct pool *, enum fmt_use,
48 struct fmt_spec **, size_t *);
50 /* Parses Fortran-like or column-based specifications for placing
51 variable data in fixed positions in columns and rows, that is,
52 formats like those parsed by DATA LIST or PRINT. Returns true
55 If successful, formats for VAR_CNT variables are stored in
56 *FORMATS, and the number of formats required is stored in
57 *FORMAT_CNT. *FORMAT_CNT may be greater than VAR_CNT because
58 of T, X, and / "formats", but success guarantees that exactly
59 VAR_CNT variables will be placed by the output formats. The
60 caller should call execute_placement_format to process those
61 "formats" in interpreting the output.
63 Uses POOL for allocation. When the caller is finished
64 interpreting *FORMATS, POOL may be destroyed. */
66 parse_var_placements (struct lexer *lexer, struct pool *pool, size_t var_cnt,
68 struct fmt_spec **formats, size_t *format_cnt)
71 if (lex_is_number (lexer))
72 return fixed_parse_columns (lexer, pool, var_cnt, use,
74 else if (lex_match (lexer, T_LPAREN))
76 size_t assignment_cnt;
79 if (!fixed_parse_fortran (lexer, pool, use, formats, format_cnt))
83 for (i = 0; i < *format_cnt; i++)
84 assignment_cnt += (*formats)[i].type < FMT_NUMBER_OF_FORMATS;
86 if (assignment_cnt != var_cnt)
88 msg (SE, _("Number of variables specified (%zu) "
89 "differs from number of variable formats (%zu)."),
90 var_cnt, assignment_cnt);
98 msg (SE, _("SPSS-like or Fortran-like format "
99 "specification expected after variable names."));
104 /* Implements parse_var_placements for column-based formats. */
106 fixed_parse_columns (struct lexer *lexer, struct pool *pool, size_t var_cnt,
108 struct fmt_spec **formats, size_t *format_cnt)
110 struct fmt_spec format;
114 if ( !parse_column_range (lexer, 1, &fc, &lc, NULL) )
117 /* Divide columns evenly. */
118 format.w = (lc - fc + 1) / var_cnt;
119 if ((lc - fc + 1) % var_cnt)
121 msg (SE, _("The %d columns %d-%d "
122 "can't be evenly divided into %zu fields."),
123 lc - fc + 1, fc, lc, var_cnt);
127 /* Format specifier. */
128 if (lex_match (lexer, T_LPAREN))
130 /* Get format type. */
131 if (lex_token (lexer) == T_ID)
133 if (!parse_format_specifier_name (lexer, &format.type))
135 lex_match (lexer, T_COMMA);
140 /* Get decimal places. */
141 if (lex_is_integer (lexer))
143 format.d = lex_integer (lexer);
149 if (!lex_force_match (lexer, T_RPAREN))
157 if (!fmt_check (&format, use))
160 *formats = pool_nalloc (pool, var_cnt + 1, sizeof **formats);
161 *format_cnt = var_cnt + 1;
162 (*formats)[0].type = PRS_TYPE_T;
163 (*formats)[0].w = fc;
164 for (i = 1; i <= var_cnt; i++)
165 (*formats)[i] = format;
169 /* Implements parse_var_placements for Fortran-like formats. */
171 fixed_parse_fortran (struct lexer *lexer, struct pool *pool, enum fmt_use use,
172 struct fmt_spec **formats, size_t *format_cnt)
174 size_t formats_allocated = 0;
175 size_t formats_used = 0;
178 while (!lex_match (lexer, T_RPAREN))
181 struct fmt_spec *new_formats;
182 size_t new_format_cnt;
184 size_t formats_needed;
187 if (lex_is_integer (lexer))
189 count = lex_integer (lexer);
195 /* Parse format specifier. */
196 if (lex_match (lexer, T_LPAREN))
198 /* Call ourselves recursively to handle parentheses. */
199 if (!fixed_parse_fortran (lexer, pool, use,
200 &new_formats, &new_format_cnt))
207 if (lex_match (lexer, T_SLASH))
208 f.type = PRS_TYPE_NEW_REC;
211 char type[FMT_TYPE_LEN_MAX + 1];
213 if (!parse_abstract_format_specifier (lexer, type, &f.w, &f.d))
216 if (!strcasecmp (type, "T"))
218 else if (!strcasecmp (type, "X"))
226 if (!fmt_from_name (type, &f.type))
228 msg (SE, _("Unknown format type `%s'."), type);
231 if (!fmt_check (&f, use))
237 /* Add COUNT copies of the NEW_FORMAT_CNT formats in
238 NEW_FORMATS to FORMATS. */
239 if (new_format_cnt != 0
240 && size_overflow_p (xtimes (xsum (formats_used,
241 xtimes (count, new_format_cnt)),
244 formats_needed = count * new_format_cnt;
245 if (formats_used + formats_needed > formats_allocated)
247 formats_allocated = formats_used + formats_needed;
248 *formats = pool_2nrealloc (pool, *formats, &formats_allocated,
251 for (; count > 0; count--)
253 memcpy (&(*formats)[formats_used], new_formats,
254 sizeof **formats * new_format_cnt);
255 formats_used += new_format_cnt;
258 lex_match (lexer, T_COMMA);
261 *format_cnt = formats_used;
265 /* Checks whether FORMAT represents one of the special "formats"
266 for T, X, or /. If so, updates *RECORD or *COLUMN (or both)
267 as appropriate, and returns true. Otherwise, returns false
268 without any side effects. */
270 execute_placement_format (const struct fmt_spec *format,
271 int *record, int *column)
273 switch (format->type)
276 *column += format->w;
283 case PRS_TYPE_NEW_REC:
289 assert (format->type < FMT_NUMBER_OF_FORMATS);
294 /* Parses a BASE-based column using LEXER. Returns true and
295 stores a 1-based column number into *COLUMN if successful,
296 otherwise emits an error message and returns false. */
298 parse_column (int value, int base, int *column)
300 assert (base == 0 || base == 1);
301 *column = value - base + 1;
305 msg (SE, _("Column positions for fields must be positive."));
307 msg (SE, _("Column positions for fields must not be negative."));
313 /* Parse a column or a range of columns, specified as a single
314 integer or two integers delimited by a dash. Stores the range
315 in *FIRST_COLUMN and *LAST_COLUMN. (If only a single integer
316 is given, it is stored in both.) If RANGE_SPECIFIED is
317 non-null, then *RANGE_SPECIFIED is set to true if the syntax
318 contained a dash, false otherwise. Returns true if
319 successful, false if the syntax was invalid or the values
320 specified did not make sense.
322 If BASE is 0, zero-based column numbers are parsed; if BASE is
323 1, 1-based column numbers are parsed. Regardless of BASE, the
324 values stored in *FIRST_COLUMN and *LAST_COLUMN are
327 parse_column_range (struct lexer *lexer, int base,
328 int *first_column, int *last_column,
329 bool *range_specified)
332 if (!lex_force_int (lexer)
333 || !parse_column (lex_integer (lexer), base, first_column))
338 if (lex_is_integer (lexer) && lex_integer (lexer) < 0)
340 if (!parse_column (-lex_integer (lexer), base, last_column))
344 if (*last_column < *first_column)
346 msg (SE, _("The ending column for a field must be "
347 "greater than the starting column."));
352 *range_specified = true;
356 *last_column = *first_column;
358 *range_specified = false;
364 /* Parses a (possibly empty) sequence of slashes, each of which
365 may be followed by an integer. A slash on its own increases
366 *RECORD by 1 and sets *COLUMN to 1. A slash followed by an
367 integer sets *RECORD to the integer, as long as that increases
368 *RECORD, and sets *COLUMN to 1.
370 Returns true if successful, false on syntax error. */
372 parse_record_placement (struct lexer *lexer, int *record, int *column)
374 while (lex_match (lexer, T_SLASH))
376 if (lex_is_integer (lexer))
378 if (lex_integer (lexer) <= *record)
380 msg (SE, _("The record number specified, %ld, is at or "
381 "before the previous record, %d. Data "
382 "fields must be listed in order of "
383 "increasing record number."),
384 lex_integer (lexer), *record);
387 *record = lex_integer (lexer);
394 assert (*record >= 1);