1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <language/data-io/placement-parser.h>
23 #include <language/lexer/format-parser.h>
24 #include <language/lexer/lexer.h>
25 #include <libpspp/message.h>
26 #include <libpspp/pool.h>
27 #include <libpspp/str.h>
29 #include <data/format.h>
35 #define _(msgid) gettext (msgid)
37 /* Extensions to the format specifiers used only for
41 PRS_TYPE_T = SCHAR_MAX - 3, /* Tab to absolute column. */
42 PRS_TYPE_X, /* Skip columns. */
43 PRS_TYPE_NEW_REC /* Next record. */
46 static bool fixed_parse_columns (struct lexer *, struct pool *, size_t var_cnt, bool for_input,
47 struct fmt_spec **, size_t *);
48 static bool fixed_parse_fortran (struct lexer *l, struct pool *, bool for_input,
49 struct fmt_spec **, size_t *);
51 /* Parses Fortran-like or column-based specifications for placing
52 variable data in fixed positions in columns and rows, that is,
53 formats like those parsed by DATA LIST or PRINT. Returns true
56 If successful, formats for VAR_CNT variables are stored in
57 *FORMATS, and the number of formats required is stored in
58 *FORMAT_CNT. *FORMAT_CNT may be greater than VAR_CNT because
59 of T, X, and / "formats", but success guarantees that exactly
60 VAR_CNT variables will be placed by the output formats. The
61 caller should call execute_placement_format to process those
62 "formats" in interpreting the output.
64 Uses POOL for allocation. When the caller is finished
65 interpreting *FORMATS, POOL may be destroyed. */
67 parse_var_placements (struct lexer *lexer, struct pool *pool, size_t var_cnt, bool for_input,
68 struct fmt_spec **formats, size_t *format_cnt)
71 if (lex_is_number (lexer))
72 return fixed_parse_columns (lexer, pool, var_cnt, for_input, formats, format_cnt);
73 else if (lex_match (lexer, '('))
75 size_t assignment_cnt;
78 if (!fixed_parse_fortran (lexer, pool, for_input, formats, format_cnt))
82 for (i = 0; i < *format_cnt; i++)
83 assignment_cnt += (*formats)[i].type < FMT_NUMBER_OF_FORMATS;
85 if (assignment_cnt != var_cnt)
87 msg (SE, _("Number of variables specified (%zu) "
88 "differs from number of variable formats (%zu)."),
89 var_cnt, assignment_cnt);
97 msg (SE, _("SPSS-like or Fortran-like format "
98 "specification expected after variable names."));
103 /* Implements parse_var_placements for column-based formats. */
105 fixed_parse_columns (struct lexer *lexer, struct pool *pool, size_t var_cnt, bool for_input,
106 struct fmt_spec **formats, size_t *format_cnt)
108 struct fmt_spec format;
112 if ( !parse_column_range (lexer, 1, &fc, &lc, NULL) )
115 /* Divide columns evenly. */
116 format.w = (lc - fc + 1) / var_cnt;
117 if ((lc - fc + 1) % var_cnt)
119 msg (SE, _("The %d columns %d-%d "
120 "can't be evenly divided into %zu fields."),
121 lc - fc + 1, fc, lc, var_cnt);
125 /* Format specifier. */
126 if (lex_match (lexer, '('))
128 /* Get format type. */
129 if (lex_token (lexer) == T_ID)
131 if (!parse_format_specifier_name (lexer, &format.type))
133 lex_match (lexer, ',');
138 /* Get decimal places. */
139 if (lex_is_integer (lexer))
141 format.d = lex_integer (lexer);
147 if (!lex_force_match (lexer, ')'))
155 if (!fmt_check (&format, for_input))
158 *formats = pool_nalloc (pool, var_cnt + 1, sizeof **formats);
159 *format_cnt = var_cnt + 1;
160 (*formats)[0].type = PRS_TYPE_T;
161 (*formats)[0].w = fc;
162 for (i = 1; i <= var_cnt; i++)
163 (*formats)[i] = format;
167 /* Implements parse_var_placements for Fortran-like formats. */
169 fixed_parse_fortran (struct lexer *lexer, struct pool *pool, bool for_input,
170 struct fmt_spec **formats, size_t *format_cnt)
172 size_t formats_allocated = 0;
173 size_t formats_used = 0;
176 while (!lex_match (lexer, ')'))
179 struct fmt_spec *new_formats;
180 size_t new_format_cnt;
182 size_t formats_needed;
185 if (lex_is_integer (lexer))
187 count = lex_integer (lexer);
193 /* Parse format specifier. */
194 if (lex_match (lexer, '('))
196 /* Call ourselves recursively to handle parentheses. */
197 if (!fixed_parse_fortran (lexer, pool, for_input,
198 &new_formats, &new_format_cnt))
205 if (lex_match (lexer, '/'))
206 f.type = PRS_TYPE_NEW_REC;
209 char type[FMT_TYPE_LEN_MAX + 1];
211 if (!parse_abstract_format_specifier (lexer, type, &f.w, &f.d))
214 if (!strcasecmp (type, "T"))
216 else if (!strcasecmp (type, "X"))
224 if (!fmt_from_name (type, &f.type))
226 msg (SE, _("Unknown format type \"%s\"."), type);
229 if (!fmt_check (&f, for_input))
235 /* Add COUNT copies of the NEW_FORMAT_CNT formats in
236 NEW_FORMATS to FORMATS. */
237 if (new_format_cnt != 0
238 && size_overflow_p (xtimes (xsum (formats_used,
239 xtimes (count, new_format_cnt)),
242 formats_needed = count * new_format_cnt;
243 if (formats_used + formats_needed > formats_allocated)
245 formats_allocated = formats_used + formats_needed;
246 *formats = pool_2nrealloc (pool, *formats, &formats_allocated,
249 for (; count > 0; count--)
251 memcpy (&(*formats)[formats_used], new_formats,
252 sizeof **formats * new_format_cnt);
253 formats_used += new_format_cnt;
256 lex_match (lexer, ',');
259 *format_cnt = formats_used;
263 /* Checks whether FORMAT represents one of the special "formats"
264 for T, X, or /. If so, updates *RECORD or *COLUMN (or both)
265 as appropriate, and returns true. Otherwise, returns false
266 without any side effects. */
268 execute_placement_format (const struct fmt_spec *format,
269 int *record, int *column)
271 switch (format->type)
274 *column += format->w;
281 case PRS_TYPE_NEW_REC:
287 assert (format->type < FMT_NUMBER_OF_FORMATS);
292 /* Parses a BASE-based column using LEXER. Returns true and
293 stores a 1-based column number into *COLUMN if successful,
294 otherwise emits an error message and returns false. */
296 parse_column (struct lexer *lexer, int base, int *column)
298 assert (base == 0 || base == 1);
299 if (!lex_force_int (lexer))
301 *column = lex_integer (lexer) - base + 1;
305 msg (SE, _("Column positions for fields must be positive."));
307 msg (SE, _("Column positions for fields must not be negative."));
314 /* Parse a column or a range of columns, specified as a single
315 integer or two integers delimited by a dash. Stores the range
316 in *FIRST_COLUMN and *LAST_COLUMN. (If only a single integer
317 is given, it is stored in both.) If RANGE_SPECIFIED is
318 non-null, then *RANGE_SPECIFIED is set to true if the syntax
319 contained a dash, false otherwise. Returns true if
320 successful, false if the syntax was invalid or the values
321 specified did not make sense.
323 If BASE is 0, zero-based column numbers are parsed; if BASE is
324 1, 1-based column numbers are parsed. Regardless of BASE, the
325 values stored in *FIRST_COLUMN and *LAST_COLUMN are
328 parse_column_range (struct lexer *lexer, int base,
329 int *first_column, int *last_column,
330 bool *range_specified)
333 if (!parse_column (lexer, base, first_column))
337 lex_negative_to_dash (lexer);
338 if (lex_match (lexer, '-'))
340 if (!parse_column (lexer, base, last_column))
342 if (*last_column < *first_column)
344 msg (SE, _("The ending column for a field must be "
345 "greater than the starting column."));
350 *range_specified = true;
354 *last_column = *first_column;
356 *range_specified = false;
362 /* Parses a (possibly empty) sequence of slashes, each of which
363 may be followed by an integer. A slash on its own increases
364 *RECORD by 1 and sets *COLUMN to 1. A slash followed by an
365 integer sets *RECORD to the integer, as long as that increases
366 *RECORD, and sets *COLUMN to 1.
368 Returns true if successful, false on syntax error. */
370 parse_record_placement (struct lexer *lexer, int *record, int *column)
372 while (lex_match (lexer, '/'))
374 if (lex_is_integer (lexer))
376 if (lex_integer (lexer) <= *record)
378 msg (SE, _("The record number specified, %ld, is at or "
379 "before the previous record, %d. Data "
380 "fields must be listed in order of "
381 "increasing record number."),
382 lex_integer (lexer), *record);
385 *record = lex_integer (lexer);
392 assert (*record >= 1);