1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <language/data-io/placement-parser.h>
23 #include <language/lexer/format-parser.h>
24 #include <language/lexer/lexer.h>
25 #include <libpspp/message.h>
26 #include <libpspp/pool.h>
27 #include <libpspp/str.h>
33 #define _(msgid) gettext (msgid)
35 /* Extensions to the format specifiers used only for
39 PRS_TYPE_T = SCHAR_MAX - 3, /* Tab to absolute column. */
40 PRS_TYPE_X, /* Skip columns. */
41 PRS_TYPE_NEW_REC /* Next record. */
44 static bool fixed_parse_columns (struct lexer *, struct pool *, size_t var_cnt, bool for_input,
45 struct fmt_spec **, size_t *);
46 static bool fixed_parse_fortran (struct lexer *l, struct pool *, bool for_input,
47 struct fmt_spec **, size_t *);
49 /* Parses Fortran-like or column-based specifications for placing
50 variable data in fixed positions in columns and rows, that is,
51 formats like those parsed by DATA LIST or PRINT. Returns true
54 If successful, formats for VAR_CNT variables are stored in
55 *FORMATS, and the number of formats required is stored in
56 *FORMAT_CNT. *FORMAT_CNT may be greater than VAR_CNT because
57 of T, X, and / "formats", but success guarantees that exactly
58 VAR_CNT variables will be placed by the output formats. The
59 caller should call execute_placement_format to process those
60 "formats" in interpreting the output.
62 Uses POOL for allocation. When the caller is finished
63 interpreting *FORMATS, POOL may be destroyed. */
65 parse_var_placements (struct lexer *lexer, struct pool *pool, size_t var_cnt, bool for_input,
66 struct fmt_spec **formats, size_t *format_cnt)
69 if (lex_is_number (lexer))
70 return fixed_parse_columns (lexer, pool, var_cnt, for_input, formats, format_cnt);
71 else if (lex_match (lexer, '('))
73 size_t assignment_cnt;
76 if (!fixed_parse_fortran (lexer, pool, for_input, formats, format_cnt))
80 for (i = 0; i < *format_cnt; i++)
81 assignment_cnt += (*formats)[i].type < FMT_NUMBER_OF_FORMATS;
83 if (assignment_cnt != var_cnt)
85 msg (SE, _("Number of variables specified (%zu) "
86 "differs from number of variable formats (%zu)."),
87 var_cnt, assignment_cnt);
95 msg (SE, _("SPSS-like or Fortran-like format "
96 "specification expected after variable names."));
101 /* Implements parse_var_placements for column-based formats. */
103 fixed_parse_columns (struct lexer *lexer, struct pool *pool, size_t var_cnt, bool for_input,
104 struct fmt_spec **formats, size_t *format_cnt)
106 struct fmt_spec format;
110 if ( !parse_column_range (lexer, 1, &fc, &lc, NULL) )
113 /* Divide columns evenly. */
114 format.w = (lc - fc + 1) / var_cnt;
115 if ((lc - fc + 1) % var_cnt)
117 msg (SE, _("The %d columns %d-%d "
118 "can't be evenly divided into %zu fields."),
119 lc - fc + 1, fc, lc, var_cnt);
123 /* Format specifier. */
124 if (lex_match (lexer, '('))
126 /* Get format type. */
127 if (lex_token (lexer) == T_ID)
129 if (!parse_format_specifier_name (lexer, &format.type))
131 lex_match (lexer, ',');
136 /* Get decimal places. */
137 if (lex_is_integer (lexer))
139 format.d = lex_integer (lexer);
145 if (!lex_force_match (lexer, ')'))
153 if (!fmt_check (&format, for_input))
156 *formats = pool_nalloc (pool, var_cnt + 1, sizeof **formats);
157 *format_cnt = var_cnt + 1;
158 (*formats)[0].type = PRS_TYPE_T;
159 (*formats)[0].w = fc;
160 for (i = 1; i <= var_cnt; i++)
161 (*formats)[i] = format;
165 /* Implements parse_var_placements for Fortran-like formats. */
167 fixed_parse_fortran (struct lexer *lexer, struct pool *pool, bool for_input,
168 struct fmt_spec **formats, size_t *format_cnt)
170 size_t formats_allocated = 0;
171 size_t formats_used = 0;
174 while (!lex_match (lexer, ')'))
177 struct fmt_spec *new_formats;
178 size_t new_format_cnt;
180 size_t formats_needed;
183 if (lex_is_integer (lexer))
185 count = lex_integer (lexer);
191 /* Parse format specifier. */
192 if (lex_match (lexer, '('))
194 /* Call ourselves recursively to handle parentheses. */
195 if (!fixed_parse_fortran (lexer, pool, for_input,
196 &new_formats, &new_format_cnt))
203 if (lex_match (lexer, '/'))
204 f.type = PRS_TYPE_NEW_REC;
207 char type[FMT_TYPE_LEN_MAX + 1];
209 if (!parse_abstract_format_specifier (lexer, type, &f.w, &f.d))
212 if (!strcasecmp (type, "T"))
214 else if (!strcasecmp (type, "X"))
222 if (!fmt_from_name (type, &f.type))
224 msg (SE, _("Unknown format type \"%s\"."), type);
227 if (!fmt_check (&f, for_input))
233 /* Add COUNT copies of the NEW_FORMAT_CNT formats in
234 NEW_FORMATS to FORMATS. */
235 if (new_format_cnt != 0
236 && size_overflow_p (xtimes (xsum (formats_used,
237 xtimes (count, new_format_cnt)),
240 formats_needed = count * new_format_cnt;
241 if (formats_used + formats_needed > formats_allocated)
243 formats_allocated = formats_used + formats_needed;
244 *formats = pool_2nrealloc (pool, *formats, &formats_allocated,
247 for (; count > 0; count--)
249 memcpy (&(*formats)[formats_used], new_formats,
250 sizeof **formats * new_format_cnt);
251 formats_used += new_format_cnt;
254 lex_match (lexer, ',');
257 *format_cnt = formats_used;
261 /* Checks whether FORMAT represents one of the special "formats"
262 for T, X, or /. If so, updates *RECORD or *COLUMN (or both)
263 as appropriate, and returns true. Otherwise, returns false
264 without any side effects. */
266 execute_placement_format (const struct fmt_spec *format,
267 int *record, int *column)
269 switch (format->type)
272 *column += format->w;
279 case PRS_TYPE_NEW_REC:
285 assert (format->type < FMT_NUMBER_OF_FORMATS);
290 /* Parses a BASE-based column using LEXER. Returns true and
291 stores a 1-based column number into *COLUMN if successful,
292 otherwise emits an error message and returns false. */
294 parse_column (struct lexer *lexer, int base, int *column)
296 assert (base == 0 || base == 1);
297 if (!lex_force_int (lexer))
299 *column = lex_integer (lexer) - base + 1;
303 msg (SE, _("Column positions for fields must be positive."));
305 msg (SE, _("Column positions for fields must not be negative."));
312 /* Parse a column or a range of columns, specified as a single
313 integer or two integers delimited by a dash. Stores the range
314 in *FIRST_COLUMN and *LAST_COLUMN. (If only a single integer
315 is given, it is stored in both.) If RANGE_SPECIFIED is
316 non-null, then *RANGE_SPECIFIED is set to true if the syntax
317 contained a dash, false otherwise. Returns true if
318 successful, false if the syntax was invalid or the values
319 specified did not make sense.
321 If BASE is 0, zero-based column numbers are parsed; if BASE is
322 1, 1-based column numbers are parsed. Regardless of BASE, the
323 values stored in *FIRST_COLUMN and *LAST_COLUMN are
326 parse_column_range (struct lexer *lexer, int base,
327 int *first_column, int *last_column,
328 bool *range_specified)
331 if (!parse_column (lexer, base, first_column))
335 lex_negative_to_dash (lexer);
336 if (lex_match (lexer, '-'))
338 if (!parse_column (lexer, base, last_column))
340 if (*last_column < *first_column)
342 msg (SE, _("The ending column for a field must be "
343 "greater than the starting column."));
348 *range_specified = true;
352 *last_column = *first_column;
354 *range_specified = false;
360 /* Parses a (possibly empty) sequence of slashes, each of which
361 may be followed by an integer. A slash on its own increases
362 *RECORD by 1 and sets *COLUMN to 1. A slash followed by an
363 integer sets *RECORD to the integer, as long as that increases
364 *RECORD, and sets *COLUMN to 1.
366 Returns true if successful, false on syntax error. */
368 parse_record_placement (struct lexer *lexer, int *record, int *column)
370 while (lex_match (lexer, '/'))
372 if (lex_is_integer (lexer))
374 if (lex_integer (lexer) <= *record)
376 msg (SE, _("The record number specified, %ld, is at or "
377 "before the previous record, %d. Data "
378 "fields must be listed in order of "
379 "increasing record number."),
380 lex_integer (lexer), *record);
383 *record = lex_integer (lexer);
390 assert (*record >= 1);