1 /* PSPP - computes sample statistics.
2 Copyright (C) 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include <language/data-io/placement-parser.h>
26 #include <language/lexer/format-parser.h>
27 #include <language/lexer/lexer.h>
28 #include <libpspp/message.h>
29 #include <libpspp/pool.h>
30 #include <libpspp/str.h>
36 #define _(msgid) gettext (msgid)
38 /* Extensions to the format specifiers used only for
42 PRS_TYPE_T = SCHAR_MAX - 3, /* Tab to absolute column. */
43 PRS_TYPE_X, /* Skip columns. */
44 PRS_TYPE_NEW_REC /* Next record. */
47 static bool fixed_parse_columns (struct lexer *, struct pool *, size_t var_cnt, bool for_input,
48 struct fmt_spec **, size_t *);
49 static bool fixed_parse_fortran (struct lexer *l, struct pool *, bool for_input,
50 struct fmt_spec **, size_t *);
52 /* Parses Fortran-like or column-based specifications for placing
53 variable data in fixed positions in columns and rows, that is,
54 formats like those parsed by DATA LIST or PRINT. Returns true
57 If successful, formats for VAR_CNT variables are stored in
58 *FORMATS, and the number of formats required is stored in
59 *FORMAT_CNT. *FORMAT_CNT may be greater than VAR_CNT because
60 of T, X, and / "formats", but success guarantees that exactly
61 VAR_CNT variables will be placed by the output formats. The
62 caller should call execute_placement_format to process those
63 "formats" in interpreting the output.
65 Uses POOL for allocation. When the caller is finished
66 interpreting *FORMATS, POOL may be destroyed. */
68 parse_var_placements (struct lexer *lexer, struct pool *pool, size_t var_cnt, bool for_input,
69 struct fmt_spec **formats, size_t *format_cnt)
72 if (lex_is_number (lexer))
73 return fixed_parse_columns (lexer, pool, var_cnt, for_input, formats, format_cnt);
74 else if (lex_match (lexer, '('))
76 size_t assignment_cnt;
79 if (!fixed_parse_fortran (lexer, pool, for_input, formats, format_cnt))
83 for (i = 0; i < *format_cnt; i++)
84 assignment_cnt += (*formats)[i].type < FMT_NUMBER_OF_FORMATS;
86 if (assignment_cnt != var_cnt)
88 msg (SE, _("Number of variables specified (%d) "
89 "differs from number of variable formats (%d)."),
90 (int) var_cnt, (int) assignment_cnt);
98 msg (SE, _("SPSS-like or Fortran-like format "
99 "specification expected after variable names."));
104 /* Implements parse_var_placements for column-based formats. */
106 fixed_parse_columns (struct lexer *lexer, struct pool *pool, size_t var_cnt, bool for_input,
107 struct fmt_spec **formats, size_t *format_cnt)
109 struct fmt_spec format;
113 if ( !parse_column_range (lexer, &fc, &lc, NULL) )
116 /* Divide columns evenly. */
117 format.w = (lc - fc + 1) / var_cnt;
118 if ((lc - fc + 1) % var_cnt)
120 msg (SE, _("The %d columns %d-%d "
121 "can't be evenly divided into %d fields."),
122 lc - fc + 1, fc, lc, var_cnt);
126 /* Format specifier. */
127 if (lex_match (lexer, '('))
129 /* Get format type. */
130 if (lex_token (lexer) == T_ID)
132 if (!parse_format_specifier_name (lexer, &format.type))
134 lex_match (lexer, ',');
139 /* Get decimal places. */
140 if (lex_is_integer (lexer))
142 format.d = lex_integer (lexer);
148 if (!lex_force_match (lexer, ')'))
156 if (!fmt_check (&format, for_input))
159 *formats = pool_nalloc (pool, var_cnt + 1, sizeof **formats);
160 *format_cnt = var_cnt + 1;
161 (*formats)[0].type = PRS_TYPE_T;
162 (*formats)[0].w = fc;
163 for (i = 1; i <= var_cnt; i++)
164 (*formats)[i] = format;
168 /* Implements parse_var_placements for Fortran-like formats. */
170 fixed_parse_fortran (struct lexer *lexer, struct pool *pool, bool for_input,
171 struct fmt_spec **formats, size_t *format_cnt)
173 size_t formats_allocated = 0;
174 size_t formats_used = 0;
177 while (!lex_match (lexer, ')'))
180 struct fmt_spec *new_formats;
181 size_t new_format_cnt;
183 size_t formats_needed;
186 if (lex_is_integer (lexer))
188 count = lex_integer (lexer);
194 /* Parse format specifier. */
195 if (lex_match (lexer, '('))
197 /* Call ourselves recursively to handle parentheses. */
198 if (!fixed_parse_fortran (lexer, pool, for_input,
199 &new_formats, &new_format_cnt))
206 if (lex_match (lexer, '/'))
207 f.type = PRS_TYPE_NEW_REC;
210 char type[FMT_TYPE_LEN_MAX + 1];
212 if (!parse_abstract_format_specifier (lexer, type, &f.w, &f.d))
215 if (!strcasecmp (type, "T"))
217 else if (!strcasecmp (type, "X"))
225 if (!fmt_from_name (type, &f.type))
227 msg (SE, _("Unknown format type \"%s\"."), type);
230 if (!fmt_check (&f, for_input))
236 /* Add COUNT copies of the NEW_FORMAT_CNT formats in
237 NEW_FORMATS to FORMATS. */
238 if (new_format_cnt != 0
239 && size_overflow_p (xtimes (xsum (formats_used,
240 xtimes (count, new_format_cnt)),
243 formats_needed = count * new_format_cnt;
244 if (formats_used + formats_needed > formats_allocated)
246 formats_allocated = formats_used + formats_needed;
247 *formats = pool_2nrealloc (pool, *formats, &formats_allocated,
250 for (; count > 0; count--)
252 memcpy (&(*formats)[formats_used], new_formats,
253 sizeof **formats * new_format_cnt);
254 formats_used += new_format_cnt;
257 lex_match (lexer, ',');
260 *format_cnt = formats_used;
264 /* Checks whether FORMAT represents one of the special "formats"
265 for T, X, or /. If so, updates *RECORD or *COLUMN (or both)
266 as appropriate, and returns true. Otherwise, returns false
267 without any side effects. */
269 execute_placement_format (const struct fmt_spec *format,
270 int *record, int *column)
272 switch (format->type)
275 *column += format->w;
282 case PRS_TYPE_NEW_REC:
288 assert (format->type < FMT_NUMBER_OF_FORMATS);
293 /* Parse a column or a range of columns, specified as a single
294 integer or two integer delimited by a dash. Stores the range
295 in *FIRST_COLUMN and *LAST_COLUMN. (If only a single integer
296 is given, it is stored in both.) If RANGE_SPECIFIED is
297 non-null, then *RANGE_SPECIFIED is set to true if the syntax
298 contained a dash, false otherwise. Returns true if
299 successful, false if the syntax was invalid or the values
300 specified did not make sense. */
302 parse_column_range (struct lexer *lexer, int *first_column, int *last_column,
303 bool *range_specified)
306 if (!lex_force_int (lexer))
308 *first_column = lex_integer (lexer);
309 if (*first_column < 1)
311 msg (SE, _("Column positions for fields must be positive."));
317 lex_negative_to_dash (lexer);
318 if (lex_match (lexer, '-'))
320 if (!lex_force_int (lexer))
322 *last_column = lex_integer (lexer);
323 if (*last_column < 1)
325 msg (SE, _("Column positions for fields must be positive."));
328 else if (*last_column < *first_column)
330 msg (SE, _("The ending column for a field must be "
331 "greater than the starting column."));
336 *range_specified = true;
341 *last_column = *first_column;
343 *range_specified = false;
349 /* Parses a (possibly empty) sequence of slashes, each of which
350 may be followed by an integer. A slash on its own increases
351 *RECORD by 1 and sets *COLUMN to 1. A slash followed by an
352 integer sets *RECORD to the integer, as long as that increases
353 *RECORD, and sets *COLUMN to 1.
355 Returns true if successful, false on syntax error. */
357 parse_record_placement (struct lexer *lexer, int *record, int *column)
359 while (lex_match (lexer, '/'))
361 if (lex_is_integer (lexer))
363 if (lex_integer (lexer) <= *record)
365 msg (SE, _("The record number specified, %ld, is at or "
366 "before the previous record, %d. Data "
367 "fields must be listed in order of "
368 "increasing record number."),
369 lex_integer (lexer), *record);
372 *record = lex_integer (lexer);
379 assert (*record >= 1);