1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/case.h"
26 #include "data/casereader.h"
27 #include "data/data-in.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/format.h"
31 #include "data/settings.h"
32 #include "data/transformations.h"
33 #include "data/variable.h"
34 #include "language/command.h"
35 #include "language/commands/data-parser.h"
36 #include "language/commands/data-reader.h"
37 #include "language/commands/file-handle.h"
38 #include "language/commands/inpt-pgm.h"
39 #include "language/commands/placement-parser.h"
40 #include "language/lexer/format-parser.h"
41 #include "language/lexer/lexer.h"
42 #include "language/lexer/variable-parser.h"
43 #include "libpspp/assertion.h"
44 #include "libpspp/compiler.h"
45 #include "libpspp/i18n.h"
46 #include "libpspp/message.h"
47 #include "libpspp/misc.h"
48 #include "libpspp/pool.h"
49 #include "libpspp/str.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
57 /* DATA LIST transformation data. */
60 struct data_parser *parser; /* Parser. */
61 struct dictionary *dict; /* Dictionary. */
62 struct dfm_reader *reader; /* Data file reader. */
63 struct variable *end; /* Variable specified on END subcommand. */
66 static bool parse_fixed (struct lexer *, struct dictionary *,
67 struct pool *, struct data_parser *);
68 static bool parse_free (struct lexer *, struct dictionary *,
69 struct pool *, struct data_parser *);
71 static const struct trns_class data_list_trns_class;
74 cmd_data_list (struct lexer *lexer, struct dataset *ds)
76 struct dictionary *dict = (in_input_program ()
78 : dict_create (get_default_encoding ()));
79 struct data_parser *parser = data_parser_create ();
80 struct dfm_reader *reader = NULL;
82 struct variable *end = NULL;
83 struct file_handle *fh = NULL;
85 char *encoding = NULL;
86 int encoding_start = 0, encoding_end = 0;
88 int table = -1; /* Print table if nonzero, -1=undecided. */
90 bool has_type = false;
92 int end_start = 0, end_end = 0;
93 while (lex_token (lexer) != T_SLASH)
95 if (lex_match_id (lexer, "FILE"))
97 lex_match (lexer, T_EQUALS);
99 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
103 else if (lex_match_id (lexer, "ENCODING"))
105 encoding_start = lex_ofs (lexer) - 1;
106 lex_match (lexer, T_EQUALS);
107 if (!lex_force_string (lexer))
111 encoding = ss_xstrdup (lex_tokss (lexer));
113 encoding_end = lex_ofs (lexer);
116 else if (lex_match_id (lexer, "RECORDS"))
118 if (data_parser_get_records (parser) > 0)
120 lex_sbc_only_once (lexer, "RECORDS");
123 lex_match (lexer, T_EQUALS);
124 lex_match (lexer, T_LPAREN);
125 if (!lex_force_int_range (lexer, "RECORDS", 0, INT_MAX))
127 data_parser_set_records (parser, lex_integer (lexer));
129 lex_match (lexer, T_RPAREN);
131 else if (lex_match_id (lexer, "SKIP"))
133 lex_match (lexer, T_EQUALS);
134 if (!lex_force_int_range (lexer, "SKIP", 0, INT_MAX))
136 data_parser_set_skip (parser, lex_integer (lexer));
139 else if (lex_match_id (lexer, "END"))
141 if (!in_input_program ())
143 lex_next_error (lexer, -1, -1,
144 _("The %s subcommand may only be used within %s."),
145 "END", "INPUT PROGRAM");
150 lex_sbc_only_once (lexer, "END");
154 end_start = lex_ofs (lexer) - 1;
155 lex_match (lexer, T_EQUALS);
156 if (!lex_force_id (lexer))
158 end_end = lex_ofs (lexer);
160 end = dict_lookup_var (dict, lex_tokcstr (lexer));
162 end = dict_create_var_assert (dict, lex_tokcstr (lexer), 0);
165 else if (lex_match_id (lexer, "NOTABLE"))
167 else if (lex_match_id (lexer, "TABLE"))
169 else if (lex_token (lexer) == T_ID)
171 if (lex_match_id (lexer, "FIXED"))
172 data_parser_set_type (parser, DP_FIXED);
173 else if (lex_match_id (lexer, "FREE"))
175 data_parser_set_type (parser, DP_DELIMITED);
176 data_parser_set_span (parser, true);
178 else if (lex_match_id (lexer, "LIST"))
180 data_parser_set_type (parser, DP_DELIMITED);
181 data_parser_set_span (parser, false);
185 lex_error_expecting (lexer, "FILE", "ENCODING", "RECORDS",
186 "SKIP", "END", "NOTABLE", "TABLE",
187 "FIXED", "FREE", "LIST");
193 lex_next_error (lexer, -1, -1,
194 _("Only one of FIXED, FREE, or LIST may "
200 if (data_parser_get_type (parser) == DP_DELIMITED)
202 if (lex_match (lexer, T_LPAREN))
204 struct string delims = DS_EMPTY_INITIALIZER;
210 if (lex_match_id (lexer, "TAB"))
212 else if (lex_is_string (lexer)
213 && ss_length (lex_tokss (lexer)) == 1)
215 delim = ss_first (lex_tokss (lexer));
220 /* XXX should support multibyte UTF-8 characters */
221 lex_error (lexer, _("Syntax error expecting TAB "
222 "or delimiter string."));
223 ds_destroy (&delims);
226 ds_put_byte (&delims, delim);
228 lex_match (lexer, T_COMMA);
230 while (!lex_match (lexer, T_RPAREN));
232 data_parser_set_empty_line_has_field (parser, true);
233 data_parser_set_quotes (parser, ss_empty ());
234 data_parser_set_soft_delimiters (parser, ss_empty ());
235 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
236 ds_destroy (&delims);
240 data_parser_set_empty_line_has_field (parser, false);
241 data_parser_set_quotes (parser, ss_cstr ("'\""));
242 data_parser_set_soft_delimiters (parser,
243 ss_cstr (CC_SPACES));
244 const char decimal = settings_get_fmt_settings ()->decimal;
245 data_parser_set_hard_delimiters (parser,
246 ss_buffer (",", (decimal == '.') ? 1 : 0));
252 lex_error_expecting (lexer, "FILE", "ENCODING", "RECORDS",
253 "SKIP", "END", "NOTABLE", "TABLE",
254 "FIXED", "FREE", "LIST");
261 fh = fh_inline_file ();
264 lex_ofs_msg (lexer, SW, encoding_start, encoding_end,
265 _("Encoding should not be specified for inline data. "
266 "It will be ignored."));
268 fh_set_default_handle (fh);
270 enum data_parser_type type = data_parser_get_type (parser);
271 if (type != DP_FIXED && end != NULL)
273 lex_ofs_error (lexer, end_start, end_end,
274 _("The %s subcommand may be used only with %s."),
275 "END", "DATA LIST FIXED");
279 struct pool *tmp_pool = pool_create ();
280 bool ok = (type == DP_FIXED
281 ? parse_fixed (lexer, dict, tmp_pool, parser)
282 : parse_free (lexer, dict, tmp_pool, parser));
283 pool_destroy (tmp_pool);
286 assert (data_parser_any_fields (parser));
288 if (lex_end_of_command (lexer) != CMD_SUCCESS)
292 table = type == DP_FIXED || !data_parser_get_span (parser);
294 data_parser_output_description (parser, fh);
296 reader = dfm_open_reader (fh, lexer, encoding);
300 if (in_input_program ())
302 struct data_list_trns *trns = xmalloc (sizeof *trns);
303 *trns = (struct data_list_trns) {
305 .dict = dict_ref (dict),
309 add_transformation (ds, &data_list_trns_class, trns);
312 data_parser_make_active_file (parser, ds, reader, dict, NULL, NULL);
322 data_parser_destroy (parser);
323 if (!in_input_program ())
327 return CMD_CASCADING_FAILURE;
330 /* Fixed-format parsing. */
332 /* Parses all the variable specifications for DATA LIST FIXED,
333 storing them into DLS. Uses TMP_POOL for temporary storage;
334 the caller may destroy it. Returns true only if
337 parse_fixed (struct lexer *lexer, struct dictionary *dict,
338 struct pool *tmp_pool, struct data_parser *parser)
340 int max_records = data_parser_get_records (parser);
346 /* Parse everything. */
347 int records_start = lex_ofs (lexer);
348 if (!parse_record_placement (lexer, &record, &column))
351 int vars_start = lex_ofs (lexer);
354 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
355 &names, &n_names, PV_NONE))
357 int vars_end = lex_ofs (lexer) - 1;
358 struct fmt_spec *formats;
360 if (!parse_var_placements (lexer, tmp_pool, n_names, FMT_FOR_INPUT,
361 &formats, &n_formats))
363 int placements_end = lex_ofs (lexer) - 1;
365 /* Create variables and var specs. */
367 for (struct fmt_spec *f = formats; f < &formats[n_formats]; f++)
368 if (!execute_placement_format (f, &record, &column))
370 /* Create variable. */
371 const char *name = names[name_idx++];
372 int width = fmt_var_width (f);
373 struct variable *v = dict_create_var (dict, name, width);
377 struct fmt_spec output = fmt_for_output_from_input (
378 f, settings_get_fmt_settings ());
379 var_set_both_formats (v, &output);
384 This can be acceptable if we're in INPUT
385 PROGRAM, but only if the existing variable has
386 the same width as the one we would have
388 if (!in_input_program ())
390 lex_ofs_error (lexer, vars_start, vars_end,
391 _("%s is a duplicate variable name."), name);
395 v = dict_lookup_var_assert (dict, name);
396 if ((width != 0) != (var_get_width (v) != 0))
398 lex_ofs_error (lexer, vars_start, placements_end,
399 _("There is already a variable %s of a "
400 "different type."), name);
403 if (width != 0 && width != var_get_width (v))
405 lex_ofs_error (lexer, vars_start, placements_end,
406 _("There is already a string variable %s of "
407 "a different width."), name);
412 if (max_records && record > max_records)
414 lex_ofs_error (lexer, records_start, vars_end,
415 _("Cannot place variable %s on record %d when "
416 "RECORDS=%d is specified."),
417 var_get_name (v), record,
418 data_parser_get_records (parser));
422 data_parser_add_fixed_field (parser, f,
423 var_get_case_index (v),
424 var_get_name (v), record, column);
428 assert (name_idx == n_names);
430 while (lex_token (lexer) != T_ENDCMD);
435 /* Free-format parsing. */
437 /* Parses variable specifications for DATA LIST FREE and adds
438 them to DLS. Uses TMP_POOL for temporary storage; the caller
439 may destroy it. Returns true only if successful. */
441 parse_free (struct lexer *lexer, struct dictionary *dict,
442 struct pool *tmp_pool, struct data_parser *parser)
450 int vars_start = lex_ofs (lexer);
451 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
452 &names, &n_names, PV_NONE))
454 int vars_end = lex_ofs (lexer) - 1;
456 struct fmt_spec input, output;
457 if (lex_match (lexer, T_LPAREN))
459 char type[FMT_TYPE_LEN_MAX + 1];
461 if (!parse_abstract_format_specifier (lexer, type, &input.w,
464 if (!fmt_from_name (type, &input.type))
466 lex_next_error (lexer, -1, -1,
467 _("Unknown format type `%s'."), type);
471 /* If no width was included, use the minimum width for the type.
472 This isn't quite right, because DATETIME by itself seems to become
473 DATETIME20 (see bug #30690), whereas this will become
474 DATETIME17. The correct behavior is not documented. */
477 input.w = fmt_min_input_width (input.type);
481 char *error = fmt_check_input__ (&input);
484 lex_next_error (lexer, -1, -1, "%s", error);
488 if (!lex_force_match (lexer, T_RPAREN))
491 /* As a special case, N format is treated as F format
492 for free-field input. */
493 if (input.type == FMT_N)
496 output = fmt_for_output_from_input (&input,
497 settings_get_fmt_settings ());
501 lex_match (lexer, T_ASTERISK);
502 input = fmt_for_input (FMT_F, 8, 0);
503 output = *settings_get_format ();
506 for (size_t i = 0; i < n_names; i++)
508 struct variable *v = dict_create_var (dict, names[i],
509 fmt_var_width (&input));
512 lex_ofs_error (lexer, vars_start, vars_end,
513 _("%s is a duplicate variable name."), names[i]);
516 var_set_both_formats (v, &output);
518 data_parser_add_delimited_field (parser,
519 &input, var_get_case_index (v),
523 while (lex_token (lexer) != T_ENDCMD);
528 /* Input procedure. */
530 /* Destroys DATA LIST transformation TRNS.
531 Returns true if successful, false if an I/O error occurred. */
533 data_list_trns_free (void *trns_)
535 struct data_list_trns *trns = trns_;
536 data_parser_destroy (trns->parser);
537 dfm_close_reader (trns->reader);
538 dict_unref (trns->dict);
543 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
544 static enum trns_result
545 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
547 struct data_list_trns *trns = trns_;
548 enum trns_result retval;
550 *c = case_unshare (*c);
551 if (data_parser_parse (trns->parser, trns->reader, trns->dict, *c))
552 retval = TRNS_CONTINUE;
553 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
555 /* An I/O error, or encountering end of file for a second
556 time, should be escalated into a more serious error. */
560 retval = TRNS_END_FILE;
562 /* If there was an END subcommand handle it. */
563 if (trns->end != NULL)
565 double *end = case_num_rw (*c, trns->end);
566 if (retval == TRNS_END_FILE)
569 retval = TRNS_CONTINUE;
578 static const struct trns_class data_list_trns_class = {
580 .execute = data_list_trns_proc,
581 .destroy = data_list_trns_free,