1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/case.h"
26 #include "data/casereader.h"
27 #include "data/data-in.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/format.h"
31 #include "data/settings.h"
32 #include "data/transformations.h"
33 #include "data/variable.h"
34 #include "language/command.h"
35 #include "language/commands/data-parser.h"
36 #include "language/commands/data-reader.h"
37 #include "language/commands/file-handle.h"
38 #include "language/commands/inpt-pgm.h"
39 #include "language/commands/placement-parser.h"
40 #include "language/lexer/format-parser.h"
41 #include "language/lexer/lexer.h"
42 #include "language/lexer/variable-parser.h"
43 #include "libpspp/assertion.h"
44 #include "libpspp/compiler.h"
45 #include "libpspp/i18n.h"
46 #include "libpspp/message.h"
47 #include "libpspp/misc.h"
48 #include "libpspp/pool.h"
49 #include "libpspp/str.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
57 /* DATA LIST transformation data. */
60 struct data_parser *parser; /* Parser. */
61 struct dictionary *dict; /* Dictionary. */
62 struct dfm_reader *reader; /* Data file reader. */
63 struct variable *end; /* Variable specified on END subcommand. */
66 static bool parse_fixed (struct lexer *, struct dictionary *,
67 struct pool *, struct data_parser *);
68 static bool parse_free (struct lexer *, struct dictionary *,
69 struct pool *, struct data_parser *);
71 static const struct trns_class data_list_trns_class;
74 cmd_data_list (struct lexer *lexer, struct dataset *ds)
76 struct dictionary *dict = (in_input_program ()
78 : dict_create (get_default_encoding ()));
79 struct data_parser *parser = data_parser_create ();
80 struct dfm_reader *reader = NULL;
82 struct variable *end = NULL;
83 struct file_handle *fh = NULL;
85 char *encoding = NULL;
86 int encoding_start = 0, encoding_end = 0;
88 int table = -1; /* Print table if nonzero, -1=undecided. */
90 bool has_type = false;
92 int end_start = 0, end_end = 0;
93 while (lex_token (lexer) != T_SLASH)
95 if (lex_match_id (lexer, "FILE"))
97 lex_match (lexer, T_EQUALS);
99 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
103 else if (lex_match_id (lexer, "ENCODING"))
105 encoding_start = lex_ofs (lexer) - 1;
106 lex_match (lexer, T_EQUALS);
107 if (!lex_force_string (lexer))
111 encoding = ss_xstrdup (lex_tokss (lexer));
113 encoding_end = lex_ofs (lexer);
116 else if (lex_match_id (lexer, "RECORDS"))
118 if (data_parser_get_records (parser) > 0)
120 lex_sbc_only_once (lexer, "RECORDS");
123 lex_match (lexer, T_EQUALS);
124 lex_match (lexer, T_LPAREN);
125 if (!lex_force_int_range (lexer, "RECORDS", 0, INT_MAX))
127 data_parser_set_records (parser, lex_integer (lexer));
129 lex_match (lexer, T_RPAREN);
131 else if (lex_match_id (lexer, "SKIP"))
133 lex_match (lexer, T_EQUALS);
134 if (!lex_force_int_range (lexer, "SKIP", 0, INT_MAX))
136 data_parser_set_skip (parser, lex_integer (lexer));
139 else if (lex_match_id (lexer, "END"))
141 if (!in_input_program ())
143 lex_next_error (lexer, -1, -1,
144 _("The %s subcommand may only be used within %s."),
145 "END", "INPUT PROGRAM");
150 lex_sbc_only_once (lexer, "END");
154 end_start = lex_ofs (lexer) - 1;
155 lex_match (lexer, T_EQUALS);
156 if (!lex_force_id (lexer))
158 end_end = lex_ofs (lexer);
160 end = dict_lookup_var (dict, lex_tokcstr (lexer));
162 end = dict_create_var_assert (dict, lex_tokcstr (lexer), 0);
165 else if (lex_match_id (lexer, "NOTABLE"))
167 else if (lex_match_id (lexer, "TABLE"))
169 else if (lex_token (lexer) == T_ID)
171 if (lex_match_id (lexer, "FIXED"))
172 data_parser_set_type (parser, DP_FIXED);
173 else if (lex_match_id (lexer, "FREE"))
175 data_parser_set_type (parser, DP_DELIMITED);
176 data_parser_set_span (parser, true);
178 else if (lex_match_id (lexer, "LIST"))
180 data_parser_set_type (parser, DP_DELIMITED);
181 data_parser_set_span (parser, false);
185 lex_error_expecting (lexer, "FILE", "ENCODING", "RECORDS",
186 "SKIP", "END", "NOTABLE", "TABLE",
187 "FIXED", "FREE", "LIST");
193 lex_next_error (lexer, -1, -1,
194 _("Only one of FIXED, FREE, or LIST may "
200 if (data_parser_get_type (parser) == DP_DELIMITED)
202 if (lex_match (lexer, T_LPAREN))
204 struct string delims = DS_EMPTY_INITIALIZER;
210 if (lex_match_id (lexer, "TAB"))
212 else if (lex_is_string (lexer)
213 && ss_length (lex_tokss (lexer)) == 1)
215 delim = ss_first (lex_tokss (lexer));
220 /* XXX should support multibyte UTF-8 characters */
221 lex_error (lexer, _("Syntax error expecting TAB "
222 "or delimiter string."));
223 ds_destroy (&delims);
226 ds_put_byte (&delims, delim);
228 lex_match (lexer, T_COMMA);
230 while (!lex_match (lexer, T_RPAREN));
232 data_parser_set_empty_line_has_field (parser, true);
233 data_parser_set_quotes (parser, ss_empty ());
234 data_parser_set_soft_delimiters (parser, ss_empty ());
235 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
236 ds_destroy (&delims);
240 data_parser_set_empty_line_has_field (parser, false);
241 data_parser_set_quotes (parser, ss_cstr ("'\""));
242 data_parser_set_soft_delimiters (parser,
243 ss_cstr (CC_SPACES));
244 const char decimal = settings_get_fmt_settings ()->decimal;
245 data_parser_set_hard_delimiters (parser,
246 ss_buffer (",", (decimal == '.') ? 1 : 0));
252 lex_error_expecting (lexer, "FILE", "ENCODING", "RECORDS",
253 "SKIP", "END", "NOTABLE", "TABLE",
254 "FIXED", "FREE", "LIST");
261 fh = fh_inline_file ();
264 lex_ofs_msg (lexer, SW, encoding_start, encoding_end,
265 _("Encoding should not be specified for inline data. "
266 "It will be ignored."));
268 fh_set_default_handle (fh);
270 enum data_parser_type type = data_parser_get_type (parser);
271 if (type != DP_FIXED && end != NULL)
273 lex_ofs_error (lexer, end_start, end_end,
274 _("The %s subcommand may be used only with %s."),
275 "END", "DATA LIST FIXED");
279 struct pool *tmp_pool = pool_create ();
280 bool ok = (type == DP_FIXED
281 ? parse_fixed (lexer, dict, tmp_pool, parser)
282 : parse_free (lexer, dict, tmp_pool, parser));
283 pool_destroy (tmp_pool);
286 assert (data_parser_any_fields (parser));
288 if (lex_end_of_command (lexer) != CMD_SUCCESS)
292 table = type == DP_FIXED || !data_parser_get_span (parser);
294 data_parser_output_description (parser, fh);
296 reader = dfm_open_reader (fh, lexer, encoding);
300 if (in_input_program ())
302 struct data_list_trns *trns = xmalloc (sizeof *trns);
303 *trns = (struct data_list_trns) {
305 .dict = dict_ref (dict),
309 add_transformation (ds, &data_list_trns_class, trns);
312 data_parser_make_active_file (parser, ds, reader, dict, NULL, NULL);
322 data_parser_destroy (parser);
323 if (!in_input_program ())
327 return CMD_CASCADING_FAILURE;
330 /* Fixed-format parsing. */
332 /* Parses all the variable specifications for DATA LIST FIXED,
333 storing them into DLS. Uses TMP_POOL for temporary storage;
334 the caller may destroy it. Returns true only if
337 parse_fixed (struct lexer *lexer, struct dictionary *dict,
338 struct pool *tmp_pool, struct data_parser *parser)
340 int max_records = data_parser_get_records (parser);
344 int start = lex_ofs (lexer);
345 while (lex_token (lexer) != T_ENDCMD)
347 if (lex_match (lexer, T_SLASH))
349 int records_start = lex_ofs (lexer) - 1;
350 if (lex_is_number (lexer))
352 if (!lex_force_int_range (lexer, NULL, record + 1, INT_MAX))
354 record = lex_integer (lexer);
361 if (max_records && record > max_records)
363 lex_ofs_error (lexer, records_start, lex_ofs (lexer) - 1,
364 _("Cannot advance to record %d when "
365 "RECORDS=%d is specified."),
366 record, data_parser_get_records (parser));
369 if (record > data_parser_get_records (parser))
370 data_parser_set_records (parser, record);
375 int vars_start = lex_ofs (lexer);
378 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
379 &names, &n_names, PV_NONE))
381 int vars_end = lex_ofs (lexer) - 1;
382 struct fmt_spec *formats;
384 if (!parse_var_placements (lexer, tmp_pool, n_names, FMT_FOR_INPUT,
385 &formats, &n_formats))
387 int placements_end = lex_ofs (lexer) - 1;
389 /* Create variables and var specs. */
391 for (struct fmt_spec *f = formats; f < &formats[n_formats]; f++)
392 if (!execute_placement_format (*f, &record, &column))
394 /* Create variable. */
395 const char *name = names[name_idx++];
396 int width = fmt_var_width (*f);
397 struct variable *v = dict_create_var (dict, name, width);
401 struct fmt_spec output = fmt_for_output_from_input (
402 *f, settings_get_fmt_settings ());
403 var_set_both_formats (v, output);
408 This can be acceptable if we're in INPUT
409 PROGRAM, but only if the existing variable has
410 the same width as the one we would have
412 if (!in_input_program ())
414 lex_ofs_error (lexer, vars_start, vars_end,
415 _("%s is a duplicate variable name."), name);
419 v = dict_lookup_var_assert (dict, name);
420 if ((width != 0) != (var_get_width (v) != 0))
422 lex_ofs_error (lexer, vars_start, placements_end,
423 _("There is already a variable %s of a "
424 "different type."), name);
427 if (width != 0 && width != var_get_width (v))
429 lex_ofs_error (lexer, vars_start, placements_end,
430 _("There is already a string variable %s of "
431 "a different width."), name);
436 if (max_records && record > max_records)
438 lex_ofs_error (lexer, vars_start, placements_end,
439 _("Cannot place variable %s on record %d when "
440 "RECORDS=%d is specified."),
441 var_get_name (v), record,
442 data_parser_get_records (parser));
446 data_parser_add_fixed_field (parser, *f,
447 var_get_dict_index (v),
448 var_get_name (v), record, column);
452 assert (name_idx == n_names);
455 if (!data_parser_any_fields (parser))
457 lex_ofs_error (lexer, start, lex_ofs (lexer) - 1,
458 _("No fields were specified. "
459 "At least one is required."));
466 /* Free-format parsing. */
468 /* Parses variable specifications for DATA LIST FREE and adds
469 them to DLS. Uses TMP_POOL for temporary storage; the caller
470 may destroy it. Returns true only if successful. */
472 parse_free (struct lexer *lexer, struct dictionary *dict,
473 struct pool *tmp_pool, struct data_parser *parser)
481 int vars_start = lex_ofs (lexer);
482 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
483 &names, &n_names, PV_NONE))
485 int vars_end = lex_ofs (lexer) - 1;
487 struct fmt_spec input, output;
488 if (lex_match (lexer, T_LPAREN))
490 char type[FMT_TYPE_LEN_MAX + 1];
492 if (!parse_abstract_format_specifier (lexer, type, &input.w,
495 if (!fmt_from_name (type, &input.type))
497 lex_next_error (lexer, -1, -1,
498 _("Unknown format type `%s'."), type);
502 /* If no width was included, use the minimum width for the type.
503 This isn't quite right, because DATETIME by itself seems to become
504 DATETIME20 (see bug #30690), whereas this will become
505 DATETIME17. The correct behavior is not documented. */
508 input.w = fmt_min_input_width (input.type);
512 char *error = fmt_check_input__ (input);
515 lex_next_error (lexer, -1, -1, "%s", error);
519 if (!lex_force_match (lexer, T_RPAREN))
522 /* As a special case, N format is treated as F format
523 for free-field input. */
524 if (input.type == FMT_N)
527 output = fmt_for_output_from_input (input,
528 settings_get_fmt_settings ());
532 lex_match (lexer, T_ASTERISK);
533 input = fmt_for_input (FMT_F, 8, 0);
534 output = settings_get_format ();
537 for (size_t i = 0; i < n_names; i++)
539 struct variable *v = dict_create_var (dict, names[i],
540 fmt_var_width (input));
543 lex_ofs_error (lexer, vars_start, vars_end,
544 _("%s is a duplicate variable name."), names[i]);
547 var_set_both_formats (v, output);
549 data_parser_add_delimited_field (parser,
550 input, var_get_dict_index (v),
554 while (lex_token (lexer) != T_ENDCMD);
559 /* Input procedure. */
561 /* Destroys DATA LIST transformation TRNS.
562 Returns true if successful, false if an I/O error occurred. */
564 data_list_trns_free (void *trns_)
566 struct data_list_trns *trns = trns_;
567 data_parser_destroy (trns->parser);
568 dfm_close_reader (trns->reader);
569 dict_unref (trns->dict);
574 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
575 static enum trns_result
576 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
578 struct data_list_trns *trns = trns_;
579 enum trns_result retval;
581 *c = case_unshare (*c);
582 if (data_parser_parse (trns->parser, trns->reader, trns->dict, *c))
583 retval = TRNS_CONTINUE;
584 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
586 /* An I/O error, or encountering end of file for a second
587 time, should be escalated into a more serious error. */
591 retval = TRNS_END_FILE;
593 /* If there was an END subcommand handle it. */
594 if (trns->end != NULL)
596 double *end = case_num_rw (*c, trns->end);
597 if (retval == TRNS_END_FILE)
600 retval = TRNS_CONTINUE;
609 static const struct trns_class data_list_trns_class = {
611 .execute = data_list_trns_proc,
612 .destroy = data_list_trns_free,