1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/case.h"
26 #include "data/casereader.h"
27 #include "data/data-in.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/format.h"
31 #include "data/settings.h"
32 #include "data/transformations.h"
33 #include "data/variable.h"
34 #include "language/command.h"
35 #include "language/data-io/data-parser.h"
36 #include "language/data-io/data-reader.h"
37 #include "language/data-io/file-handle.h"
38 #include "language/data-io/inpt-pgm.h"
39 #include "language/data-io/placement-parser.h"
40 #include "language/lexer/format-parser.h"
41 #include "language/lexer/lexer.h"
42 #include "language/lexer/variable-parser.h"
43 #include "libpspp/assertion.h"
44 #include "libpspp/compiler.h"
45 #include "libpspp/i18n.h"
46 #include "libpspp/message.h"
47 #include "libpspp/misc.h"
48 #include "libpspp/pool.h"
49 #include "libpspp/str.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
57 /* DATA LIST transformation data. */
60 struct data_parser *parser; /* Parser. */
61 struct dictionary *dict; /* Dictionary. */
62 struct dfm_reader *reader; /* Data file reader. */
63 struct variable *end; /* Variable specified on END subcommand. */
66 static bool parse_fixed (struct lexer *, struct dictionary *,
67 struct pool *, struct data_parser *);
68 static bool parse_free (struct lexer *, struct dictionary *,
69 struct pool *, struct data_parser *);
71 static const struct trns_class data_list_trns_class;
74 cmd_data_list (struct lexer *lexer, struct dataset *ds)
76 struct dictionary *dict;
77 struct data_parser *parser;
78 struct dfm_reader *reader;
79 struct variable *end = NULL;
80 struct file_handle *fh = NULL;
81 char *encoding = NULL;
84 enum data_parser_type type;
86 struct pool *tmp_pool;
89 dict = (in_input_program ()
91 : dict_create (get_default_encoding ()));
92 parser = data_parser_create ();
95 table = -1; /* Print table if nonzero, -1=undecided. */
98 while (lex_token (lexer) != T_SLASH)
100 if (lex_match_id (lexer, "FILE"))
102 lex_match (lexer, T_EQUALS);
104 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
108 else if (lex_match_id (lexer, "ENCODING"))
110 lex_match (lexer, T_EQUALS);
111 if (!lex_force_string (lexer))
115 encoding = ss_xstrdup (lex_tokss (lexer));
119 else if (lex_match_id (lexer, "RECORDS"))
121 if (data_parser_get_records (parser) > 0)
123 lex_sbc_only_once (lexer, "RECORDS");
126 lex_match (lexer, T_EQUALS);
127 lex_match (lexer, T_LPAREN);
128 if (!lex_force_int_range (lexer, "RECORDS", 0, INT_MAX))
130 data_parser_set_records (parser, lex_integer (lexer));
132 lex_match (lexer, T_RPAREN);
134 else if (lex_match_id (lexer, "SKIP"))
136 lex_match (lexer, T_EQUALS);
137 if (!lex_force_int_range (lexer, "SKIP", 0, INT_MAX))
139 data_parser_set_skip (parser, lex_integer (lexer));
142 else if (lex_match_id (lexer, "END"))
144 if (!in_input_program ())
146 lex_next_error (lexer, -1, -1,
147 _("The %s subcommand may only be used within %s."),
148 "END", "INPUT PROGRAM");
153 lex_sbc_only_once (lexer, "END");
157 lex_match (lexer, T_EQUALS);
158 if (!lex_force_id (lexer))
160 end = dict_lookup_var (dict, lex_tokcstr (lexer));
162 end = dict_create_var_assert (dict, lex_tokcstr (lexer), 0);
165 else if (lex_match_id (lexer, "NOTABLE"))
167 else if (lex_match_id (lexer, "TABLE"))
169 else if (lex_token (lexer) == T_ID)
171 if (lex_match_id (lexer, "FIXED"))
172 data_parser_set_type (parser, DP_FIXED);
173 else if (lex_match_id (lexer, "FREE"))
175 data_parser_set_type (parser, DP_DELIMITED);
176 data_parser_set_span (parser, true);
178 else if (lex_match_id (lexer, "LIST"))
180 data_parser_set_type (parser, DP_DELIMITED);
181 data_parser_set_span (parser, false);
185 lex_error (lexer, NULL);
191 lex_next_error (lexer, -1, -1,
192 _("Only one of FIXED, FREE, or LIST may "
198 if (data_parser_get_type (parser) == DP_DELIMITED)
200 if (lex_match (lexer, T_LPAREN))
202 struct string delims = DS_EMPTY_INITIALIZER;
204 while (!lex_match (lexer, T_RPAREN))
208 if (lex_match_id (lexer, "TAB"))
210 else if (lex_is_string (lexer)
211 && ss_length (lex_tokss (lexer)) == 1)
213 delim = ss_first (lex_tokss (lexer));
218 /* XXX should support multibyte UTF-8 characters */
219 lex_error (lexer, NULL);
220 ds_destroy (&delims);
223 ds_put_byte (&delims, delim);
225 lex_match (lexer, T_COMMA);
228 data_parser_set_empty_line_has_field (parser, true);
229 data_parser_set_quotes (parser, ss_empty ());
230 data_parser_set_soft_delimiters (parser, ss_empty ());
231 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
232 ds_destroy (&delims);
236 data_parser_set_empty_line_has_field (parser, false);
237 data_parser_set_quotes (parser, ss_cstr ("'\""));
238 data_parser_set_soft_delimiters (parser,
239 ss_cstr (CC_SPACES));
240 const char decimal = settings_get_fmt_settings ()->decimal;
241 data_parser_set_hard_delimiters (parser,
242 ss_buffer (",", (decimal == '.') ? 1 : 0));
248 lex_error (lexer, NULL);
252 type = data_parser_get_type (parser);
254 if (encoding && NULL == fh)
255 msg (MW, _("Encoding should not be specified for inline data. It will be "
259 fh = fh_inline_file ();
260 fh_set_default_handle (fh);
262 if (type != DP_FIXED && end != NULL)
264 msg (SE, _("The %s subcommand may be used only with %s."), "END", "DATA LIST FIXED");
268 tmp_pool = pool_create ();
269 if (type == DP_FIXED)
270 ok = parse_fixed (lexer, dict, tmp_pool, parser);
272 ok = parse_free (lexer, dict, tmp_pool, parser);
273 pool_destroy (tmp_pool);
277 if (!data_parser_any_fields (parser))
279 msg (SE, _("At least one variable must be specified."));
283 if (lex_end_of_command (lexer) != CMD_SUCCESS)
287 table = type == DP_FIXED || !data_parser_get_span (parser);
289 data_parser_output_description (parser, fh);
291 reader = dfm_open_reader (fh, lexer, encoding);
295 if (in_input_program ())
297 struct data_list_trns *trns = xmalloc (sizeof *trns);
298 trns->parser = parser;
299 trns->dict = dict_ref (dict);
300 trns->reader = reader;
302 add_transformation (ds, &data_list_trns_class, trns);
305 data_parser_make_active_file (parser, ds, reader, dict, NULL, NULL);
315 data_parser_destroy (parser);
316 if (!in_input_program ())
320 return CMD_CASCADING_FAILURE;
323 /* Fixed-format parsing. */
325 /* Parses all the variable specifications for DATA LIST FIXED,
326 storing them into DLS. Uses TMP_POOL for temporary storage;
327 the caller may destroy it. Returns true only if
330 parse_fixed (struct lexer *lexer, struct dictionary *dict,
331 struct pool *tmp_pool, struct data_parser *parser)
333 int max_records = data_parser_get_records (parser);
337 while (lex_token (lexer) != T_ENDCMD)
340 size_t n_names, name_idx;
341 struct fmt_spec *formats, *f;
344 /* Parse everything. */
345 if (!parse_record_placement (lexer, &record, &column)
346 || !parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
347 &names, &n_names, PV_NONE)
348 || !parse_var_placements (lexer, tmp_pool, n_names, FMT_FOR_INPUT,
349 &formats, &n_formats))
352 /* Create variables and var specs. */
354 for (f = formats; f < &formats[n_formats]; f++)
355 if (!execute_placement_format (f, &record, &column))
361 name = names[name_idx++];
363 /* Create variable. */
364 width = fmt_var_width (f);
365 v = dict_create_var (dict, name, width);
369 struct fmt_spec output = fmt_for_output_from_input (
370 f, settings_get_fmt_settings ());
371 var_set_both_formats (v, &output);
376 This can be acceptable if we're in INPUT
377 PROGRAM, but only if the existing variable has
378 the same width as the one we would have
380 if (!in_input_program ())
382 msg (SE, _("%s is a duplicate variable name."), name);
386 v = dict_lookup_var_assert (dict, name);
387 if ((width != 0) != (var_get_width (v) != 0))
389 msg (SE, _("There is already a variable %s of a "
394 if (width != 0 && width != var_get_width (v))
396 msg (SE, _("There is already a string variable %s of a "
397 "different width."), name);
402 if (max_records && record > max_records)
404 msg (SE, _("Cannot place variable %s on record %d when "
405 "RECORDS=%d is specified."),
406 var_get_name (v), record,
407 data_parser_get_records (parser));
410 data_parser_add_fixed_field (parser, f,
411 var_get_case_index (v),
412 var_get_name (v), record, column);
416 assert (name_idx == n_names);
422 /* Free-format parsing. */
424 /* Parses variable specifications for DATA LIST FREE and adds
425 them to DLS. Uses TMP_POOL for temporary storage; the caller
426 may destroy it. Returns true only if successful. */
428 parse_free (struct lexer *lexer, struct dictionary *dict,
429 struct pool *tmp_pool, struct data_parser *parser)
432 while (lex_token (lexer) != T_ENDCMD)
434 struct fmt_spec input, output;
439 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
440 &name, &n_names, PV_NONE))
443 if (lex_match (lexer, T_LPAREN))
445 char type[FMT_TYPE_LEN_MAX + 1];
447 if (!parse_abstract_format_specifier (lexer, type, &input.w,
450 if (!fmt_from_name (type, &input.type))
452 lex_next_error (lexer, -1, -1,
453 _("Unknown format type `%s'."), type);
457 /* If no width was included, use the minimum width for the type.
458 This isn't quite right, because DATETIME by itself seems to become
459 DATETIME20 (see bug #30690), whereas this will become
460 DATETIME17. The correct behavior is not documented. */
463 input.w = fmt_min_input_width (input.type);
467 char *error = fmt_check_input__ (&input);
470 lex_next_error (lexer, -1, -1, "%s", error);
474 if (!lex_force_match (lexer, T_RPAREN))
477 /* As a special case, N format is treated as F format
478 for free-field input. */
479 if (input.type == FMT_N)
482 output = fmt_for_output_from_input (&input,
483 settings_get_fmt_settings ());
487 lex_match (lexer, T_ASTERISK);
488 input = fmt_for_input (FMT_F, 8, 0);
489 output = *settings_get_format ();
492 for (i = 0; i < n_names; i++)
496 v = dict_create_var (dict, name[i], fmt_var_width (&input));
499 msg (SE, _("%s is a duplicate variable name."), name[i]);
502 var_set_both_formats (v, &output);
504 data_parser_add_delimited_field (parser,
505 &input, var_get_case_index (v),
513 /* Input procedure. */
515 /* Destroys DATA LIST transformation TRNS.
516 Returns true if successful, false if an I/O error occurred. */
518 data_list_trns_free (void *trns_)
520 struct data_list_trns *trns = trns_;
521 data_parser_destroy (trns->parser);
522 dfm_close_reader (trns->reader);
523 dict_unref (trns->dict);
528 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
529 static enum trns_result
530 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
532 struct data_list_trns *trns = trns_;
533 enum trns_result retval;
535 *c = case_unshare (*c);
536 if (data_parser_parse (trns->parser, trns->reader, trns->dict, *c))
537 retval = TRNS_CONTINUE;
538 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
540 /* An I/O error, or encountering end of file for a second
541 time, should be escalated into a more serious error. */
545 retval = TRNS_END_FILE;
547 /* If there was an END subcommand handle it. */
548 if (trns->end != NULL)
550 double *end = case_num_rw (*c, trns->end);
551 if (retval == TRNS_END_FILE)
554 retval = TRNS_CONTINUE;
563 static const struct trns_class data_list_trns_class = {
565 .execute = data_list_trns_proc,
566 .destroy = data_list_trns_free,