1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/case.h"
26 #include "data/casereader.h"
27 #include "data/data-in.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/format.h"
31 #include "data/settings.h"
32 #include "data/transformations.h"
33 #include "data/variable.h"
34 #include "language/command.h"
35 #include "language/data-io/data-parser.h"
36 #include "language/data-io/data-reader.h"
37 #include "language/data-io/file-handle.h"
38 #include "language/data-io/inpt-pgm.h"
39 #include "language/data-io/placement-parser.h"
40 #include "language/lexer/format-parser.h"
41 #include "language/lexer/lexer.h"
42 #include "language/lexer/variable-parser.h"
43 #include "libpspp/assertion.h"
44 #include "libpspp/compiler.h"
45 #include "libpspp/i18n.h"
46 #include "libpspp/message.h"
47 #include "libpspp/misc.h"
48 #include "libpspp/pool.h"
49 #include "libpspp/str.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
57 /* DATA LIST transformation data. */
60 struct data_parser *parser; /* Parser. */
61 struct dfm_reader *reader; /* Data file reader. */
62 struct variable *end; /* Variable specified on END subcommand. */
65 static bool parse_fixed (struct lexer *, struct dictionary *,
66 struct pool *, struct data_parser *);
67 static bool parse_free (struct lexer *, struct dictionary *,
68 struct pool *, struct data_parser *);
70 static const struct trns_class data_list_trns_class;
73 cmd_data_list (struct lexer *lexer, struct dataset *ds)
75 struct dictionary *dict;
76 struct data_parser *parser;
77 struct dfm_reader *reader;
78 struct variable *end = NULL;
79 struct file_handle *fh = NULL;
80 char *encoding = NULL;
83 enum data_parser_type type;
85 struct pool *tmp_pool;
88 dict = (in_input_program ()
90 : dict_create (get_default_encoding ()));
91 parser = data_parser_create (dict);
94 table = -1; /* Print table if nonzero, -1=undecided. */
97 while (lex_token (lexer) != T_SLASH)
99 if (lex_match_id (lexer, "FILE"))
101 lex_match (lexer, T_EQUALS);
103 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
107 else if (lex_match_id (lexer, "ENCODING"))
109 lex_match (lexer, T_EQUALS);
110 if (!lex_force_string (lexer))
114 encoding = ss_xstrdup (lex_tokss (lexer));
118 else if (lex_match_id (lexer, "RECORDS"))
120 if (data_parser_get_records (parser) > 0)
122 lex_sbc_only_once ("RECORDS");
125 lex_match (lexer, T_EQUALS);
126 lex_match (lexer, T_LPAREN);
127 if (!lex_force_int_range (lexer, "RECORDS", 0, INT_MAX))
129 data_parser_set_records (parser, lex_integer (lexer));
131 lex_match (lexer, T_RPAREN);
133 else if (lex_match_id (lexer, "SKIP"))
135 lex_match (lexer, T_EQUALS);
136 if (!lex_force_int_range (lexer, "SKIP", 0, INT_MAX))
138 data_parser_set_skip (parser, lex_integer (lexer));
141 else if (lex_match_id (lexer, "END"))
143 if (!in_input_program ())
145 msg (SE, _("The %s subcommand may only be used within %s."), "END", "INPUT PROGRAM");
150 lex_sbc_only_once ("END");
154 lex_match (lexer, T_EQUALS);
155 if (!lex_force_id (lexer))
157 end = dict_lookup_var (dict, lex_tokcstr (lexer));
159 end = dict_create_var_assert (dict, lex_tokcstr (lexer), 0);
162 else if (lex_match_id (lexer, "NOTABLE"))
164 else if (lex_match_id (lexer, "TABLE"))
166 else if (lex_token (lexer) == T_ID)
168 if (lex_match_id (lexer, "FIXED"))
169 data_parser_set_type (parser, DP_FIXED);
170 else if (lex_match_id (lexer, "FREE"))
172 data_parser_set_type (parser, DP_DELIMITED);
173 data_parser_set_span (parser, true);
175 else if (lex_match_id (lexer, "LIST"))
177 data_parser_set_type (parser, DP_DELIMITED);
178 data_parser_set_span (parser, false);
182 lex_error (lexer, NULL);
188 msg (SE, _("Only one of FIXED, FREE, or LIST may "
194 if (data_parser_get_type (parser) == DP_DELIMITED)
196 if (lex_match (lexer, T_LPAREN))
198 struct string delims = DS_EMPTY_INITIALIZER;
200 while (!lex_match (lexer, T_RPAREN))
204 if (lex_match_id (lexer, "TAB"))
206 else if (lex_is_string (lexer)
207 && ss_length (lex_tokss (lexer)) == 1)
209 delim = ss_first (lex_tokss (lexer));
214 /* XXX should support multibyte UTF-8 characters */
215 lex_error (lexer, NULL);
216 ds_destroy (&delims);
219 ds_put_byte (&delims, delim);
221 lex_match (lexer, T_COMMA);
224 data_parser_set_empty_line_has_field (parser, true);
225 data_parser_set_quotes (parser, ss_empty ());
226 data_parser_set_soft_delimiters (parser, ss_empty ());
227 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
228 ds_destroy (&delims);
232 data_parser_set_empty_line_has_field (parser, false);
233 data_parser_set_quotes (parser, ss_cstr ("'\""));
234 data_parser_set_soft_delimiters (parser,
235 ss_cstr (CC_SPACES));
236 const char decimal = settings_get_fmt_settings ()->decimal;
237 data_parser_set_hard_delimiters (parser,
238 ss_buffer (",", (decimal == '.') ? 1 : 0));
244 lex_error (lexer, NULL);
248 type = data_parser_get_type (parser);
250 if (encoding && NULL == fh)
251 msg (MW, _("Encoding should not be specified for inline data. It will be "
255 fh = fh_inline_file ();
256 fh_set_default_handle (fh);
258 if (type != DP_FIXED && end != NULL)
260 msg (SE, _("The %s subcommand may be used only with %s."), "END", "DATA LIST FIXED");
264 tmp_pool = pool_create ();
265 if (type == DP_FIXED)
266 ok = parse_fixed (lexer, dict, tmp_pool, parser);
268 ok = parse_free (lexer, dict, tmp_pool, parser);
269 pool_destroy (tmp_pool);
273 if (!data_parser_any_fields (parser))
275 msg (SE, _("At least one variable must be specified."));
279 if (lex_end_of_command (lexer) != CMD_SUCCESS)
283 table = type == DP_FIXED || !data_parser_get_span (parser);
285 data_parser_output_description (parser, fh);
287 reader = dfm_open_reader (fh, lexer, encoding);
291 if (in_input_program ())
293 struct data_list_trns *trns = xmalloc (sizeof *trns);
294 trns->parser = parser;
295 trns->reader = reader;
297 add_transformation (ds, &data_list_trns_class, trns);
300 data_parser_make_active_file (parser, ds, reader, dict, NULL, NULL);
308 data_parser_destroy (parser);
309 if (!in_input_program ())
313 return CMD_CASCADING_FAILURE;
316 /* Fixed-format parsing. */
318 /* Parses all the variable specifications for DATA LIST FIXED,
319 storing them into DLS. Uses TMP_POOL for temporary storage;
320 the caller may destroy it. Returns true only if
323 parse_fixed (struct lexer *lexer, struct dictionary *dict,
324 struct pool *tmp_pool, struct data_parser *parser)
326 int max_records = data_parser_get_records (parser);
330 while (lex_token (lexer) != T_ENDCMD)
333 size_t n_names, name_idx;
334 struct fmt_spec *formats, *f;
337 /* Parse everything. */
338 if (!parse_record_placement (lexer, &record, &column)
339 || !parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
340 &names, &n_names, PV_NONE)
341 || !parse_var_placements (lexer, tmp_pool, n_names, FMT_FOR_INPUT,
342 &formats, &n_formats))
345 /* Create variables and var specs. */
347 for (f = formats; f < &formats[n_formats]; f++)
348 if (!execute_placement_format (f, &record, &column))
354 name = names[name_idx++];
356 /* Create variable. */
357 width = fmt_var_width (f);
358 v = dict_create_var (dict, name, width);
362 struct fmt_spec output = fmt_for_output_from_input (
363 f, settings_get_fmt_settings ());
364 var_set_both_formats (v, &output);
369 This can be acceptable if we're in INPUT
370 PROGRAM, but only if the existing variable has
371 the same width as the one we would have
373 if (!in_input_program ())
375 msg (SE, _("%s is a duplicate variable name."), name);
379 v = dict_lookup_var_assert (dict, name);
380 if ((width != 0) != (var_get_width (v) != 0))
382 msg (SE, _("There is already a variable %s of a "
387 if (width != 0 && width != var_get_width (v))
389 msg (SE, _("There is already a string variable %s of a "
390 "different width."), name);
395 if (max_records && record > max_records)
397 msg (SE, _("Cannot place variable %s on record %d when "
398 "RECORDS=%d is specified."),
399 var_get_name (v), record,
400 data_parser_get_records (parser));
403 data_parser_add_fixed_field (parser, f,
404 var_get_case_index (v),
405 var_get_name (v), record, column);
409 assert (name_idx == n_names);
415 /* Free-format parsing. */
417 /* Parses variable specifications for DATA LIST FREE and adds
418 them to DLS. Uses TMP_POOL for temporary storage; the caller
419 may destroy it. Returns true only if successful. */
421 parse_free (struct lexer *lexer, struct dictionary *dict,
422 struct pool *tmp_pool, struct data_parser *parser)
425 while (lex_token (lexer) != T_ENDCMD)
427 struct fmt_spec input, output;
432 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
433 &name, &n_names, PV_NONE))
436 if (lex_match (lexer, T_LPAREN))
438 char type[FMT_TYPE_LEN_MAX + 1];
440 if (!parse_abstract_format_specifier (lexer, type, &input.w,
443 if (!fmt_from_name (type, &input.type))
445 msg (SE, _("Unknown format type `%s'."), type);
449 /* If no width was included, use the minimum width for the type.
450 This isn't quite right, because DATETIME by itself seems to become
451 DATETIME20 (see bug #30690), whereas this will become
452 DATETIME17. The correct behavior is not documented. */
455 input.w = fmt_min_input_width (input.type);
459 if (!fmt_check_input (&input) || !lex_force_match (lexer, T_RPAREN))
462 /* As a special case, N format is treated as F format
463 for free-field input. */
464 if (input.type == FMT_N)
467 output = fmt_for_output_from_input (&input,
468 settings_get_fmt_settings ());
472 lex_match (lexer, T_ASTERISK);
473 input = fmt_for_input (FMT_F, 8, 0);
474 output = *settings_get_format ();
477 for (i = 0; i < n_names; i++)
481 v = dict_create_var (dict, name[i], fmt_var_width (&input));
484 msg (SE, _("%s is a duplicate variable name."), name[i]);
487 var_set_both_formats (v, &output);
489 data_parser_add_delimited_field (parser,
490 &input, var_get_case_index (v),
498 /* Input procedure. */
500 /* Destroys DATA LIST transformation TRNS.
501 Returns true if successful, false if an I/O error occurred. */
503 data_list_trns_free (void *trns_)
505 struct data_list_trns *trns = trns_;
506 data_parser_destroy (trns->parser);
507 dfm_close_reader (trns->reader);
512 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
513 static enum trns_result
514 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
516 struct data_list_trns *trns = trns_;
517 enum trns_result retval;
519 *c = case_unshare (*c);
520 if (data_parser_parse (trns->parser, trns->reader, *c))
521 retval = TRNS_CONTINUE;
522 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
524 /* An I/O error, or encountering end of file for a second
525 time, should be escalated into a more serious error. */
529 retval = TRNS_END_FILE;
531 /* If there was an END subcommand handle it. */
532 if (trns->end != NULL)
534 double *end = case_num_rw (*c, trns->end);
535 if (retval == TRNS_END_FILE)
538 retval = TRNS_CONTINUE;
547 static const struct trns_class data_list_trns_class = {
549 .execute = data_list_trns_proc,
550 .destroy = data_list_trns_free,