1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/case.h"
26 #include "data/casereader.h"
27 #include "data/data-in.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/format.h"
31 #include "data/settings.h"
32 #include "data/transformations.h"
33 #include "data/variable.h"
34 #include "language/command.h"
35 #include "language/data-io/data-parser.h"
36 #include "language/data-io/data-reader.h"
37 #include "language/data-io/file-handle.h"
38 #include "language/data-io/inpt-pgm.h"
39 #include "language/data-io/placement-parser.h"
40 #include "language/lexer/format-parser.h"
41 #include "language/lexer/lexer.h"
42 #include "language/lexer/variable-parser.h"
43 #include "libpspp/assertion.h"
44 #include "libpspp/compiler.h"
45 #include "libpspp/i18n.h"
46 #include "libpspp/message.h"
47 #include "libpspp/misc.h"
48 #include "libpspp/pool.h"
49 #include "libpspp/str.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
57 /* DATA LIST transformation data. */
60 struct data_parser *parser; /* Parser. */
61 struct dictionary *dict; /* Dictionary. */
62 struct dfm_reader *reader; /* Data file reader. */
63 struct variable *end; /* Variable specified on END subcommand. */
66 static bool parse_fixed (struct lexer *, struct dictionary *,
67 struct pool *, struct data_parser *);
68 static bool parse_free (struct lexer *, struct dictionary *,
69 struct pool *, struct data_parser *);
71 static const struct trns_class data_list_trns_class;
74 cmd_data_list (struct lexer *lexer, struct dataset *ds)
76 struct dictionary *dict;
77 struct data_parser *parser;
78 struct dfm_reader *reader;
79 struct variable *end = NULL;
80 struct file_handle *fh = NULL;
81 char *encoding = NULL;
84 enum data_parser_type type;
86 struct pool *tmp_pool;
89 dict = (in_input_program ()
91 : dict_create (get_default_encoding ()));
92 parser = data_parser_create ();
95 table = -1; /* Print table if nonzero, -1=undecided. */
98 while (lex_token (lexer) != T_SLASH)
100 if (lex_match_id (lexer, "FILE"))
102 lex_match (lexer, T_EQUALS);
104 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
108 else if (lex_match_id (lexer, "ENCODING"))
110 lex_match (lexer, T_EQUALS);
111 if (!lex_force_string (lexer))
115 encoding = ss_xstrdup (lex_tokss (lexer));
119 else if (lex_match_id (lexer, "RECORDS"))
121 if (data_parser_get_records (parser) > 0)
123 lex_sbc_only_once ("RECORDS");
126 lex_match (lexer, T_EQUALS);
127 lex_match (lexer, T_LPAREN);
128 if (!lex_force_int_range (lexer, "RECORDS", 0, INT_MAX))
130 data_parser_set_records (parser, lex_integer (lexer));
132 lex_match (lexer, T_RPAREN);
134 else if (lex_match_id (lexer, "SKIP"))
136 lex_match (lexer, T_EQUALS);
137 if (!lex_force_int_range (lexer, "SKIP", 0, INT_MAX))
139 data_parser_set_skip (parser, lex_integer (lexer));
142 else if (lex_match_id (lexer, "END"))
144 if (!in_input_program ())
146 msg (SE, _("The %s subcommand may only be used within %s."), "END", "INPUT PROGRAM");
151 lex_sbc_only_once ("END");
155 lex_match (lexer, T_EQUALS);
156 if (!lex_force_id (lexer))
158 end = dict_lookup_var (dict, lex_tokcstr (lexer));
160 end = dict_create_var_assert (dict, lex_tokcstr (lexer), 0);
163 else if (lex_match_id (lexer, "NOTABLE"))
165 else if (lex_match_id (lexer, "TABLE"))
167 else if (lex_token (lexer) == T_ID)
169 if (lex_match_id (lexer, "FIXED"))
170 data_parser_set_type (parser, DP_FIXED);
171 else if (lex_match_id (lexer, "FREE"))
173 data_parser_set_type (parser, DP_DELIMITED);
174 data_parser_set_span (parser, true);
176 else if (lex_match_id (lexer, "LIST"))
178 data_parser_set_type (parser, DP_DELIMITED);
179 data_parser_set_span (parser, false);
183 lex_error (lexer, NULL);
189 msg (SE, _("Only one of FIXED, FREE, or LIST may "
195 if (data_parser_get_type (parser) == DP_DELIMITED)
197 if (lex_match (lexer, T_LPAREN))
199 struct string delims = DS_EMPTY_INITIALIZER;
201 while (!lex_match (lexer, T_RPAREN))
205 if (lex_match_id (lexer, "TAB"))
207 else if (lex_is_string (lexer)
208 && ss_length (lex_tokss (lexer)) == 1)
210 delim = ss_first (lex_tokss (lexer));
215 /* XXX should support multibyte UTF-8 characters */
216 lex_error (lexer, NULL);
217 ds_destroy (&delims);
220 ds_put_byte (&delims, delim);
222 lex_match (lexer, T_COMMA);
225 data_parser_set_empty_line_has_field (parser, true);
226 data_parser_set_quotes (parser, ss_empty ());
227 data_parser_set_soft_delimiters (parser, ss_empty ());
228 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
229 ds_destroy (&delims);
233 data_parser_set_empty_line_has_field (parser, false);
234 data_parser_set_quotes (parser, ss_cstr ("'\""));
235 data_parser_set_soft_delimiters (parser,
236 ss_cstr (CC_SPACES));
237 const char decimal = settings_get_fmt_settings ()->decimal;
238 data_parser_set_hard_delimiters (parser,
239 ss_buffer (",", (decimal == '.') ? 1 : 0));
245 lex_error (lexer, NULL);
249 type = data_parser_get_type (parser);
251 if (encoding && NULL == fh)
252 msg (MW, _("Encoding should not be specified for inline data. It will be "
256 fh = fh_inline_file ();
257 fh_set_default_handle (fh);
259 if (type != DP_FIXED && end != NULL)
261 msg (SE, _("The %s subcommand may be used only with %s."), "END", "DATA LIST FIXED");
265 tmp_pool = pool_create ();
266 if (type == DP_FIXED)
267 ok = parse_fixed (lexer, dict, tmp_pool, parser);
269 ok = parse_free (lexer, dict, tmp_pool, parser);
270 pool_destroy (tmp_pool);
274 if (!data_parser_any_fields (parser))
276 msg (SE, _("At least one variable must be specified."));
280 if (lex_end_of_command (lexer) != CMD_SUCCESS)
284 table = type == DP_FIXED || !data_parser_get_span (parser);
286 data_parser_output_description (parser, fh);
288 reader = dfm_open_reader (fh, lexer, encoding);
292 if (in_input_program ())
294 struct data_list_trns *trns = xmalloc (sizeof *trns);
295 trns->parser = parser;
296 trns->dict = dict_ref (dict);
297 trns->reader = reader;
299 add_transformation (ds, &data_list_trns_class, trns);
302 data_parser_make_active_file (parser, ds, reader, dict, NULL, NULL);
312 data_parser_destroy (parser);
313 if (!in_input_program ())
317 return CMD_CASCADING_FAILURE;
320 /* Fixed-format parsing. */
322 /* Parses all the variable specifications for DATA LIST FIXED,
323 storing them into DLS. Uses TMP_POOL for temporary storage;
324 the caller may destroy it. Returns true only if
327 parse_fixed (struct lexer *lexer, struct dictionary *dict,
328 struct pool *tmp_pool, struct data_parser *parser)
330 int max_records = data_parser_get_records (parser);
334 while (lex_token (lexer) != T_ENDCMD)
337 size_t n_names, name_idx;
338 struct fmt_spec *formats, *f;
341 /* Parse everything. */
342 if (!parse_record_placement (lexer, &record, &column)
343 || !parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
344 &names, &n_names, PV_NONE)
345 || !parse_var_placements (lexer, tmp_pool, n_names, FMT_FOR_INPUT,
346 &formats, &n_formats))
349 /* Create variables and var specs. */
351 for (f = formats; f < &formats[n_formats]; f++)
352 if (!execute_placement_format (f, &record, &column))
358 name = names[name_idx++];
360 /* Create variable. */
361 width = fmt_var_width (f);
362 v = dict_create_var (dict, name, width);
366 struct fmt_spec output = fmt_for_output_from_input (
367 f, settings_get_fmt_settings ());
368 var_set_both_formats (v, &output);
373 This can be acceptable if we're in INPUT
374 PROGRAM, but only if the existing variable has
375 the same width as the one we would have
377 if (!in_input_program ())
379 msg (SE, _("%s is a duplicate variable name."), name);
383 v = dict_lookup_var_assert (dict, name);
384 if ((width != 0) != (var_get_width (v) != 0))
386 msg (SE, _("There is already a variable %s of a "
391 if (width != 0 && width != var_get_width (v))
393 msg (SE, _("There is already a string variable %s of a "
394 "different width."), name);
399 if (max_records && record > max_records)
401 msg (SE, _("Cannot place variable %s on record %d when "
402 "RECORDS=%d is specified."),
403 var_get_name (v), record,
404 data_parser_get_records (parser));
407 data_parser_add_fixed_field (parser, f,
408 var_get_case_index (v),
409 var_get_name (v), record, column);
413 assert (name_idx == n_names);
419 /* Free-format parsing. */
421 /* Parses variable specifications for DATA LIST FREE and adds
422 them to DLS. Uses TMP_POOL for temporary storage; the caller
423 may destroy it. Returns true only if successful. */
425 parse_free (struct lexer *lexer, struct dictionary *dict,
426 struct pool *tmp_pool, struct data_parser *parser)
429 while (lex_token (lexer) != T_ENDCMD)
431 struct fmt_spec input, output;
436 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
437 &name, &n_names, PV_NONE))
440 if (lex_match (lexer, T_LPAREN))
442 char type[FMT_TYPE_LEN_MAX + 1];
444 if (!parse_abstract_format_specifier (lexer, type, &input.w,
447 if (!fmt_from_name (type, &input.type))
449 msg (SE, _("Unknown format type `%s'."), type);
453 /* If no width was included, use the minimum width for the type.
454 This isn't quite right, because DATETIME by itself seems to become
455 DATETIME20 (see bug #30690), whereas this will become
456 DATETIME17. The correct behavior is not documented. */
459 input.w = fmt_min_input_width (input.type);
463 if (!fmt_check_input (&input) || !lex_force_match (lexer, T_RPAREN))
466 /* As a special case, N format is treated as F format
467 for free-field input. */
468 if (input.type == FMT_N)
471 output = fmt_for_output_from_input (&input,
472 settings_get_fmt_settings ());
476 lex_match (lexer, T_ASTERISK);
477 input = fmt_for_input (FMT_F, 8, 0);
478 output = *settings_get_format ();
481 for (i = 0; i < n_names; i++)
485 v = dict_create_var (dict, name[i], fmt_var_width (&input));
488 msg (SE, _("%s is a duplicate variable name."), name[i]);
491 var_set_both_formats (v, &output);
493 data_parser_add_delimited_field (parser,
494 &input, var_get_case_index (v),
502 /* Input procedure. */
504 /* Destroys DATA LIST transformation TRNS.
505 Returns true if successful, false if an I/O error occurred. */
507 data_list_trns_free (void *trns_)
509 struct data_list_trns *trns = trns_;
510 data_parser_destroy (trns->parser);
511 dfm_close_reader (trns->reader);
512 dict_unref (trns->dict);
517 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
518 static enum trns_result
519 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
521 struct data_list_trns *trns = trns_;
522 enum trns_result retval;
524 *c = case_unshare (*c);
525 if (data_parser_parse (trns->parser, trns->reader, trns->dict, *c))
526 retval = TRNS_CONTINUE;
527 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
529 /* An I/O error, or encountering end of file for a second
530 time, should be escalated into a more serious error. */
534 retval = TRNS_END_FILE;
536 /* If there was an END subcommand handle it. */
537 if (trns->end != NULL)
539 double *end = case_num_rw (*c, trns->end);
540 if (retval == TRNS_END_FILE)
543 retval = TRNS_CONTINUE;
552 static const struct trns_class data_list_trns_class = {
554 .execute = data_list_trns_proc,
555 .destroy = data_list_trns_free,