1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/case.h"
26 #include "data/casereader.h"
27 #include "data/data-in.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/format.h"
31 #include "data/settings.h"
32 #include "data/transformations.h"
33 #include "data/variable.h"
34 #include "language/command.h"
35 #include "language/data-io/data-parser.h"
36 #include "language/data-io/data-reader.h"
37 #include "language/data-io/file-handle.h"
38 #include "language/data-io/inpt-pgm.h"
39 #include "language/data-io/placement-parser.h"
40 #include "language/lexer/format-parser.h"
41 #include "language/lexer/lexer.h"
42 #include "language/lexer/variable-parser.h"
43 #include "libpspp/assertion.h"
44 #include "libpspp/compiler.h"
45 #include "libpspp/i18n.h"
46 #include "libpspp/message.h"
47 #include "libpspp/misc.h"
48 #include "libpspp/pool.h"
49 #include "libpspp/str.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
57 /* DATA LIST transformation data. */
60 struct data_parser *parser; /* Parser. */
61 struct dfm_reader *reader; /* Data file reader. */
62 struct variable *end; /* Variable specified on END subcommand. */
65 static bool parse_fixed (struct lexer *, struct dictionary *,
66 struct pool *, struct data_parser *);
67 static bool parse_free (struct lexer *, struct dictionary *,
68 struct pool *, struct data_parser *);
70 static trns_free_func data_list_trns_free;
71 static trns_proc_func data_list_trns_proc;
74 cmd_data_list (struct lexer *lexer, struct dataset *ds)
76 struct dictionary *dict;
77 struct data_parser *parser;
78 struct dfm_reader *reader;
79 struct variable *end = NULL;
80 struct file_handle *fh = NULL;
81 char *encoding = NULL;
84 enum data_parser_type type;
86 struct pool *tmp_pool;
89 dict = (in_input_program ()
91 : dict_create (get_default_encoding ()));
92 parser = data_parser_create (dict);
95 table = -1; /* Print table if nonzero, -1=undecided. */
98 while (lex_token (lexer) != T_SLASH)
100 if (lex_match_id (lexer, "FILE"))
102 lex_match (lexer, T_EQUALS);
104 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
108 else if (lex_match_id (lexer, "ENCODING"))
110 lex_match (lexer, T_EQUALS);
111 if (!lex_force_string (lexer))
115 encoding = ss_xstrdup (lex_tokss (lexer));
119 else if (lex_match_id (lexer, "RECORDS"))
121 lex_match (lexer, T_EQUALS);
122 lex_match (lexer, T_LPAREN);
123 if (!lex_force_int (lexer))
126 int records = lex_integer (lexer);
129 msg (SE, _("The %s value must be non-negative."), "RECORDS");
132 data_parser_set_records (parser, records);
134 lex_match (lexer, T_RPAREN);
136 else if (lex_match_id (lexer, "SKIP"))
138 lex_match (lexer, T_EQUALS);
139 if (!lex_force_int (lexer))
141 int skip = lex_integer (lexer);
144 msg (SE, _("The %s value must be non-negative."), "SKIP");
147 data_parser_set_skip (parser, skip);
150 else if (lex_match_id (lexer, "END"))
152 if (!in_input_program ())
154 msg (SE, _("The %s subcommand may only be used within %s."), "END", "INPUT PROGRAM");
159 msg (SE, _("The %s subcommand may only be specified once."), "END");
163 lex_match (lexer, T_EQUALS);
164 if (!lex_force_id (lexer))
166 end = dict_lookup_var (dict, lex_tokcstr (lexer));
168 end = dict_create_var_assert (dict, lex_tokcstr (lexer), 0);
171 else if (lex_match_id (lexer, "NOTABLE"))
173 else if (lex_match_id (lexer, "TABLE"))
175 else if (lex_token (lexer) == T_ID)
177 if (lex_match_id (lexer, "FIXED"))
178 data_parser_set_type (parser, DP_FIXED);
179 else if (lex_match_id (lexer, "FREE"))
181 data_parser_set_type (parser, DP_DELIMITED);
182 data_parser_set_span (parser, true);
184 else if (lex_match_id (lexer, "LIST"))
186 data_parser_set_type (parser, DP_DELIMITED);
187 data_parser_set_span (parser, false);
191 lex_error (lexer, NULL);
197 msg (SE, _("Only one of FIXED, FREE, or LIST may "
203 if (data_parser_get_type (parser) == DP_DELIMITED)
205 if (lex_match (lexer, T_LPAREN))
207 struct string delims = DS_EMPTY_INITIALIZER;
209 while (!lex_match (lexer, T_RPAREN))
213 if (lex_match_id (lexer, "TAB"))
215 else if (lex_is_string (lexer)
216 && ss_length (lex_tokss (lexer)) == 1)
218 delim = ss_first (lex_tokss (lexer));
223 /* XXX should support multibyte UTF-8 characters */
224 lex_error (lexer, NULL);
225 ds_destroy (&delims);
228 ds_put_byte (&delims, delim);
230 lex_match (lexer, T_COMMA);
233 data_parser_set_empty_line_has_field (parser, true);
234 data_parser_set_quotes (parser, ss_empty ());
235 data_parser_set_soft_delimiters (parser, ss_empty ());
236 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
237 ds_destroy (&delims);
241 data_parser_set_empty_line_has_field (parser, false);
242 data_parser_set_quotes (parser, ss_cstr ("'\""));
243 data_parser_set_soft_delimiters (parser,
244 ss_cstr (CC_SPACES));
245 const char decimal = settings_get_fmt_settings ()->decimal;
246 data_parser_set_hard_delimiters (parser,
247 ss_buffer (",", (decimal == '.') ? 1 : 0));
253 lex_error (lexer, NULL);
257 type = data_parser_get_type (parser);
259 if (encoding && NULL == fh)
260 msg (MW, _("Encoding should not be specified for inline data. It will be "
264 fh = fh_inline_file ();
265 fh_set_default_handle (fh);
267 if (type != DP_FIXED && end != NULL)
269 msg (SE, _("The %s subcommand may be used only with %s."), "END", "DATA LIST FIXED");
273 tmp_pool = pool_create ();
274 if (type == DP_FIXED)
275 ok = parse_fixed (lexer, dict, tmp_pool, parser);
277 ok = parse_free (lexer, dict, tmp_pool, parser);
278 pool_destroy (tmp_pool);
282 if (!data_parser_any_fields (parser))
284 msg (SE, _("At least one variable must be specified."));
288 if (lex_end_of_command (lexer) != CMD_SUCCESS)
292 table = type == DP_FIXED || !data_parser_get_span (parser);
294 data_parser_output_description (parser, fh);
296 reader = dfm_open_reader (fh, lexer, encoding);
300 if (in_input_program ())
302 struct data_list_trns *trns = xmalloc (sizeof *trns);
303 trns->parser = parser;
304 trns->reader = reader;
306 add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns);
309 data_parser_make_active_file (parser, ds, reader, dict, NULL, NULL);
314 return CMD_DATA_LIST;
317 data_parser_destroy (parser);
318 if (!in_input_program ())
322 return CMD_CASCADING_FAILURE;
325 /* Fixed-format parsing. */
327 /* Parses all the variable specifications for DATA LIST FIXED,
328 storing them into DLS. Uses TMP_POOL for temporary storage;
329 the caller may destroy it. Returns true only if
332 parse_fixed (struct lexer *lexer, struct dictionary *dict,
333 struct pool *tmp_pool, struct data_parser *parser)
335 int max_records = data_parser_get_records (parser);
339 while (lex_token (lexer) != T_ENDCMD)
342 size_t name_cnt, name_idx;
343 struct fmt_spec *formats, *f;
346 /* Parse everything. */
347 if (!parse_record_placement (lexer, &record, &column)
348 || !parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
349 &names, &name_cnt, PV_NONE)
350 || !parse_var_placements (lexer, tmp_pool, name_cnt, FMT_FOR_INPUT,
351 &formats, &format_cnt))
354 /* Create variables and var specs. */
356 for (f = formats; f < &formats[format_cnt]; f++)
357 if (!execute_placement_format (f, &record, &column))
363 name = names[name_idx++];
365 /* Create variable. */
366 width = fmt_var_width (f);
367 v = dict_create_var (dict, name, width);
371 struct fmt_spec output = fmt_for_output_from_input (
372 f, settings_get_fmt_settings ());
373 var_set_both_formats (v, &output);
378 This can be acceptable if we're in INPUT
379 PROGRAM, but only if the existing variable has
380 the same width as the one we would have
382 if (!in_input_program ())
384 msg (SE, _("%s is a duplicate variable name."), name);
388 v = dict_lookup_var_assert (dict, name);
389 if ((width != 0) != (var_get_width (v) != 0))
391 msg (SE, _("There is already a variable %s of a "
396 if (width != 0 && width != var_get_width (v))
398 msg (SE, _("There is already a string variable %s of a "
399 "different width."), name);
404 if (max_records && record > max_records)
406 msg (SE, _("Cannot place variable %s on record %d when "
407 "RECORDS=%d is specified."),
408 var_get_name (v), record,
409 data_parser_get_records (parser));
412 data_parser_add_fixed_field (parser, f,
413 var_get_case_index (v),
414 var_get_name (v), record, column);
418 assert (name_idx == name_cnt);
424 /* Free-format parsing. */
426 /* Parses variable specifications for DATA LIST FREE and adds
427 them to DLS. Uses TMP_POOL for temporary storage; the caller
428 may destroy it. Returns true only if successful. */
430 parse_free (struct lexer *lexer, struct dictionary *dict,
431 struct pool *tmp_pool, struct data_parser *parser)
434 while (lex_token (lexer) != T_ENDCMD)
436 struct fmt_spec input, output;
441 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
442 &name, &name_cnt, PV_NONE))
445 if (lex_match (lexer, T_LPAREN))
447 char type[FMT_TYPE_LEN_MAX + 1];
449 if (!parse_abstract_format_specifier (lexer, type, &input.w,
452 if (!fmt_from_name (type, &input.type))
454 msg (SE, _("Unknown format type `%s'."), type);
458 /* If no width was included, use the minimum width for the type.
459 This isn't quite right, because DATETIME by itself seems to become
460 DATETIME20 (see bug #30690), whereas this will become
461 DATETIME17. The correct behavior is not documented. */
464 input.w = fmt_min_input_width (input.type);
468 if (!fmt_check_input (&input) || !lex_force_match (lexer, T_RPAREN))
471 /* As a special case, N format is treated as F format
472 for free-field input. */
473 if (input.type == FMT_N)
476 output = fmt_for_output_from_input (&input,
477 settings_get_fmt_settings ());
481 lex_match (lexer, T_ASTERISK);
482 input = fmt_for_input (FMT_F, 8, 0);
483 output = *settings_get_format ();
486 for (i = 0; i < name_cnt; i++)
490 v = dict_create_var (dict, name[i], fmt_var_width (&input));
493 msg (SE, _("%s is a duplicate variable name."), name[i]);
496 var_set_both_formats (v, &output);
498 data_parser_add_delimited_field (parser,
499 &input, var_get_case_index (v),
507 /* Input procedure. */
509 /* Destroys DATA LIST transformation TRNS.
510 Returns true if successful, false if an I/O error occurred. */
512 data_list_trns_free (void *trns_)
514 struct data_list_trns *trns = trns_;
515 data_parser_destroy (trns->parser);
516 dfm_close_reader (trns->reader);
521 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
523 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
525 struct data_list_trns *trns = trns_;
528 *c = case_unshare (*c);
529 if (data_parser_parse (trns->parser, trns->reader, *c))
530 retval = TRNS_CONTINUE;
531 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
533 /* An I/O error, or encountering end of file for a second
534 time, should be escalated into a more serious error. */
538 retval = TRNS_END_FILE;
540 /* If there was an END subcommand handle it. */
541 if (trns->end != NULL)
543 double *end = &case_data_rw (*c, trns->end)->f;
544 if (retval == TRNS_END_FILE)
547 retval = TRNS_CONTINUE;