1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/case.h"
26 #include "data/casereader.h"
27 #include "data/data-in.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/format.h"
31 #include "data/settings.h"
32 #include "data/transformations.h"
33 #include "data/variable.h"
34 #include "language/command.h"
35 #include "language/data-io/data-parser.h"
36 #include "language/data-io/data-reader.h"
37 #include "language/data-io/file-handle.h"
38 #include "language/data-io/inpt-pgm.h"
39 #include "language/data-io/placement-parser.h"
40 #include "language/lexer/format-parser.h"
41 #include "language/lexer/lexer.h"
42 #include "language/lexer/variable-parser.h"
43 #include "libpspp/assertion.h"
44 #include "libpspp/compiler.h"
45 #include "libpspp/i18n.h"
46 #include "libpspp/message.h"
47 #include "libpspp/misc.h"
48 #include "libpspp/pool.h"
49 #include "libpspp/str.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
57 /* DATA LIST transformation data. */
60 struct data_parser *parser; /* Parser. */
61 struct dfm_reader *reader; /* Data file reader. */
62 struct variable *end; /* Variable specified on END subcommand. */
65 static bool parse_fixed (struct lexer *, struct dictionary *,
66 struct pool *, struct data_parser *);
67 static bool parse_free (struct lexer *, struct dictionary *,
68 struct pool *, struct data_parser *);
70 static trns_free_func data_list_trns_free;
71 static trns_proc_func data_list_trns_proc;
74 cmd_data_list (struct lexer *lexer, struct dataset *ds)
76 struct dictionary *dict;
77 struct data_parser *parser;
78 struct dfm_reader *reader;
79 struct variable *end = NULL;
80 struct file_handle *fh = NULL;
81 char *encoding = NULL;
84 enum data_parser_type type;
86 struct pool *tmp_pool;
89 dict = (in_input_program ()
91 : dict_create (get_default_encoding ()));
92 parser = data_parser_create (dict);
95 table = -1; /* Print table if nonzero, -1=undecided. */
98 while (lex_token (lexer) != T_SLASH)
100 if (lex_match_id (lexer, "FILE"))
102 lex_match (lexer, T_EQUALS);
104 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
108 else if (lex_match_id (lexer, "ENCODING"))
110 lex_match (lexer, T_EQUALS);
111 if (!lex_force_string (lexer))
115 encoding = ss_xstrdup (lex_tokss (lexer));
119 else if (lex_match_id (lexer, "RECORDS"))
121 lex_match (lexer, T_EQUALS);
122 lex_match (lexer, T_LPAREN);
123 if (!lex_force_int_range (lexer, "RECORDS", 0, INT_MAX))
125 data_parser_set_records (parser, lex_integer (lexer));
127 lex_match (lexer, T_RPAREN);
129 else if (lex_match_id (lexer, "SKIP"))
131 lex_match (lexer, T_EQUALS);
132 if (!lex_force_int_range (lexer, "SKIP", 0, INT_MAX))
134 data_parser_set_skip (parser, lex_integer (lexer));
137 else if (lex_match_id (lexer, "END"))
139 if (!in_input_program ())
141 msg (SE, _("The %s subcommand may only be used within %s."), "END", "INPUT PROGRAM");
146 msg (SE, _("The %s subcommand may only be specified once."), "END");
150 lex_match (lexer, T_EQUALS);
151 if (!lex_force_id (lexer))
153 end = dict_lookup_var (dict, lex_tokcstr (lexer));
155 end = dict_create_var_assert (dict, lex_tokcstr (lexer), 0);
158 else if (lex_match_id (lexer, "NOTABLE"))
160 else if (lex_match_id (lexer, "TABLE"))
162 else if (lex_token (lexer) == T_ID)
164 if (lex_match_id (lexer, "FIXED"))
165 data_parser_set_type (parser, DP_FIXED);
166 else if (lex_match_id (lexer, "FREE"))
168 data_parser_set_type (parser, DP_DELIMITED);
169 data_parser_set_span (parser, true);
171 else if (lex_match_id (lexer, "LIST"))
173 data_parser_set_type (parser, DP_DELIMITED);
174 data_parser_set_span (parser, false);
178 lex_error (lexer, NULL);
184 msg (SE, _("Only one of FIXED, FREE, or LIST may "
190 if (data_parser_get_type (parser) == DP_DELIMITED)
192 if (lex_match (lexer, T_LPAREN))
194 struct string delims = DS_EMPTY_INITIALIZER;
196 while (!lex_match (lexer, T_RPAREN))
200 if (lex_match_id (lexer, "TAB"))
202 else if (lex_is_string (lexer)
203 && ss_length (lex_tokss (lexer)) == 1)
205 delim = ss_first (lex_tokss (lexer));
210 /* XXX should support multibyte UTF-8 characters */
211 lex_error (lexer, NULL);
212 ds_destroy (&delims);
215 ds_put_byte (&delims, delim);
217 lex_match (lexer, T_COMMA);
220 data_parser_set_empty_line_has_field (parser, true);
221 data_parser_set_quotes (parser, ss_empty ());
222 data_parser_set_soft_delimiters (parser, ss_empty ());
223 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
224 ds_destroy (&delims);
228 data_parser_set_empty_line_has_field (parser, false);
229 data_parser_set_quotes (parser, ss_cstr ("'\""));
230 data_parser_set_soft_delimiters (parser,
231 ss_cstr (CC_SPACES));
232 const char decimal = settings_get_fmt_settings ()->decimal;
233 data_parser_set_hard_delimiters (parser,
234 ss_buffer (",", (decimal == '.') ? 1 : 0));
240 lex_error (lexer, NULL);
244 type = data_parser_get_type (parser);
246 if (encoding && NULL == fh)
247 msg (MW, _("Encoding should not be specified for inline data. It will be "
251 fh = fh_inline_file ();
252 fh_set_default_handle (fh);
254 if (type != DP_FIXED && end != NULL)
256 msg (SE, _("The %s subcommand may be used only with %s."), "END", "DATA LIST FIXED");
260 tmp_pool = pool_create ();
261 if (type == DP_FIXED)
262 ok = parse_fixed (lexer, dict, tmp_pool, parser);
264 ok = parse_free (lexer, dict, tmp_pool, parser);
265 pool_destroy (tmp_pool);
269 if (!data_parser_any_fields (parser))
271 msg (SE, _("At least one variable must be specified."));
275 if (lex_end_of_command (lexer) != CMD_SUCCESS)
279 table = type == DP_FIXED || !data_parser_get_span (parser);
281 data_parser_output_description (parser, fh);
283 reader = dfm_open_reader (fh, lexer, encoding);
287 if (in_input_program ())
289 struct data_list_trns *trns = xmalloc (sizeof *trns);
290 trns->parser = parser;
291 trns->reader = reader;
293 add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns);
296 data_parser_make_active_file (parser, ds, reader, dict, NULL, NULL);
301 return CMD_DATA_LIST;
304 data_parser_destroy (parser);
305 if (!in_input_program ())
309 return CMD_CASCADING_FAILURE;
312 /* Fixed-format parsing. */
314 /* Parses all the variable specifications for DATA LIST FIXED,
315 storing them into DLS. Uses TMP_POOL for temporary storage;
316 the caller may destroy it. Returns true only if
319 parse_fixed (struct lexer *lexer, struct dictionary *dict,
320 struct pool *tmp_pool, struct data_parser *parser)
322 int max_records = data_parser_get_records (parser);
326 while (lex_token (lexer) != T_ENDCMD)
329 size_t name_cnt, name_idx;
330 struct fmt_spec *formats, *f;
333 /* Parse everything. */
334 if (!parse_record_placement (lexer, &record, &column)
335 || !parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
336 &names, &name_cnt, PV_NONE)
337 || !parse_var_placements (lexer, tmp_pool, name_cnt, FMT_FOR_INPUT,
338 &formats, &format_cnt))
341 /* Create variables and var specs. */
343 for (f = formats; f < &formats[format_cnt]; f++)
344 if (!execute_placement_format (f, &record, &column))
350 name = names[name_idx++];
352 /* Create variable. */
353 width = fmt_var_width (f);
354 v = dict_create_var (dict, name, width);
358 struct fmt_spec output = fmt_for_output_from_input (
359 f, settings_get_fmt_settings ());
360 var_set_both_formats (v, &output);
365 This can be acceptable if we're in INPUT
366 PROGRAM, but only if the existing variable has
367 the same width as the one we would have
369 if (!in_input_program ())
371 msg (SE, _("%s is a duplicate variable name."), name);
375 v = dict_lookup_var_assert (dict, name);
376 if ((width != 0) != (var_get_width (v) != 0))
378 msg (SE, _("There is already a variable %s of a "
383 if (width != 0 && width != var_get_width (v))
385 msg (SE, _("There is already a string variable %s of a "
386 "different width."), name);
391 if (max_records && record > max_records)
393 msg (SE, _("Cannot place variable %s on record %d when "
394 "RECORDS=%d is specified."),
395 var_get_name (v), record,
396 data_parser_get_records (parser));
399 data_parser_add_fixed_field (parser, f,
400 var_get_case_index (v),
401 var_get_name (v), record, column);
405 assert (name_idx == name_cnt);
411 /* Free-format parsing. */
413 /* Parses variable specifications for DATA LIST FREE and adds
414 them to DLS. Uses TMP_POOL for temporary storage; the caller
415 may destroy it. Returns true only if successful. */
417 parse_free (struct lexer *lexer, struct dictionary *dict,
418 struct pool *tmp_pool, struct data_parser *parser)
421 while (lex_token (lexer) != T_ENDCMD)
423 struct fmt_spec input, output;
428 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
429 &name, &name_cnt, PV_NONE))
432 if (lex_match (lexer, T_LPAREN))
434 char type[FMT_TYPE_LEN_MAX + 1];
436 if (!parse_abstract_format_specifier (lexer, type, &input.w,
439 if (!fmt_from_name (type, &input.type))
441 msg (SE, _("Unknown format type `%s'."), type);
445 /* If no width was included, use the minimum width for the type.
446 This isn't quite right, because DATETIME by itself seems to become
447 DATETIME20 (see bug #30690), whereas this will become
448 DATETIME17. The correct behavior is not documented. */
451 input.w = fmt_min_input_width (input.type);
455 if (!fmt_check_input (&input) || !lex_force_match (lexer, T_RPAREN))
458 /* As a special case, N format is treated as F format
459 for free-field input. */
460 if (input.type == FMT_N)
463 output = fmt_for_output_from_input (&input,
464 settings_get_fmt_settings ());
468 lex_match (lexer, T_ASTERISK);
469 input = fmt_for_input (FMT_F, 8, 0);
470 output = *settings_get_format ();
473 for (i = 0; i < name_cnt; i++)
477 v = dict_create_var (dict, name[i], fmt_var_width (&input));
480 msg (SE, _("%s is a duplicate variable name."), name[i]);
483 var_set_both_formats (v, &output);
485 data_parser_add_delimited_field (parser,
486 &input, var_get_case_index (v),
494 /* Input procedure. */
496 /* Destroys DATA LIST transformation TRNS.
497 Returns true if successful, false if an I/O error occurred. */
499 data_list_trns_free (void *trns_)
501 struct data_list_trns *trns = trns_;
502 data_parser_destroy (trns->parser);
503 dfm_close_reader (trns->reader);
508 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
510 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
512 struct data_list_trns *trns = trns_;
515 *c = case_unshare (*c);
516 if (data_parser_parse (trns->parser, trns->reader, *c))
517 retval = TRNS_CONTINUE;
518 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
520 /* An I/O error, or encountering end of file for a second
521 time, should be escalated into a more serious error. */
525 retval = TRNS_END_FILE;
527 /* If there was an END subcommand handle it. */
528 if (trns->end != NULL)
530 double *end = case_num_rw (*c, trns->end);
531 if (retval == TRNS_END_FILE)
534 retval = TRNS_CONTINUE;