1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/case.h"
26 #include "data/casereader.h"
27 #include "data/data-in.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/format.h"
31 #include "data/settings.h"
32 #include "data/transformations.h"
33 #include "data/variable.h"
34 #include "language/command.h"
35 #include "language/data-io/data-parser.h"
36 #include "language/data-io/data-reader.h"
37 #include "language/data-io/file-handle.h"
38 #include "language/data-io/inpt-pgm.h"
39 #include "language/data-io/placement-parser.h"
40 #include "language/lexer/format-parser.h"
41 #include "language/lexer/lexer.h"
42 #include "language/lexer/variable-parser.h"
43 #include "libpspp/assertion.h"
44 #include "libpspp/compiler.h"
45 #include "libpspp/i18n.h"
46 #include "libpspp/message.h"
47 #include "libpspp/misc.h"
48 #include "libpspp/pool.h"
49 #include "libpspp/str.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
57 /* DATA LIST transformation data. */
60 struct data_parser *parser; /* Parser. */
61 struct dfm_reader *reader; /* Data file reader. */
62 struct variable *end; /* Variable specified on END subcommand. */
65 static bool parse_fixed (struct lexer *, struct dictionary *,
66 struct pool *, struct data_parser *);
67 static bool parse_free (struct lexer *, struct dictionary *,
68 struct pool *, struct data_parser *);
70 static trns_free_func data_list_trns_free;
71 static trns_proc_func data_list_trns_proc;
74 cmd_data_list (struct lexer *lexer, struct dataset *ds)
76 struct dictionary *dict;
77 struct data_parser *parser;
78 struct dfm_reader *reader;
79 struct variable *end = NULL;
80 struct file_handle *fh = NULL;
81 char *encoding = NULL;
84 enum data_parser_type type;
86 struct pool *tmp_pool;
89 dict = (in_input_program ()
91 : dict_create (get_default_encoding ()));
92 parser = data_parser_create (dict);
95 table = -1; /* Print table if nonzero, -1=undecided. */
98 while (lex_token (lexer) != T_SLASH)
100 if (lex_match_id (lexer, "FILE"))
102 lex_match (lexer, T_EQUALS);
104 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
108 else if (lex_match_id (lexer, "ENCODING"))
110 lex_match (lexer, T_EQUALS);
111 if (!lex_force_string (lexer))
115 encoding = ss_xstrdup (lex_tokss (lexer));
119 else if (lex_match_id (lexer, "RECORDS"))
121 lex_match (lexer, T_EQUALS);
122 lex_match (lexer, T_LPAREN);
123 if (!lex_force_int (lexer))
125 data_parser_set_records (parser, lex_integer (lexer));
127 lex_match (lexer, T_RPAREN);
129 else if (lex_match_id (lexer, "SKIP"))
131 lex_match (lexer, T_EQUALS);
132 if (!lex_force_int (lexer))
134 data_parser_set_skip (parser, lex_integer (lexer));
137 else if (lex_match_id (lexer, "END"))
139 if (!in_input_program ())
141 msg (SE, _("The END subcommand may only be used within "
147 msg (SE, _("The END subcommand may only be specified once."));
151 lex_match (lexer, T_EQUALS);
152 if (!lex_force_id (lexer))
154 end = dict_lookup_var (dict, lex_tokcstr (lexer));
156 end = dict_create_var_assert (dict, lex_tokcstr (lexer), 0);
159 else if (lex_match_id (lexer, "NOTABLE"))
161 else if (lex_match_id (lexer, "TABLE"))
163 else if (lex_token (lexer) == T_ID)
165 if (lex_match_id (lexer, "FIXED"))
166 data_parser_set_type (parser, DP_FIXED);
167 else if (lex_match_id (lexer, "FREE"))
169 data_parser_set_type (parser, DP_DELIMITED);
170 data_parser_set_span (parser, true);
172 else if (lex_match_id (lexer, "LIST"))
174 data_parser_set_type (parser, DP_DELIMITED);
175 data_parser_set_span (parser, false);
179 lex_error (lexer, NULL);
185 msg (SE, _("Only one of FIXED, FREE, or LIST may "
191 if (data_parser_get_type (parser) == DP_DELIMITED)
193 if (lex_match (lexer, T_LPAREN))
195 struct string delims = DS_EMPTY_INITIALIZER;
197 while (!lex_match (lexer, T_RPAREN))
201 if (lex_match_id (lexer, "TAB"))
203 else if (lex_is_string (lexer)
204 && ss_length (lex_tokss (lexer)) == 1)
206 delim = ss_first (lex_tokss (lexer));
211 /* XXX should support multibyte UTF-8 characters */
212 lex_error (lexer, NULL);
213 ds_destroy (&delims);
216 ds_put_byte (&delims, delim);
218 lex_match (lexer, T_COMMA);
221 data_parser_set_empty_line_has_field (parser, true);
222 data_parser_set_quotes (parser, ss_empty ());
223 data_parser_set_soft_delimiters (parser, ss_empty ());
224 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
225 ds_destroy (&delims);
229 data_parser_set_empty_line_has_field (parser, false);
230 data_parser_set_quotes (parser, ss_cstr ("'\""));
231 data_parser_set_soft_delimiters (parser,
232 ss_cstr (CC_SPACES));
233 data_parser_set_hard_delimiters (parser, ss_cstr (","));
239 lex_error (lexer, NULL);
243 type = data_parser_get_type (parser);
245 if (encoding && NULL == fh)
246 msg (MW, _("Encoding should not be specified for inline data. It will be "
250 fh = fh_inline_file ();
251 fh_set_default_handle (fh);
253 if (type != DP_FIXED && end != NULL)
255 msg (SE, _("The END subcommand may be used only with DATA LIST FIXED."));
259 tmp_pool = pool_create ();
260 if (type == DP_FIXED)
261 ok = parse_fixed (lexer, dict, tmp_pool, parser);
263 ok = parse_free (lexer, dict, tmp_pool, parser);
264 pool_destroy (tmp_pool);
268 if (!data_parser_any_fields (parser))
270 msg (SE, _("At least one variable must be specified."));
274 if (lex_end_of_command (lexer) != CMD_SUCCESS)
278 table = type == DP_FIXED || !data_parser_get_span (parser);
280 data_parser_output_description (parser, fh);
282 reader = dfm_open_reader (fh, lexer, encoding);
286 if (in_input_program ())
288 struct data_list_trns *trns = xmalloc (sizeof *trns);
289 trns->parser = parser;
290 trns->reader = reader;
292 add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns);
295 data_parser_make_active_file (parser, ds, reader, dict);
303 data_parser_destroy (parser);
304 if (!in_input_program ())
308 return CMD_CASCADING_FAILURE;
311 /* Fixed-format parsing. */
313 /* Parses all the variable specifications for DATA LIST FIXED,
314 storing them into DLS. Uses TMP_POOL for temporary storage;
315 the caller may destroy it. Returns true only if
318 parse_fixed (struct lexer *lexer, struct dictionary *dict,
319 struct pool *tmp_pool, struct data_parser *parser)
321 int max_records = data_parser_get_records (parser);
325 while (lex_token (lexer) != T_ENDCMD)
328 size_t name_cnt, name_idx;
329 struct fmt_spec *formats, *f;
332 /* Parse everything. */
333 if (!parse_record_placement (lexer, &record, &column)
334 || !parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
335 &names, &name_cnt, PV_NONE)
336 || !parse_var_placements (lexer, tmp_pool, name_cnt, FMT_FOR_INPUT,
337 &formats, &format_cnt))
340 /* Create variables and var specs. */
342 for (f = formats; f < &formats[format_cnt]; f++)
343 if (!execute_placement_format (f, &record, &column))
349 name = names[name_idx++];
351 /* Create variable. */
352 width = fmt_var_width (f);
353 v = dict_create_var (dict, name, width);
357 struct fmt_spec output = fmt_for_output_from_input (f);
358 var_set_both_formats (v, &output);
363 This can be acceptable if we're in INPUT
364 PROGRAM, but only if the existing variable has
365 the same width as the one we would have
367 if (!in_input_program ())
369 msg (SE, _("%s is a duplicate variable name."), name);
373 v = dict_lookup_var_assert (dict, name);
374 if ((width != 0) != (var_get_width (v) != 0))
376 msg (SE, _("There is already a variable %s of a "
381 if (width != 0 && width != var_get_width (v))
383 msg (SE, _("There is already a string variable %s of a "
384 "different width."), name);
389 if (max_records && record > max_records)
391 msg (SE, _("Cannot place variable %s on record %d when "
392 "RECORDS=%d is specified."),
393 var_get_name (v), record,
394 data_parser_get_records (parser));
397 data_parser_add_fixed_field (parser, f,
398 var_get_case_index (v),
399 var_get_name (v), record, column);
403 assert (name_idx == name_cnt);
409 /* Free-format parsing. */
411 /* Parses variable specifications for DATA LIST FREE and adds
412 them to DLS. Uses TMP_POOL for temporary storage; the caller
413 may destroy it. Returns true only if successful. */
415 parse_free (struct lexer *lexer, struct dictionary *dict,
416 struct pool *tmp_pool, struct data_parser *parser)
419 while (lex_token (lexer) != T_ENDCMD)
421 struct fmt_spec input, output;
426 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
427 &name, &name_cnt, PV_NONE))
430 if (lex_match (lexer, T_LPAREN))
432 char type[FMT_TYPE_LEN_MAX + 1];
434 if (!parse_abstract_format_specifier (lexer, type, &input.w,
437 if (!fmt_from_name (type, &input.type))
439 msg (SE, _("Unknown format type `%s'."), type);
443 /* If no width was included, use the minimum width for the type.
444 This isn't quite right, because DATETIME by itself seems to become
445 DATETIME20 (see bug #30690), whereas this will become
446 DATETIME17. The correct behavior is not documented. */
449 input.w = fmt_min_input_width (input.type);
453 if (!fmt_check_input (&input) || !lex_force_match (lexer, T_RPAREN))
456 /* As a special case, N format is treated as F format
457 for free-field input. */
458 if (input.type == FMT_N)
461 output = fmt_for_output_from_input (&input);
465 lex_match (lexer, T_ASTERISK);
466 input = fmt_for_input (FMT_F, 8, 0);
467 output = *settings_get_format ();
470 for (i = 0; i < name_cnt; i++)
474 v = dict_create_var (dict, name[i], fmt_var_width (&input));
477 msg (SE, _("%s is a duplicate variable name."), name[i]);
480 var_set_both_formats (v, &output);
482 data_parser_add_delimited_field (parser,
483 &input, var_get_case_index (v),
491 /* Input procedure. */
493 /* Destroys DATA LIST transformation TRNS.
494 Returns true if successful, false if an I/O error occurred. */
496 data_list_trns_free (void *trns_)
498 struct data_list_trns *trns = trns_;
499 data_parser_destroy (trns->parser);
500 dfm_close_reader (trns->reader);
505 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
507 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
509 struct data_list_trns *trns = trns_;
512 *c = case_unshare (*c);
513 if (data_parser_parse (trns->parser, trns->reader, *c))
514 retval = TRNS_CONTINUE;
515 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
517 /* An I/O error, or encountering end of file for a second
518 time, should be escalated into a more serious error. */
522 retval = TRNS_END_FILE;
524 /* If there was an END subcommand handle it. */
525 if (trns->end != NULL)
527 double *end = &case_data_rw (*c, trns->end)->f;
528 if (retval == TRNS_END_FILE)
531 retval = TRNS_CONTINUE;