1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/case.h"
26 #include "data/casereader.h"
27 #include "data/data-in.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/format.h"
31 #include "data/settings.h"
32 #include "data/transformations.h"
33 #include "data/variable.h"
34 #include "language/command.h"
35 #include "language/data-io/data-parser.h"
36 #include "language/data-io/data-reader.h"
37 #include "language/data-io/file-handle.h"
38 #include "language/data-io/inpt-pgm.h"
39 #include "language/data-io/placement-parser.h"
40 #include "language/lexer/format-parser.h"
41 #include "language/lexer/lexer.h"
42 #include "language/lexer/variable-parser.h"
43 #include "libpspp/assertion.h"
44 #include "libpspp/compiler.h"
45 #include "libpspp/message.h"
46 #include "libpspp/misc.h"
47 #include "libpspp/pool.h"
48 #include "libpspp/str.h"
51 #include "gl/xalloc.h"
54 #define _(msgid) gettext (msgid)
56 /* DATA LIST transformation data. */
59 struct data_parser *parser; /* Parser. */
60 struct dfm_reader *reader; /* Data file reader. */
61 struct variable *end; /* Variable specified on END subcommand. */
64 static bool parse_fixed (struct lexer *, struct dictionary *,
65 struct pool *, struct data_parser *);
66 static bool parse_free (struct lexer *, struct dictionary *,
67 struct pool *, struct data_parser *);
69 static trns_free_func data_list_trns_free;
70 static trns_proc_func data_list_trns_proc;
73 cmd_data_list (struct lexer *lexer, struct dataset *ds)
75 struct dictionary *dict;
76 struct data_parser *parser;
77 struct dfm_reader *reader;
78 struct variable *end = NULL;
79 struct file_handle *fh = NULL;
80 struct string encoding = DS_EMPTY_INITIALIZER;
83 enum data_parser_type type;
85 struct pool *tmp_pool;
88 dict = in_input_program () ? dataset_dict (ds) : dict_create ();
89 parser = data_parser_create (dict);
92 table = -1; /* Print table if nonzero, -1=undecided. */
95 while (lex_token (lexer) != T_SLASH)
97 if (lex_match_id (lexer, "FILE"))
99 lex_match (lexer, T_EQUALS);
101 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE);
105 else if (lex_match_id (lexer, "ENCODING"))
107 lex_match (lexer, T_EQUALS);
108 if (!lex_force_string (lexer))
111 ds_init_substring (&encoding, lex_tokss (lexer));
115 else if (lex_match_id (lexer, "RECORDS"))
117 lex_match (lexer, T_EQUALS);
118 lex_match (lexer, T_LPAREN);
119 if (!lex_force_int (lexer))
121 data_parser_set_records (parser, lex_integer (lexer));
123 lex_match (lexer, T_RPAREN);
125 else if (lex_match_id (lexer, "SKIP"))
127 lex_match (lexer, T_EQUALS);
128 if (!lex_force_int (lexer))
130 data_parser_set_skip (parser, lex_integer (lexer));
133 else if (lex_match_id (lexer, "END"))
135 if (!in_input_program ())
137 msg (SE, _("The END subcommand may only be used within "
143 msg (SE, _("The END subcommand may only be specified once."));
147 lex_match (lexer, T_EQUALS);
148 if (!lex_force_id (lexer))
150 end = dict_lookup_var (dict, lex_tokcstr (lexer));
152 end = dict_create_var_assert (dict, lex_tokcstr (lexer), 0);
155 else if (lex_match_id (lexer, "NOTABLE"))
157 else if (lex_match_id (lexer, "TABLE"))
159 else if (lex_token (lexer) == T_ID)
161 if (lex_match_id (lexer, "FIXED"))
162 data_parser_set_type (parser, DP_FIXED);
163 else if (lex_match_id (lexer, "FREE"))
165 data_parser_set_type (parser, DP_DELIMITED);
166 data_parser_set_span (parser, true);
168 else if (lex_match_id (lexer, "LIST"))
170 data_parser_set_type (parser, DP_DELIMITED);
171 data_parser_set_span (parser, false);
175 lex_error (lexer, NULL);
181 msg (SE, _("Only one of FIXED, FREE, or LIST may "
187 if (data_parser_get_type (parser) == DP_DELIMITED)
189 if (lex_match (lexer, T_LPAREN))
191 struct string delims = DS_EMPTY_INITIALIZER;
193 while (!lex_match (lexer, T_RPAREN))
197 if (lex_match_id (lexer, "TAB"))
199 else if (lex_is_string (lexer)
200 && ss_length (lex_tokss (lexer)) == 1)
202 delim = ss_first (lex_tokss (lexer));
207 /* XXX should support multibyte UTF-8 characters */
208 lex_error (lexer, NULL);
209 ds_destroy (&delims);
212 ds_put_byte (&delims, delim);
214 lex_match (lexer, T_COMMA);
217 data_parser_set_empty_line_has_field (parser, true);
218 data_parser_set_quotes (parser, ss_empty ());
219 data_parser_set_soft_delimiters (parser, ss_empty ());
220 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
221 ds_destroy (&delims);
225 data_parser_set_empty_line_has_field (parser, false);
226 data_parser_set_quotes (parser, ss_cstr ("'\""));
227 data_parser_set_soft_delimiters (parser,
228 ss_cstr (CC_SPACES));
229 data_parser_set_hard_delimiters (parser, ss_cstr (","));
235 lex_error (lexer, NULL);
239 type = data_parser_get_type (parser);
241 if (! ds_is_empty (&encoding))
244 msg (MW, _("Encoding should not be specified for inline data. It will be ignored."));
246 dict_set_encoding (dict, ds_cstr (&encoding));
250 fh = fh_inline_file ();
251 fh_set_default_handle (fh);
253 if (type != DP_FIXED && end != NULL)
255 msg (SE, _("The END subcommand may be used only with DATA LIST FIXED."));
259 tmp_pool = pool_create ();
260 if (type == DP_FIXED)
261 ok = parse_fixed (lexer, dict, tmp_pool, parser);
263 ok = parse_free (lexer, dict, tmp_pool, parser);
264 pool_destroy (tmp_pool);
268 if (!data_parser_any_fields (parser))
270 msg (SE, _("At least one variable must be specified."));
274 if (lex_end_of_command (lexer) != CMD_SUCCESS)
278 table = type == DP_FIXED || !data_parser_get_span (parser);
280 data_parser_output_description (parser, fh);
282 reader = dfm_open_reader (fh, lexer);
286 if (in_input_program ())
288 struct data_list_trns *trns = xmalloc (sizeof *trns);
289 trns->parser = parser;
290 trns->reader = reader;
292 add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns);
295 data_parser_make_active_file (parser, ds, reader, dict);
298 ds_destroy (&encoding);
303 data_parser_destroy (parser);
304 if (!in_input_program ())
307 ds_destroy (&encoding);
308 return CMD_CASCADING_FAILURE;
311 /* Fixed-format parsing. */
313 /* Parses all the variable specifications for DATA LIST FIXED,
314 storing them into DLS. Uses TMP_POOL for temporary storage;
315 the caller may destroy it. Returns true only if
318 parse_fixed (struct lexer *lexer, struct dictionary *dict,
319 struct pool *tmp_pool, struct data_parser *parser)
321 int max_records = data_parser_get_records (parser);
325 while (lex_token (lexer) != T_ENDCMD)
328 size_t name_cnt, name_idx;
329 struct fmt_spec *formats, *f;
332 /* Parse everything. */
333 if (!parse_record_placement (lexer, &record, &column)
334 || !parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
335 &names, &name_cnt, PV_NONE)
336 || !parse_var_placements (lexer, tmp_pool, name_cnt, true,
337 &formats, &format_cnt))
340 /* Create variables and var specs. */
342 for (f = formats; f < &formats[format_cnt]; f++)
343 if (!execute_placement_format (f, &record, &column))
349 name = names[name_idx++];
351 /* Create variable. */
352 width = fmt_var_width (f);
353 v = dict_create_var (dict, name, width);
357 struct fmt_spec output = fmt_for_output_from_input (f);
358 var_set_both_formats (v, &output);
363 This can be acceptable if we're in INPUT
364 PROGRAM, but only if the existing variable has
365 the same width as the one we would have
367 if (!in_input_program ())
369 msg (SE, _("%s is a duplicate variable name."), name);
373 v = dict_lookup_var_assert (dict, name);
374 if ((width != 0) != (var_get_width (v) != 0))
376 msg (SE, _("There is already a variable %s of a "
381 if (width != 0 && width != var_get_width (v))
383 msg (SE, _("There is already a string variable %s of a "
384 "different width."), name);
389 if (max_records && record > max_records)
391 msg (SE, _("Cannot place variable %s on record %d when "
392 "RECORDS=%d is specified."),
393 var_get_name (v), record,
394 data_parser_get_records (parser));
397 data_parser_add_fixed_field (parser, f,
398 var_get_case_index (v),
399 var_get_name (v), record, column);
403 assert (name_idx == name_cnt);
409 /* Free-format parsing. */
411 /* Parses variable specifications for DATA LIST FREE and adds
412 them to DLS. Uses TMP_POOL for temporary storage; the caller
413 may destroy it. Returns true only if successful. */
415 parse_free (struct lexer *lexer, struct dictionary *dict,
416 struct pool *tmp_pool, struct data_parser *parser)
419 while (lex_token (lexer) != T_ENDCMD)
421 struct fmt_spec input, output;
426 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
427 &name, &name_cnt, PV_NONE))
430 if (lex_match (lexer, T_LPAREN))
432 if (!parse_format_specifier (lexer, &input)
433 || !fmt_check_input (&input)
434 || !lex_force_match (lexer, T_RPAREN))
437 /* As a special case, N format is treated as F format
438 for free-field input. */
439 if (input.type == FMT_N)
442 output = fmt_for_output_from_input (&input);
446 lex_match (lexer, T_ASTERISK);
447 input = fmt_for_input (FMT_F, 8, 0);
448 output = *settings_get_format ();
451 for (i = 0; i < name_cnt; i++)
455 v = dict_create_var (dict, name[i], fmt_var_width (&input));
458 msg (SE, _("%s is a duplicate variable name."), name[i]);
461 var_set_both_formats (v, &output);
463 data_parser_add_delimited_field (parser,
464 &input, var_get_case_index (v),
472 /* Input procedure. */
474 /* Destroys DATA LIST transformation TRNS.
475 Returns true if successful, false if an I/O error occurred. */
477 data_list_trns_free (void *trns_)
479 struct data_list_trns *trns = trns_;
480 data_parser_destroy (trns->parser);
481 dfm_close_reader (trns->reader);
486 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
488 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
490 struct data_list_trns *trns = trns_;
493 *c = case_unshare (*c);
494 if (data_parser_parse (trns->parser, trns->reader, *c))
495 retval = TRNS_CONTINUE;
496 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
498 /* An I/O error, or encountering end of file for a second
499 time, should be escalated into a more serious error. */
503 retval = TRNS_END_FILE;
505 /* If there was an END subcommand handle it. */
506 if (trns->end != NULL)
508 double *end = &case_data_rw (*c, trns->end)->f;
509 if (retval == TRNS_END_FILE)
512 retval = TRNS_CONTINUE;