1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/case.h"
26 #include "data/casereader.h"
27 #include "data/data-in.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/format.h"
31 #include "data/settings.h"
32 #include "data/transformations.h"
33 #include "data/variable.h"
34 #include "language/command.h"
35 #include "language/data-io/data-parser.h"
36 #include "language/data-io/data-reader.h"
37 #include "language/data-io/file-handle.h"
38 #include "language/data-io/inpt-pgm.h"
39 #include "language/data-io/placement-parser.h"
40 #include "language/lexer/format-parser.h"
41 #include "language/lexer/lexer.h"
42 #include "language/lexer/variable-parser.h"
43 #include "libpspp/assertion.h"
44 #include "libpspp/compiler.h"
45 #include "libpspp/i18n.h"
46 #include "libpspp/message.h"
47 #include "libpspp/misc.h"
48 #include "libpspp/pool.h"
49 #include "libpspp/str.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
57 /* DATA LIST transformation data. */
60 struct data_parser *parser; /* Parser. */
61 struct dfm_reader *reader; /* Data file reader. */
62 struct variable *end; /* Variable specified on END subcommand. */
65 static bool parse_fixed (struct lexer *, struct dictionary *,
66 struct pool *, struct data_parser *);
67 static bool parse_free (struct lexer *, struct dictionary *,
68 struct pool *, struct data_parser *);
70 static trns_free_func data_list_trns_free;
71 static trns_proc_func data_list_trns_proc;
74 cmd_data_list (struct lexer *lexer, struct dataset *ds)
76 struct dictionary *dict;
77 struct data_parser *parser;
78 struct dfm_reader *reader;
79 struct variable *end = NULL;
80 struct file_handle *fh = NULL;
81 struct string encoding = DS_EMPTY_INITIALIZER;
84 enum data_parser_type type;
86 struct pool *tmp_pool;
89 dict = (in_input_program ()
91 : dict_create (get_default_encoding ()));
92 parser = data_parser_create (dict);
95 table = -1; /* Print table if nonzero, -1=undecided. */
98 while (lex_token (lexer) != T_SLASH)
100 if (lex_match_id (lexer, "FILE"))
102 lex_match (lexer, T_EQUALS);
104 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
108 else if (lex_match_id (lexer, "ENCODING"))
110 lex_match (lexer, T_EQUALS);
111 if (!lex_force_string (lexer))
114 ds_init_substring (&encoding, lex_tokss (lexer));
118 else if (lex_match_id (lexer, "RECORDS"))
120 lex_match (lexer, T_EQUALS);
121 lex_match (lexer, T_LPAREN);
122 if (!lex_force_int (lexer))
124 data_parser_set_records (parser, lex_integer (lexer));
126 lex_match (lexer, T_RPAREN);
128 else if (lex_match_id (lexer, "SKIP"))
130 lex_match (lexer, T_EQUALS);
131 if (!lex_force_int (lexer))
133 data_parser_set_skip (parser, lex_integer (lexer));
136 else if (lex_match_id (lexer, "END"))
138 if (!in_input_program ())
140 msg (SE, _("The END subcommand may only be used within "
146 msg (SE, _("The END subcommand may only be specified once."));
150 lex_match (lexer, T_EQUALS);
151 if (!lex_force_id (lexer))
153 end = dict_lookup_var (dict, lex_tokcstr (lexer));
155 end = dict_create_var_assert (dict, lex_tokcstr (lexer), 0);
158 else if (lex_match_id (lexer, "NOTABLE"))
160 else if (lex_match_id (lexer, "TABLE"))
162 else if (lex_token (lexer) == T_ID)
164 if (lex_match_id (lexer, "FIXED"))
165 data_parser_set_type (parser, DP_FIXED);
166 else if (lex_match_id (lexer, "FREE"))
168 data_parser_set_type (parser, DP_DELIMITED);
169 data_parser_set_span (parser, true);
171 else if (lex_match_id (lexer, "LIST"))
173 data_parser_set_type (parser, DP_DELIMITED);
174 data_parser_set_span (parser, false);
178 lex_error (lexer, NULL);
184 msg (SE, _("Only one of FIXED, FREE, or LIST may "
190 if (data_parser_get_type (parser) == DP_DELIMITED)
192 if (lex_match (lexer, T_LPAREN))
194 struct string delims = DS_EMPTY_INITIALIZER;
196 while (!lex_match (lexer, T_RPAREN))
200 if (lex_match_id (lexer, "TAB"))
202 else if (lex_is_string (lexer)
203 && ss_length (lex_tokss (lexer)) == 1)
205 delim = ss_first (lex_tokss (lexer));
210 /* XXX should support multibyte UTF-8 characters */
211 lex_error (lexer, NULL);
212 ds_destroy (&delims);
215 ds_put_byte (&delims, delim);
217 lex_match (lexer, T_COMMA);
220 data_parser_set_empty_line_has_field (parser, true);
221 data_parser_set_quotes (parser, ss_empty ());
222 data_parser_set_soft_delimiters (parser, ss_empty ());
223 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
224 ds_destroy (&delims);
228 data_parser_set_empty_line_has_field (parser, false);
229 data_parser_set_quotes (parser, ss_cstr ("'\""));
230 data_parser_set_soft_delimiters (parser,
231 ss_cstr (CC_SPACES));
232 data_parser_set_hard_delimiters (parser, ss_cstr (","));
238 lex_error (lexer, NULL);
242 type = data_parser_get_type (parser);
244 if (! ds_is_empty (&encoding) && NULL == fh)
245 msg (MW, _("Encoding should not be specified for inline data. It will be "
249 fh = fh_inline_file ();
250 fh_set_default_handle (fh);
252 if (type != DP_FIXED && end != NULL)
254 msg (SE, _("The END subcommand may be used only with DATA LIST FIXED."));
258 tmp_pool = pool_create ();
259 if (type == DP_FIXED)
260 ok = parse_fixed (lexer, dict, tmp_pool, parser);
262 ok = parse_free (lexer, dict, tmp_pool, parser);
263 pool_destroy (tmp_pool);
267 if (!data_parser_any_fields (parser))
269 msg (SE, _("At least one variable must be specified."));
273 if (lex_end_of_command (lexer) != CMD_SUCCESS)
277 table = type == DP_FIXED || !data_parser_get_span (parser);
279 data_parser_output_description (parser, fh);
281 reader = dfm_open_reader (fh, lexer);
285 if (in_input_program ())
287 struct data_list_trns *trns = xmalloc (sizeof *trns);
288 trns->parser = parser;
289 trns->reader = reader;
291 add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns);
294 data_parser_make_active_file (parser, ds, reader, dict);
297 ds_destroy (&encoding);
302 data_parser_destroy (parser);
303 if (!in_input_program ())
306 ds_destroy (&encoding);
307 return CMD_CASCADING_FAILURE;
310 /* Fixed-format parsing. */
312 /* Parses all the variable specifications for DATA LIST FIXED,
313 storing them into DLS. Uses TMP_POOL for temporary storage;
314 the caller may destroy it. Returns true only if
317 parse_fixed (struct lexer *lexer, struct dictionary *dict,
318 struct pool *tmp_pool, struct data_parser *parser)
320 int max_records = data_parser_get_records (parser);
324 while (lex_token (lexer) != T_ENDCMD)
327 size_t name_cnt, name_idx;
328 struct fmt_spec *formats, *f;
331 /* Parse everything. */
332 if (!parse_record_placement (lexer, &record, &column)
333 || !parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
334 &names, &name_cnt, PV_NONE)
335 || !parse_var_placements (lexer, tmp_pool, name_cnt, true,
336 &formats, &format_cnt))
339 /* Create variables and var specs. */
341 for (f = formats; f < &formats[format_cnt]; f++)
342 if (!execute_placement_format (f, &record, &column))
348 name = names[name_idx++];
350 /* Create variable. */
351 width = fmt_var_width (f);
352 v = dict_create_var (dict, name, width);
356 struct fmt_spec output = fmt_for_output_from_input (f);
357 var_set_both_formats (v, &output);
362 This can be acceptable if we're in INPUT
363 PROGRAM, but only if the existing variable has
364 the same width as the one we would have
366 if (!in_input_program ())
368 msg (SE, _("%s is a duplicate variable name."), name);
372 v = dict_lookup_var_assert (dict, name);
373 if ((width != 0) != (var_get_width (v) != 0))
375 msg (SE, _("There is already a variable %s of a "
380 if (width != 0 && width != var_get_width (v))
382 msg (SE, _("There is already a string variable %s of a "
383 "different width."), name);
388 if (max_records && record > max_records)
390 msg (SE, _("Cannot place variable %s on record %d when "
391 "RECORDS=%d is specified."),
392 var_get_name (v), record,
393 data_parser_get_records (parser));
396 data_parser_add_fixed_field (parser, f,
397 var_get_case_index (v),
398 var_get_name (v), record, column);
402 assert (name_idx == name_cnt);
408 /* Free-format parsing. */
410 /* Parses variable specifications for DATA LIST FREE and adds
411 them to DLS. Uses TMP_POOL for temporary storage; the caller
412 may destroy it. Returns true only if successful. */
414 parse_free (struct lexer *lexer, struct dictionary *dict,
415 struct pool *tmp_pool, struct data_parser *parser)
418 while (lex_token (lexer) != T_ENDCMD)
420 struct fmt_spec input, output;
425 if (!parse_DATA_LIST_vars_pool (lexer, dict, tmp_pool,
426 &name, &name_cnt, PV_NONE))
429 if (lex_match (lexer, T_LPAREN))
431 if (!parse_format_specifier (lexer, &input)
432 || !fmt_check_input (&input)
433 || !lex_force_match (lexer, T_RPAREN))
436 /* As a special case, N format is treated as F format
437 for free-field input. */
438 if (input.type == FMT_N)
441 output = fmt_for_output_from_input (&input);
445 lex_match (lexer, T_ASTERISK);
446 input = fmt_for_input (FMT_F, 8, 0);
447 output = *settings_get_format ();
450 for (i = 0; i < name_cnt; i++)
454 v = dict_create_var (dict, name[i], fmt_var_width (&input));
457 msg (SE, _("%s is a duplicate variable name."), name[i]);
460 var_set_both_formats (v, &output);
462 data_parser_add_delimited_field (parser,
463 &input, var_get_case_index (v),
471 /* Input procedure. */
473 /* Destroys DATA LIST transformation TRNS.
474 Returns true if successful, false if an I/O error occurred. */
476 data_list_trns_free (void *trns_)
478 struct data_list_trns *trns = trns_;
479 data_parser_destroy (trns->parser);
480 dfm_close_reader (trns->reader);
485 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
487 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
489 struct data_list_trns *trns = trns_;
492 *c = case_unshare (*c);
493 if (data_parser_parse (trns->parser, trns->reader, *c))
494 retval = TRNS_CONTINUE;
495 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
497 /* An I/O error, or encountering end of file for a second
498 time, should be escalated into a more serious error. */
502 retval = TRNS_END_FILE;
504 /* If there was an END subcommand handle it. */
505 if (trns->end != NULL)
507 double *end = &case_data_rw (*c, trns->end)->f;
508 if (retval == TRNS_END_FILE)
511 retval = TRNS_CONTINUE;