1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include <data/case.h>
26 #include <data/data-in.h>
27 #include <data/casereader.h>
28 #include <data/dictionary.h>
29 #include <data/format.h>
30 #include <data/procedure.h>
31 #include <data/settings.h>
32 #include <data/transformations.h>
33 #include <data/variable.h>
34 #include <language/command.h>
35 #include <language/data-io/data-parser.h>
36 #include <language/data-io/data-reader.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/data-io/inpt-pgm.h>
39 #include <language/data-io/placement-parser.h>
40 #include <language/lexer/format-parser.h>
41 #include <language/lexer/lexer.h>
42 #include <language/lexer/variable-parser.h>
43 #include <libpspp/assertion.h>
44 #include <libpspp/compiler.h>
45 #include <libpspp/message.h>
46 #include <libpspp/misc.h>
47 #include <libpspp/pool.h>
48 #include <libpspp/str.h>
54 #define _(msgid) gettext (msgid)
56 /* DATA LIST transformation data. */
59 struct data_parser *parser; /* Parser. */
60 struct dfm_reader *reader; /* Data file reader. */
61 struct variable *end; /* Variable specified on END subcommand. */
64 static bool parse_fixed (struct lexer *, struct dictionary *,
65 struct pool *, struct data_parser *);
66 static bool parse_free (struct lexer *, struct dictionary *,
67 struct pool *, struct data_parser *);
69 static trns_free_func data_list_trns_free;
70 static trns_proc_func data_list_trns_proc;
73 cmd_data_list (struct lexer *lexer, struct dataset *ds)
75 struct dictionary *dict;
76 struct data_parser *parser;
77 struct dfm_reader *reader;
79 struct file_handle *fh;
82 enum data_parser_type type;
84 struct pool *tmp_pool;
87 dict = in_input_program () ? dataset_dict (ds) : dict_create ();
88 parser = data_parser_create ();
93 table = -1; /* Print table if nonzero, -1=undecided. */
96 while (lex_token (lexer) != '/')
98 if (lex_match_id (lexer, "FILE"))
100 lex_match (lexer, '=');
102 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE);
106 else if (lex_match_id (lexer, "RECORDS"))
108 lex_match (lexer, '=');
109 lex_match (lexer, '(');
110 if (!lex_force_int (lexer))
112 data_parser_set_records (parser, lex_integer (lexer));
114 lex_match (lexer, ')');
116 else if (lex_match_id (lexer, "SKIP"))
118 lex_match (lexer, '=');
119 if (!lex_force_int (lexer))
121 data_parser_set_skip (parser, lex_integer (lexer));
124 else if (lex_match_id (lexer, "END"))
126 if (!in_input_program ())
128 msg (SE, _("The END subcommand may only be used within "
134 msg (SE, _("The END subcommand may only be specified once."));
138 lex_match (lexer, '=');
139 if (!lex_force_id (lexer))
141 end = dict_lookup_var (dict, lex_tokid (lexer));
143 end = dict_create_var_assert (dict, lex_tokid (lexer), 0);
146 else if (lex_match_id (lexer, "NOTABLE"))
148 else if (lex_match_id (lexer, "TABLE"))
150 else if (lex_token (lexer) == T_ID)
152 if (lex_match_id (lexer, "FIXED"))
153 data_parser_set_type (parser, DP_FIXED);
154 else if (lex_match_id (lexer, "FREE"))
156 data_parser_set_type (parser, DP_DELIMITED);
157 data_parser_set_span (parser, true);
159 else if (lex_match_id (lexer, "LIST"))
161 data_parser_set_type (parser, DP_DELIMITED);
162 data_parser_set_span (parser, false);
166 lex_error (lexer, NULL);
172 msg (SE, _("Only one of FIXED, FREE, or LIST may "
178 if (data_parser_get_type (parser) == DP_DELIMITED)
180 if (lex_match (lexer, '('))
182 struct string delims = DS_EMPTY_INITIALIZER;
184 while (!lex_match (lexer, ')'))
188 if (lex_match_id (lexer, "TAB"))
190 else if (lex_token (lexer) == T_STRING
191 && ds_length (lex_tokstr (lexer)) == 1)
193 delim = ds_first (lex_tokstr (lexer));
198 lex_error (lexer, NULL);
199 ds_destroy (&delims);
202 ds_put_char (&delims, delim);
204 lex_match (lexer, ',');
207 data_parser_set_empty_line_has_field (parser, true);
208 data_parser_set_quotes (parser, ss_empty ());
209 data_parser_set_soft_delimiters (parser, ss_empty ());
210 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
211 ds_destroy (&delims);
215 data_parser_set_empty_line_has_field (parser, false);
216 data_parser_set_quotes (parser, ss_cstr ("'\""));
217 data_parser_set_soft_delimiters (parser,
218 ss_cstr (CC_SPACES));
219 data_parser_set_hard_delimiters (parser, ss_cstr (","));
225 lex_error (lexer, NULL);
229 type = data_parser_get_type (parser);
232 fh = fh_inline_file ();
233 fh_set_default_handle (fh);
235 if (type != DP_FIXED && end != NULL)
237 msg (SE, _("The END subcommand may be used only with DATA LIST FIXED."));
241 tmp_pool = pool_create ();
242 if (type == DP_FIXED)
243 ok = parse_fixed (lexer, dict, tmp_pool, parser);
245 ok = parse_free (lexer, dict, tmp_pool, parser);
246 pool_destroy (tmp_pool);
250 if (!data_parser_any_fields (parser))
252 msg (SE, _("At least one variable must be specified."));
256 if (lex_end_of_command (lexer) != CMD_SUCCESS)
260 table = type == DP_FIXED || !data_parser_get_span (parser);
262 data_parser_output_description (parser, fh);
264 reader = dfm_open_reader (fh, lexer);
268 if (in_input_program ())
270 struct data_list_trns *trns = xmalloc (sizeof *trns);
271 trns->parser = parser;
272 trns->reader = reader;
274 add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns);
277 data_parser_make_active_file (parser, ds, reader, dict);
284 data_parser_destroy (parser);
287 return CMD_CASCADING_FAILURE;
290 /* Fixed-format parsing. */
292 /* Parses all the variable specifications for DATA LIST FIXED,
293 storing them into DLS. Uses TMP_POOL for temporary storage;
294 the caller may destroy it. Returns true only if
297 parse_fixed (struct lexer *lexer, struct dictionary *dict,
298 struct pool *tmp_pool, struct data_parser *parser)
300 int max_records = data_parser_get_records (parser);
304 while (lex_token (lexer) != '.')
307 size_t name_cnt, name_idx;
308 struct fmt_spec *formats, *f;
311 /* Parse everything. */
312 if (!parse_record_placement (lexer, &record, &column)
313 || !parse_DATA_LIST_vars_pool (lexer, tmp_pool,
314 &names, &name_cnt, PV_NONE)
315 || !parse_var_placements (lexer, tmp_pool, name_cnt, true,
316 &formats, &format_cnt))
319 /* Create variables and var specs. */
321 for (f = formats; f < &formats[format_cnt]; f++)
322 if (!execute_placement_format (f, &record, &column))
328 name = names[name_idx++];
330 /* Create variable. */
331 width = fmt_var_width (f);
332 v = dict_create_var (dict, name, width);
336 struct fmt_spec output = fmt_for_output_from_input (f);
337 var_set_both_formats (v, &output);
342 This can be acceptable if we're in INPUT
343 PROGRAM, but only if the existing variable has
344 the same width as the one we would have
346 if (!in_input_program ())
348 msg (SE, _("%s is a duplicate variable name."), name);
352 v = dict_lookup_var_assert (dict, name);
353 if ((width != 0) != (var_get_width (v) != 0))
355 msg (SE, _("There is already a variable %s of a "
360 if (width != 0 && width != var_get_width (v))
362 msg (SE, _("There is already a string variable %s of a "
363 "different width."), name);
368 if (max_records && record > max_records)
370 msg (SE, _("Cannot place variable %s on record %d when "
371 "RECORDS=%d is specified."),
372 var_get_name (v), record,
373 data_parser_get_records (parser));
376 data_parser_add_fixed_field (parser, f,
377 var_get_case_index (v),
378 var_get_name (v), record, column);
382 assert (name_idx == name_cnt);
388 /* Free-format parsing. */
390 /* Parses variable specifications for DATA LIST FREE and adds
391 them to DLS. Uses TMP_POOL for temporary storage; the caller
392 may destroy it. Returns true only if successful. */
394 parse_free (struct lexer *lexer, struct dictionary *dict,
395 struct pool *tmp_pool, struct data_parser *parser)
398 while (lex_token (lexer) != '.')
400 struct fmt_spec input, output;
405 if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool,
406 &name, &name_cnt, PV_NONE))
409 if (lex_match (lexer, '('))
411 if (!parse_format_specifier (lexer, &input)
412 || !fmt_check_input (&input)
413 || !lex_force_match (lexer, ')'))
416 /* As a special case, N format is treated as F format
417 for free-field input. */
418 if (input.type == FMT_N)
421 output = fmt_for_output_from_input (&input);
425 lex_match (lexer, '*');
426 input = fmt_for_input (FMT_F, 8, 0);
427 output = *settings_get_format ();
430 for (i = 0; i < name_cnt; i++)
434 v = dict_create_var (dict, name[i], fmt_var_width (&input));
437 msg (SE, _("%s is a duplicate variable name."), name[i]);
440 var_set_both_formats (v, &output);
442 data_parser_add_delimited_field (parser,
443 &input, var_get_case_index (v),
451 /* Input procedure. */
453 /* Destroys DATA LIST transformation TRNS.
454 Returns true if successful, false if an I/O error occurred. */
456 data_list_trns_free (void *trns_)
458 struct data_list_trns *trns = trns_;
459 data_parser_destroy (trns->parser);
460 dfm_close_reader (trns->reader);
465 /* Handle DATA LIST transformation TRNS, parsing data into C. */
467 data_list_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
469 struct data_list_trns *trns = trns_;
472 if (data_parser_parse (trns->parser, trns->reader, c))
473 retval = TRNS_CONTINUE;
474 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
476 /* An I/O error, or encountering end of file for a second
477 time, should be escalated into a more serious error. */
481 retval = TRNS_END_FILE;
483 /* If there was an END subcommand handle it. */
484 if (trns->end != NULL)
486 double *end = &case_data_rw (c, trns->end)->f;
487 if (retval == TRNS_END_FILE)
490 retval = TRNS_CONTINUE;