1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include <data/case.h>
26 #include <data/data-in.h>
27 #include <data/casereader.h>
28 #include <data/dictionary.h>
29 #include <data/format.h>
30 #include <data/procedure.h>
31 #include <data/settings.h>
32 #include <data/transformations.h>
33 #include <data/variable.h>
34 #include <language/command.h>
35 #include <language/data-io/data-parser.h>
36 #include <language/data-io/data-reader.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/data-io/inpt-pgm.h>
39 #include <language/data-io/placement-parser.h>
40 #include <language/lexer/format-parser.h>
41 #include <language/lexer/lexer.h>
42 #include <language/lexer/variable-parser.h>
43 #include <libpspp/assertion.h>
44 #include <libpspp/compiler.h>
45 #include <libpspp/message.h>
46 #include <libpspp/misc.h>
47 #include <libpspp/pool.h>
48 #include <libpspp/str.h>
54 #define _(msgid) gettext (msgid)
56 /* DATA LIST transformation data. */
59 struct data_parser *parser; /* Parser. */
60 struct dfm_reader *reader; /* Data file reader. */
61 struct variable *end; /* Variable specified on END subcommand. */
64 static bool parse_fixed (struct lexer *, struct dictionary *,
65 struct pool *, struct data_parser *);
66 static bool parse_free (struct lexer *, struct dictionary *,
67 struct pool *, struct data_parser *);
69 static trns_free_func data_list_trns_free;
70 static trns_proc_func data_list_trns_proc;
73 cmd_data_list (struct lexer *lexer, struct dataset *ds)
75 struct dictionary *dict;
76 struct data_parser *parser;
77 struct dfm_reader *reader;
78 struct variable *end = NULL;
79 struct file_handle *fh = NULL;
80 struct string encoding = DS_EMPTY_INITIALIZER;
83 enum data_parser_type type;
85 struct pool *tmp_pool;
88 dict = in_input_program () ? dataset_dict (ds) : dict_create ();
89 parser = data_parser_create ();
92 table = -1; /* Print table if nonzero, -1=undecided. */
95 while (lex_token (lexer) != '/')
97 if (lex_match_id (lexer, "FILE"))
99 lex_match (lexer, '=');
101 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE);
105 else if (lex_match_id (lexer, "ENCODING"))
107 lex_match (lexer, '=');
108 if (!lex_force_string (lexer))
111 ds_init_string (&encoding, lex_tokstr (lexer));
115 else if (lex_match_id (lexer, "RECORDS"))
117 lex_match (lexer, '=');
118 lex_match (lexer, '(');
119 if (!lex_force_int (lexer))
121 data_parser_set_records (parser, lex_integer (lexer));
123 lex_match (lexer, ')');
125 else if (lex_match_id (lexer, "SKIP"))
127 lex_match (lexer, '=');
128 if (!lex_force_int (lexer))
130 data_parser_set_skip (parser, lex_integer (lexer));
133 else if (lex_match_id (lexer, "END"))
135 if (!in_input_program ())
137 msg (SE, _("The END subcommand may only be used within "
143 msg (SE, _("The END subcommand may only be specified once."));
147 lex_match (lexer, '=');
148 if (!lex_force_id (lexer))
150 end = dict_lookup_var (dict, lex_tokid (lexer));
152 end = dict_create_var_assert (dict, lex_tokid (lexer), 0);
155 else if (lex_match_id (lexer, "NOTABLE"))
157 else if (lex_match_id (lexer, "TABLE"))
159 else if (lex_token (lexer) == T_ID)
161 if (lex_match_id (lexer, "FIXED"))
162 data_parser_set_type (parser, DP_FIXED);
163 else if (lex_match_id (lexer, "FREE"))
165 data_parser_set_type (parser, DP_DELIMITED);
166 data_parser_set_span (parser, true);
168 else if (lex_match_id (lexer, "LIST"))
170 data_parser_set_type (parser, DP_DELIMITED);
171 data_parser_set_span (parser, false);
175 lex_error (lexer, NULL);
181 msg (SE, _("Only one of FIXED, FREE, or LIST may "
187 if (data_parser_get_type (parser) == DP_DELIMITED)
189 if (lex_match (lexer, '('))
191 struct string delims = DS_EMPTY_INITIALIZER;
193 while (!lex_match (lexer, ')'))
197 if (lex_match_id (lexer, "TAB"))
199 else if (lex_token (lexer) == T_STRING
200 && ds_length (lex_tokstr (lexer)) == 1)
202 delim = ds_first (lex_tokstr (lexer));
207 lex_error (lexer, NULL);
208 ds_destroy (&delims);
211 ds_put_char (&delims, delim);
213 lex_match (lexer, ',');
216 data_parser_set_empty_line_has_field (parser, true);
217 data_parser_set_quotes (parser, ss_empty ());
218 data_parser_set_soft_delimiters (parser, ss_empty ());
219 data_parser_set_hard_delimiters (parser, ds_ss (&delims));
220 ds_destroy (&delims);
224 data_parser_set_empty_line_has_field (parser, false);
225 data_parser_set_quotes (parser, ss_cstr ("'\""));
226 data_parser_set_soft_delimiters (parser,
227 ss_cstr (CC_SPACES));
228 data_parser_set_hard_delimiters (parser, ss_cstr (","));
234 lex_error (lexer, NULL);
238 type = data_parser_get_type (parser);
240 if (! ds_is_empty (&encoding))
243 msg (MW, _("Encoding should not be specified for inline data. It will be ignored."));
245 dict_set_encoding (dict, ds_cstr (&encoding));
249 fh = fh_inline_file ();
250 fh_set_default_handle (fh);
252 if (type != DP_FIXED && end != NULL)
254 msg (SE, _("The END subcommand may be used only with DATA LIST FIXED."));
258 tmp_pool = pool_create ();
259 if (type == DP_FIXED)
260 ok = parse_fixed (lexer, dict, tmp_pool, parser);
262 ok = parse_free (lexer, dict, tmp_pool, parser);
263 pool_destroy (tmp_pool);
267 if (!data_parser_any_fields (parser))
269 msg (SE, _("At least one variable must be specified."));
273 if (lex_end_of_command (lexer) != CMD_SUCCESS)
277 table = type == DP_FIXED || !data_parser_get_span (parser);
279 data_parser_output_description (parser, fh);
281 reader = dfm_open_reader (fh, lexer);
285 if (in_input_program ())
287 struct data_list_trns *trns = xmalloc (sizeof *trns);
288 trns->parser = parser;
289 trns->reader = reader;
291 add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns);
294 data_parser_make_active_file (parser, ds, reader, dict);
301 data_parser_destroy (parser);
302 if (!in_input_program ())
305 return CMD_CASCADING_FAILURE;
308 /* Fixed-format parsing. */
310 /* Parses all the variable specifications for DATA LIST FIXED,
311 storing them into DLS. Uses TMP_POOL for temporary storage;
312 the caller may destroy it. Returns true only if
315 parse_fixed (struct lexer *lexer, struct dictionary *dict,
316 struct pool *tmp_pool, struct data_parser *parser)
318 int max_records = data_parser_get_records (parser);
322 while (lex_token (lexer) != '.')
325 size_t name_cnt, name_idx;
326 struct fmt_spec *formats, *f;
329 /* Parse everything. */
330 if (!parse_record_placement (lexer, &record, &column)
331 || !parse_DATA_LIST_vars_pool (lexer, tmp_pool,
332 &names, &name_cnt, PV_NONE)
333 || !parse_var_placements (lexer, tmp_pool, name_cnt, true,
334 &formats, &format_cnt))
337 /* Create variables and var specs. */
339 for (f = formats; f < &formats[format_cnt]; f++)
340 if (!execute_placement_format (f, &record, &column))
346 name = names[name_idx++];
348 /* Create variable. */
349 width = fmt_var_width (f);
350 v = dict_create_var (dict, name, width);
354 struct fmt_spec output = fmt_for_output_from_input (f);
355 var_set_both_formats (v, &output);
360 This can be acceptable if we're in INPUT
361 PROGRAM, but only if the existing variable has
362 the same width as the one we would have
364 if (!in_input_program ())
366 msg (SE, _("%s is a duplicate variable name."), name);
370 v = dict_lookup_var_assert (dict, name);
371 if ((width != 0) != (var_get_width (v) != 0))
373 msg (SE, _("There is already a variable %s of a "
378 if (width != 0 && width != var_get_width (v))
380 msg (SE, _("There is already a string variable %s of a "
381 "different width."), name);
386 if (max_records && record > max_records)
388 msg (SE, _("Cannot place variable %s on record %d when "
389 "RECORDS=%d is specified."),
390 var_get_name (v), record,
391 data_parser_get_records (parser));
394 data_parser_add_fixed_field (parser, f,
395 var_get_case_index (v),
396 var_get_name (v), record, column);
400 assert (name_idx == name_cnt);
406 /* Free-format parsing. */
408 /* Parses variable specifications for DATA LIST FREE and adds
409 them to DLS. Uses TMP_POOL for temporary storage; the caller
410 may destroy it. Returns true only if successful. */
412 parse_free (struct lexer *lexer, struct dictionary *dict,
413 struct pool *tmp_pool, struct data_parser *parser)
416 while (lex_token (lexer) != '.')
418 struct fmt_spec input, output;
423 if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool,
424 &name, &name_cnt, PV_NONE))
427 if (lex_match (lexer, '('))
429 if (!parse_format_specifier (lexer, &input)
430 || !fmt_check_input (&input)
431 || !lex_force_match (lexer, ')'))
434 /* As a special case, N format is treated as F format
435 for free-field input. */
436 if (input.type == FMT_N)
439 output = fmt_for_output_from_input (&input);
443 lex_match (lexer, '*');
444 input = fmt_for_input (FMT_F, 8, 0);
445 output = *settings_get_format ();
448 for (i = 0; i < name_cnt; i++)
452 v = dict_create_var (dict, name[i], fmt_var_width (&input));
455 msg (SE, _("%s is a duplicate variable name."), name[i]);
458 var_set_both_formats (v, &output);
460 data_parser_add_delimited_field (parser,
461 &input, var_get_case_index (v),
469 /* Input procedure. */
471 /* Destroys DATA LIST transformation TRNS.
472 Returns true if successful, false if an I/O error occurred. */
474 data_list_trns_free (void *trns_)
476 struct data_list_trns *trns = trns_;
477 data_parser_destroy (trns->parser);
478 dfm_close_reader (trns->reader);
483 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
485 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
487 struct data_list_trns *trns = trns_;
490 *c = case_unshare (*c);
491 if (data_parser_parse (trns->parser, trns->reader, *c))
492 retval = TRNS_CONTINUE;
493 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
495 /* An I/O error, or encountering end of file for a second
496 time, should be escalated into a more serious error. */
500 retval = TRNS_END_FILE;
502 /* If there was an END subcommand handle it. */
503 if (trns->end != NULL)
505 double *end = &case_data_rw (*c, trns->end)->f;
506 if (retval == TRNS_END_FILE)
509 retval = TRNS_CONTINUE;