1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include <data/case.h>
26 #include <data/data-in.h>
27 #include <data/casereader.h>
28 #include <data/casereader-provider.h>
29 #include <data/dictionary.h>
30 #include <data/format.h>
31 #include <data/procedure.h>
32 #include <data/settings.h>
33 #include <data/transformations.h>
34 #include <data/variable.h>
35 #include <language/command.h>
36 #include <language/data-io/data-reader.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/data-io/inpt-pgm.h>
39 #include <language/data-io/placement-parser.h>
40 #include <language/lexer/format-parser.h>
41 #include <language/lexer/lexer.h>
42 #include <language/lexer/variable-parser.h>
43 #include <libpspp/assertion.h>
44 #include <libpspp/compiler.h>
45 #include <libpspp/ll.h>
46 #include <libpspp/message.h>
47 #include <libpspp/misc.h>
48 #include <libpspp/pool.h>
49 #include <libpspp/str.h>
50 #include <output/table.h>
56 #define _(msgid) gettext (msgid)
58 /* Utility function. */
60 /* Describes how to parse one variable. */
63 struct ll ll; /* List element. */
66 struct fmt_spec input; /* Input format of this field. */
67 int fv; /* First value in case. */
68 char name[VAR_NAME_LEN + 1]; /* Var name for error messages and tables. */
70 /* Fixed format only. */
71 int record; /* Record number (1-based). */
72 int first_column; /* Column numbers in record. */
75 static struct dls_var_spec *
76 ll_to_dls_var_spec (struct ll *ll)
78 return ll_data (ll, struct dls_var_spec, ll);
81 /* Constants for DATA LIST type. */
89 /* DATA LIST private data structure. */
92 struct pool *pool; /* Used for all DATA LIST storage. */
93 struct ll_list specs; /* List of dls_var_specs. */
94 struct dfm_reader *reader; /* Data file reader. */
95 enum dls_type type; /* Type of DATA LIST construct. */
96 struct variable *end; /* Variable specified on END subcommand. */
97 int record_cnt; /* Number of records. */
98 struct string delims; /* Field delimiters. */
99 int skip_records; /* Records to skip before first case. */
100 size_t value_cnt; /* Number of `union value's in case. */
103 static const struct casereader_class data_list_casereader_class;
105 static bool parse_fixed (struct lexer *, struct dictionary *dict,
106 struct pool *tmp_pool, struct data_list_pgm *);
107 static bool parse_free (struct lexer *, struct dictionary *dict,
108 struct pool *tmp_pool, struct data_list_pgm *);
109 static void dump_fixed_table (const struct ll_list *,
110 const struct file_handle *, int record_cnt);
111 static void dump_free_table (const struct data_list_pgm *,
112 const struct file_handle *);
114 static trns_free_func data_list_trns_free;
115 static trns_proc_func data_list_trns_proc;
118 cmd_data_list (struct lexer *lexer, struct dataset *ds)
120 struct dictionary *dict;
121 struct data_list_pgm *dls;
122 int table = -1; /* Print table if nonzero, -1=undecided. */
123 struct file_handle *fh = NULL;
124 struct pool *tmp_pool;
127 dict = in_input_program () ? dataset_dict (ds) : dict_create ();
129 dls = pool_create_container (struct data_list_pgm, pool);
130 ll_init (&dls->specs);
135 dls->skip_records = 0;
136 ds_init_empty (&dls->delims);
137 ds_register_pool (&dls->delims, dls->pool);
139 tmp_pool = pool_create_subpool (dls->pool);
141 while (lex_token (lexer) != '/')
143 if (lex_match_id (lexer, "FILE"))
145 lex_match (lexer, '=');
147 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE);
151 else if (lex_match_id (lexer, "RECORDS"))
153 lex_match (lexer, '=');
154 lex_match (lexer, '(');
155 if (!lex_force_int (lexer))
157 dls->record_cnt = lex_integer (lexer);
159 lex_match (lexer, ')');
161 else if (lex_match_id (lexer, "SKIP"))
163 lex_match (lexer, '=');
164 if (!lex_force_int (lexer))
166 dls->skip_records = lex_integer (lexer);
169 else if (lex_match_id (lexer, "END"))
173 msg (SE, _("The END subcommand may only be specified once."));
177 lex_match (lexer, '=');
178 if (!lex_force_id (lexer))
180 dls->end = dict_lookup_var (dict, lex_tokid (lexer));
182 dls->end = dict_create_var_assert (dict, lex_tokid (lexer), 0);
185 else if (lex_token (lexer) == T_ID)
187 if (lex_match_id (lexer, "NOTABLE"))
189 else if (lex_match_id (lexer, "TABLE"))
194 if (lex_match_id (lexer, "FIXED"))
196 else if (lex_match_id (lexer, "FREE"))
198 else if (lex_match_id (lexer, "LIST"))
202 lex_error (lexer, NULL);
208 msg (SE, _("Only one of FIXED, FREE, or LIST may "
214 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
215 && lex_match (lexer, '('))
217 while (!lex_match (lexer, ')'))
221 if (lex_match_id (lexer, "TAB"))
223 else if (lex_token (lexer) == T_STRING && ds_length (lex_tokstr (lexer)) == 1)
225 delim = ds_first (lex_tokstr (lexer));
230 lex_error (lexer, NULL);
234 ds_put_char (&dls->delims, delim);
236 lex_match (lexer, ',');
243 lex_error (lexer, NULL);
249 fh = fh_inline_file ();
250 fh_set_default_handle (fh);
253 dls->type = DLS_FIXED;
255 if (dls->type != DLS_FIXED && dls->end != NULL)
257 msg (SE, _("The END keyword may be used only with DATA LIST FIXED."));
262 table = dls->type != DLS_FREE;
264 ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (lexer, dict, tmp_pool, dls);
268 if (lex_end_of_command (lexer) != CMD_SUCCESS)
273 if (dls->type == DLS_FIXED)
274 dump_fixed_table (&dls->specs, fh, dls->record_cnt);
276 dump_free_table (dls, fh);
279 dls->reader = dfm_open_reader (fh, lexer);
280 if (dls->reader == NULL)
283 dls->value_cnt = dict_get_next_value_idx (dict);
285 if (in_input_program ())
286 add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls);
289 struct casereader *reader;
290 reader = casereader_create_sequential (NULL,
291 dict_get_next_value_idx (dict),
292 -1, &data_list_casereader_class,
294 proc_set_active_file (ds, reader, dict);
297 pool_destroy (tmp_pool);
304 data_list_trns_free (dls);
305 return CMD_CASCADING_FAILURE;
308 /* Fixed-format parsing. */
310 /* Parses all the variable specifications for DATA LIST FIXED,
311 storing them into DLS. Uses TMP_POOL for data that is not
312 needed once parsing is complete. Returns true only if
315 parse_fixed (struct lexer *lexer, struct dictionary *dict,
316 struct pool *tmp_pool, struct data_list_pgm *dls)
318 int last_nonempty_record;
322 while (lex_token (lexer) != '.')
325 size_t name_cnt, name_idx;
326 struct fmt_spec *formats, *f;
329 /* Parse everything. */
330 if (!parse_record_placement (lexer, &record, &column)
331 || !parse_DATA_LIST_vars_pool (lexer, tmp_pool,
332 &names, &name_cnt, PV_NONE)
333 || !parse_var_placements (lexer, tmp_pool, name_cnt, true,
334 &formats, &format_cnt))
337 /* Create variables and var specs. */
339 for (f = formats; f < &formats[format_cnt]; f++)
340 if (!execute_placement_format (f, &record, &column))
345 struct dls_var_spec *spec;
347 name = names[name_idx++];
349 /* Create variable. */
350 width = fmt_var_width (f);
351 v = dict_create_var (dict, name, width);
355 struct fmt_spec output = fmt_for_output_from_input (f);
356 var_set_both_formats (v, &output);
361 This can be acceptable if we're in INPUT
362 PROGRAM, but only if the existing variable has
363 the same width as the one we would have
365 if (!in_input_program ())
367 msg (SE, _("%s is a duplicate variable name."), name);
371 v = dict_lookup_var_assert (dict, name);
372 if ((width != 0) != (var_get_width (v) != 0))
374 msg (SE, _("There is already a variable %s of a "
379 if (width != 0 && width != var_get_width (v))
381 msg (SE, _("There is already a string variable %s of a "
382 "different width."), name);
387 /* Create specifier for parsing the variable. */
388 spec = pool_alloc (dls->pool, sizeof *spec);
390 spec->fv = var_get_case_index (v);
391 spec->record = record;
392 spec->first_column = column;
393 strcpy (spec->name, var_get_name (v));
394 ll_push_tail (&dls->specs, &spec->ll);
398 assert (name_idx == name_cnt);
400 if (ll_is_empty (&dls->specs))
402 msg (SE, _("At least one variable must be specified."));
406 last_nonempty_record = ll_to_dls_var_spec (ll_tail (&dls->specs))->record;
407 if (dls->record_cnt && last_nonempty_record > dls->record_cnt)
409 msg (SE, _("Variables are specified on records that "
410 "should not exist according to RECORDS subcommand."));
413 else if (!dls->record_cnt)
414 dls->record_cnt = last_nonempty_record;
419 /* Displays a table giving information on fixed-format variable
420 parsing on DATA LIST. */
422 dump_fixed_table (const struct ll_list *specs,
423 const struct file_handle *fh, int record_cnt)
427 struct dls_var_spec *spec;
430 spec_cnt = ll_count (specs);
431 t = tab_create (4, spec_cnt + 1, 0);
432 tab_columns (t, TAB_COL_DOWN, 1);
433 tab_headers (t, 0, 0, 1, 0);
434 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
435 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
436 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
437 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
438 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt);
439 tab_hline (t, TAL_2, 0, 3, 1);
440 tab_dim (t, tab_natural_dimensions);
443 ll_for_each (spec, struct dls_var_spec, ll, specs)
445 char fmt_string[FMT_STRING_LEN_MAX + 1];
446 tab_text (t, 0, row, TAB_LEFT, spec->name);
447 tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record);
448 tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d",
449 spec->first_column, spec->first_column + spec->input.w - 1);
450 tab_text (t, 3, row, TAB_LEFT | TAB_FIX,
451 fmt_to_string (&spec->input, fmt_string));
455 tab_title (t, ngettext ("Reading %d record from %s.",
456 "Reading %d records from %s.", record_cnt),
457 record_cnt, fh_get_name (fh));
461 /* Free-format parsing. */
463 /* Parses variable specifications for DATA LIST FREE and adds
464 them to DLS. Uses TMP_POOL for data that is not needed once
465 parsing is complete. Returns true only if successful. */
467 parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool,
468 struct data_list_pgm *dls)
471 while (lex_token (lexer) != '.')
473 struct fmt_spec input, output;
478 if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool,
479 &name, &name_cnt, PV_NONE))
482 if (lex_match (lexer, '('))
484 if (!parse_format_specifier (lexer, &input)
485 || !fmt_check_input (&input)
486 || !lex_force_match (lexer, ')'))
489 /* As a special case, N format is treated as F format
490 for free-field input. */
491 if (input.type == FMT_N)
494 output = fmt_for_output_from_input (&input);
498 lex_match (lexer, '*');
499 input = fmt_for_input (FMT_F, 8, 0);
500 output = *get_format ();
503 for (i = 0; i < name_cnt; i++)
505 struct dls_var_spec *spec;
508 v = dict_create_var (dict, name[i], fmt_var_width (&input));
511 msg (SE, _("%s is a duplicate variable name."), name[i]);
514 var_set_both_formats (v, &output);
516 spec = pool_alloc (dls->pool, sizeof *spec);
518 spec->fv = var_get_case_index (v);
519 strcpy (spec->name, var_get_name (v));
520 ll_push_tail (&dls->specs, &spec->ll);
527 /* Displays a table giving information on free-format variable parsing
530 dump_free_table (const struct data_list_pgm *dls,
531 const struct file_handle *fh)
534 struct dls_var_spec *spec;
538 spec_cnt = ll_count (&dls->specs);
540 t = tab_create (2, spec_cnt + 1, 0);
541 tab_columns (t, TAB_COL_DOWN, 1);
542 tab_headers (t, 0, 0, 1, 0);
543 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
544 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
545 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, spec_cnt);
546 tab_hline (t, TAL_2, 0, 1, 1);
547 tab_dim (t, tab_natural_dimensions);
549 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
551 char str[FMT_STRING_LEN_MAX + 1];
552 tab_text (t, 0, row, TAB_LEFT, spec->name);
553 tab_text (t, 1, row, TAB_LEFT | TAB_FIX,
554 fmt_to_string (&spec->input, str));
558 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
563 /* Input procedure. */
565 /* Extracts a field from the current position in the current
566 record. Fields can be unquoted or quoted with single- or
567 double-quote characters.
569 *FIELD is set to the field content. The caller must not
570 or destroy this constant string.
572 After parsing the field, sets the current position in the
573 record to just past the field and any trailing delimiter.
574 Returns 0 on failure or a 1-based column number indicating the
575 beginning of the field on success. */
577 cut_field (const struct data_list_pgm *dls, struct substring *field)
579 struct substring line, p;
581 if (dfm_eof (dls->reader))
583 if (ds_is_empty (&dls->delims))
584 dfm_expand_tabs (dls->reader);
585 line = p = dfm_get_record (dls->reader);
587 if (ds_is_empty (&dls->delims))
589 bool missing_quote = false;
591 /* Skip leading whitespace. */
592 ss_ltrim (&p, ss_cstr (CC_SPACES));
596 /* Handle actual data, whether quoted or unquoted. */
597 if (ss_match_char (&p, '\''))
598 missing_quote = !ss_get_until (&p, '\'', field);
599 else if (ss_match_char (&p, '"'))
600 missing_quote = !ss_get_until (&p, '"', field);
602 ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field);
604 msg (SW, _("Quoted string extends beyond end of line."));
606 /* Skip trailing whitespace and a single comma if present. */
607 ss_ltrim (&p, ss_cstr (CC_SPACES));
608 ss_match_char (&p, ',');
610 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p));
614 if (!ss_is_empty (p))
615 ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field);
616 else if (dfm_columns_past_end (dls->reader) == 0)
618 /* A blank line or a line that ends in a delimiter has a
619 trailing blank field. */
625 /* Advance past the field.
627 Also advance past a trailing delimiter, regardless of
628 whether one actually existed. If we "skip" a delimiter
629 that was not actually there, then we will return
630 end-of-line on our next call, which is what we want. */
631 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1);
636 static bool read_from_data_list_fixed (const struct data_list_pgm *,
638 static bool read_from_data_list_free (const struct data_list_pgm *,
640 static bool read_from_data_list_list (const struct data_list_pgm *,
643 /* Reads a case from DLS into C.
644 Returns true if successful, false at end of file or on I/O error. */
646 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
650 dfm_push (dls->reader);
654 retval = read_from_data_list_fixed (dls, c);
657 retval = read_from_data_list_free (dls, c);
660 retval = read_from_data_list_list (dls, c);
665 dfm_pop (dls->reader);
670 /* Reads a case from the data file into C, parsing it according
671 to fixed-format syntax rules in DLS.
672 Returns true if successful, false at end of file or on I/O error. */
674 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
676 enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (dls->reader);
677 struct dls_var_spec *spec;
680 if (dfm_eof (dls->reader))
683 spec = ll_to_dls_var_spec (ll_head (&dls->specs));
684 for (row = 1; row <= dls->record_cnt; row++)
686 struct substring line;
688 if (dfm_eof (dls->reader))
690 msg (SW, _("Partial case of %d of %d records discarded."),
691 row - 1, dls->record_cnt);
694 dfm_expand_tabs (dls->reader);
695 line = dfm_get_record (dls->reader);
697 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
699 if (row < spec->record)
702 data_in (ss_substr (line, spec->first_column - 1,
704 encoding, spec->input.type, spec->input.d,
705 spec->first_column, case_data_rw_idx (c, spec->fv),
706 fmt_var_width (&spec->input));
709 dfm_forward_record (dls->reader);
715 /* Reads a case from the data file into C, parsing it according
716 to free-format syntax rules in DLS.
717 Returns true if successful, false at end of file or on I/O error. */
719 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
721 enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (dls->reader);
722 struct dls_var_spec *spec;
724 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
726 struct substring field;
728 /* Cut out a field and read in a new record if necessary. */
729 while (!cut_field (dls, &field))
731 if (!dfm_eof (dls->reader))
732 dfm_forward_record (dls->reader);
733 if (dfm_eof (dls->reader))
735 if (&spec->ll != ll_head (&dls->specs))
736 msg (SW, _("Partial case discarded. The first variable "
737 "missing was %s."), spec->name);
742 data_in (field, encoding, spec->input.type, 0,
743 dfm_get_column (dls->reader, ss_data (field)),
744 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
749 /* Reads a case from the data file and parses it according to
750 list-format syntax rules.
751 Returns true if successful, false at end of file or on I/O error. */
753 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
755 enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (dls->reader);
756 struct dls_var_spec *spec;
758 if (dfm_eof (dls->reader))
761 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
763 struct substring field;
765 if (!cut_field (dls, &field))
767 if (get_undefined ())
768 msg (SW, _("Missing value(s) for all variables from %s onward. "
769 "These will be filled with the system-missing value "
770 "or blanks, as appropriate."),
772 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
774 int width = fmt_var_width (&spec->input);
776 case_data_rw_idx (c, spec->fv)->f = SYSMIS;
778 memset (case_data_rw_idx (c, spec->fv)->s, ' ', width);
783 data_in (field, encoding, spec->input.type, 0,
784 dfm_get_column (dls->reader, ss_data (field)),
785 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
788 dfm_forward_record (dls->reader);
792 /* Destroys DATA LIST transformation DLS.
793 Returns true if successful, false if an I/O error occurred. */
795 data_list_trns_free (void *dls_)
797 struct data_list_pgm *dls = dls_;
798 dfm_close_reader (dls->reader);
799 pool_destroy (dls->pool);
803 /* Handle DATA LIST transformation DLS, parsing data into C. */
805 data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED)
807 struct data_list_pgm *dls = dls_;
810 if (read_from_data_list (dls, c))
811 retval = TRNS_CONTINUE;
812 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
814 /* An I/O error, or encountering end of file for a second
815 time, should be escalated into a more serious error. */
819 retval = TRNS_END_FILE;
821 /* If there was an END subcommand handle it. */
822 if (dls->end != NULL)
824 double *end = &case_data_rw (c, dls->end)->f;
825 if (retval == TRNS_END_FILE)
828 retval = TRNS_CONTINUE;
837 /* Reads one case into OUTPUT_CASE.
838 Returns true if successful, false at end of file or if an
839 I/O error occurred. */
841 data_list_casereader_read (struct casereader *reader UNUSED, void *dls_,
844 struct data_list_pgm *dls = dls_;
847 /* Skip the requested number of records before reading the
849 while (dls->skip_records > 0)
851 if (dfm_eof (dls->reader))
853 dfm_forward_record (dls->reader);
857 case_create (c, dls->value_cnt);
858 ok = read_from_data_list (dls, c);
864 /* Destroys the casereader. */
866 data_list_casereader_destroy (struct casereader *reader UNUSED, void *dls_)
868 struct data_list_pgm *dls = dls_;
869 if (dfm_reader_error (dls->reader))
870 casereader_force_error (reader);
871 data_list_trns_free (dls);
874 static const struct casereader_class data_list_casereader_class =
876 data_list_casereader_read,
877 data_list_casereader_destroy,