1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include <data/case.h>
25 #include <data/data-in.h>
26 #include <data/casereader.h>
27 #include <data/casereader-provider.h>
28 #include <data/dictionary.h>
29 #include <data/format.h>
30 #include <data/procedure.h>
31 #include <data/settings.h>
32 #include <data/transformations.h>
33 #include <data/variable.h>
34 #include <language/command.h>
35 #include <language/data-io/data-reader.h>
36 #include <language/data-io/file-handle.h>
37 #include <language/data-io/inpt-pgm.h>
38 #include <language/data-io/placement-parser.h>
39 #include <language/lexer/format-parser.h>
40 #include <language/lexer/lexer.h>
41 #include <language/lexer/variable-parser.h>
42 #include <libpspp/alloc.h>
43 #include <libpspp/assertion.h>
44 #include <libpspp/compiler.h>
45 #include <libpspp/ll.h>
46 #include <libpspp/message.h>
47 #include <libpspp/misc.h>
48 #include <libpspp/pool.h>
49 #include <libpspp/str.h>
50 #include <output/table.h>
56 #define _(msgid) gettext (msgid)
58 /* Utility function. */
60 /* Describes how to parse one variable. */
63 struct ll ll; /* List element. */
66 struct fmt_spec input; /* Input format of this field. */
67 int fv; /* First value in case. */
68 char name[LONG_NAME_LEN + 1]; /* Var name for error messages and tables. */
70 /* Fixed format only. */
71 int record; /* Record number (1-based). */
72 int first_column; /* Column numbers in record. */
75 static struct dls_var_spec *
76 ll_to_dls_var_spec (struct ll *ll)
78 return ll_data (ll, struct dls_var_spec, ll);
81 /* Constants for DATA LIST type. */
89 /* DATA LIST private data structure. */
92 struct pool *pool; /* Used for all DATA LIST storage. */
93 struct ll_list specs; /* List of dls_var_specs. */
94 struct dfm_reader *reader; /* Data file reader. */
95 enum dls_type type; /* Type of DATA LIST construct. */
96 struct variable *end; /* Variable specified on END subcommand. */
97 int record_cnt; /* Number of records. */
98 struct string delims; /* Field delimiters. */
99 int skip_records; /* Records to skip before first case. */
100 size_t value_cnt; /* Number of `union value's in case. */
103 static const struct casereader_class data_list_casereader_class;
105 static bool parse_fixed (struct lexer *, struct dictionary *dict,
106 struct pool *tmp_pool, struct data_list_pgm *);
107 static bool parse_free (struct lexer *, struct dictionary *dict,
108 struct pool *tmp_pool, struct data_list_pgm *);
109 static void dump_fixed_table (const struct ll_list *,
110 const struct file_handle *, int record_cnt);
111 static void dump_free_table (const struct data_list_pgm *,
112 const struct file_handle *);
114 static trns_free_func data_list_trns_free;
115 static trns_proc_func data_list_trns_proc;
118 cmd_data_list (struct lexer *lexer, struct dataset *ds)
120 struct dictionary *dict;
121 struct data_list_pgm *dls;
122 int table = -1; /* Print table if nonzero, -1=undecided. */
123 struct file_handle *fh = fh_inline_file ();
124 struct pool *tmp_pool;
127 dict = in_input_program () ? dataset_dict (ds) : dict_create ();
129 dls = pool_create_container (struct data_list_pgm, pool);
130 ll_init (&dls->specs);
135 dls->skip_records = 0;
136 ds_init_empty (&dls->delims);
137 ds_register_pool (&dls->delims, dls->pool);
139 tmp_pool = pool_create_subpool (dls->pool);
141 while (lex_token (lexer) != '/')
143 if (lex_match_id (lexer, "FILE"))
145 lex_match (lexer, '=');
146 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE);
150 else if (lex_match_id (lexer, "RECORDS"))
152 lex_match (lexer, '=');
153 lex_match (lexer, '(');
154 if (!lex_force_int (lexer))
156 dls->record_cnt = lex_integer (lexer);
158 lex_match (lexer, ')');
160 else if (lex_match_id (lexer, "SKIP"))
162 lex_match (lexer, '=');
163 if (!lex_force_int (lexer))
165 dls->skip_records = lex_integer (lexer);
168 else if (lex_match_id (lexer, "END"))
172 msg (SE, _("The END subcommand may only be specified once."));
176 lex_match (lexer, '=');
177 if (!lex_force_id (lexer))
179 dls->end = dict_lookup_var (dict, lex_tokid (lexer));
181 dls->end = dict_create_var_assert (dict, lex_tokid (lexer), 0);
184 else if (lex_token (lexer) == T_ID)
186 if (lex_match_id (lexer, "NOTABLE"))
188 else if (lex_match_id (lexer, "TABLE"))
193 if (lex_match_id (lexer, "FIXED"))
195 else if (lex_match_id (lexer, "FREE"))
197 else if (lex_match_id (lexer, "LIST"))
201 lex_error (lexer, NULL);
207 msg (SE, _("Only one of FIXED, FREE, or LIST may "
213 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
214 && lex_match (lexer, '('))
216 while (!lex_match (lexer, ')'))
220 if (lex_match_id (lexer, "TAB"))
222 else if (lex_token (lexer) == T_STRING && ds_length (lex_tokstr (lexer)) == 1)
224 delim = ds_first (lex_tokstr (lexer));
229 lex_error (lexer, NULL);
233 ds_put_char (&dls->delims, delim);
235 lex_match (lexer, ',');
242 lex_error (lexer, NULL);
247 fh_set_default_handle (fh);
250 dls->type = DLS_FIXED;
253 table = dls->type != DLS_FREE;
255 ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (lexer, dict, tmp_pool, dls);
259 if (lex_end_of_command (lexer) != CMD_SUCCESS)
264 if (dls->type == DLS_FIXED)
265 dump_fixed_table (&dls->specs, fh, dls->record_cnt);
267 dump_free_table (dls, fh);
270 dls->reader = dfm_open_reader (fh, lexer);
271 if (dls->reader == NULL)
274 dls->value_cnt = dict_get_next_value_idx (dict);
276 if (in_input_program ())
277 add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls);
280 struct casereader *reader;
281 reader = casereader_create_sequential (NULL,
282 dict_get_next_value_idx (dict),
283 -1, &data_list_casereader_class,
285 proc_set_active_file (ds, reader, dict);
288 pool_destroy (tmp_pool);
293 data_list_trns_free (dls);
294 return CMD_CASCADING_FAILURE;
297 /* Fixed-format parsing. */
299 /* Parses all the variable specifications for DATA LIST FIXED,
300 storing them into DLS. Uses TMP_POOL for data that is not
301 needed once parsing is complete. Returns true only if
304 parse_fixed (struct lexer *lexer, struct dictionary *dict,
305 struct pool *tmp_pool, struct data_list_pgm *dls)
307 int last_nonempty_record;
311 while (lex_token (lexer) != '.')
314 size_t name_cnt, name_idx;
315 struct fmt_spec *formats, *f;
318 /* Parse everything. */
319 if (!parse_record_placement (lexer, &record, &column)
320 || !parse_DATA_LIST_vars_pool (lexer, tmp_pool,
321 &names, &name_cnt, PV_NONE)
322 || !parse_var_placements (lexer, tmp_pool, name_cnt, true,
323 &formats, &format_cnt))
326 /* Create variables and var specs. */
328 for (f = formats; f < &formats[format_cnt]; f++)
329 if (!execute_placement_format (f, &record, &column))
334 struct dls_var_spec *spec;
336 name = names[name_idx++];
338 /* Create variable. */
339 width = fmt_var_width (f);
340 v = dict_create_var (dict, name, width);
344 struct fmt_spec output = fmt_for_output_from_input (f);
345 var_set_both_formats (v, &output);
350 This can be acceptable if we're in INPUT
351 PROGRAM, but only if the existing variable has
352 the same width as the one we would have
354 if (!in_input_program ())
356 msg (SE, _("%s is a duplicate variable name."), name);
360 v = dict_lookup_var_assert (dict, name);
361 if ((width != 0) != (var_get_width (v) != 0))
363 msg (SE, _("There is already a variable %s of a "
368 if (width != 0 && width != var_get_width (v))
370 msg (SE, _("There is already a string variable %s of a "
371 "different width."), name);
376 /* Create specifier for parsing the variable. */
377 spec = pool_alloc (dls->pool, sizeof *spec);
379 spec->fv = var_get_case_index (v);
380 spec->record = record;
381 spec->first_column = column;
382 strcpy (spec->name, var_get_name (v));
383 ll_push_tail (&dls->specs, &spec->ll);
387 assert (name_idx == name_cnt);
389 if (ll_is_empty (&dls->specs))
391 msg (SE, _("At least one variable must be specified."));
395 last_nonempty_record = ll_to_dls_var_spec (ll_tail (&dls->specs))->record;
396 if (dls->record_cnt && last_nonempty_record > dls->record_cnt)
398 msg (SE, _("Variables are specified on records that "
399 "should not exist according to RECORDS subcommand."));
402 else if (!dls->record_cnt)
403 dls->record_cnt = last_nonempty_record;
408 /* Displays a table giving information on fixed-format variable
409 parsing on DATA LIST. */
411 dump_fixed_table (const struct ll_list *specs,
412 const struct file_handle *fh, int record_cnt)
416 struct dls_var_spec *spec;
419 spec_cnt = ll_count (specs);
420 t = tab_create (4, spec_cnt + 1, 0);
421 tab_columns (t, TAB_COL_DOWN, 1);
422 tab_headers (t, 0, 0, 1, 0);
423 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
424 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
425 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
426 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
427 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt);
428 tab_hline (t, TAL_2, 0, 3, 1);
429 tab_dim (t, tab_natural_dimensions);
432 ll_for_each (spec, struct dls_var_spec, ll, specs)
434 char fmt_string[FMT_STRING_LEN_MAX + 1];
435 tab_text (t, 0, row, TAB_LEFT, spec->name);
436 tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record);
437 tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d",
438 spec->first_column, spec->first_column + spec->input.w - 1);
439 tab_text (t, 3, row, TAB_LEFT | TAB_FIX,
440 fmt_to_string (&spec->input, fmt_string));
444 tab_title (t, ngettext ("Reading %d record from %s.",
445 "Reading %d records from %s.", record_cnt),
446 record_cnt, fh_get_name (fh));
450 /* Free-format parsing. */
452 /* Parses variable specifications for DATA LIST FREE and adds
453 them to DLS. Uses TMP_POOL for data that is not needed once
454 parsing is complete. Returns true only if successful. */
456 parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool,
457 struct data_list_pgm *dls)
460 while (lex_token (lexer) != '.')
462 struct fmt_spec input, output;
467 if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool,
468 &name, &name_cnt, PV_NONE))
471 if (lex_match (lexer, '('))
473 if (!parse_format_specifier (lexer, &input)
474 || !fmt_check_input (&input)
475 || !lex_force_match (lexer, ')'))
478 /* As a special case, N format is treated as F format
479 for free-field input. */
480 if (input.type == FMT_N)
483 output = fmt_for_output_from_input (&input);
487 lex_match (lexer, '*');
488 input = fmt_for_input (FMT_F, 8, 0);
489 output = *get_format ();
492 for (i = 0; i < name_cnt; i++)
494 struct dls_var_spec *spec;
497 v = dict_create_var (dict, name[i], fmt_var_width (&input));
500 msg (SE, _("%s is a duplicate variable name."), name[i]);
503 var_set_both_formats (v, &output);
505 spec = pool_alloc (dls->pool, sizeof *spec);
507 spec->fv = var_get_case_index (v);
508 strcpy (spec->name, var_get_name (v));
509 ll_push_tail (&dls->specs, &spec->ll);
516 /* Displays a table giving information on free-format variable parsing
519 dump_free_table (const struct data_list_pgm *dls,
520 const struct file_handle *fh)
523 struct dls_var_spec *spec;
527 spec_cnt = ll_count (&dls->specs);
529 t = tab_create (2, spec_cnt + 1, 0);
530 tab_columns (t, TAB_COL_DOWN, 1);
531 tab_headers (t, 0, 0, 1, 0);
532 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
533 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
534 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, spec_cnt);
535 tab_hline (t, TAL_2, 0, 1, 1);
536 tab_dim (t, tab_natural_dimensions);
538 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
540 char str[FMT_STRING_LEN_MAX + 1];
541 tab_text (t, 0, row, TAB_LEFT, spec->name);
542 tab_text (t, 1, row, TAB_LEFT | TAB_FIX,
543 fmt_to_string (&spec->input, str));
547 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
552 /* Input procedure. */
554 /* Extracts a field from the current position in the current
555 record. Fields can be unquoted or quoted with single- or
556 double-quote characters.
558 *FIELD is set to the field content. The caller must not
559 or destroy this constant string.
561 After parsing the field, sets the current position in the
562 record to just past the field and any trailing delimiter.
563 Returns 0 on failure or a 1-based column number indicating the
564 beginning of the field on success. */
566 cut_field (const struct data_list_pgm *dls, struct substring *field)
568 struct substring line, p;
570 if (dfm_eof (dls->reader))
572 if (ds_is_empty (&dls->delims))
573 dfm_expand_tabs (dls->reader);
574 line = p = dfm_get_record (dls->reader);
576 if (ds_is_empty (&dls->delims))
578 bool missing_quote = false;
580 /* Skip leading whitespace. */
581 ss_ltrim (&p, ss_cstr (CC_SPACES));
585 /* Handle actual data, whether quoted or unquoted. */
586 if (ss_match_char (&p, '\''))
587 missing_quote = !ss_get_until (&p, '\'', field);
588 else if (ss_match_char (&p, '"'))
589 missing_quote = !ss_get_until (&p, '"', field);
591 ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field);
593 msg (SW, _("Quoted string extends beyond end of line."));
595 /* Skip trailing whitespace and a single comma if present. */
596 ss_ltrim (&p, ss_cstr (CC_SPACES));
597 ss_match_char (&p, ',');
599 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p));
603 if (!ss_is_empty (p))
604 ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field);
605 else if (dfm_columns_past_end (dls->reader) == 0)
607 /* A blank line or a line that ends in a delimiter has a
608 trailing blank field. */
614 /* Advance past the field.
616 Also advance past a trailing delimiter, regardless of
617 whether one actually existed. If we "skip" a delimiter
618 that was not actually there, then we will return
619 end-of-line on our next call, which is what we want. */
620 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1);
625 static bool read_from_data_list_fixed (const struct data_list_pgm *,
627 static bool read_from_data_list_free (const struct data_list_pgm *,
629 static bool read_from_data_list_list (const struct data_list_pgm *,
632 /* Reads a case from DLS into C.
633 Returns true if successful, false at end of file or on I/O error. */
635 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
639 dfm_push (dls->reader);
643 retval = read_from_data_list_fixed (dls, c);
646 retval = read_from_data_list_free (dls, c);
649 retval = read_from_data_list_list (dls, c);
654 dfm_pop (dls->reader);
659 /* Reads a case from the data file into C, parsing it according
660 to fixed-format syntax rules in DLS.
661 Returns true if successful, false at end of file or on I/O error. */
663 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
665 struct dls_var_spec *spec;
668 if (dfm_eof (dls->reader))
671 spec = ll_to_dls_var_spec (ll_head (&dls->specs));
672 for (row = 1; row <= dls->record_cnt; row++)
674 struct substring line;
676 if (dfm_eof (dls->reader))
678 msg (SW, _("Partial case of %d of %d records discarded."),
679 row - 1, dls->record_cnt);
682 dfm_expand_tabs (dls->reader);
683 line = dfm_get_record (dls->reader);
685 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
687 if (row < spec->record)
690 data_in (ss_substr (line, spec->first_column - 1, spec->input.w),
691 spec->input.type, spec->input.d, spec->first_column,
692 case_data_rw_idx (c, spec->fv),
693 fmt_var_width (&spec->input));
696 dfm_forward_record (dls->reader);
702 /* Reads a case from the data file into C, parsing it according
703 to free-format syntax rules in DLS.
704 Returns true if successful, false at end of file or on I/O error. */
706 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
708 struct dls_var_spec *spec;
710 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
712 struct substring field;
714 /* Cut out a field and read in a new record if necessary. */
715 while (!cut_field (dls, &field))
717 if (!dfm_eof (dls->reader))
718 dfm_forward_record (dls->reader);
719 if (dfm_eof (dls->reader))
721 if (&spec->ll != ll_head (&dls->specs))
722 msg (SW, _("Partial case discarded. The first variable "
723 "missing was %s."), spec->name);
728 data_in (field, spec->input.type, 0,
729 dfm_get_column (dls->reader, ss_data (field)),
730 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
735 /* Reads a case from the data file and parses it according to
736 list-format syntax rules.
737 Returns true if successful, false at end of file or on I/O error. */
739 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
741 struct dls_var_spec *spec;
743 if (dfm_eof (dls->reader))
746 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
748 struct substring field;
750 if (!cut_field (dls, &field))
752 if (get_undefined ())
753 msg (SW, _("Missing value(s) for all variables from %s onward. "
754 "These will be filled with the system-missing value "
755 "or blanks, as appropriate."),
757 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
759 int width = fmt_var_width (&spec->input);
761 case_data_rw_idx (c, spec->fv)->f = SYSMIS;
763 memset (case_data_rw_idx (c, spec->fv)->s, ' ', width);
768 data_in (field, spec->input.type, 0,
769 dfm_get_column (dls->reader, ss_data (field)),
770 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
773 dfm_forward_record (dls->reader);
777 /* Destroys DATA LIST transformation DLS.
778 Returns true if successful, false if an I/O error occurred. */
780 data_list_trns_free (void *dls_)
782 struct data_list_pgm *dls = dls_;
783 dfm_close_reader (dls->reader);
784 pool_destroy (dls->pool);
788 /* Handle DATA LIST transformation DLS, parsing data into C. */
790 data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED)
792 struct data_list_pgm *dls = dls_;
795 if (read_from_data_list (dls, c))
796 retval = TRNS_CONTINUE;
797 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
799 /* An I/O error, or encountering end of file for a second
800 time, should be escalated into a more serious error. */
804 retval = TRNS_END_FILE;
806 /* If there was an END subcommand handle it. */
807 if (dls->end != NULL)
809 double *end = &case_data_rw (c, dls->end)->f;
810 if (retval == TRNS_DROP_CASE)
813 retval = TRNS_END_FILE;
822 /* Reads one case into OUTPUT_CASE.
823 Returns true if successful, false at end of file or if an
824 I/O error occurred. */
826 data_list_casereader_read (struct casereader *reader UNUSED, void *dls_,
829 struct data_list_pgm *dls = dls_;
832 /* Skip the requested number of records before reading the
834 while (dls->skip_records > 0)
836 if (dfm_eof (dls->reader))
838 dfm_forward_record (dls->reader);
842 case_create (c, dls->value_cnt);
843 ok = read_from_data_list (dls, c);
849 /* Destroys the casereader. */
851 data_list_casereader_destroy (struct casereader *reader UNUSED, void *dls_)
853 struct data_list_pgm *dls = dls_;
854 if (dfm_reader_error (dls->reader))
855 casereader_force_error (reader);
856 data_list_trns_free (dls);
859 static const struct casereader_class data_list_casereader_class =
861 data_list_casereader_read,
862 data_list_casereader_destroy,