1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include <data/case.h>
25 #include <data/data-in.h>
26 #include <data/casereader.h>
27 #include <data/casereader-provider.h>
28 #include <data/dictionary.h>
29 #include <data/format.h>
30 #include <data/procedure.h>
31 #include <data/settings.h>
32 #include <data/transformations.h>
33 #include <data/variable.h>
34 #include <language/command.h>
35 #include <language/data-io/data-reader.h>
36 #include <language/data-io/file-handle.h>
37 #include <language/data-io/inpt-pgm.h>
38 #include <language/data-io/placement-parser.h>
39 #include <language/lexer/format-parser.h>
40 #include <language/lexer/lexer.h>
41 #include <language/lexer/variable-parser.h>
42 #include <libpspp/alloc.h>
43 #include <libpspp/assertion.h>
44 #include <libpspp/compiler.h>
45 #include <libpspp/ll.h>
46 #include <libpspp/message.h>
47 #include <libpspp/misc.h>
48 #include <libpspp/pool.h>
49 #include <libpspp/str.h>
50 #include <output/table.h>
56 #define _(msgid) gettext (msgid)
58 /* Utility function. */
60 /* Describes how to parse one variable. */
63 struct ll ll; /* List element. */
66 struct fmt_spec input; /* Input format of this field. */
67 int fv; /* First value in case. */
68 char name[LONG_NAME_LEN + 1]; /* Var name for error messages and tables. */
70 /* Fixed format only. */
71 int record; /* Record number (1-based). */
72 int first_column; /* Column numbers in record. */
75 static struct dls_var_spec *
76 ll_to_dls_var_spec (struct ll *ll)
78 return ll_data (ll, struct dls_var_spec, ll);
81 /* Constants for DATA LIST type. */
89 /* DATA LIST private data structure. */
92 struct pool *pool; /* Used for all DATA LIST storage. */
93 struct ll_list specs; /* List of dls_var_specs. */
94 struct dfm_reader *reader; /* Data file reader. */
95 enum dls_type type; /* Type of DATA LIST construct. */
96 struct variable *end; /* Variable specified on END subcommand. */
97 int record_cnt; /* Number of records. */
98 struct string delims; /* Field delimiters. */
99 int skip_records; /* Records to skip before first case. */
100 size_t value_cnt; /* Number of `union value's in case. */
103 static const struct casereader_class data_list_casereader_class;
105 static bool parse_fixed (struct lexer *, struct dictionary *dict,
106 struct pool *tmp_pool, struct data_list_pgm *);
107 static bool parse_free (struct lexer *, struct dictionary *dict,
108 struct pool *tmp_pool, struct data_list_pgm *);
109 static void dump_fixed_table (const struct ll_list *,
110 const struct file_handle *, int record_cnt);
111 static void dump_free_table (const struct data_list_pgm *,
112 const struct file_handle *);
114 static trns_free_func data_list_trns_free;
115 static trns_proc_func data_list_trns_proc;
118 cmd_data_list (struct lexer *lexer, struct dataset *ds)
120 struct dictionary *dict;
121 struct data_list_pgm *dls;
122 int table = -1; /* Print table if nonzero, -1=undecided. */
123 struct file_handle *fh = fh_inline_file ();
124 struct pool *tmp_pool;
127 dict = in_input_program () ? dataset_dict (ds) : dict_create ();
129 dls = pool_create_container (struct data_list_pgm, pool);
130 ll_init (&dls->specs);
135 dls->skip_records = 0;
136 ds_init_empty (&dls->delims);
137 ds_register_pool (&dls->delims, dls->pool);
139 tmp_pool = pool_create_subpool (dls->pool);
141 while (lex_token (lexer) != '/')
143 if (lex_match_id (lexer, "FILE"))
145 lex_match (lexer, '=');
146 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE);
150 else if (lex_match_id (lexer, "RECORDS"))
152 lex_match (lexer, '=');
153 lex_match (lexer, '(');
154 if (!lex_force_int (lexer))
156 dls->record_cnt = lex_integer (lexer);
158 lex_match (lexer, ')');
160 else if (lex_match_id (lexer, "SKIP"))
162 lex_match (lexer, '=');
163 if (!lex_force_int (lexer))
165 dls->skip_records = lex_integer (lexer);
168 else if (lex_match_id (lexer, "END"))
172 msg (SE, _("The END subcommand may only be specified once."));
176 lex_match (lexer, '=');
177 if (!lex_force_id (lexer))
179 dls->end = dict_lookup_var (dict, lex_tokid (lexer));
181 dls->end = dict_create_var_assert (dict, lex_tokid (lexer), 0);
184 else if (lex_token (lexer) == T_ID)
186 if (lex_match_id (lexer, "NOTABLE"))
188 else if (lex_match_id (lexer, "TABLE"))
193 if (lex_match_id (lexer, "FIXED"))
195 else if (lex_match_id (lexer, "FREE"))
197 else if (lex_match_id (lexer, "LIST"))
201 lex_error (lexer, NULL);
207 msg (SE, _("Only one of FIXED, FREE, or LIST may "
213 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
214 && lex_match (lexer, '('))
216 while (!lex_match (lexer, ')'))
220 if (lex_match_id (lexer, "TAB"))
222 else if (lex_token (lexer) == T_STRING && ds_length (lex_tokstr (lexer)) == 1)
224 delim = ds_first (lex_tokstr (lexer));
229 lex_error (lexer, NULL);
233 ds_put_char (&dls->delims, delim);
235 lex_match (lexer, ',');
242 lex_error (lexer, NULL);
247 fh_set_default_handle (fh);
250 dls->type = DLS_FIXED;
253 table = dls->type != DLS_FREE;
255 ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (lexer, dict, tmp_pool, dls);
259 if (lex_end_of_command (lexer) != CMD_SUCCESS)
264 if (dls->type == DLS_FIXED)
265 dump_fixed_table (&dls->specs, fh, dls->record_cnt);
267 dump_free_table (dls, fh);
270 dls->reader = dfm_open_reader (fh, lexer);
271 if (dls->reader == NULL)
274 dls->value_cnt = dict_get_next_value_idx (dict);
276 if (in_input_program ())
277 add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls);
280 struct casereader *reader;
281 reader = casereader_create_sequential (NULL,
282 dict_get_next_value_idx (dict),
283 -1, &data_list_casereader_class,
285 proc_set_active_file (ds, reader, dict);
288 pool_destroy (tmp_pool);
293 data_list_trns_free (dls);
294 return CMD_CASCADING_FAILURE;
297 /* Fixed-format parsing. */
299 /* Parses all the variable specifications for DATA LIST FIXED,
300 storing them into DLS. Uses TMP_POOL for data that is not
301 needed once parsing is complete. Returns true only if
304 parse_fixed (struct lexer *lexer, struct dictionary *dict,
305 struct pool *tmp_pool, struct data_list_pgm *dls)
307 int last_nonempty_record;
311 while (lex_token (lexer) != '.')
314 size_t name_cnt, name_idx;
315 struct fmt_spec *formats, *f;
318 /* Parse everything. */
319 if (!parse_record_placement (lexer, &record, &column)
320 || !parse_DATA_LIST_vars_pool (lexer, tmp_pool,
321 &names, &name_cnt, PV_NONE)
322 || !parse_var_placements (lexer, tmp_pool, name_cnt, true,
323 &formats, &format_cnt))
326 /* Create variables and var specs. */
328 for (f = formats; f < &formats[format_cnt]; f++)
329 if (!execute_placement_format (f, &record, &column))
334 struct dls_var_spec *spec;
336 name = names[name_idx++];
338 /* Create variable. */
339 width = fmt_var_width (f);
340 v = dict_create_var (dict, name, width);
344 struct fmt_spec output = fmt_for_output_from_input (f);
345 var_set_both_formats (v, &output);
350 This can be acceptable if we're in INPUT
351 PROGRAM, but only if the existing variable has
352 the same width as the one we would have
354 if (!in_input_program ())
356 msg (SE, _("%s is a duplicate variable name."), name);
360 v = dict_lookup_var_assert (dict, name);
361 if ((width != 0) != (var_get_width (v) != 0))
363 msg (SE, _("There is already a variable %s of a "
368 if (width != 0 && width != var_get_width (v))
370 msg (SE, _("There is already a string variable %s of a "
371 "different width."), name);
376 /* Create specifier for parsing the variable. */
377 spec = pool_alloc (dls->pool, sizeof *spec);
379 spec->fv = var_get_case_index (v);
380 spec->record = record;
381 spec->first_column = column;
382 strcpy (spec->name, var_get_name (v));
383 ll_push_tail (&dls->specs, &spec->ll);
387 assert (name_idx == name_cnt);
389 if (ll_is_empty (&dls->specs))
391 msg (SE, _("At least one variable must be specified."));
395 last_nonempty_record = ll_to_dls_var_spec (ll_tail (&dls->specs))->record;
396 if (dls->record_cnt && last_nonempty_record > dls->record_cnt)
398 msg (SE, _("Variables are specified on records that "
399 "should not exist according to RECORDS subcommand."));
402 else if (!dls->record_cnt)
403 dls->record_cnt = last_nonempty_record;
408 /* Displays a table giving information on fixed-format variable
409 parsing on DATA LIST. */
411 dump_fixed_table (const struct ll_list *specs,
412 const struct file_handle *fh, int record_cnt)
416 struct dls_var_spec *spec;
419 spec_cnt = ll_count (specs);
420 t = tab_create (4, spec_cnt + 1, 0);
421 tab_columns (t, TAB_COL_DOWN, 1);
422 tab_headers (t, 0, 0, 1, 0);
423 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
424 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
425 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
426 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
427 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt);
428 tab_hline (t, TAL_2, 0, 3, 1);
429 tab_dim (t, tab_natural_dimensions);
432 ll_for_each (spec, struct dls_var_spec, ll, specs)
434 char fmt_string[FMT_STRING_LEN_MAX + 1];
435 tab_text (t, 0, row, TAB_LEFT, spec->name);
436 tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record);
437 tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d",
438 spec->first_column, spec->first_column + spec->input.w - 1);
439 tab_text (t, 3, row, TAB_LEFT | TAB_FIX,
440 fmt_to_string (&spec->input, fmt_string));
444 tab_title (t, ngettext ("Reading %d record from %s.",
445 "Reading %d records from %s.", record_cnt),
446 record_cnt, fh_get_name (fh));
450 /* Free-format parsing. */
452 /* Parses variable specifications for DATA LIST FREE and adds
453 them to DLS. Uses TMP_POOL for data that is not needed once
454 parsing is complete. Returns true only if successful. */
456 parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool,
457 struct data_list_pgm *dls)
460 while (lex_token (lexer) != '.')
462 struct fmt_spec input, output;
467 if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool,
468 &name, &name_cnt, PV_NONE))
471 if (lex_match (lexer, '('))
473 if (!parse_format_specifier (lexer, &input)
474 || !fmt_check_input (&input)
475 || !lex_force_match (lexer, ')'))
478 /* As a special case, N format is treated as F format
479 for free-field input. */
480 if (input.type == FMT_N)
483 output = fmt_for_output_from_input (&input);
487 lex_match (lexer, '*');
488 input = fmt_for_input (FMT_F, 8, 0);
489 output = *get_format ();
492 for (i = 0; i < name_cnt; i++)
494 struct dls_var_spec *spec;
497 v = dict_create_var (dict, name[i], fmt_var_width (&input));
500 msg (SE, _("%s is a duplicate variable name."), name[i]);
503 var_set_both_formats (v, &output);
505 spec = pool_alloc (dls->pool, sizeof *spec);
507 spec->fv = var_get_case_index (v);
508 strcpy (spec->name, var_get_name (v));
509 ll_push_tail (&dls->specs, &spec->ll);
516 /* Displays a table giving information on free-format variable parsing
519 dump_free_table (const struct data_list_pgm *dls,
520 const struct file_handle *fh)
523 struct dls_var_spec *spec;
527 spec_cnt = ll_count (&dls->specs);
529 t = tab_create (2, spec_cnt + 1, 0);
530 tab_columns (t, TAB_COL_DOWN, 1);
531 tab_headers (t, 0, 0, 1, 0);
532 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
533 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
534 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, spec_cnt);
535 tab_hline (t, TAL_2, 0, 1, 1);
536 tab_dim (t, tab_natural_dimensions);
538 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
540 char str[FMT_STRING_LEN_MAX + 1];
541 tab_text (t, 0, row, TAB_LEFT, spec->name);
542 tab_text (t, 1, row, TAB_LEFT | TAB_FIX,
543 fmt_to_string (&spec->input, str));
547 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
552 /* Input procedure. */
554 /* Extracts a field from the current position in the current
555 record. Fields can be unquoted or quoted with single- or
556 double-quote characters.
558 *FIELD is set to the field content. The caller must not
559 or destroy this constant string.
561 After parsing the field, sets the current position in the
562 record to just past the field and any trailing delimiter.
563 Returns 0 on failure or a 1-based column number indicating the
564 beginning of the field on success. */
566 cut_field (const struct data_list_pgm *dls, struct substring *field)
568 struct substring line, p;
570 if (dfm_eof (dls->reader))
572 if (ds_is_empty (&dls->delims))
573 dfm_expand_tabs (dls->reader);
574 line = p = dfm_get_record (dls->reader);
576 if (ds_is_empty (&dls->delims))
578 bool missing_quote = false;
580 /* Skip leading whitespace. */
581 ss_ltrim (&p, ss_cstr (CC_SPACES));
585 /* Handle actual data, whether quoted or unquoted. */
586 if (ss_match_char (&p, '\''))
587 missing_quote = !ss_get_until (&p, '\'', field);
588 else if (ss_match_char (&p, '"'))
589 missing_quote = !ss_get_until (&p, '"', field);
591 ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field);
593 msg (SW, _("Quoted string extends beyond end of line."));
595 /* Skip trailing whitespace and a single comma if present. */
596 ss_ltrim (&p, ss_cstr (CC_SPACES));
597 ss_match_char (&p, ',');
599 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p));
603 if (!ss_is_empty (p))
604 ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field);
605 else if (dfm_columns_past_end (dls->reader) == 0)
607 /* A blank line or a line that ends in a delimiter has a
608 trailing blank field. */
614 /* Advance past the field.
616 Also advance past a trailing delimiter, regardless of
617 whether one actually existed. If we "skip" a delimiter
618 that was not actually there, then we will return
619 end-of-line on our next call, which is what we want. */
620 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1);
625 static bool read_from_data_list_fixed (const struct data_list_pgm *,
627 static bool read_from_data_list_free (const struct data_list_pgm *,
629 static bool read_from_data_list_list (const struct data_list_pgm *,
632 /* Reads a case from DLS into C.
633 Returns true if successful, false at end of file or on I/O error. */
635 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
639 dfm_push (dls->reader);
643 retval = read_from_data_list_fixed (dls, c);
646 retval = read_from_data_list_free (dls, c);
649 retval = read_from_data_list_list (dls, c);
654 dfm_pop (dls->reader);
659 /* Reads a case from the data file into C, parsing it according
660 to fixed-format syntax rules in DLS.
661 Returns true if successful, false at end of file or on I/O error. */
663 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
665 struct dls_var_spec *spec;
668 if (dfm_eof (dls->reader))
671 spec = ll_to_dls_var_spec (ll_head (&dls->specs));
672 for (row = 1; row <= dls->record_cnt; row++)
674 struct substring line;
676 if (dfm_eof (dls->reader))
678 msg (SW, _("Partial case of %d of %d records discarded."),
679 row - 1, dls->record_cnt);
682 dfm_expand_tabs (dls->reader);
683 line = dfm_get_record (dls->reader);
685 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
686 data_in (ss_substr (line, spec->first_column - 1, spec->input.w),
687 spec->input.type, spec->input.d, spec->first_column,
688 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
690 dfm_forward_record (dls->reader);
696 /* Reads a case from the data file into C, parsing it according
697 to free-format syntax rules in DLS.
698 Returns true if successful, false at end of file or on I/O error. */
700 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
702 struct dls_var_spec *spec;
704 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
706 struct substring field;
708 /* Cut out a field and read in a new record if necessary. */
709 while (!cut_field (dls, &field))
711 if (!dfm_eof (dls->reader))
712 dfm_forward_record (dls->reader);
713 if (dfm_eof (dls->reader))
715 if (&spec->ll != ll_head (&dls->specs))
716 msg (SW, _("Partial case discarded. The first variable "
717 "missing was %s."), spec->name);
722 data_in (field, spec->input.type, 0,
723 dfm_get_column (dls->reader, ss_data (field)),
724 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
729 /* Reads a case from the data file and parses it according to
730 list-format syntax rules.
731 Returns true if successful, false at end of file or on I/O error. */
733 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
735 struct dls_var_spec *spec;
737 if (dfm_eof (dls->reader))
740 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
742 struct substring field;
744 if (!cut_field (dls, &field))
746 if (get_undefined ())
747 msg (SW, _("Missing value(s) for all variables from %s onward. "
748 "These will be filled with the system-missing value "
749 "or blanks, as appropriate."),
751 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
753 int width = fmt_var_width (&spec->input);
755 case_data_rw_idx (c, spec->fv)->f = SYSMIS;
757 memset (case_data_rw_idx (c, spec->fv)->s, ' ', width);
762 data_in (field, spec->input.type, 0,
763 dfm_get_column (dls->reader, ss_data (field)),
764 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
767 dfm_forward_record (dls->reader);
771 /* Destroys DATA LIST transformation DLS.
772 Returns true if successful, false if an I/O error occurred. */
774 data_list_trns_free (void *dls_)
776 struct data_list_pgm *dls = dls_;
777 dfm_close_reader (dls->reader);
778 pool_destroy (dls->pool);
782 /* Handle DATA LIST transformation DLS, parsing data into C. */
784 data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED)
786 struct data_list_pgm *dls = dls_;
789 if (read_from_data_list (dls, c))
790 retval = TRNS_CONTINUE;
791 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
793 /* An I/O error, or encountering end of file for a second
794 time, should be escalated into a more serious error. */
798 retval = TRNS_END_FILE;
800 /* If there was an END subcommand handle it. */
801 if (dls->end != NULL)
803 double *end = &case_data_rw (c, dls->end)->f;
804 if (retval == TRNS_DROP_CASE)
807 retval = TRNS_END_FILE;
816 /* Reads one case into OUTPUT_CASE.
817 Returns true if successful, false at end of file or if an
818 I/O error occurred. */
820 data_list_casereader_read (struct casereader *reader UNUSED, void *dls_,
823 struct data_list_pgm *dls = dls_;
826 /* Skip the requested number of records before reading the
828 while (dls->skip_records > 0)
830 if (dfm_eof (dls->reader))
832 dfm_forward_record (dls->reader);
836 case_create (c, dls->value_cnt);
837 ok = read_from_data_list (dls, c);
843 /* Destroys the casereader. */
845 data_list_casereader_destroy (struct casereader *reader UNUSED, void *dls_)
847 struct data_list_pgm *dls = dls_;
848 if (dfm_reader_error (dls->reader))
849 casereader_force_error (reader);
850 data_list_trns_free (dls);
853 static const struct casereader_class data_list_casereader_class =
855 data_list_casereader_read,
856 data_list_casereader_destroy,