1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include <data/case.h>
25 #include <data/data-in.h>
26 #include <data/casereader.h>
27 #include <data/casereader-provider.h>
28 #include <data/dictionary.h>
29 #include <data/format.h>
30 #include <data/procedure.h>
31 #include <data/settings.h>
32 #include <data/transformations.h>
33 #include <data/variable.h>
34 #include <language/command.h>
35 #include <language/data-io/data-reader.h>
36 #include <language/data-io/file-handle.h>
37 #include <language/data-io/inpt-pgm.h>
38 #include <language/data-io/placement-parser.h>
39 #include <language/lexer/format-parser.h>
40 #include <language/lexer/lexer.h>
41 #include <language/lexer/variable-parser.h>
42 #include <libpspp/alloc.h>
43 #include <libpspp/assertion.h>
44 #include <libpspp/compiler.h>
45 #include <libpspp/ll.h>
46 #include <libpspp/message.h>
47 #include <libpspp/misc.h>
48 #include <libpspp/pool.h>
49 #include <libpspp/str.h>
50 #include <output/table.h>
56 #define _(msgid) gettext (msgid)
58 /* Utility function. */
60 /* Describes how to parse one variable. */
63 struct ll ll; /* List element. */
66 struct fmt_spec input; /* Input format of this field. */
67 int fv; /* First value in case. */
68 char name[LONG_NAME_LEN + 1]; /* Var name for error messages and tables. */
70 /* Fixed format only. */
71 int record; /* Record number (1-based). */
72 int first_column; /* Column numbers in record. */
75 static struct dls_var_spec *
76 ll_to_dls_var_spec (struct ll *ll)
78 return ll_data (ll, struct dls_var_spec, ll);
81 /* Constants for DATA LIST type. */
89 /* DATA LIST private data structure. */
92 struct pool *pool; /* Used for all DATA LIST storage. */
93 struct ll_list specs; /* List of dls_var_specs. */
94 struct dfm_reader *reader; /* Data file reader. */
95 enum dls_type type; /* Type of DATA LIST construct. */
96 struct variable *end; /* Variable specified on END subcommand. */
97 int record_cnt; /* Number of records. */
98 struct string delims; /* Field delimiters. */
99 int skip_records; /* Records to skip before first case. */
100 size_t value_cnt; /* Number of `union value's in case. */
103 static const struct casereader_class data_list_casereader_class;
105 static bool parse_fixed (struct lexer *, struct dictionary *dict,
106 struct pool *tmp_pool, struct data_list_pgm *);
107 static bool parse_free (struct lexer *, struct dictionary *dict,
108 struct pool *tmp_pool, struct data_list_pgm *);
109 static void dump_fixed_table (const struct ll_list *,
110 const struct file_handle *, int record_cnt);
111 static void dump_free_table (const struct data_list_pgm *,
112 const struct file_handle *);
114 static trns_free_func data_list_trns_free;
115 static trns_proc_func data_list_trns_proc;
118 cmd_data_list (struct lexer *lexer, struct dataset *ds)
120 struct dictionary *dict;
121 struct data_list_pgm *dls;
122 int table = -1; /* Print table if nonzero, -1=undecided. */
123 struct file_handle *fh = fh_inline_file ();
124 struct pool *tmp_pool;
127 dict = in_input_program () ? dataset_dict (ds) : dict_create ();
129 dls = pool_create_container (struct data_list_pgm, pool);
130 ll_init (&dls->specs);
135 dls->skip_records = 0;
136 ds_init_empty (&dls->delims);
137 ds_register_pool (&dls->delims, dls->pool);
139 tmp_pool = pool_create_subpool (dls->pool);
141 while (lex_token (lexer) != '/')
143 if (lex_match_id (lexer, "FILE"))
145 lex_match (lexer, '=');
146 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE);
150 else if (lex_match_id (lexer, "RECORDS"))
152 lex_match (lexer, '=');
153 lex_match (lexer, '(');
154 if (!lex_force_int (lexer))
156 dls->record_cnt = lex_integer (lexer);
158 lex_match (lexer, ')');
160 else if (lex_match_id (lexer, "SKIP"))
162 lex_match (lexer, '=');
163 if (!lex_force_int (lexer))
165 dls->skip_records = lex_integer (lexer);
168 else if (lex_match_id (lexer, "END"))
172 msg (SE, _("The END subcommand may only be specified once."));
176 lex_match (lexer, '=');
177 if (!lex_force_id (lexer))
179 dls->end = dict_lookup_var (dict, lex_tokid (lexer));
181 dls->end = dict_create_var_assert (dict, lex_tokid (lexer), 0);
184 else if (lex_token (lexer) == T_ID)
186 if (lex_match_id (lexer, "NOTABLE"))
188 else if (lex_match_id (lexer, "TABLE"))
193 if (lex_match_id (lexer, "FIXED"))
195 else if (lex_match_id (lexer, "FREE"))
197 else if (lex_match_id (lexer, "LIST"))
201 lex_error (lexer, NULL);
207 msg (SE, _("Only one of FIXED, FREE, or LIST may "
213 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
214 && lex_match (lexer, '('))
216 while (!lex_match (lexer, ')'))
220 if (lex_match_id (lexer, "TAB"))
222 else if (lex_token (lexer) == T_STRING && ds_length (lex_tokstr (lexer)) == 1)
224 delim = ds_first (lex_tokstr (lexer));
229 lex_error (lexer, NULL);
233 ds_put_char (&dls->delims, delim);
235 lex_match (lexer, ',');
242 lex_error (lexer, NULL);
247 fh_set_default_handle (fh);
250 dls->type = DLS_FIXED;
252 if (dls->type != DLS_FIXED && dls->end != NULL)
254 msg (SE, _("The END keyword may be used only with DATA LIST FIXED."));
259 table = dls->type != DLS_FREE;
261 ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (lexer, dict, tmp_pool, dls);
265 if (lex_end_of_command (lexer) != CMD_SUCCESS)
270 if (dls->type == DLS_FIXED)
271 dump_fixed_table (&dls->specs, fh, dls->record_cnt);
273 dump_free_table (dls, fh);
276 dls->reader = dfm_open_reader (fh, lexer);
277 if (dls->reader == NULL)
280 dls->value_cnt = dict_get_next_value_idx (dict);
282 if (in_input_program ())
283 add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls);
286 struct casereader *reader;
287 reader = casereader_create_sequential (NULL,
288 dict_get_next_value_idx (dict),
289 -1, &data_list_casereader_class,
291 proc_set_active_file (ds, reader, dict);
294 pool_destroy (tmp_pool);
299 data_list_trns_free (dls);
300 return CMD_CASCADING_FAILURE;
303 /* Fixed-format parsing. */
305 /* Parses all the variable specifications for DATA LIST FIXED,
306 storing them into DLS. Uses TMP_POOL for data that is not
307 needed once parsing is complete. Returns true only if
310 parse_fixed (struct lexer *lexer, struct dictionary *dict,
311 struct pool *tmp_pool, struct data_list_pgm *dls)
313 int last_nonempty_record;
317 while (lex_token (lexer) != '.')
320 size_t name_cnt, name_idx;
321 struct fmt_spec *formats, *f;
324 /* Parse everything. */
325 if (!parse_record_placement (lexer, &record, &column)
326 || !parse_DATA_LIST_vars_pool (lexer, tmp_pool,
327 &names, &name_cnt, PV_NONE)
328 || !parse_var_placements (lexer, tmp_pool, name_cnt, true,
329 &formats, &format_cnt))
332 /* Create variables and var specs. */
334 for (f = formats; f < &formats[format_cnt]; f++)
335 if (!execute_placement_format (f, &record, &column))
340 struct dls_var_spec *spec;
342 name = names[name_idx++];
344 /* Create variable. */
345 width = fmt_var_width (f);
346 v = dict_create_var (dict, name, width);
350 struct fmt_spec output = fmt_for_output_from_input (f);
351 var_set_both_formats (v, &output);
356 This can be acceptable if we're in INPUT
357 PROGRAM, but only if the existing variable has
358 the same width as the one we would have
360 if (!in_input_program ())
362 msg (SE, _("%s is a duplicate variable name."), name);
366 v = dict_lookup_var_assert (dict, name);
367 if ((width != 0) != (var_get_width (v) != 0))
369 msg (SE, _("There is already a variable %s of a "
374 if (width != 0 && width != var_get_width (v))
376 msg (SE, _("There is already a string variable %s of a "
377 "different width."), name);
382 /* Create specifier for parsing the variable. */
383 spec = pool_alloc (dls->pool, sizeof *spec);
385 spec->fv = var_get_case_index (v);
386 spec->record = record;
387 spec->first_column = column;
388 strcpy (spec->name, var_get_name (v));
389 ll_push_tail (&dls->specs, &spec->ll);
393 assert (name_idx == name_cnt);
395 if (ll_is_empty (&dls->specs))
397 msg (SE, _("At least one variable must be specified."));
401 last_nonempty_record = ll_to_dls_var_spec (ll_tail (&dls->specs))->record;
402 if (dls->record_cnt && last_nonempty_record > dls->record_cnt)
404 msg (SE, _("Variables are specified on records that "
405 "should not exist according to RECORDS subcommand."));
408 else if (!dls->record_cnt)
409 dls->record_cnt = last_nonempty_record;
414 /* Displays a table giving information on fixed-format variable
415 parsing on DATA LIST. */
417 dump_fixed_table (const struct ll_list *specs,
418 const struct file_handle *fh, int record_cnt)
422 struct dls_var_spec *spec;
425 spec_cnt = ll_count (specs);
426 t = tab_create (4, spec_cnt + 1, 0);
427 tab_columns (t, TAB_COL_DOWN, 1);
428 tab_headers (t, 0, 0, 1, 0);
429 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
430 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
431 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
432 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
433 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt);
434 tab_hline (t, TAL_2, 0, 3, 1);
435 tab_dim (t, tab_natural_dimensions);
438 ll_for_each (spec, struct dls_var_spec, ll, specs)
440 char fmt_string[FMT_STRING_LEN_MAX + 1];
441 tab_text (t, 0, row, TAB_LEFT, spec->name);
442 tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record);
443 tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d",
444 spec->first_column, spec->first_column + spec->input.w - 1);
445 tab_text (t, 3, row, TAB_LEFT | TAB_FIX,
446 fmt_to_string (&spec->input, fmt_string));
450 tab_title (t, ngettext ("Reading %d record from %s.",
451 "Reading %d records from %s.", record_cnt),
452 record_cnt, fh_get_name (fh));
456 /* Free-format parsing. */
458 /* Parses variable specifications for DATA LIST FREE and adds
459 them to DLS. Uses TMP_POOL for data that is not needed once
460 parsing is complete. Returns true only if successful. */
462 parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool,
463 struct data_list_pgm *dls)
466 while (lex_token (lexer) != '.')
468 struct fmt_spec input, output;
473 if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool,
474 &name, &name_cnt, PV_NONE))
477 if (lex_match (lexer, '('))
479 if (!parse_format_specifier (lexer, &input)
480 || !fmt_check_input (&input)
481 || !lex_force_match (lexer, ')'))
484 /* As a special case, N format is treated as F format
485 for free-field input. */
486 if (input.type == FMT_N)
489 output = fmt_for_output_from_input (&input);
493 lex_match (lexer, '*');
494 input = fmt_for_input (FMT_F, 8, 0);
495 output = *get_format ();
498 for (i = 0; i < name_cnt; i++)
500 struct dls_var_spec *spec;
503 v = dict_create_var (dict, name[i], fmt_var_width (&input));
506 msg (SE, _("%s is a duplicate variable name."), name[i]);
509 var_set_both_formats (v, &output);
511 spec = pool_alloc (dls->pool, sizeof *spec);
513 spec->fv = var_get_case_index (v);
514 strcpy (spec->name, var_get_name (v));
515 ll_push_tail (&dls->specs, &spec->ll);
522 /* Displays a table giving information on free-format variable parsing
525 dump_free_table (const struct data_list_pgm *dls,
526 const struct file_handle *fh)
529 struct dls_var_spec *spec;
533 spec_cnt = ll_count (&dls->specs);
535 t = tab_create (2, spec_cnt + 1, 0);
536 tab_columns (t, TAB_COL_DOWN, 1);
537 tab_headers (t, 0, 0, 1, 0);
538 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
539 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
540 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, spec_cnt);
541 tab_hline (t, TAL_2, 0, 1, 1);
542 tab_dim (t, tab_natural_dimensions);
544 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
546 char str[FMT_STRING_LEN_MAX + 1];
547 tab_text (t, 0, row, TAB_LEFT, spec->name);
548 tab_text (t, 1, row, TAB_LEFT | TAB_FIX,
549 fmt_to_string (&spec->input, str));
553 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
558 /* Input procedure. */
560 /* Extracts a field from the current position in the current
561 record. Fields can be unquoted or quoted with single- or
562 double-quote characters.
564 *FIELD is set to the field content. The caller must not
565 or destroy this constant string.
567 After parsing the field, sets the current position in the
568 record to just past the field and any trailing delimiter.
569 Returns 0 on failure or a 1-based column number indicating the
570 beginning of the field on success. */
572 cut_field (const struct data_list_pgm *dls, struct substring *field)
574 struct substring line, p;
576 if (dfm_eof (dls->reader))
578 if (ds_is_empty (&dls->delims))
579 dfm_expand_tabs (dls->reader);
580 line = p = dfm_get_record (dls->reader);
582 if (ds_is_empty (&dls->delims))
584 bool missing_quote = false;
586 /* Skip leading whitespace. */
587 ss_ltrim (&p, ss_cstr (CC_SPACES));
591 /* Handle actual data, whether quoted or unquoted. */
592 if (ss_match_char (&p, '\''))
593 missing_quote = !ss_get_until (&p, '\'', field);
594 else if (ss_match_char (&p, '"'))
595 missing_quote = !ss_get_until (&p, '"', field);
597 ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field);
599 msg (SW, _("Quoted string extends beyond end of line."));
601 /* Skip trailing whitespace and a single comma if present. */
602 ss_ltrim (&p, ss_cstr (CC_SPACES));
603 ss_match_char (&p, ',');
605 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p));
609 if (!ss_is_empty (p))
610 ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field);
611 else if (dfm_columns_past_end (dls->reader) == 0)
613 /* A blank line or a line that ends in a delimiter has a
614 trailing blank field. */
620 /* Advance past the field.
622 Also advance past a trailing delimiter, regardless of
623 whether one actually existed. If we "skip" a delimiter
624 that was not actually there, then we will return
625 end-of-line on our next call, which is what we want. */
626 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1);
631 static bool read_from_data_list_fixed (const struct data_list_pgm *,
633 static bool read_from_data_list_free (const struct data_list_pgm *,
635 static bool read_from_data_list_list (const struct data_list_pgm *,
638 /* Reads a case from DLS into C.
639 Returns true if successful, false at end of file or on I/O error. */
641 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
645 dfm_push (dls->reader);
649 retval = read_from_data_list_fixed (dls, c);
652 retval = read_from_data_list_free (dls, c);
655 retval = read_from_data_list_list (dls, c);
660 dfm_pop (dls->reader);
665 /* Reads a case from the data file into C, parsing it according
666 to fixed-format syntax rules in DLS.
667 Returns true if successful, false at end of file or on I/O error. */
669 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
671 struct dls_var_spec *spec;
674 if (dfm_eof (dls->reader))
677 spec = ll_to_dls_var_spec (ll_head (&dls->specs));
678 for (row = 1; row <= dls->record_cnt; row++)
680 struct substring line;
682 if (dfm_eof (dls->reader))
684 msg (SW, _("Partial case of %d of %d records discarded."),
685 row - 1, dls->record_cnt);
688 dfm_expand_tabs (dls->reader);
689 line = dfm_get_record (dls->reader);
691 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
693 if (row < spec->record)
696 data_in (ss_substr (line, spec->first_column - 1, spec->input.w),
697 spec->input.type, spec->input.d, spec->first_column,
698 case_data_rw_idx (c, spec->fv),
699 fmt_var_width (&spec->input));
702 dfm_forward_record (dls->reader);
708 /* Reads a case from the data file into C, parsing it according
709 to free-format syntax rules in DLS.
710 Returns true if successful, false at end of file or on I/O error. */
712 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
714 struct dls_var_spec *spec;
716 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
718 struct substring field;
720 /* Cut out a field and read in a new record if necessary. */
721 while (!cut_field (dls, &field))
723 if (!dfm_eof (dls->reader))
724 dfm_forward_record (dls->reader);
725 if (dfm_eof (dls->reader))
727 if (&spec->ll != ll_head (&dls->specs))
728 msg (SW, _("Partial case discarded. The first variable "
729 "missing was %s."), spec->name);
734 data_in (field, spec->input.type, 0,
735 dfm_get_column (dls->reader, ss_data (field)),
736 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
741 /* Reads a case from the data file and parses it according to
742 list-format syntax rules.
743 Returns true if successful, false at end of file or on I/O error. */
745 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
747 struct dls_var_spec *spec;
749 if (dfm_eof (dls->reader))
752 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
754 struct substring field;
756 if (!cut_field (dls, &field))
758 if (get_undefined ())
759 msg (SW, _("Missing value(s) for all variables from %s onward. "
760 "These will be filled with the system-missing value "
761 "or blanks, as appropriate."),
763 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
765 int width = fmt_var_width (&spec->input);
767 case_data_rw_idx (c, spec->fv)->f = SYSMIS;
769 memset (case_data_rw_idx (c, spec->fv)->s, ' ', width);
774 data_in (field, spec->input.type, 0,
775 dfm_get_column (dls->reader, ss_data (field)),
776 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
779 dfm_forward_record (dls->reader);
783 /* Destroys DATA LIST transformation DLS.
784 Returns true if successful, false if an I/O error occurred. */
786 data_list_trns_free (void *dls_)
788 struct data_list_pgm *dls = dls_;
789 dfm_close_reader (dls->reader);
790 pool_destroy (dls->pool);
794 /* Handle DATA LIST transformation DLS, parsing data into C. */
796 data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED)
798 struct data_list_pgm *dls = dls_;
801 if (read_from_data_list (dls, c))
802 retval = TRNS_CONTINUE;
803 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
805 /* An I/O error, or encountering end of file for a second
806 time, should be escalated into a more serious error. */
810 retval = TRNS_END_FILE;
812 /* If there was an END subcommand handle it. */
813 if (dls->end != NULL)
815 double *end = &case_data_rw (c, dls->end)->f;
816 if (retval == TRNS_END_FILE)
819 retval = TRNS_CONTINUE;
828 /* Reads one case into OUTPUT_CASE.
829 Returns true if successful, false at end of file or if an
830 I/O error occurred. */
832 data_list_casereader_read (struct casereader *reader UNUSED, void *dls_,
835 struct data_list_pgm *dls = dls_;
838 /* Skip the requested number of records before reading the
840 while (dls->skip_records > 0)
842 if (dfm_eof (dls->reader))
844 dfm_forward_record (dls->reader);
848 case_create (c, dls->value_cnt);
849 ok = read_from_data_list (dls, c);
855 /* Destroys the casereader. */
857 data_list_casereader_destroy (struct casereader *reader UNUSED, void *dls_)
859 struct data_list_pgm *dls = dls_;
860 if (dfm_reader_error (dls->reader))
861 casereader_force_error (reader);
862 data_list_trns_free (dls);
865 static const struct casereader_class data_list_casereader_class =
867 data_list_casereader_read,
868 data_list_casereader_destroy,