1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 #include <data/case.h>
27 #include <data/data-in.h>
28 #include <data/casereader.h>
29 #include <data/casereader-provider.h>
30 #include <data/dictionary.h>
31 #include <data/format.h>
32 #include <data/procedure.h>
33 #include <data/settings.h>
34 #include <data/transformations.h>
35 #include <data/variable.h>
36 #include <language/command.h>
37 #include <language/data-io/data-reader.h>
38 #include <language/data-io/file-handle.h>
39 #include <language/data-io/inpt-pgm.h>
40 #include <language/data-io/placement-parser.h>
41 #include <language/lexer/format-parser.h>
42 #include <language/lexer/lexer.h>
43 #include <language/lexer/variable-parser.h>
44 #include <libpspp/alloc.h>
45 #include <libpspp/assertion.h>
46 #include <libpspp/compiler.h>
47 #include <libpspp/ll.h>
48 #include <libpspp/message.h>
49 #include <libpspp/misc.h>
50 #include <libpspp/pool.h>
51 #include <libpspp/str.h>
52 #include <output/table.h>
58 #define _(msgid) gettext (msgid)
60 /* Utility function. */
62 /* Describes how to parse one variable. */
65 struct ll ll; /* List element. */
68 struct fmt_spec input; /* Input format of this field. */
69 int fv; /* First value in case. */
70 char name[LONG_NAME_LEN + 1]; /* Var name for error messages and tables. */
72 /* Fixed format only. */
73 int record; /* Record number (1-based). */
74 int first_column; /* Column numbers in record. */
77 static struct dls_var_spec *
78 ll_to_dls_var_spec (struct ll *ll)
80 return ll_data (ll, struct dls_var_spec, ll);
83 /* Constants for DATA LIST type. */
91 /* DATA LIST private data structure. */
94 struct pool *pool; /* Used for all DATA LIST storage. */
95 struct ll_list specs; /* List of dls_var_specs. */
96 struct dfm_reader *reader; /* Data file reader. */
97 enum dls_type type; /* Type of DATA LIST construct. */
98 struct variable *end; /* Variable specified on END subcommand. */
99 int record_cnt; /* Number of records. */
100 struct string delims; /* Field delimiters. */
101 int skip_records; /* Records to skip before first case. */
102 size_t value_cnt; /* Number of `union value's in case. */
105 static const struct casereader_class data_list_casereader_class;
107 static bool parse_fixed (struct lexer *, struct dictionary *dict,
108 struct pool *tmp_pool, struct data_list_pgm *);
109 static bool parse_free (struct lexer *, struct dictionary *dict,
110 struct pool *tmp_pool, struct data_list_pgm *);
111 static void dump_fixed_table (const struct ll_list *,
112 const struct file_handle *, int record_cnt);
113 static void dump_free_table (const struct data_list_pgm *,
114 const struct file_handle *);
116 static trns_free_func data_list_trns_free;
117 static trns_proc_func data_list_trns_proc;
120 cmd_data_list (struct lexer *lexer, struct dataset *ds)
122 struct dictionary *dict;
123 struct data_list_pgm *dls;
124 int table = -1; /* Print table if nonzero, -1=undecided. */
125 struct file_handle *fh = fh_inline_file ();
126 struct pool *tmp_pool;
129 dict = in_input_program () ? dataset_dict (ds) : dict_create ();
131 dls = pool_create_container (struct data_list_pgm, pool);
132 ll_init (&dls->specs);
137 dls->skip_records = 0;
138 ds_init_empty (&dls->delims);
139 ds_register_pool (&dls->delims, dls->pool);
141 tmp_pool = pool_create_subpool (dls->pool);
143 while (lex_token (lexer) != '/')
145 if (lex_match_id (lexer, "FILE"))
147 lex_match (lexer, '=');
148 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE);
152 else if (lex_match_id (lexer, "RECORDS"))
154 lex_match (lexer, '=');
155 lex_match (lexer, '(');
156 if (!lex_force_int (lexer))
158 dls->record_cnt = lex_integer (lexer);
160 lex_match (lexer, ')');
162 else if (lex_match_id (lexer, "SKIP"))
164 lex_match (lexer, '=');
165 if (!lex_force_int (lexer))
167 dls->skip_records = lex_integer (lexer);
170 else if (lex_match_id (lexer, "END"))
174 msg (SE, _("The END subcommand may only be specified once."));
178 lex_match (lexer, '=');
179 if (!lex_force_id (lexer))
181 dls->end = dict_lookup_var (dict, lex_tokid (lexer));
183 dls->end = dict_create_var_assert (dict, lex_tokid (lexer), 0);
186 else if (lex_token (lexer) == T_ID)
188 if (lex_match_id (lexer, "NOTABLE"))
190 else if (lex_match_id (lexer, "TABLE"))
195 if (lex_match_id (lexer, "FIXED"))
197 else if (lex_match_id (lexer, "FREE"))
199 else if (lex_match_id (lexer, "LIST"))
203 lex_error (lexer, NULL);
209 msg (SE, _("Only one of FIXED, FREE, or LIST may "
215 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
216 && lex_match (lexer, '('))
218 while (!lex_match (lexer, ')'))
222 if (lex_match_id (lexer, "TAB"))
224 else if (lex_token (lexer) == T_STRING && ds_length (lex_tokstr (lexer)) == 1)
226 delim = ds_first (lex_tokstr (lexer));
231 lex_error (lexer, NULL);
235 ds_put_char (&dls->delims, delim);
237 lex_match (lexer, ',');
244 lex_error (lexer, NULL);
249 fh_set_default_handle (fh);
252 dls->type = DLS_FIXED;
255 table = dls->type != DLS_FREE;
257 ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (lexer, dict, tmp_pool, dls);
261 if (lex_end_of_command (lexer) != CMD_SUCCESS)
266 if (dls->type == DLS_FIXED)
267 dump_fixed_table (&dls->specs, fh, dls->record_cnt);
269 dump_free_table (dls, fh);
272 dls->reader = dfm_open_reader (fh, lexer);
273 if (dls->reader == NULL)
276 dls->value_cnt = dict_get_next_value_idx (dict);
278 if (in_input_program ())
279 add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls);
282 struct casereader *reader;
283 reader = casereader_create_sequential (NULL,
284 dict_get_next_value_idx (dict),
285 -1, &data_list_casereader_class,
287 proc_set_active_file (ds, reader, dict);
290 pool_destroy (tmp_pool);
295 data_list_trns_free (dls);
296 return CMD_CASCADING_FAILURE;
299 /* Fixed-format parsing. */
301 /* Parses all the variable specifications for DATA LIST FIXED,
302 storing them into DLS. Uses TMP_POOL for data that is not
303 needed once parsing is complete. Returns true only if
306 parse_fixed (struct lexer *lexer, struct dictionary *dict,
307 struct pool *tmp_pool, struct data_list_pgm *dls)
309 int last_nonempty_record;
313 while (lex_token (lexer) != '.')
316 size_t name_cnt, name_idx;
317 struct fmt_spec *formats, *f;
320 /* Parse everything. */
321 if (!parse_record_placement (lexer, &record, &column)
322 || !parse_DATA_LIST_vars_pool (lexer, tmp_pool,
323 &names, &name_cnt, PV_NONE)
324 || !parse_var_placements (lexer, tmp_pool, name_cnt, true,
325 &formats, &format_cnt))
328 /* Create variables and var specs. */
330 for (f = formats; f < &formats[format_cnt]; f++)
331 if (!execute_placement_format (f, &record, &column))
336 struct dls_var_spec *spec;
338 name = names[name_idx++];
340 /* Create variable. */
341 width = fmt_var_width (f);
342 v = dict_create_var (dict, name, width);
346 struct fmt_spec output = fmt_for_output_from_input (f);
347 var_set_both_formats (v, &output);
352 This can be acceptable if we're in INPUT
353 PROGRAM, but only if the existing variable has
354 the same width as the one we would have
356 if (!in_input_program ())
358 msg (SE, _("%s is a duplicate variable name."), name);
362 v = dict_lookup_var_assert (dict, name);
363 if ((width != 0) != (var_get_width (v) != 0))
365 msg (SE, _("There is already a variable %s of a "
370 if (width != 0 && width != var_get_width (v))
372 msg (SE, _("There is already a string variable %s of a "
373 "different width."), name);
378 /* Create specifier for parsing the variable. */
379 spec = pool_alloc (dls->pool, sizeof *spec);
381 spec->fv = var_get_case_index (v);
382 spec->record = record;
383 spec->first_column = column;
384 strcpy (spec->name, var_get_name (v));
385 ll_push_tail (&dls->specs, &spec->ll);
389 assert (name_idx == name_cnt);
391 if (ll_is_empty (&dls->specs))
393 msg (SE, _("At least one variable must be specified."));
397 last_nonempty_record = ll_to_dls_var_spec (ll_tail (&dls->specs))->record;
398 if (dls->record_cnt && last_nonempty_record > dls->record_cnt)
400 msg (SE, _("Variables are specified on records that "
401 "should not exist according to RECORDS subcommand."));
404 else if (!dls->record_cnt)
405 dls->record_cnt = last_nonempty_record;
410 /* Displays a table giving information on fixed-format variable
411 parsing on DATA LIST. */
413 dump_fixed_table (const struct ll_list *specs,
414 const struct file_handle *fh, int record_cnt)
418 struct dls_var_spec *spec;
421 spec_cnt = ll_count (specs);
422 t = tab_create (4, spec_cnt + 1, 0);
423 tab_columns (t, TAB_COL_DOWN, 1);
424 tab_headers (t, 0, 0, 1, 0);
425 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
426 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
427 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
428 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
429 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt);
430 tab_hline (t, TAL_2, 0, 3, 1);
431 tab_dim (t, tab_natural_dimensions);
434 ll_for_each (spec, struct dls_var_spec, ll, specs)
436 char fmt_string[FMT_STRING_LEN_MAX + 1];
437 tab_text (t, 0, row, TAB_LEFT, spec->name);
438 tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record);
439 tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d",
440 spec->first_column, spec->first_column + spec->input.w - 1);
441 tab_text (t, 3, row, TAB_LEFT | TAB_FIX,
442 fmt_to_string (&spec->input, fmt_string));
446 tab_title (t, ngettext ("Reading %d record from %s.",
447 "Reading %d records from %s.", record_cnt),
448 record_cnt, fh_get_name (fh));
452 /* Free-format parsing. */
454 /* Parses variable specifications for DATA LIST FREE and adds
455 them to DLS. Uses TMP_POOL for data that is not needed once
456 parsing is complete. Returns true only if successful. */
458 parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool,
459 struct data_list_pgm *dls)
462 while (lex_token (lexer) != '.')
464 struct fmt_spec input, output;
469 if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool,
470 &name, &name_cnt, PV_NONE))
473 if (lex_match (lexer, '('))
475 if (!parse_format_specifier (lexer, &input)
476 || !fmt_check_input (&input)
477 || !lex_force_match (lexer, ')'))
480 /* As a special case, N format is treated as F format
481 for free-field input. */
482 if (input.type == FMT_N)
485 output = fmt_for_output_from_input (&input);
489 lex_match (lexer, '*');
490 input = fmt_for_input (FMT_F, 8, 0);
491 output = *get_format ();
494 for (i = 0; i < name_cnt; i++)
496 struct dls_var_spec *spec;
499 v = dict_create_var (dict, name[i], fmt_var_width (&input));
502 msg (SE, _("%s is a duplicate variable name."), name[i]);
505 var_set_both_formats (v, &output);
507 spec = pool_alloc (dls->pool, sizeof *spec);
509 spec->fv = var_get_case_index (v);
510 strcpy (spec->name, var_get_name (v));
511 ll_push_tail (&dls->specs, &spec->ll);
518 /* Displays a table giving information on free-format variable parsing
521 dump_free_table (const struct data_list_pgm *dls,
522 const struct file_handle *fh)
525 struct dls_var_spec *spec;
529 spec_cnt = ll_count (&dls->specs);
531 t = tab_create (2, spec_cnt + 1, 0);
532 tab_columns (t, TAB_COL_DOWN, 1);
533 tab_headers (t, 0, 0, 1, 0);
534 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
535 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
536 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, spec_cnt);
537 tab_hline (t, TAL_2, 0, 1, 1);
538 tab_dim (t, tab_natural_dimensions);
540 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
542 char str[FMT_STRING_LEN_MAX + 1];
543 tab_text (t, 0, row, TAB_LEFT, spec->name);
544 tab_text (t, 1, row, TAB_LEFT | TAB_FIX,
545 fmt_to_string (&spec->input, str));
549 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
554 /* Input procedure. */
556 /* Extracts a field from the current position in the current
557 record. Fields can be unquoted or quoted with single- or
558 double-quote characters.
560 *FIELD is set to the field content. The caller must not
561 or destroy this constant string.
563 After parsing the field, sets the current position in the
564 record to just past the field and any trailing delimiter.
565 Returns 0 on failure or a 1-based column number indicating the
566 beginning of the field on success. */
568 cut_field (const struct data_list_pgm *dls, struct substring *field)
570 struct substring line, p;
572 if (dfm_eof (dls->reader))
574 if (ds_is_empty (&dls->delims))
575 dfm_expand_tabs (dls->reader);
576 line = p = dfm_get_record (dls->reader);
578 if (ds_is_empty (&dls->delims))
580 bool missing_quote = false;
582 /* Skip leading whitespace. */
583 ss_ltrim (&p, ss_cstr (CC_SPACES));
587 /* Handle actual data, whether quoted or unquoted. */
588 if (ss_match_char (&p, '\''))
589 missing_quote = !ss_get_until (&p, '\'', field);
590 else if (ss_match_char (&p, '"'))
591 missing_quote = !ss_get_until (&p, '"', field);
593 ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field);
595 msg (SW, _("Quoted string extends beyond end of line."));
597 /* Skip trailing whitespace and a single comma if present. */
598 ss_ltrim (&p, ss_cstr (CC_SPACES));
599 ss_match_char (&p, ',');
601 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p));
605 if (!ss_is_empty (p))
606 ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field);
607 else if (dfm_columns_past_end (dls->reader) == 0)
609 /* A blank line or a line that ends in a delimiter has a
610 trailing blank field. */
616 /* Advance past the field.
618 Also advance past a trailing delimiter, regardless of
619 whether one actually existed. If we "skip" a delimiter
620 that was not actually there, then we will return
621 end-of-line on our next call, which is what we want. */
622 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1);
627 static bool read_from_data_list_fixed (const struct data_list_pgm *,
629 static bool read_from_data_list_free (const struct data_list_pgm *,
631 static bool read_from_data_list_list (const struct data_list_pgm *,
634 /* Reads a case from DLS into C.
635 Returns true if successful, false at end of file or on I/O error. */
637 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
641 dfm_push (dls->reader);
645 retval = read_from_data_list_fixed (dls, c);
648 retval = read_from_data_list_free (dls, c);
651 retval = read_from_data_list_list (dls, c);
656 dfm_pop (dls->reader);
661 /* Reads a case from the data file into C, parsing it according
662 to fixed-format syntax rules in DLS.
663 Returns true if successful, false at end of file or on I/O error. */
665 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
667 struct dls_var_spec *spec;
670 if (dfm_eof (dls->reader))
673 spec = ll_to_dls_var_spec (ll_head (&dls->specs));
674 for (row = 1; row <= dls->record_cnt; row++)
676 struct substring line;
678 if (dfm_eof (dls->reader))
680 msg (SW, _("Partial case of %d of %d records discarded."),
681 row - 1, dls->record_cnt);
684 dfm_expand_tabs (dls->reader);
685 line = dfm_get_record (dls->reader);
687 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
688 data_in (ss_substr (line, spec->first_column - 1, spec->input.w),
689 spec->input.type, spec->input.d, spec->first_column,
690 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
692 dfm_forward_record (dls->reader);
698 /* Reads a case from the data file into C, parsing it according
699 to free-format syntax rules in DLS.
700 Returns true if successful, false at end of file or on I/O error. */
702 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
704 struct dls_var_spec *spec;
706 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
708 struct substring field;
710 /* Cut out a field and read in a new record if necessary. */
711 while (!cut_field (dls, &field))
713 if (!dfm_eof (dls->reader))
714 dfm_forward_record (dls->reader);
715 if (dfm_eof (dls->reader))
717 if (&spec->ll != ll_head (&dls->specs))
718 msg (SW, _("Partial case discarded. The first variable "
719 "missing was %s."), spec->name);
724 data_in (field, spec->input.type, 0,
725 dfm_get_column (dls->reader, ss_data (field)),
726 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
731 /* Reads a case from the data file and parses it according to
732 list-format syntax rules.
733 Returns true if successful, false at end of file or on I/O error. */
735 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
737 struct dls_var_spec *spec;
739 if (dfm_eof (dls->reader))
742 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
744 struct substring field;
746 if (!cut_field (dls, &field))
748 if (get_undefined ())
749 msg (SW, _("Missing value(s) for all variables from %s onward. "
750 "These will be filled with the system-missing value "
751 "or blanks, as appropriate."),
753 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
755 int width = fmt_var_width (&spec->input);
757 case_data_rw_idx (c, spec->fv)->f = SYSMIS;
759 memset (case_data_rw_idx (c, spec->fv)->s, ' ', width);
764 data_in (field, spec->input.type, 0,
765 dfm_get_column (dls->reader, ss_data (field)),
766 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
769 dfm_forward_record (dls->reader);
773 /* Destroys DATA LIST transformation DLS.
774 Returns true if successful, false if an I/O error occurred. */
776 data_list_trns_free (void *dls_)
778 struct data_list_pgm *dls = dls_;
779 dfm_close_reader (dls->reader);
780 pool_destroy (dls->pool);
784 /* Handle DATA LIST transformation DLS, parsing data into C. */
786 data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED)
788 struct data_list_pgm *dls = dls_;
791 if (read_from_data_list (dls, c))
792 retval = TRNS_CONTINUE;
793 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
795 /* An I/O error, or encountering end of file for a second
796 time, should be escalated into a more serious error. */
800 retval = TRNS_END_FILE;
802 /* If there was an END subcommand handle it. */
803 if (dls->end != NULL)
805 double *end = &case_data_rw (c, dls->end)->f;
806 if (retval == TRNS_DROP_CASE)
809 retval = TRNS_END_FILE;
818 /* Reads one case into OUTPUT_CASE.
819 Returns true if successful, false at end of file or if an
820 I/O error occurred. */
822 data_list_casereader_read (struct casereader *reader UNUSED, void *dls_,
825 struct data_list_pgm *dls = dls_;
828 /* Skip the requested number of records before reading the
830 while (dls->skip_records > 0)
832 if (dfm_eof (dls->reader))
834 dfm_forward_record (dls->reader);
838 case_create (c, dls->value_cnt);
839 ok = read_from_data_list (dls, c);
845 /* Destroys the casereader. */
847 data_list_casereader_destroy (struct casereader *reader UNUSED, void *dls_)
849 struct data_list_pgm *dls = dls_;
850 if (dfm_reader_error (dls->reader))
851 casereader_force_error (reader);
852 data_list_trns_free (dls);
855 static const struct casereader_class data_list_casereader_class =
857 data_list_casereader_read,
858 data_list_casereader_destroy,