1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/case-source.h>
30 #include <data/data-in.h>
31 #include <data/dictionary.h>
32 #include <data/format.h>
33 #include <data/procedure.h>
34 #include <data/settings.h>
35 #include <data/transformations.h>
36 #include <data/variable.h>
37 #include <language/command.h>
38 #include <language/data-io/data-reader.h>
39 #include <language/data-io/file-handle.h>
40 #include <language/data-io/inpt-pgm.h>
41 #include <language/data-io/placement-parser.h>
42 #include <language/lexer/format-parser.h>
43 #include <language/lexer/lexer.h>
44 #include <language/lexer/variable-parser.h>
45 #include <libpspp/alloc.h>
46 #include <libpspp/assertion.h>
47 #include <libpspp/compiler.h>
48 #include <libpspp/ll.h>
49 #include <libpspp/message.h>
50 #include <libpspp/misc.h>
51 #include <libpspp/pool.h>
52 #include <libpspp/str.h>
53 #include <output/table.h>
59 #define _(msgid) gettext (msgid)
61 /* Utility function. */
63 /* Describes how to parse one variable. */
66 struct ll ll; /* List element. */
69 struct fmt_spec input; /* Input format of this field. */
70 int fv; /* First value in case. */
71 char name[LONG_NAME_LEN + 1]; /* Var name for error messages and tables. */
73 /* Fixed format only. */
74 int record; /* Record number (1-based). */
75 int first_column; /* Column numbers in record. */
78 static struct dls_var_spec *
79 ll_to_dls_var_spec (struct ll *ll)
81 return ll_data (ll, struct dls_var_spec, ll);
84 /* Constants for DATA LIST type. */
92 /* DATA LIST private data structure. */
95 struct pool *pool; /* Used for all DATA LIST storage. */
96 struct ll_list specs; /* List of dls_var_specs. */
97 struct dfm_reader *reader; /* Data file reader. */
98 enum dls_type type; /* Type of DATA LIST construct. */
99 struct variable *end; /* Variable specified on END subcommand. */
100 int record_cnt; /* Number of records. */
101 struct string delims; /* Field delimiters. */
102 int skip_records; /* Records to skip before first case. */
105 static const struct case_source_class data_list_source_class;
107 static bool parse_fixed (struct lexer *, struct dictionary *dict,
108 struct pool *tmp_pool, struct data_list_pgm *);
109 static bool parse_free (struct lexer *, struct dictionary *dict,
110 struct pool *tmp_pool, struct data_list_pgm *);
111 static void dump_fixed_table (const struct ll_list *,
112 const struct file_handle *, int record_cnt);
113 static void dump_free_table (const struct data_list_pgm *,
114 const struct file_handle *);
116 static trns_free_func data_list_trns_free;
117 static trns_proc_func data_list_trns_proc;
120 cmd_data_list (struct lexer *lexer, struct dataset *ds)
122 struct dictionary *dict = dataset_dict (ds);
123 struct data_list_pgm *dls;
124 int table = -1; /* Print table if nonzero, -1=undecided. */
125 struct file_handle *fh = fh_inline_file ();
126 struct pool *tmp_pool;
129 if (!in_input_program ())
130 discard_variables (ds);
132 dls = pool_create_container (struct data_list_pgm, pool);
133 ll_init (&dls->specs);
138 dls->skip_records = 0;
139 ds_init_empty (&dls->delims);
140 ds_register_pool (&dls->delims, dls->pool);
142 tmp_pool = pool_create_subpool (dls->pool);
144 while (lex_token (lexer) != '/')
146 if (lex_match_id (lexer, "FILE"))
148 lex_match (lexer, '=');
149 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE);
153 else if (lex_match_id (lexer, "RECORDS"))
155 lex_match (lexer, '=');
156 lex_match (lexer, '(');
157 if (!lex_force_int (lexer))
159 dls->record_cnt = lex_integer (lexer);
161 lex_match (lexer, ')');
163 else if (lex_match_id (lexer, "SKIP"))
165 lex_match (lexer, '=');
166 if (!lex_force_int (lexer))
168 dls->skip_records = lex_integer (lexer);
171 else if (lex_match_id (lexer, "END"))
175 msg (SE, _("The END subcommand may only be specified once."));
179 lex_match (lexer, '=');
180 if (!lex_force_id (lexer))
182 dls->end = dict_lookup_var (dataset_dict (ds), lex_tokid (lexer));
184 dls->end = dict_create_var_assert (dataset_dict (ds), lex_tokid (lexer), 0);
187 else if (lex_token (lexer) == T_ID)
189 if (lex_match_id (lexer, "NOTABLE"))
191 else if (lex_match_id (lexer, "TABLE"))
196 if (lex_match_id (lexer, "FIXED"))
198 else if (lex_match_id (lexer, "FREE"))
200 else if (lex_match_id (lexer, "LIST"))
204 lex_error (lexer, NULL);
210 msg (SE, _("Only one of FIXED, FREE, or LIST may "
216 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
217 && lex_match (lexer, '('))
219 while (!lex_match (lexer, ')'))
223 if (lex_match_id (lexer, "TAB"))
225 else if (lex_token (lexer) == T_STRING && ds_length (lex_tokstr (lexer)) == 1)
227 delim = ds_first (lex_tokstr (lexer));
232 lex_error (lexer, NULL);
236 ds_put_char (&dls->delims, delim);
238 lex_match (lexer, ',');
245 lex_error (lexer, NULL);
250 fh_set_default_handle (fh);
253 dls->type = DLS_FIXED;
256 table = dls->type != DLS_FREE;
258 ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (lexer, dict, tmp_pool, dls);
262 if (lex_end_of_command (lexer) != CMD_SUCCESS)
267 if (dls->type == DLS_FIXED)
268 dump_fixed_table (&dls->specs, fh, dls->record_cnt);
270 dump_free_table (dls, fh);
273 dls->reader = dfm_open_reader (fh, lexer);
274 if (dls->reader == NULL)
277 if (in_input_program ())
278 add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls);
280 proc_set_source (ds, create_case_source (&data_list_source_class, dls));
282 pool_destroy (tmp_pool);
287 data_list_trns_free (dls);
288 return CMD_CASCADING_FAILURE;
291 /* Fixed-format parsing. */
293 /* Parses all the variable specifications for DATA LIST FIXED,
294 storing them into DLS. Uses TMP_POOL for data that is not
295 needed once parsing is complete. Returns true only if
298 parse_fixed (struct lexer *lexer, struct dictionary *dict,
299 struct pool *tmp_pool, struct data_list_pgm *dls)
301 int last_nonempty_record;
305 while (lex_token (lexer) != '.')
308 size_t name_cnt, name_idx;
309 struct fmt_spec *formats, *f;
312 /* Parse everything. */
313 if (!parse_record_placement (lexer, &record, &column)
314 || !parse_DATA_LIST_vars_pool (lexer, tmp_pool,
315 &names, &name_cnt, PV_NONE)
316 || !parse_var_placements (lexer, tmp_pool, name_cnt, true,
317 &formats, &format_cnt))
320 /* Create variables and var specs. */
322 for (f = formats; f < &formats[format_cnt]; f++)
323 if (!execute_placement_format (f, &record, &column))
328 struct dls_var_spec *spec;
330 name = names[name_idx++];
332 /* Create variable. */
333 width = fmt_var_width (f);
334 v = dict_create_var (dict, name, width);
338 struct fmt_spec output = fmt_for_output_from_input (f);
345 This can be acceptable if we're in INPUT
346 PROGRAM, but only if the existing variable has
347 the same width as the one we would have
349 if (!in_input_program ())
351 msg (SE, _("%s is a duplicate variable name."), name);
355 v = dict_lookup_var_assert (dict, name);
356 if ((width != 0) != (v->width != 0))
358 msg (SE, _("There is already a variable %s of a "
363 if (width != 0 && width != v->width)
365 msg (SE, _("There is already a string variable %s of a "
366 "different width."), name);
371 /* Create specifier for parsing the variable. */
372 spec = pool_alloc (dls->pool, sizeof *spec);
375 spec->record = record;
376 spec->first_column = column;
377 strcpy (spec->name, v->name);
378 ll_push_tail (&dls->specs, &spec->ll);
382 assert (name_idx == name_cnt);
384 if (ll_is_empty (&dls->specs))
386 msg (SE, _("At least one variable must be specified."));
390 last_nonempty_record = ll_to_dls_var_spec (ll_tail (&dls->specs))->record;
391 if (dls->record_cnt && last_nonempty_record > dls->record_cnt)
393 msg (SE, _("Variables are specified on records that "
394 "should not exist according to RECORDS subcommand."));
397 else if (!dls->record_cnt)
398 dls->record_cnt = last_nonempty_record;
403 /* Displays a table giving information on fixed-format variable
404 parsing on DATA LIST. */
406 dump_fixed_table (const struct ll_list *specs,
407 const struct file_handle *fh, int record_cnt)
411 struct dls_var_spec *spec;
414 spec_cnt = ll_count (specs);
415 t = tab_create (4, spec_cnt + 1, 0);
416 tab_columns (t, TAB_COL_DOWN, 1);
417 tab_headers (t, 0, 0, 1, 0);
418 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
419 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
420 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
421 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
422 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt);
423 tab_hline (t, TAL_2, 0, 3, 1);
424 tab_dim (t, tab_natural_dimensions);
427 ll_for_each (spec, struct dls_var_spec, ll, specs)
429 char fmt_string[FMT_STRING_LEN_MAX + 1];
430 tab_text (t, 0, row, TAB_LEFT, spec->name);
431 tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record);
432 tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d",
433 spec->first_column, spec->first_column + spec->input.w - 1);
434 tab_text (t, 3, row, TAB_LEFT | TAB_FIX,
435 fmt_to_string (&spec->input, fmt_string));
439 tab_title (t, ngettext ("Reading %d record from %s.",
440 "Reading %d records from %s.", record_cnt),
441 record_cnt, fh_get_name (fh));
445 /* Free-format parsing. */
447 /* Parses variable specifications for DATA LIST FREE and adds
448 them to DLS. Uses TMP_POOL for data that is not needed once
449 parsing is complete. Returns true only if successful. */
451 parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool,
452 struct data_list_pgm *dls)
455 while (lex_token (lexer) != '.')
457 struct fmt_spec input, output;
462 if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool,
463 &name, &name_cnt, PV_NONE))
466 if (lex_match (lexer, '('))
468 if (!parse_format_specifier (lexer, &input)
469 || !fmt_check_input (&input)
470 || !lex_force_match (lexer, ')'))
472 output = fmt_for_output_from_input (&input);
476 lex_match (lexer, '*');
477 input = fmt_for_input (FMT_F, 8, 0);
478 output = *get_format ();
481 for (i = 0; i < name_cnt; i++)
483 struct dls_var_spec *spec;
486 v = dict_create_var (dict, name[i], fmt_var_width (&input));
489 msg (SE, _("%s is a duplicate variable name."), name[i]);
492 v->print = v->write = output;
494 spec = pool_alloc (dls->pool, sizeof *spec);
497 strcpy (spec->name, v->name);
498 ll_push_tail (&dls->specs, &spec->ll);
505 /* Displays a table giving information on free-format variable parsing
508 dump_free_table (const struct data_list_pgm *dls,
509 const struct file_handle *fh)
512 struct dls_var_spec *spec;
516 spec_cnt = ll_count (&dls->specs);
518 t = tab_create (2, spec_cnt + 1, 0);
519 tab_columns (t, TAB_COL_DOWN, 1);
520 tab_headers (t, 0, 0, 1, 0);
521 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
522 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
523 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, spec_cnt);
524 tab_hline (t, TAL_2, 0, 1, 1);
525 tab_dim (t, tab_natural_dimensions);
527 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
529 char str[FMT_STRING_LEN_MAX + 1];
530 tab_text (t, 0, row, TAB_LEFT, spec->name);
531 tab_text (t, 1, row, TAB_LEFT | TAB_FIX,
532 fmt_to_string (&spec->input, str));
536 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
541 /* Input procedure. */
543 /* Extracts a field from the current position in the current
544 record. Fields can be unquoted or quoted with single- or
545 double-quote characters.
547 *FIELD is set to the field content. The caller must not
548 or destroy this constant string.
550 After parsing the field, sets the current position in the
551 record to just past the field and any trailing delimiter.
552 Returns 0 on failure or a 1-based column number indicating the
553 beginning of the field on success. */
555 cut_field (const struct data_list_pgm *dls, struct substring *field)
557 struct substring line, p;
559 if (dfm_eof (dls->reader))
561 if (ds_is_empty (&dls->delims))
562 dfm_expand_tabs (dls->reader);
563 line = p = dfm_get_record (dls->reader);
565 if (ds_is_empty (&dls->delims))
567 bool missing_quote = false;
569 /* Skip leading whitespace. */
570 ss_ltrim (&p, ss_cstr (CC_SPACES));
574 /* Handle actual data, whether quoted or unquoted. */
575 if (ss_match_char (&p, '\''))
576 missing_quote = !ss_get_until (&p, '\'', field);
577 else if (ss_match_char (&p, '"'))
578 missing_quote = !ss_get_until (&p, '"', field);
580 ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field);
582 msg (SW, _("Quoted string extends beyond end of line."));
584 /* Skip trailing whitespace and a single comma if present. */
585 ss_ltrim (&p, ss_cstr (CC_SPACES));
586 ss_match_char (&p, ',');
588 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p));
592 if (!ss_is_empty (p))
593 ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field);
594 else if (dfm_columns_past_end (dls->reader) == 0)
596 /* A blank line or a line that ends in a delimiter has a
597 trailing blank field. */
603 /* Advance past the field.
605 Also advance past a trailing delimiter, regardless of
606 whether one actually existed. If we "skip" a delimiter
607 that was not actually there, then we will return
608 end-of-line on our next call, which is what we want. */
609 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1);
614 static bool read_from_data_list_fixed (const struct data_list_pgm *,
616 static bool read_from_data_list_free (const struct data_list_pgm *,
618 static bool read_from_data_list_list (const struct data_list_pgm *,
621 /* Reads a case from DLS into C.
622 Returns true if successful, false at end of file or on I/O error. */
624 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
628 dfm_push (dls->reader);
632 retval = read_from_data_list_fixed (dls, c);
635 retval = read_from_data_list_free (dls, c);
638 retval = read_from_data_list_list (dls, c);
643 dfm_pop (dls->reader);
648 /* Reads a case from the data file into C, parsing it according
649 to fixed-format syntax rules in DLS.
650 Returns true if successful, false at end of file or on I/O error. */
652 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
654 struct dls_var_spec *spec;
657 if (dfm_eof (dls->reader))
660 spec = ll_to_dls_var_spec (ll_head (&dls->specs));
661 for (row = 1; row <= dls->record_cnt; row++)
663 struct substring line;
665 if (dfm_eof (dls->reader))
667 msg (SW, _("Partial case of %d of %d records discarded."),
668 row - 1, dls->record_cnt);
671 dfm_expand_tabs (dls->reader);
672 line = dfm_get_record (dls->reader);
674 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
678 data_in_finite_line (&di, ss_data (line), ss_length (line),
680 spec->first_column + spec->input.w - 1);
681 di.v = case_data_rw (c, spec->fv);
682 di.flags = DI_IMPLIED_DECIMALS;
683 di.f1 = spec->first_column;
684 di.format = spec->input;
689 dfm_forward_record (dls->reader);
695 /* Reads a case from the data file into C, parsing it according
696 to free-format syntax rules in DLS.
697 Returns true if successful, false at end of file or on I/O error. */
699 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
701 struct dls_var_spec *spec;
703 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
705 struct substring field;
708 /* Cut out a field and read in a new record if necessary. */
709 while (!cut_field (dls, &field))
711 if (!dfm_eof (dls->reader))
712 dfm_forward_record (dls->reader);
713 if (dfm_eof (dls->reader))
715 if (&spec->ll != ll_head (&dls->specs))
716 msg (SW, _("Partial case discarded. The first variable "
717 "missing was %s."), spec->name);
722 di.s = ss_data (field);
723 di.e = ss_end (field);
724 di.v = case_data_rw (c, spec->fv);
726 di.f1 = dfm_get_column (dls->reader, ss_data (field));
727 di.format = spec->input;
733 /* Reads a case from the data file and parses it according to
734 list-format syntax rules.
735 Returns true if successful, false at end of file or on I/O error. */
737 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
739 struct dls_var_spec *spec;
741 if (dfm_eof (dls->reader))
744 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
746 struct substring field;
749 if (!cut_field (dls, &field))
751 if (get_undefined ())
752 msg (SW, _("Missing value(s) for all variables from %s onward. "
753 "These will be filled with the system-missing value "
754 "or blanks, as appropriate."),
756 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
758 int width = fmt_var_width (&spec->input);
760 case_data_rw (c, spec->fv)->f = SYSMIS;
762 memset (case_data_rw (c, spec->fv)->s, ' ', width);
767 di.s = ss_data (field);
768 di.e = ss_end (field);
769 di.v = case_data_rw (c, spec->fv);
771 di.f1 = dfm_get_column (dls->reader, ss_data (field));
772 di.format = spec->input;
776 dfm_forward_record (dls->reader);
780 /* Destroys DATA LIST transformation DLS.
781 Returns true if successful, false if an I/O error occurred. */
783 data_list_trns_free (void *dls_)
785 struct data_list_pgm *dls = dls_;
786 dfm_close_reader (dls->reader);
787 pool_destroy (dls->pool);
791 /* Handle DATA LIST transformation DLS, parsing data into C. */
793 data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED)
795 struct data_list_pgm *dls = dls_;
798 if (read_from_data_list (dls, c))
799 retval = TRNS_CONTINUE;
800 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
802 /* An I/O error, or encountering end of file for a second
803 time, should be escalated into a more serious error. */
807 retval = TRNS_END_FILE;
809 /* If there was an END subcommand handle it. */
810 if (dls->end != NULL)
812 double *end = &case_data_rw (c, dls->end->fv)->f;
813 if (retval == TRNS_DROP_CASE)
816 retval = TRNS_END_FILE;
825 /* Reads all the records from the data file and passes them to
827 Returns true if successful, false if an I/O error occurred. */
829 data_list_source_read (struct case_source *source,
831 write_case_func *write_case, write_case_data wc_data)
833 struct data_list_pgm *dls = source->aux;
835 /* Skip the requested number of records before reading the
837 while (dls->skip_records > 0)
839 if (dfm_eof (dls->reader))
841 dfm_forward_record (dls->reader);
849 if (!read_from_data_list (dls, c))
850 return !dfm_reader_error (dls->reader);
852 dfm_push (dls->reader);
853 ok = write_case (wc_data);
854 dfm_pop (dls->reader);
860 /* Destroys the source's internal data. */
862 data_list_source_destroy (struct case_source *source)
864 data_list_trns_free (source->aux);
867 static const struct case_source_class data_list_source_class =
871 data_list_source_read,
872 data_list_source_destroy,