1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/case-source.h>
30 #include <data/data-in.h>
31 #include <data/dictionary.h>
32 #include <data/format.h>
33 #include <data/procedure.h>
34 #include <data/settings.h>
35 #include <data/transformations.h>
36 #include <data/variable.h>
37 #include <language/command.h>
38 #include <language/data-io/data-reader.h>
39 #include <language/data-io/file-handle.h>
40 #include <language/data-io/inpt-pgm.h>
41 #include <language/data-io/placement-parser.h>
42 #include <language/lexer/format-parser.h>
43 #include <language/lexer/lexer.h>
44 #include <language/lexer/variable-parser.h>
45 #include <libpspp/alloc.h>
46 #include <libpspp/assertion.h>
47 #include <libpspp/compiler.h>
48 #include <libpspp/ll.h>
49 #include <libpspp/message.h>
50 #include <libpspp/misc.h>
51 #include <libpspp/pool.h>
52 #include <libpspp/str.h>
53 #include <output/table.h>
59 #define _(msgid) gettext (msgid)
61 /* Utility function. */
63 /* Describes how to parse one variable. */
66 struct ll ll; /* List element. */
69 struct fmt_spec input; /* Input format of this field. */
70 int fv; /* First value in case. */
71 char name[LONG_NAME_LEN + 1]; /* Var name for error messages and tables. */
73 /* Fixed format only. */
74 int record; /* Record number (1-based). */
75 int first_column; /* Column numbers in record. */
78 static struct dls_var_spec *
79 ll_to_dls_var_spec (struct ll *ll)
81 return ll_data (ll, struct dls_var_spec, ll);
84 /* Constants for DATA LIST type. */
92 /* DATA LIST private data structure. */
95 struct pool *pool; /* Used for all DATA LIST storage. */
96 struct ll_list specs; /* List of dls_var_specs. */
97 struct dfm_reader *reader; /* Data file reader. */
98 enum dls_type type; /* Type of DATA LIST construct. */
99 struct variable *end; /* Variable specified on END subcommand. */
100 int record_cnt; /* Number of records. */
101 struct string delims; /* Field delimiters. */
104 static const struct case_source_class data_list_source_class;
106 static bool parse_fixed (struct dictionary *dict,
107 struct pool *tmp_pool, struct data_list_pgm *);
108 static bool parse_free (struct dictionary *dict,
109 struct pool *tmp_pool, struct data_list_pgm *);
110 static void dump_fixed_table (const struct ll_list *,
111 const struct file_handle *, int record_cnt);
112 static void dump_free_table (const struct data_list_pgm *,
113 const struct file_handle *);
115 static trns_free_func data_list_trns_free;
116 static trns_proc_func data_list_trns_proc;
119 cmd_data_list (struct dataset *ds)
121 struct dictionary *dict = dataset_dict (ds);
122 struct data_list_pgm *dls;
123 int table = -1; /* Print table if nonzero, -1=undecided. */
124 struct file_handle *fh = fh_inline_file ();
125 struct pool *tmp_pool;
128 if (!in_input_program ())
129 discard_variables (ds);
131 dls = pool_create_container (struct data_list_pgm, pool);
132 ll_init (&dls->specs);
137 ds_init_empty (&dls->delims);
138 ds_register_pool (&dls->delims, dls->pool);
140 tmp_pool = pool_create_subpool (dls->pool);
144 if (lex_match_id ("FILE"))
147 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
151 else if (lex_match_id ("RECORDS"))
155 if (!lex_force_int ())
157 dls->record_cnt = lex_integer ();
161 else if (lex_match_id ("END"))
165 msg (SE, _("The END subcommand may only be specified once."));
170 if (!lex_force_id ())
172 dls->end = dict_lookup_var (dataset_dict (ds), tokid);
174 dls->end = dict_create_var_assert (dataset_dict (ds), tokid, 0);
177 else if (token == T_ID)
179 if (lex_match_id ("NOTABLE"))
181 else if (lex_match_id ("TABLE"))
186 if (lex_match_id ("FIXED"))
188 else if (lex_match_id ("FREE"))
190 else if (lex_match_id ("LIST"))
200 msg (SE, _("Only one of FIXED, FREE, or LIST may "
206 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
209 while (!lex_match (')'))
213 if (lex_match_id ("TAB"))
215 else if (token == T_STRING && ds_length (&tokstr) == 1)
217 delim = ds_first (&tokstr);
226 ds_put_char (&dls->delims, delim);
240 fh_set_default_handle (fh);
243 dls->type = DLS_FIXED;
246 table = dls->type != DLS_FREE;
248 ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (dict, tmp_pool, dls);
252 if (lex_end_of_command () != CMD_SUCCESS)
257 if (dls->type == DLS_FIXED)
258 dump_fixed_table (&dls->specs, fh, dls->record_cnt);
260 dump_free_table (dls, fh);
263 dls->reader = dfm_open_reader (fh);
264 if (dls->reader == NULL)
267 if (in_input_program ())
268 add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls);
270 proc_set_source (ds, create_case_source (&data_list_source_class, dls));
272 pool_destroy (tmp_pool);
277 data_list_trns_free (dls);
278 return CMD_CASCADING_FAILURE;
281 /* Fixed-format parsing. */
283 /* Parses all the variable specifications for DATA LIST FIXED,
284 storing them into DLS. Uses TMP_POOL for data that is not
285 needed once parsing is complete. Returns true only if
288 parse_fixed (struct dictionary *dict,
289 struct pool *tmp_pool, struct data_list_pgm *dls)
291 int last_nonempty_record;
298 size_t name_cnt, name_idx;
299 struct fmt_spec *formats, *f;
302 /* Parse everything. */
303 if (!parse_record_placement (&record, &column)
304 || !parse_DATA_LIST_vars_pool (tmp_pool, &names, &name_cnt, PV_NONE)
305 || !parse_var_placements (tmp_pool, name_cnt, true,
306 &formats, &format_cnt))
309 /* Create variables and var specs. */
311 for (f = formats; f < &formats[format_cnt]; f++)
312 if (!execute_placement_format (f, &record, &column))
317 struct dls_var_spec *spec;
319 name = names[name_idx++];
321 /* Create variable. */
322 width = fmt_var_width (f);
323 v = dict_create_var (dict, name, width);
327 struct fmt_spec output = fmt_for_output_from_input (f);
334 This can be acceptable if we're in INPUT
335 PROGRAM, but only if the existing variable has
336 the same width as the one we would have
338 if (!in_input_program ())
340 msg (SE, _("%s is a duplicate variable name."), name);
344 v = dict_lookup_var_assert (dict, name);
345 if ((width != 0) != (v->width != 0))
347 msg (SE, _("There is already a variable %s of a "
352 if (width != 0 && width != v->width)
354 msg (SE, _("There is already a string variable %s of a "
355 "different width."), name);
360 /* Create specifier for parsing the variable. */
361 spec = pool_alloc (dls->pool, sizeof *spec);
364 spec->record = record;
365 spec->first_column = column;
366 strcpy (spec->name, v->name);
367 ll_push_tail (&dls->specs, &spec->ll);
371 assert (name_idx == name_cnt);
373 if (ll_is_empty (&dls->specs))
375 msg (SE, _("At least one variable must be specified."));
379 last_nonempty_record = ll_to_dls_var_spec (ll_tail (&dls->specs))->record;
380 if (dls->record_cnt && last_nonempty_record > dls->record_cnt)
382 msg (SE, _("Variables are specified on records that "
383 "should not exist according to RECORDS subcommand."));
386 else if (!dls->record_cnt)
387 dls->record_cnt = last_nonempty_record;
392 /* Displays a table giving information on fixed-format variable
393 parsing on DATA LIST. */
395 dump_fixed_table (const struct ll_list *specs,
396 const struct file_handle *fh, int record_cnt)
400 struct dls_var_spec *spec;
403 spec_cnt = ll_count (specs);
404 t = tab_create (4, spec_cnt + 1, 0);
405 tab_columns (t, TAB_COL_DOWN, 1);
406 tab_headers (t, 0, 0, 1, 0);
407 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
408 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
409 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
410 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
411 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt);
412 tab_hline (t, TAL_2, 0, 3, 1);
413 tab_dim (t, tab_natural_dimensions);
416 ll_for_each (spec, struct dls_var_spec, ll, specs)
418 char fmt_string[FMT_STRING_LEN_MAX + 1];
419 tab_text (t, 0, row, TAB_LEFT, spec->name);
420 tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record);
421 tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d",
422 spec->first_column, spec->first_column + spec->input.w - 1);
423 tab_text (t, 3, row, TAB_LEFT | TAB_FIX,
424 fmt_to_string (&spec->input, fmt_string));
428 tab_title (t, ngettext ("Reading %d record from %s.",
429 "Reading %d records from %s.", record_cnt),
430 record_cnt, fh_get_name (fh));
434 /* Free-format parsing. */
436 /* Parses variable specifications for DATA LIST FREE and adds
437 them to DLS. Uses TMP_POOL for data that is not needed once
438 parsing is complete. Returns true only if successful. */
440 parse_free (struct dictionary *dict, struct pool *tmp_pool, struct data_list_pgm *dls)
445 struct fmt_spec input, output;
450 if (!parse_DATA_LIST_vars_pool (tmp_pool, &name, &name_cnt, PV_NONE))
455 if (!parse_format_specifier (&input)
456 || !fmt_check_input (&input)
457 || !lex_force_match (')'))
459 output = fmt_for_output_from_input (&input);
464 input = fmt_for_input (FMT_F, 8, 0);
465 output = *get_format ();
468 for (i = 0; i < name_cnt; i++)
470 struct dls_var_spec *spec;
473 v = dict_create_var (dict, name[i], fmt_var_width (&input));
476 msg (SE, _("%s is a duplicate variable name."), name[i]);
479 v->print = v->write = output;
481 spec = pool_alloc (dls->pool, sizeof *spec);
484 strcpy (spec->name, v->name);
485 ll_push_tail (&dls->specs, &spec->ll);
492 /* Displays a table giving information on free-format variable parsing
495 dump_free_table (const struct data_list_pgm *dls,
496 const struct file_handle *fh)
499 struct dls_var_spec *spec;
503 spec_cnt = ll_count (&dls->specs);
505 t = tab_create (2, spec_cnt + 1, 0);
506 tab_columns (t, TAB_COL_DOWN, 1);
507 tab_headers (t, 0, 0, 1, 0);
508 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
509 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
510 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, spec_cnt);
511 tab_hline (t, TAL_2, 0, 1, 1);
512 tab_dim (t, tab_natural_dimensions);
514 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
516 char str[FMT_STRING_LEN_MAX + 1];
517 tab_text (t, 0, row, TAB_LEFT, spec->name);
518 tab_text (t, 1, row, TAB_LEFT | TAB_FIX,
519 fmt_to_string (&spec->input, str));
523 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
528 /* Input procedure. */
530 /* Extracts a field from the current position in the current
531 record. Fields can be unquoted or quoted with single- or
532 double-quote characters.
534 *FIELD is set to the field content. The caller must not
535 or destroy this constant string.
537 After parsing the field, sets the current position in the
538 record to just past the field and any trailing delimiter.
539 Returns 0 on failure or a 1-based column number indicating the
540 beginning of the field on success. */
542 cut_field (const struct data_list_pgm *dls, struct substring *field)
544 struct substring line, p;
546 if (dfm_eof (dls->reader))
548 if (ds_is_empty (&dls->delims))
549 dfm_expand_tabs (dls->reader);
550 line = p = dfm_get_record (dls->reader);
552 if (ds_is_empty (&dls->delims))
554 bool missing_quote = false;
556 /* Skip leading whitespace. */
557 ss_ltrim (&p, ss_cstr (CC_SPACES));
561 /* Handle actual data, whether quoted or unquoted. */
562 if (ss_match_char (&p, '\''))
563 missing_quote = !ss_get_until (&p, '\'', field);
564 else if (ss_match_char (&p, '"'))
565 missing_quote = !ss_get_until (&p, '"', field);
567 ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field);
569 msg (SW, _("Quoted string extends beyond end of line."));
571 /* Skip trailing whitespace and a single comma if present. */
572 ss_ltrim (&p, ss_cstr (CC_SPACES));
573 ss_match_char (&p, ',');
575 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p));
579 if (!ss_is_empty (p))
580 ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field);
581 else if (dfm_columns_past_end (dls->reader) == 0)
583 /* A blank line or a line that ends in a delimiter has a
584 trailing blank field. */
590 /* Advance past the field.
592 Also advance past a trailing delimiter, regardless of
593 whether one actually existed. If we "skip" a delimiter
594 that was not actually there, then we will return
595 end-of-line on our next call, which is what we want. */
596 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1);
601 static bool read_from_data_list_fixed (const struct data_list_pgm *,
603 static bool read_from_data_list_free (const struct data_list_pgm *,
605 static bool read_from_data_list_list (const struct data_list_pgm *,
608 /* Reads a case from DLS into C.
609 Returns true if successful, false at end of file or on I/O error. */
611 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
615 dfm_push (dls->reader);
619 retval = read_from_data_list_fixed (dls, c);
622 retval = read_from_data_list_free (dls, c);
625 retval = read_from_data_list_list (dls, c);
630 dfm_pop (dls->reader);
635 /* Reads a case from the data file into C, parsing it according
636 to fixed-format syntax rules in DLS.
637 Returns true if successful, false at end of file or on I/O error. */
639 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
641 struct dls_var_spec *spec;
644 if (dfm_eof (dls->reader))
647 spec = ll_to_dls_var_spec (ll_head (&dls->specs));
648 for (row = 1; row <= dls->record_cnt; row++)
650 struct substring line;
652 if (dfm_eof (dls->reader))
654 msg (SW, _("Partial case of %d of %d records discarded."),
655 row - 1, dls->record_cnt);
658 dfm_expand_tabs (dls->reader);
659 line = dfm_get_record (dls->reader);
661 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
665 data_in_finite_line (&di, ss_data (line), ss_length (line),
667 spec->first_column + spec->input.w - 1);
668 di.v = case_data_rw (c, spec->fv);
669 di.flags = DI_IMPLIED_DECIMALS;
670 di.f1 = spec->first_column;
671 di.format = spec->input;
676 dfm_forward_record (dls->reader);
682 /* Reads a case from the data file into C, parsing it according
683 to free-format syntax rules in DLS.
684 Returns true if successful, false at end of file or on I/O error. */
686 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
688 struct dls_var_spec *spec;
690 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
692 struct substring field;
695 /* Cut out a field and read in a new record if necessary. */
696 while (!cut_field (dls, &field))
698 if (!dfm_eof (dls->reader))
699 dfm_forward_record (dls->reader);
700 if (dfm_eof (dls->reader))
702 if (&spec->ll != ll_head (&dls->specs))
703 msg (SW, _("Partial case discarded. The first variable "
704 "missing was %s."), spec->name);
709 di.s = ss_data (field);
710 di.e = ss_end (field);
711 di.v = case_data_rw (c, spec->fv);
713 di.f1 = dfm_get_column (dls->reader, ss_data (field));
714 di.format = spec->input;
720 /* Reads a case from the data file and parses it according to
721 list-format syntax rules.
722 Returns true if successful, false at end of file or on I/O error. */
724 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
726 struct dls_var_spec *spec;
728 if (dfm_eof (dls->reader))
731 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
733 struct substring field;
736 if (!cut_field (dls, &field))
738 if (get_undefined ())
739 msg (SW, _("Missing value(s) for all variables from %s onward. "
740 "These will be filled with the system-missing value "
741 "or blanks, as appropriate."),
743 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
745 int width = fmt_var_width (&spec->input);
747 case_data_rw (c, spec->fv)->f = SYSMIS;
749 memset (case_data_rw (c, spec->fv)->s, ' ', width);
754 di.s = ss_data (field);
755 di.e = ss_end (field);
756 di.v = case_data_rw (c, spec->fv);
758 di.f1 = dfm_get_column (dls->reader, ss_data (field));
759 di.format = spec->input;
763 dfm_forward_record (dls->reader);
767 /* Destroys DATA LIST transformation DLS.
768 Returns true if successful, false if an I/O error occurred. */
770 data_list_trns_free (void *dls_)
772 struct data_list_pgm *dls = dls_;
773 dfm_close_reader (dls->reader);
774 pool_destroy (dls->pool);
778 /* Handle DATA LIST transformation DLS, parsing data into C. */
780 data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED)
782 struct data_list_pgm *dls = dls_;
785 if (read_from_data_list (dls, c))
786 retval = TRNS_CONTINUE;
787 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
789 /* An I/O error, or encountering end of file for a second
790 time, should be escalated into a more serious error. */
794 retval = TRNS_END_FILE;
796 /* If there was an END subcommand handle it. */
797 if (dls->end != NULL)
799 double *end = &case_data_rw (c, dls->end->fv)->f;
800 if (retval == TRNS_DROP_CASE)
803 retval = TRNS_END_FILE;
812 /* Reads all the records from the data file and passes them to
814 Returns true if successful, false if an I/O error occurred. */
816 data_list_source_read (struct case_source *source,
818 write_case_func *write_case, write_case_data wc_data)
820 struct data_list_pgm *dls = source->aux;
826 if (!read_from_data_list (dls, c))
827 return !dfm_reader_error (dls->reader);
829 dfm_push (dls->reader);
830 ok = write_case (wc_data);
831 dfm_pop (dls->reader);
837 /* Destroys the source's internal data. */
839 data_list_source_destroy (struct case_source *source)
841 data_list_trns_free (source->aux);
844 static const struct case_source_class data_list_source_class =
848 data_list_source_read,
849 data_list_source_destroy,