1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 #include <data/case-source.h>
27 #include <data/case.h>
28 #include <data/case-source.h>
29 #include <data/data-in.h>
30 #include <data/dictionary.h>
31 #include <data/format.h>
32 #include <data/procedure.h>
33 #include <data/settings.h>
34 #include <data/transformations.h>
35 #include <data/variable.h>
36 #include <language/command.h>
37 #include <language/data-io/data-reader.h>
38 #include <language/data-io/file-handle.h>
39 #include <language/data-io/inpt-pgm.h>
40 #include <language/data-io/placement-parser.h>
41 #include <language/lexer/format-parser.h>
42 #include <language/lexer/lexer.h>
43 #include <language/lexer/variable-parser.h>
44 #include <libpspp/alloc.h>
45 #include <libpspp/assertion.h>
46 #include <libpspp/compiler.h>
47 #include <libpspp/ll.h>
48 #include <libpspp/message.h>
49 #include <libpspp/misc.h>
50 #include <libpspp/pool.h>
51 #include <libpspp/str.h>
52 #include <output/table.h>
58 #define _(msgid) gettext (msgid)
60 /* Utility function. */
62 /* Describes how to parse one variable. */
65 struct ll ll; /* List element. */
68 struct fmt_spec input; /* Input format of this field. */
69 int fv; /* First value in case. */
70 char name[LONG_NAME_LEN + 1]; /* Var name for error messages and tables. */
72 /* Fixed format only. */
73 int record; /* Record number (1-based). */
74 int first_column; /* Column numbers in record. */
77 static struct dls_var_spec *
78 ll_to_dls_var_spec (struct ll *ll)
80 return ll_data (ll, struct dls_var_spec, ll);
83 /* Constants for DATA LIST type. */
91 /* DATA LIST private data structure. */
94 struct pool *pool; /* Used for all DATA LIST storage. */
95 struct ll_list specs; /* List of dls_var_specs. */
96 struct dfm_reader *reader; /* Data file reader. */
97 enum dls_type type; /* Type of DATA LIST construct. */
98 struct variable *end; /* Variable specified on END subcommand. */
99 int record_cnt; /* Number of records. */
100 struct string delims; /* Field delimiters. */
101 int skip_records; /* Records to skip before first case. */
104 static const struct case_source_class data_list_source_class;
106 static bool parse_fixed (struct lexer *, struct dictionary *dict,
107 struct pool *tmp_pool, struct data_list_pgm *);
108 static bool parse_free (struct lexer *, struct dictionary *dict,
109 struct pool *tmp_pool, struct data_list_pgm *);
110 static void dump_fixed_table (const struct ll_list *,
111 const struct file_handle *, int record_cnt);
112 static void dump_free_table (const struct data_list_pgm *,
113 const struct file_handle *);
115 static trns_free_func data_list_trns_free;
116 static trns_proc_func data_list_trns_proc;
119 cmd_data_list (struct lexer *lexer, struct dataset *ds)
121 struct dictionary *dict = dataset_dict (ds);
122 struct data_list_pgm *dls;
123 int table = -1; /* Print table if nonzero, -1=undecided. */
124 struct file_handle *fh = fh_inline_file ();
125 struct pool *tmp_pool;
128 if (!in_input_program ())
129 discard_variables (ds);
131 dls = pool_create_container (struct data_list_pgm, pool);
132 ll_init (&dls->specs);
137 dls->skip_records = 0;
138 ds_init_empty (&dls->delims);
139 ds_register_pool (&dls->delims, dls->pool);
141 tmp_pool = pool_create_subpool (dls->pool);
143 while (lex_token (lexer) != '/')
145 if (lex_match_id (lexer, "FILE"))
147 lex_match (lexer, '=');
148 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE);
152 else if (lex_match_id (lexer, "RECORDS"))
154 lex_match (lexer, '=');
155 lex_match (lexer, '(');
156 if (!lex_force_int (lexer))
158 dls->record_cnt = lex_integer (lexer);
160 lex_match (lexer, ')');
162 else if (lex_match_id (lexer, "SKIP"))
164 lex_match (lexer, '=');
165 if (!lex_force_int (lexer))
167 dls->skip_records = lex_integer (lexer);
170 else if (lex_match_id (lexer, "END"))
174 msg (SE, _("The END subcommand may only be specified once."));
178 lex_match (lexer, '=');
179 if (!lex_force_id (lexer))
181 dls->end = dict_lookup_var (dataset_dict (ds), lex_tokid (lexer));
183 dls->end = dict_create_var_assert (dataset_dict (ds), lex_tokid (lexer), 0);
186 else if (lex_token (lexer) == T_ID)
188 if (lex_match_id (lexer, "NOTABLE"))
190 else if (lex_match_id (lexer, "TABLE"))
195 if (lex_match_id (lexer, "FIXED"))
197 else if (lex_match_id (lexer, "FREE"))
199 else if (lex_match_id (lexer, "LIST"))
203 lex_error (lexer, NULL);
209 msg (SE, _("Only one of FIXED, FREE, or LIST may "
215 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
216 && lex_match (lexer, '('))
218 while (!lex_match (lexer, ')'))
222 if (lex_match_id (lexer, "TAB"))
224 else if (lex_token (lexer) == T_STRING && ds_length (lex_tokstr (lexer)) == 1)
226 delim = ds_first (lex_tokstr (lexer));
231 lex_error (lexer, NULL);
235 ds_put_char (&dls->delims, delim);
237 lex_match (lexer, ',');
244 lex_error (lexer, NULL);
249 fh_set_default_handle (fh);
252 dls->type = DLS_FIXED;
255 table = dls->type != DLS_FREE;
257 ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (lexer, dict, tmp_pool, dls);
261 if (lex_end_of_command (lexer) != CMD_SUCCESS)
266 if (dls->type == DLS_FIXED)
267 dump_fixed_table (&dls->specs, fh, dls->record_cnt);
269 dump_free_table (dls, fh);
272 dls->reader = dfm_open_reader (fh, lexer);
273 if (dls->reader == NULL)
276 if (in_input_program ())
277 add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls);
279 proc_set_source (ds, create_case_source (&data_list_source_class, dls));
281 pool_destroy (tmp_pool);
286 data_list_trns_free (dls);
287 return CMD_CASCADING_FAILURE;
290 /* Fixed-format parsing. */
292 /* Parses all the variable specifications for DATA LIST FIXED,
293 storing them into DLS. Uses TMP_POOL for data that is not
294 needed once parsing is complete. Returns true only if
297 parse_fixed (struct lexer *lexer, struct dictionary *dict,
298 struct pool *tmp_pool, struct data_list_pgm *dls)
300 int last_nonempty_record;
304 while (lex_token (lexer) != '.')
307 size_t name_cnt, name_idx;
308 struct fmt_spec *formats, *f;
311 /* Parse everything. */
312 if (!parse_record_placement (lexer, &record, &column)
313 || !parse_DATA_LIST_vars_pool (lexer, tmp_pool,
314 &names, &name_cnt, PV_NONE)
315 || !parse_var_placements (lexer, tmp_pool, name_cnt, true,
316 &formats, &format_cnt))
319 /* Create variables and var specs. */
321 for (f = formats; f < &formats[format_cnt]; f++)
322 if (!execute_placement_format (f, &record, &column))
327 struct dls_var_spec *spec;
329 name = names[name_idx++];
331 /* Create variable. */
332 width = fmt_var_width (f);
333 v = dict_create_var (dict, name, width);
337 struct fmt_spec output = fmt_for_output_from_input (f);
338 var_set_both_formats (v, &output);
343 This can be acceptable if we're in INPUT
344 PROGRAM, but only if the existing variable has
345 the same width as the one we would have
347 if (!in_input_program ())
349 msg (SE, _("%s is a duplicate variable name."), name);
353 v = dict_lookup_var_assert (dict, name);
354 if ((width != 0) != (var_get_width (v) != 0))
356 msg (SE, _("There is already a variable %s of a "
361 if (width != 0 && width != var_get_width (v))
363 msg (SE, _("There is already a string variable %s of a "
364 "different width."), name);
369 /* Create specifier for parsing the variable. */
370 spec = pool_alloc (dls->pool, sizeof *spec);
372 spec->fv = var_get_case_index (v);
373 spec->record = record;
374 spec->first_column = column;
375 strcpy (spec->name, var_get_name (v));
376 ll_push_tail (&dls->specs, &spec->ll);
380 assert (name_idx == name_cnt);
382 if (ll_is_empty (&dls->specs))
384 msg (SE, _("At least one variable must be specified."));
388 last_nonempty_record = ll_to_dls_var_spec (ll_tail (&dls->specs))->record;
389 if (dls->record_cnt && last_nonempty_record > dls->record_cnt)
391 msg (SE, _("Variables are specified on records that "
392 "should not exist according to RECORDS subcommand."));
395 else if (!dls->record_cnt)
396 dls->record_cnt = last_nonempty_record;
401 /* Displays a table giving information on fixed-format variable
402 parsing on DATA LIST. */
404 dump_fixed_table (const struct ll_list *specs,
405 const struct file_handle *fh, int record_cnt)
409 struct dls_var_spec *spec;
412 spec_cnt = ll_count (specs);
413 t = tab_create (4, spec_cnt + 1, 0);
414 tab_columns (t, TAB_COL_DOWN, 1);
415 tab_headers (t, 0, 0, 1, 0);
416 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
417 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
418 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
419 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
420 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt);
421 tab_hline (t, TAL_2, 0, 3, 1);
422 tab_dim (t, tab_natural_dimensions);
425 ll_for_each (spec, struct dls_var_spec, ll, specs)
427 char fmt_string[FMT_STRING_LEN_MAX + 1];
428 tab_text (t, 0, row, TAB_LEFT, spec->name);
429 tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record);
430 tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d",
431 spec->first_column, spec->first_column + spec->input.w - 1);
432 tab_text (t, 3, row, TAB_LEFT | TAB_FIX,
433 fmt_to_string (&spec->input, fmt_string));
437 tab_title (t, ngettext ("Reading %d record from %s.",
438 "Reading %d records from %s.", record_cnt),
439 record_cnt, fh_get_name (fh));
443 /* Free-format parsing. */
445 /* Parses variable specifications for DATA LIST FREE and adds
446 them to DLS. Uses TMP_POOL for data that is not needed once
447 parsing is complete. Returns true only if successful. */
449 parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool,
450 struct data_list_pgm *dls)
453 while (lex_token (lexer) != '.')
455 struct fmt_spec input, output;
460 if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool,
461 &name, &name_cnt, PV_NONE))
464 if (lex_match (lexer, '('))
466 if (!parse_format_specifier (lexer, &input)
467 || !fmt_check_input (&input)
468 || !lex_force_match (lexer, ')'))
471 /* As a special case, N format is treated as F format
472 for free-field input. */
473 if (input.type == FMT_N)
476 output = fmt_for_output_from_input (&input);
480 lex_match (lexer, '*');
481 input = fmt_for_input (FMT_F, 8, 0);
482 output = *get_format ();
485 for (i = 0; i < name_cnt; i++)
487 struct dls_var_spec *spec;
490 v = dict_create_var (dict, name[i], fmt_var_width (&input));
493 msg (SE, _("%s is a duplicate variable name."), name[i]);
496 var_set_both_formats (v, &output);
498 spec = pool_alloc (dls->pool, sizeof *spec);
500 spec->fv = var_get_case_index (v);
501 strcpy (spec->name, var_get_name (v));
502 ll_push_tail (&dls->specs, &spec->ll);
509 /* Displays a table giving information on free-format variable parsing
512 dump_free_table (const struct data_list_pgm *dls,
513 const struct file_handle *fh)
516 struct dls_var_spec *spec;
520 spec_cnt = ll_count (&dls->specs);
522 t = tab_create (2, spec_cnt + 1, 0);
523 tab_columns (t, TAB_COL_DOWN, 1);
524 tab_headers (t, 0, 0, 1, 0);
525 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
526 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
527 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, spec_cnt);
528 tab_hline (t, TAL_2, 0, 1, 1);
529 tab_dim (t, tab_natural_dimensions);
531 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
533 char str[FMT_STRING_LEN_MAX + 1];
534 tab_text (t, 0, row, TAB_LEFT, spec->name);
535 tab_text (t, 1, row, TAB_LEFT | TAB_FIX,
536 fmt_to_string (&spec->input, str));
540 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
545 /* Input procedure. */
547 /* Extracts a field from the current position in the current
548 record. Fields can be unquoted or quoted with single- or
549 double-quote characters.
551 *FIELD is set to the field content. The caller must not
552 or destroy this constant string.
554 After parsing the field, sets the current position in the
555 record to just past the field and any trailing delimiter.
556 Returns 0 on failure or a 1-based column number indicating the
557 beginning of the field on success. */
559 cut_field (const struct data_list_pgm *dls, struct substring *field)
561 struct substring line, p;
563 if (dfm_eof (dls->reader))
565 if (ds_is_empty (&dls->delims))
566 dfm_expand_tabs (dls->reader);
567 line = p = dfm_get_record (dls->reader);
569 if (ds_is_empty (&dls->delims))
571 bool missing_quote = false;
573 /* Skip leading whitespace. */
574 ss_ltrim (&p, ss_cstr (CC_SPACES));
578 /* Handle actual data, whether quoted or unquoted. */
579 if (ss_match_char (&p, '\''))
580 missing_quote = !ss_get_until (&p, '\'', field);
581 else if (ss_match_char (&p, '"'))
582 missing_quote = !ss_get_until (&p, '"', field);
584 ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field);
586 msg (SW, _("Quoted string extends beyond end of line."));
588 /* Skip trailing whitespace and a single comma if present. */
589 ss_ltrim (&p, ss_cstr (CC_SPACES));
590 ss_match_char (&p, ',');
592 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p));
596 if (!ss_is_empty (p))
597 ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field);
598 else if (dfm_columns_past_end (dls->reader) == 0)
600 /* A blank line or a line that ends in a delimiter has a
601 trailing blank field. */
607 /* Advance past the field.
609 Also advance past a trailing delimiter, regardless of
610 whether one actually existed. If we "skip" a delimiter
611 that was not actually there, then we will return
612 end-of-line on our next call, which is what we want. */
613 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1);
618 static bool read_from_data_list_fixed (const struct data_list_pgm *,
620 static bool read_from_data_list_free (const struct data_list_pgm *,
622 static bool read_from_data_list_list (const struct data_list_pgm *,
625 /* Reads a case from DLS into C.
626 Returns true if successful, false at end of file or on I/O error. */
628 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
632 dfm_push (dls->reader);
636 retval = read_from_data_list_fixed (dls, c);
639 retval = read_from_data_list_free (dls, c);
642 retval = read_from_data_list_list (dls, c);
647 dfm_pop (dls->reader);
652 /* Reads a case from the data file into C, parsing it according
653 to fixed-format syntax rules in DLS.
654 Returns true if successful, false at end of file or on I/O error. */
656 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
658 struct dls_var_spec *spec;
661 if (dfm_eof (dls->reader))
664 spec = ll_to_dls_var_spec (ll_head (&dls->specs));
665 for (row = 1; row <= dls->record_cnt; row++)
667 struct substring line;
669 if (dfm_eof (dls->reader))
671 msg (SW, _("Partial case of %d of %d records discarded."),
672 row - 1, dls->record_cnt);
675 dfm_expand_tabs (dls->reader);
676 line = dfm_get_record (dls->reader);
678 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
679 data_in (ss_substr (line, spec->first_column - 1, spec->input.w),
680 spec->input.type, spec->input.d, spec->first_column,
681 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
683 dfm_forward_record (dls->reader);
689 /* Reads a case from the data file into C, parsing it according
690 to free-format syntax rules in DLS.
691 Returns true if successful, false at end of file or on I/O error. */
693 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
695 struct dls_var_spec *spec;
697 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
699 struct substring field;
701 /* Cut out a field and read in a new record if necessary. */
702 while (!cut_field (dls, &field))
704 if (!dfm_eof (dls->reader))
705 dfm_forward_record (dls->reader);
706 if (dfm_eof (dls->reader))
708 if (&spec->ll != ll_head (&dls->specs))
709 msg (SW, _("Partial case discarded. The first variable "
710 "missing was %s."), spec->name);
715 data_in (field, spec->input.type, 0,
716 dfm_get_column (dls->reader, ss_data (field)),
717 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
722 /* Reads a case from the data file and parses it according to
723 list-format syntax rules.
724 Returns true if successful, false at end of file or on I/O error. */
726 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
728 struct dls_var_spec *spec;
730 if (dfm_eof (dls->reader))
733 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
735 struct substring field;
737 if (!cut_field (dls, &field))
739 if (get_undefined ())
740 msg (SW, _("Missing value(s) for all variables from %s onward. "
741 "These will be filled with the system-missing value "
742 "or blanks, as appropriate."),
744 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
746 int width = fmt_var_width (&spec->input);
748 case_data_rw_idx (c, spec->fv)->f = SYSMIS;
750 memset (case_data_rw_idx (c, spec->fv)->s, ' ', width);
755 data_in (field, spec->input.type, 0,
756 dfm_get_column (dls->reader, ss_data (field)),
757 case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input));
760 dfm_forward_record (dls->reader);
764 /* Destroys DATA LIST transformation DLS.
765 Returns true if successful, false if an I/O error occurred. */
767 data_list_trns_free (void *dls_)
769 struct data_list_pgm *dls = dls_;
770 dfm_close_reader (dls->reader);
771 pool_destroy (dls->pool);
775 /* Handle DATA LIST transformation DLS, parsing data into C. */
777 data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED)
779 struct data_list_pgm *dls = dls_;
782 if (read_from_data_list (dls, c))
783 retval = TRNS_CONTINUE;
784 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
786 /* An I/O error, or encountering end of file for a second
787 time, should be escalated into a more serious error. */
791 retval = TRNS_END_FILE;
793 /* If there was an END subcommand handle it. */
794 if (dls->end != NULL)
796 double *end = &case_data_rw (c, dls->end)->f;
797 if (retval == TRNS_DROP_CASE)
800 retval = TRNS_END_FILE;
809 /* Reads one case into OUTPUT_CASE.
810 Returns true if successful, false at end of file or if an
811 I/O error occurred. */
813 data_list_source_read (struct case_source *source, struct ccase *c)
815 struct data_list_pgm *dls = source->aux;
817 /* Skip the requested number of records before reading the
819 while (dls->skip_records > 0)
821 if (dfm_eof (dls->reader))
823 dfm_forward_record (dls->reader);
827 return read_from_data_list (dls, c);
830 /* Destroys the source.
831 Returns true if successful read, false if an I/O occurred
832 during destruction or previously. */
834 data_list_source_destroy (struct case_source *source)
836 struct data_list_pgm *dls = source->aux;
837 bool ok = !dfm_reader_error (dls->reader);
838 data_list_trns_free (dls);
842 static const struct case_source_class data_list_source_class =
846 data_list_source_read,
847 data_list_source_destroy,