1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/case-source.h>
30 #include <data/data-in.h>
31 #include <data/dictionary.h>
32 #include <data/format.h>
33 #include <data/procedure.h>
34 #include <data/settings.h>
35 #include <data/transformations.h>
36 #include <data/variable.h>
37 #include <language/command.h>
38 #include <language/data-io/data-reader.h>
39 #include <language/data-io/file-handle.h>
40 #include <language/data-io/inpt-pgm.h>
41 #include <language/data-io/placement-parser.h>
42 #include <language/lexer/format-parser.h>
43 #include <language/lexer/lexer.h>
44 #include <language/lexer/variable-parser.h>
45 #include <libpspp/alloc.h>
46 #include <libpspp/assertion.h>
47 #include <libpspp/compiler.h>
48 #include <libpspp/ll.h>
49 #include <libpspp/message.h>
50 #include <libpspp/misc.h>
51 #include <libpspp/pool.h>
52 #include <libpspp/str.h>
53 #include <output/table.h>
59 #define _(msgid) gettext (msgid)
61 /* Utility function. */
63 /* Describes how to parse one variable. */
66 struct ll ll; /* List element. */
69 struct fmt_spec input; /* Input format of this field. */
70 int fv; /* First value in case. */
71 char name[LONG_NAME_LEN + 1]; /* Var name for error messages and tables. */
73 /* Fixed format only. */
74 int record; /* Record number (1-based). */
75 int first_column; /* Column numbers in record. */
78 static struct dls_var_spec *
79 ll_to_dls_var_spec (struct ll *ll)
81 return ll_data (ll, struct dls_var_spec, ll);
84 /* Constants for DATA LIST type. */
92 /* DATA LIST private data structure. */
95 struct pool *pool; /* Used for all DATA LIST storage. */
96 struct ll_list specs; /* List of dls_var_specs. */
97 struct dfm_reader *reader; /* Data file reader. */
98 enum dls_type type; /* Type of DATA LIST construct. */
99 struct variable *end; /* Variable specified on END subcommand. */
100 int record_cnt; /* Number of records. */
101 struct string delims; /* Field delimiters. */
104 static const struct case_source_class data_list_source_class;
106 static bool parse_fixed (struct pool *tmp_pool, struct data_list_pgm *);
107 static bool parse_free (struct pool *tmp_pool, struct data_list_pgm *);
108 static void dump_fixed_table (const struct ll_list *,
109 const struct file_handle *, int record_cnt);
110 static void dump_free_table (const struct data_list_pgm *,
111 const struct file_handle *);
113 static trns_free_func data_list_trns_free;
114 static trns_proc_func data_list_trns_proc;
119 struct data_list_pgm *dls;
120 int table = -1; /* Print table if nonzero, -1=undecided. */
121 struct file_handle *fh = fh_inline_file ();
122 struct pool *tmp_pool;
125 if (!in_input_program ())
126 discard_variables (current_dataset);
128 dls = pool_create_container (struct data_list_pgm, pool);
129 ll_init (&dls->specs);
134 ds_init_empty (&dls->delims);
135 ds_register_pool (&dls->delims, dls->pool);
137 tmp_pool = pool_create_subpool (dls->pool);
141 if (lex_match_id ("FILE"))
144 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
148 else if (lex_match_id ("RECORDS"))
152 if (!lex_force_int ())
154 dls->record_cnt = lex_integer ();
158 else if (lex_match_id ("END"))
162 msg (SE, _("The END subcommand may only be specified once."));
167 if (!lex_force_id ())
169 dls->end = dict_lookup_var (dataset_dict (current_dataset), tokid);
171 dls->end = dict_create_var_assert (dataset_dict (current_dataset), tokid, 0);
174 else if (token == T_ID)
176 if (lex_match_id ("NOTABLE"))
178 else if (lex_match_id ("TABLE"))
183 if (lex_match_id ("FIXED"))
185 else if (lex_match_id ("FREE"))
187 else if (lex_match_id ("LIST"))
197 msg (SE, _("Only one of FIXED, FREE, or LIST may "
203 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
206 while (!lex_match (')'))
210 if (lex_match_id ("TAB"))
212 else if (token == T_STRING && ds_length (&tokstr) == 1)
214 delim = ds_first (&tokstr);
223 ds_put_char (&dls->delims, delim);
237 fh_set_default_handle (fh);
240 dls->type = DLS_FIXED;
243 table = dls->type != DLS_FREE;
245 ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (tmp_pool, dls);
249 if (lex_end_of_command () != CMD_SUCCESS)
254 if (dls->type == DLS_FIXED)
255 dump_fixed_table (&dls->specs, fh, dls->record_cnt);
257 dump_free_table (dls, fh);
260 dls->reader = dfm_open_reader (fh);
261 if (dls->reader == NULL)
264 if (in_input_program ())
265 add_transformation (current_dataset, data_list_trns_proc, data_list_trns_free, dls);
267 proc_set_source (current_dataset,
268 create_case_source (&data_list_source_class, dls));
270 pool_destroy (tmp_pool);
275 data_list_trns_free (dls);
276 return CMD_CASCADING_FAILURE;
279 /* Fixed-format parsing. */
281 /* Parses all the variable specifications for DATA LIST FIXED,
282 storing them into DLS. Uses TMP_POOL for data that is not
283 needed once parsing is complete. Returns true only if
286 parse_fixed (struct pool *tmp_pool, struct data_list_pgm *dls)
288 int last_nonempty_record;
295 size_t name_cnt, name_idx;
296 struct fmt_spec *formats, *f;
299 /* Parse everything. */
300 if (!parse_record_placement (&record, &column)
301 || !parse_DATA_LIST_vars_pool (tmp_pool, &names, &name_cnt, PV_NONE)
302 || !parse_var_placements (tmp_pool, name_cnt, &formats, &format_cnt))
305 /* Create variables and var specs. */
307 for (f = formats; f < &formats[format_cnt]; f++)
308 if (!execute_placement_format (f, &record, &column))
313 struct dls_var_spec *spec;
315 name = names[name_idx++];
317 /* Create variable. */
318 width = get_format_var_width (f);
319 v = dict_create_var (dataset_dict (current_dataset), name, width);
323 struct fmt_spec output;
324 convert_fmt_ItoO (f, &output);
331 This can be acceptable if we're in INPUT
332 PROGRAM, but only if the existing variable has
333 the same width as the one we would have
335 if (!in_input_program ())
337 msg (SE, _("%s is a duplicate variable name."), name);
341 v = dict_lookup_var_assert (dataset_dict (current_dataset), name);
342 if ((width != 0) != (v->width != 0))
344 msg (SE, _("There is already a variable %s of a "
349 if (width != 0 && width != v->width)
351 msg (SE, _("There is already a string variable %s of a "
352 "different width."), name);
357 /* Create specifier for parsing the variable. */
358 spec = pool_alloc (dls->pool, sizeof *spec);
361 spec->record = record;
362 spec->first_column = column;
363 strcpy (spec->name, v->name);
364 ll_push_tail (&dls->specs, &spec->ll);
368 assert (name_idx == name_cnt);
370 if (ll_is_empty (&dls->specs))
372 msg (SE, _("At least one variable must be specified."));
376 last_nonempty_record = ll_to_dls_var_spec (ll_tail (&dls->specs))->record;
377 if (dls->record_cnt && last_nonempty_record > dls->record_cnt)
379 msg (SE, _("Variables are specified on records that "
380 "should not exist according to RECORDS subcommand."));
383 else if (!dls->record_cnt)
384 dls->record_cnt = last_nonempty_record;
389 /* Displays a table giving information on fixed-format variable
390 parsing on DATA LIST. */
392 dump_fixed_table (const struct ll_list *specs,
393 const struct file_handle *fh, int record_cnt)
397 struct dls_var_spec *spec;
400 spec_cnt = ll_count (specs);
401 t = tab_create (4, spec_cnt + 1, 0);
402 tab_columns (t, TAB_COL_DOWN, 1);
403 tab_headers (t, 0, 0, 1, 0);
404 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
405 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
406 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
407 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
408 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt);
409 tab_hline (t, TAL_2, 0, 3, 1);
410 tab_dim (t, tab_natural_dimensions);
413 ll_for_each (spec, struct dls_var_spec, ll, specs)
415 tab_text (t, 0, row, TAB_LEFT, spec->name);
416 tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record);
417 tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d",
418 spec->first_column, spec->first_column + spec->input.w - 1);
419 tab_text (t, 3, row, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input));
423 tab_title (t, ngettext ("Reading %d record from %s.",
424 "Reading %d records from %s.", record_cnt),
425 record_cnt, fh_get_name (fh));
429 /* Free-format parsing. */
431 /* Parses variable specifications for DATA LIST FREE and adds
432 them to DLS. Uses TMP_POOL for data that is not needed once
433 parsing is complete. Returns true only if successful. */
435 parse_free (struct pool *tmp_pool, struct data_list_pgm *dls)
440 struct fmt_spec input, output;
445 if (!parse_DATA_LIST_vars_pool (tmp_pool, &name, &name_cnt, PV_NONE))
450 if (!parse_format_specifier (&input)
451 || !check_input_specifier (&input, 1)
452 || !lex_force_match (')'))
454 convert_fmt_ItoO (&input, &output);
459 input = make_input_format (FMT_F, 8, 0);
460 output = *get_format ();
463 for (i = 0; i < name_cnt; i++)
465 struct dls_var_spec *spec;
468 v = dict_create_var (dataset_dict (current_dataset), name[i],
469 get_format_var_width (&input));
472 msg (SE, _("%s is a duplicate variable name."), name[i]);
475 v->print = v->write = output;
477 spec = pool_alloc (dls->pool, sizeof *spec);
480 strcpy (spec->name, v->name);
481 ll_push_tail (&dls->specs, &spec->ll);
488 /* Displays a table giving information on free-format variable parsing
491 dump_free_table (const struct data_list_pgm *dls,
492 const struct file_handle *fh)
495 struct dls_var_spec *spec;
499 spec_cnt = ll_count (&dls->specs);
501 t = tab_create (2, spec_cnt + 1, 0);
502 tab_columns (t, TAB_COL_DOWN, 1);
503 tab_headers (t, 0, 0, 1, 0);
504 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
505 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
506 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, spec_cnt);
507 tab_hline (t, TAL_2, 0, 1, 1);
508 tab_dim (t, tab_natural_dimensions);
511 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
513 tab_text (t, 0, row, TAB_LEFT, spec->name);
514 tab_text (t, 1, row, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input));
518 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
523 /* Input procedure. */
525 /* Extracts a field from the current position in the current
526 record. Fields can be unquoted or quoted with single- or
527 double-quote characters.
529 *FIELD is set to the field content. The caller must not
530 or destroy this constant string.
532 After parsing the field, sets the current position in the
533 record to just past the field and any trailing delimiter.
534 Returns 0 on failure or a 1-based column number indicating the
535 beginning of the field on success. */
537 cut_field (const struct data_list_pgm *dls, struct substring *field)
539 struct substring line, p;
541 if (dfm_eof (dls->reader))
543 if (ds_is_empty (&dls->delims))
544 dfm_expand_tabs (dls->reader);
545 line = p = dfm_get_record (dls->reader);
547 if (ds_is_empty (&dls->delims))
549 bool missing_quote = false;
551 /* Skip leading whitespace. */
552 ss_ltrim (&p, ss_cstr (CC_SPACES));
556 /* Handle actual data, whether quoted or unquoted. */
557 if (ss_match_char (&p, '\''))
558 missing_quote = !ss_get_until (&p, '\'', field);
559 else if (ss_match_char (&p, '"'))
560 missing_quote = !ss_get_until (&p, '"', field);
562 ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field);
564 msg (SW, _("Quoted string extends beyond end of line."));
566 /* Skip trailing whitespace and a single comma if present. */
567 ss_ltrim (&p, ss_cstr (CC_SPACES));
568 ss_match_char (&p, ',');
570 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p));
574 if (!ss_is_empty (p))
575 ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field);
576 else if (dfm_columns_past_end (dls->reader) == 0)
578 /* A blank line or a line that ends in a delimiter has a
579 trailing blank field. */
585 /* Advance past the field.
587 Also advance past a trailing delimiter, regardless of
588 whether one actually existed. If we "skip" a delimiter
589 that was not actually there, then we will return
590 end-of-line on our next call, which is what we want. */
591 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1);
596 static bool read_from_data_list_fixed (const struct data_list_pgm *,
598 static bool read_from_data_list_free (const struct data_list_pgm *,
600 static bool read_from_data_list_list (const struct data_list_pgm *,
603 /* Reads a case from DLS into C.
604 Returns true if successful, false at end of file or on I/O error. */
606 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
610 dfm_push (dls->reader);
614 retval = read_from_data_list_fixed (dls, c);
617 retval = read_from_data_list_free (dls, c);
620 retval = read_from_data_list_list (dls, c);
625 dfm_pop (dls->reader);
630 /* Reads a case from the data file into C, parsing it according
631 to fixed-format syntax rules in DLS.
632 Returns true if successful, false at end of file or on I/O error. */
634 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
636 struct dls_var_spec *spec;
639 if (dfm_eof (dls->reader))
642 spec = ll_to_dls_var_spec (ll_head (&dls->specs));
643 for (row = 1; row <= dls->record_cnt; row++)
645 struct substring line;
647 if (dfm_eof (dls->reader))
649 msg (SW, _("Partial case of %d of %d records discarded."),
650 row - 1, dls->record_cnt);
653 dfm_expand_tabs (dls->reader);
654 line = dfm_get_record (dls->reader);
656 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
660 data_in_finite_line (&di, ss_data (line), ss_length (line),
662 spec->first_column + spec->input.w - 1);
663 di.v = case_data_rw (c, spec->fv);
664 di.flags = DI_IMPLIED_DECIMALS;
665 di.f1 = spec->first_column;
666 di.format = spec->input;
671 dfm_forward_record (dls->reader);
677 /* Reads a case from the data file into C, parsing it according
678 to free-format syntax rules in DLS.
679 Returns true if successful, false at end of file or on I/O error. */
681 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
683 struct dls_var_spec *spec;
685 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
687 struct substring field;
690 /* Cut out a field and read in a new record if necessary. */
691 while (!cut_field (dls, &field))
693 if (!dfm_eof (dls->reader))
694 dfm_forward_record (dls->reader);
695 if (dfm_eof (dls->reader))
697 if (&spec->ll != ll_head (&dls->specs))
698 msg (SW, _("Partial case discarded. The first variable "
699 "missing was %s."), spec->name);
704 di.s = ss_data (field);
705 di.e = ss_end (field);
706 di.v = case_data_rw (c, spec->fv);
708 di.f1 = dfm_get_column (dls->reader, ss_data (field));
709 di.format = spec->input;
715 /* Reads a case from the data file and parses it according to
716 list-format syntax rules.
717 Returns true if successful, false at end of file or on I/O error. */
719 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
721 struct dls_var_spec *spec;
723 if (dfm_eof (dls->reader))
726 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
728 struct substring field;
731 if (!cut_field (dls, &field))
733 if (get_undefined ())
734 msg (SW, _("Missing value(s) for all variables from %s onward. "
735 "These will be filled with the system-missing value "
736 "or blanks, as appropriate."),
738 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
740 int width = get_format_var_width (&spec->input);
742 case_data_rw (c, spec->fv)->f = SYSMIS;
744 memset (case_data_rw (c, spec->fv)->s, ' ', width);
749 di.s = ss_data (field);
750 di.e = ss_end (field);
751 di.v = case_data_rw (c, spec->fv);
753 di.f1 = dfm_get_column (dls->reader, ss_data (field));
754 di.format = spec->input;
758 dfm_forward_record (dls->reader);
762 /* Destroys DATA LIST transformation DLS.
763 Returns true if successful, false if an I/O error occurred. */
765 data_list_trns_free (void *dls_)
767 struct data_list_pgm *dls = dls_;
768 dfm_close_reader (dls->reader);
769 pool_destroy (dls->pool);
773 /* Handle DATA LIST transformation DLS, parsing data into C. */
775 data_list_trns_proc (void *dls_, struct ccase *c, casenum_t case_num UNUSED)
777 struct data_list_pgm *dls = dls_;
780 if (read_from_data_list (dls, c))
781 retval = TRNS_CONTINUE;
782 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
784 /* An I/O error, or encountering end of file for a second
785 time, should be escalated into a more serious error. */
789 retval = TRNS_END_FILE;
791 /* If there was an END subcommand handle it. */
792 if (dls->end != NULL)
794 double *end = &case_data_rw (c, dls->end->fv)->f;
795 if (retval == TRNS_DROP_CASE)
798 retval = TRNS_END_FILE;
807 /* Reads all the records from the data file and passes them to
809 Returns true if successful, false if an I/O error occurred. */
811 data_list_source_read (struct case_source *source,
813 write_case_func *write_case, write_case_data wc_data)
815 struct data_list_pgm *dls = source->aux;
821 if (!read_from_data_list (dls, c))
822 return !dfm_reader_error (dls->reader);
824 dfm_push (dls->reader);
825 ok = write_case (wc_data);
826 dfm_pop (dls->reader);
832 /* Destroys the source's internal data. */
834 data_list_source_destroy (struct case_source *source)
836 data_list_trns_free (source->aux);
839 static const struct case_source_class data_list_source_class =
843 data_list_source_read,
844 data_list_source_destroy,