1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include "data-list.h"
29 #include <data/case.h>
30 #include <data/data-in.h>
31 #include <data/dictionary.h>
32 #include <data/format.h>
33 #include <data/settings.h>
34 #include <data/variable.h>
35 #include <language/command.h>
36 #include <language/data-io/data-list.h>
37 #include <language/data-io/data-reader.h>
38 #include <language/data-io/file-handle.h>
39 #include <language/data-io/inpt-pgm.h>
40 #include <language/lexer/lexer.h>
41 #include <libpspp/alloc.h>
42 #include <libpspp/compiler.h>
43 #include <libpspp/message.h>
44 #include <libpspp/message.h>
45 #include <libpspp/misc.h>
46 #include <libpspp/str.h>
47 #include <output/table.h>
48 #include <procedure.h>
51 #define _(msgid) gettext (msgid)
53 /* Utility function. */
55 /* FIXME: Either REPEATING DATA must be the last transformation, or we
56 must multiplex the transformations that follow (i.e., perform them
57 for every case that we produce from a repetition instance).
58 Currently we do neither. We should do one or the other. */
60 /* Describes how to parse one variable. */
63 struct dls_var_spec *next; /* Next specification in list. */
65 /* Both free and fixed formats. */
66 struct fmt_spec input; /* Input format of this field. */
67 struct variable *v; /* Associated variable. Used only in
68 parsing. Not safe later. */
69 int fv; /* First value in case. */
71 /* Fixed format only. */
72 int rec; /* Record number (1-based). */
73 int fc, lc; /* Column numbers in record. */
75 /* Free format only. */
76 char name[LONG_NAME_LEN + 1]; /* Name of variable. */
79 /* Constants for DATA LIST type. */
80 /* Must match table in cmd_data_list(). */
88 /* DATA LIST private data structure. */
91 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
92 struct dfm_reader *reader; /* Data file reader. */
94 int type; /* A DLS_* constant. */
95 struct variable *end; /* Variable specified on END subcommand. */
96 int rec_cnt; /* Number of records. */
97 size_t case_size; /* Case size in bytes. */
98 char *delims; /* Delimiters if any; not null-terminated. */
99 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
102 static const struct case_source_class data_list_source_class;
104 static void rpd_msg (enum msg_class, const char *format, ...);
105 static int parse_fixed (struct data_list_pgm *);
106 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
107 static void dump_fixed_table (const struct dls_var_spec *,
108 const struct file_handle *, int rec_cnt);
109 static void dump_free_table (const struct data_list_pgm *,
110 const struct file_handle *);
111 static void destroy_dls_var_spec (struct dls_var_spec *);
112 static trns_free_func data_list_trns_free;
113 static trns_proc_func data_list_trns_proc;
118 struct data_list_pgm *dls;
119 int table = -1; /* Print table if nonzero, -1=undecided. */
120 struct file_handle *fh = fh_inline_file ();
122 if (!in_input_program ())
123 discard_variables ();
125 dls = xmalloc (sizeof *dls);
132 dls->first = dls->last = NULL;
136 if (lex_match_id ("FILE"))
139 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
143 else if (lex_match_id ("RECORDS"))
147 if (!lex_force_int ())
149 dls->rec_cnt = lex_integer ();
153 else if (lex_match_id ("END"))
157 msg (SE, _("The END subcommand may only be specified once."));
162 if (!lex_force_id ())
164 dls->end = dict_lookup_var (default_dict, tokid);
166 dls->end = dict_create_var_assert (default_dict, tokid, 0);
169 else if (token == T_ID)
171 if (lex_match_id ("NOTABLE"))
173 else if (lex_match_id ("TABLE"))
178 if (lex_match_id ("FIXED"))
180 else if (lex_match_id ("FREE"))
182 else if (lex_match_id ("LIST"))
192 msg (SE, _("Only one of FIXED, FREE, or LIST may "
198 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
201 while (!lex_match (')'))
205 if (lex_match_id ("TAB"))
207 else if (token == T_STRING && tokstr.length == 1)
209 delim = tokstr.string[0];
218 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
219 dls->delims[dls->delim_cnt++] = delim;
233 dls->case_size = dict_get_case_size (default_dict);
234 fh_set_default_handle (fh);
237 dls->type = DLS_FIXED;
241 if (dls->type == DLS_FREE)
247 if (dls->type == DLS_FIXED)
249 if (!parse_fixed (dls))
252 dump_fixed_table (dls->first, fh, dls->rec_cnt);
256 if (!parse_free (&dls->first, &dls->last))
259 dump_free_table (dls, fh);
262 dls->reader = dfm_open_reader (fh);
263 if (dls->reader == NULL)
266 if (vfm_source != NULL)
267 add_transformation (data_list_trns_proc, data_list_trns_free, dls);
269 vfm_source = create_case_source (&data_list_source_class, dls);
274 data_list_trns_free (dls);
275 return CMD_CASCADING_FAILURE;
278 /* Adds SPEC to the linked list with head at FIRST and tail at
281 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
282 struct dls_var_spec *spec)
289 (*last)->next = spec;
293 /* Fixed-format parsing. */
295 /* Used for chaining together fortran-like format specifiers. */
298 struct fmt_list *next;
301 struct fmt_list *down;
304 /* State of parsing DATA LIST. */
305 struct fixed_parsing_state
307 char **name; /* Variable names. */
308 size_t name_cnt; /* Number of names. */
310 int recno; /* Index of current record. */
311 int sc; /* 1-based column number of starting column for
312 next field to output. */
315 static int fixed_parse_compatible (struct fixed_parsing_state *,
316 struct dls_var_spec **,
317 struct dls_var_spec **);
318 static int fixed_parse_fortran (struct fixed_parsing_state *,
319 struct dls_var_spec **,
320 struct dls_var_spec **);
322 /* Parses all the variable specifications for DATA LIST FIXED,
323 storing them into DLS. Returns nonzero if successful. */
325 parse_fixed (struct data_list_pgm *dls)
327 struct fixed_parsing_state fx;
335 while (lex_match ('/'))
338 if (lex_is_integer ())
340 if (lex_integer () < fx.recno)
342 msg (SE, _("The record number specified, %ld, is "
343 "before the previous record, %d. Data "
344 "fields must be listed in order of "
345 "increasing record number."),
346 lex_integer (), fx.recno - 1);
350 fx.recno = lex_integer ();
356 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
359 if (lex_is_number ())
361 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
364 else if (token == '(')
366 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
371 msg (SE, _("SPSS-like or FORTRAN-like format "
372 "specification expected after variable names."));
376 for (i = 0; i < fx.name_cnt; i++)
380 if (dls->first == NULL)
382 msg (SE, _("At least one variable must be specified."));
385 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
387 msg (SE, _("Variables are specified on records that "
388 "should not exist according to RECORDS subcommand."));
391 else if (!dls->rec_cnt)
392 dls->rec_cnt = dls->last->rec;
393 return lex_end_of_command () == CMD_SUCCESS;
396 for (i = 0; i < fx.name_cnt; i++)
402 /* Parses a variable specification in the form 1-10 (A) based on
403 FX and adds specifications to the linked list with head at
404 FIRST and tail at LAST. */
406 fixed_parse_compatible (struct fixed_parsing_state *fx,
407 struct dls_var_spec **first, struct dls_var_spec **last)
409 struct fmt_spec input;
415 if (!lex_force_int ())
420 msg (SE, _("Column positions for fields must be positive."));
426 lex_negative_to_dash ();
429 if (!lex_force_int ())
434 msg (SE, _("Column positions for fields must be positive."));
439 msg (SE, _("The ending column for a field must be "
440 "greater than the starting column."));
449 /* Divide columns evenly. */
450 input.w = (lc - fc + 1) / fx->name_cnt;
451 if ((lc - fc + 1) % fx->name_cnt)
453 msg (SE, _("The %d columns %d-%d "
454 "can't be evenly divided into %d fields."),
455 lc - fc + 1, fc, lc, fx->name_cnt);
459 /* Format specifier. */
462 struct fmt_desc *fdp;
468 input.type = parse_format_specifier_name (&cp, 0);
469 if (input.type == -1)
473 msg (SE, _("A format specifier on this line "
474 "has extra characters on the end."));
484 if (lex_is_integer ())
486 if (lex_integer () < 1)
488 msg (SE, _("The value for number of decimal places "
489 "must be at least 1."));
493 input.d = lex_integer ();
499 fdp = &formats[input.type];
500 if (fdp->n_args < 2 && input.d)
502 msg (SE, _("Input format %s doesn't accept decimal places."),
510 if (!lex_force_match (')'))
518 if (!check_input_specifier (&input, 1))
521 /* Start column for next specification. */
524 /* Width of variables to create. */
525 if (input.type == FMT_A || input.type == FMT_AHEX)
530 /* Create variables and var specs. */
531 for (i = 0; i < fx->name_cnt; i++)
533 struct dls_var_spec *spec;
536 v = dict_create_var (default_dict, fx->name[i], width);
539 convert_fmt_ItoO (&input, &v->print);
544 v = dict_lookup_var_assert (default_dict, fx->name[i]);
545 if (vfm_source == NULL)
547 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
550 if ((width != 0) != (v->width != 0))
552 msg (SE, _("There is already a variable %s of a "
557 if (width != 0 && width != v->width)
559 msg (SE, _("There is already a string variable %s of a "
560 "different width."), fx->name[i]);
565 spec = xmalloc (sizeof *spec);
569 spec->rec = fx->recno;
570 spec->fc = fc + input.w * i;
571 spec->lc = spec->fc + input.w - 1;
572 append_var_spec (first, last, spec);
577 /* Destroy format list F and, if RECURSE is nonzero, all its
580 destroy_fmt_list (struct fmt_list *f, int recurse)
582 struct fmt_list *next;
587 if (recurse && f->f.type == FMT_DESCEND)
588 destroy_fmt_list (f->down, 1);
593 /* Takes a hierarchically structured fmt_list F as constructed by
594 fixed_parse_fortran(), and flattens it, adding the variable
595 specifications to the linked list with head FIRST and tail
596 LAST. NAME_IDX is used to take values from the list of names
597 in FX; it should initially point to a value of 0. */
599 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
600 struct dls_var_spec **first, struct dls_var_spec **last,
605 for (; f; f = f->next)
606 if (f->f.type == FMT_X)
608 else if (f->f.type == FMT_T)
610 else if (f->f.type == FMT_NEWREC)
612 fx->recno += f->count;
616 for (i = 0; i < f->count; i++)
617 if (f->f.type == FMT_DESCEND)
619 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
624 struct dls_var_spec *spec;
628 if (formats[f->f.type].cat & FCAT_STRING)
632 if (*name_idx >= fx->name_cnt)
634 msg (SE, _("The number of format "
635 "specifications exceeds the given number of "
640 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
643 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
647 spec = xmalloc (sizeof *spec);
651 spec->rec = fx->recno;
653 spec->lc = fx->sc + f->f.w - 1;
654 append_var_spec (first, last, spec);
656 convert_fmt_ItoO (&spec->input, &v->print);
664 /* Recursively parses a FORTRAN-like format specification into
665 the linked list with head FIRST and tail TAIL. LEVEL is the
666 level of recursion, starting from 0. Returns the parsed
667 specification if successful, or a null pointer on failure. */
668 static struct fmt_list *
669 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
670 struct dls_var_spec **first,
671 struct dls_var_spec **last)
673 struct fmt_list *head = NULL;
674 struct fmt_list *tail = NULL;
676 lex_force_match ('(');
680 struct fmt_list *new = xmalloc (sizeof *new);
683 /* Append new to list. */
691 if (lex_is_integer ())
693 new->count = lex_integer ();
699 /* Parse format specifier. */
702 new->f.type = FMT_DESCEND;
703 new->down = fixed_parse_fortran_internal (fx, first, last);
704 if (new->down == NULL)
707 else if (lex_match ('/'))
708 new->f.type = FMT_NEWREC;
709 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
710 || !check_input_specifier (&new->f, 1))
715 lex_force_match (')');
720 destroy_fmt_list (head, 0);
725 /* Parses a FORTRAN-like format specification into the linked
726 list with head FIRST and tail LAST. Returns nonzero if
729 fixed_parse_fortran (struct fixed_parsing_state *fx,
730 struct dls_var_spec **first, struct dls_var_spec **last)
732 struct fmt_list *list;
735 list = fixed_parse_fortran_internal (fx, first, last);
740 dump_fmt_list (fx, list, first, last, &name_idx);
741 destroy_fmt_list (list, 1);
742 if (name_idx < fx->name_cnt)
744 msg (SE, _("There aren't enough format specifications "
745 "to match the number of variable names given."));
752 /* Displays a table giving information on fixed-format variable
753 parsing on DATA LIST. */
754 /* FIXME: The `Columns' column should be divided into three columns,
755 one for the starting column, one for the dash, one for the ending
756 column; then right-justify the starting column and left-justify the
759 dump_fixed_table (const struct dls_var_spec *specs,
760 const struct file_handle *fh, int rec_cnt)
762 const struct dls_var_spec *spec;
766 for (i = 0, spec = specs; spec; spec = spec->next)
768 t = tab_create (4, i + 1, 0);
769 tab_columns (t, TAB_COL_DOWN, 1);
770 tab_headers (t, 0, 0, 1, 0);
771 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
772 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
773 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
774 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
775 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
776 tab_hline (t, TAL_2, 0, 3, 1);
777 tab_dim (t, tab_natural_dimensions);
779 for (i = 1, spec = specs; spec; spec = spec->next, i++)
781 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
782 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
783 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
785 tab_text (t, 3, i, TAB_LEFT | TAB_FIX,
786 fmt_to_string (&spec->input));
789 tab_title (t, ngettext ("Reading %d record from %s.",
790 "Reading %d records from %s.", rec_cnt),
791 rec_cnt, fh_get_name (fh));
795 /* Free-format parsing. */
797 /* Parses variable specifications for DATA LIST FREE and adds
798 them to the linked list with head FIRST and tail LAST.
799 Returns nonzero only if successful. */
801 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
806 struct fmt_spec input, output;
812 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
817 if (!parse_format_specifier (&input, 0)
818 || !check_input_specifier (&input, 1)
819 || !lex_force_match (')'))
821 for (i = 0; i < name_cnt; i++)
826 convert_fmt_ItoO (&input, &output);
831 input = make_input_format (FMT_F, 8, 0);
832 output = *get_format ();
835 if (input.type == FMT_A || input.type == FMT_AHEX)
839 for (i = 0; i < name_cnt; i++)
841 struct dls_var_spec *spec;
844 v = dict_create_var (default_dict, name[i], width);
848 msg (SE, _("%s is a duplicate variable name."), name[i]);
851 v->print = v->write = output;
853 spec = xmalloc (sizeof *spec);
857 str_copy_trunc (spec->name, sizeof spec->name, v->name);
858 append_var_spec (first, last, spec);
860 for (i = 0; i < name_cnt; i++)
865 return lex_end_of_command () == CMD_SUCCESS;
868 /* Displays a table giving information on free-format variable parsing
871 dump_free_table (const struct data_list_pgm *dls,
872 const struct file_handle *fh)
878 struct dls_var_spec *spec;
879 for (i = 0, spec = dls->first; spec; spec = spec->next)
883 t = tab_create (2, i + 1, 0);
884 tab_columns (t, TAB_COL_DOWN, 1);
885 tab_headers (t, 0, 0, 1, 0);
886 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
887 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
888 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
889 tab_hline (t, TAL_2, 0, 1, 1);
890 tab_dim (t, tab_natural_dimensions);
893 struct dls_var_spec *spec;
895 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
897 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
898 tab_text (t, 1, i, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input));
902 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
907 /* Input procedure. */
909 /* Extracts a field from the current position in the current
910 record. Fields can be unquoted or quoted with single- or
911 double-quote characters. *FIELD is set to the field content.
912 After parsing the field, sets the current position in the
913 record to just past the field and any trailing delimiter.
914 END_BLANK is used internally; it should be initialized by the
915 caller to 0 and left alone afterward. Returns 0 on failure or
916 a 1-based column number indicating the beginning of the field
919 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
922 struct fixed_string line;
926 if (dfm_eof (dls->reader))
928 if (dls->delim_cnt == 0)
929 dfm_expand_tabs (dls->reader);
930 dfm_get_record (dls->reader, &line);
932 cp = ls_c_str (&line);
933 if (dls->delim_cnt == 0)
935 /* Skip leading whitespace. */
936 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
938 if (cp >= ls_end (&line))
941 /* Handle actual data, whether quoted or unquoted. */
942 if (*cp == '\'' || *cp == '"')
946 field->string = ++cp;
947 while (cp < ls_end (&line) && *cp != quote)
949 field->length = cp - field->string;
950 if (cp < ls_end (&line))
953 msg (SW, _("Quoted string missing terminating `%c'."), quote);
958 while (cp < ls_end (&line)
959 && !isspace ((unsigned char) *cp) && *cp != ',')
961 field->length = cp - field->string;
964 /* Skip trailing whitespace and a single comma if present. */
965 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
967 if (cp < ls_end (&line) && *cp == ',')
972 if (cp >= ls_end (&line))
974 int column = dfm_column_start (dls->reader);
975 /* A blank line or a line that ends in \t has a
976 trailing blank field. */
977 if (column == 1 || (column > 1 && cp[-1] == '\t'))
982 field->string = ls_end (&line);
984 dfm_forward_record (dls->reader);
999 while (cp < ls_end (&line)
1000 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1002 field->length = cp - field->string;
1003 if (cp < ls_end (&line))
1008 dfm_forward_columns (dls->reader, field->string - line.string);
1009 column_start = dfm_column_start (dls->reader);
1011 dfm_forward_columns (dls->reader, cp - field->string);
1013 return column_start;
1016 static bool read_from_data_list_fixed (const struct data_list_pgm *,
1018 static bool read_from_data_list_free (const struct data_list_pgm *,
1020 static bool read_from_data_list_list (const struct data_list_pgm *,
1023 /* Reads a case from DLS into C.
1024 Returns true if successful, false at end of file or on I/O error. */
1026 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
1030 dfm_push (dls->reader);
1034 retval = read_from_data_list_fixed (dls, c);
1037 retval = read_from_data_list_free (dls, c);
1040 retval = read_from_data_list_list (dls, c);
1045 dfm_pop (dls->reader);
1050 /* Reads a case from the data file into C, parsing it according
1051 to fixed-format syntax rules in DLS.
1052 Returns true if successful, false at end of file or on I/O error. */
1054 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
1056 struct dls_var_spec *var_spec = dls->first;
1059 if (dfm_eof (dls->reader))
1061 for (i = 1; i <= dls->rec_cnt; i++)
1063 struct fixed_string line;
1065 if (dfm_eof (dls->reader))
1067 /* Note that this can't occur on the first record. */
1068 msg (SW, _("Partial case of %d of %d records discarded."),
1069 i - 1, dls->rec_cnt);
1072 dfm_expand_tabs (dls->reader);
1073 dfm_get_record (dls->reader, &line);
1075 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1079 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1080 var_spec->fc, var_spec->lc);
1081 di.v = case_data_rw (c, var_spec->fv);
1082 di.flags = DI_IMPLIED_DECIMALS;
1083 di.f1 = var_spec->fc;
1084 di.format = var_spec->input;
1089 dfm_forward_record (dls->reader);
1095 /* Reads a case from the data file into C, parsing it according
1096 to free-format syntax rules in DLS.
1097 Returns true if successful, false at end of file or on I/O error. */
1099 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
1101 struct dls_var_spec *var_spec;
1104 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1106 struct fixed_string field;
1109 /* Cut out a field and read in a new record if necessary. */
1112 column = cut_field (dls, &field, &end_blank);
1116 if (!dfm_eof (dls->reader))
1117 dfm_forward_record (dls->reader);
1118 if (dfm_eof (dls->reader))
1120 if (var_spec != dls->first)
1121 msg (SW, _("Partial case discarded. The first variable "
1122 "missing was %s."), var_spec->name);
1130 di.s = ls_c_str (&field);
1131 di.e = ls_end (&field);
1132 di.v = case_data_rw (c, var_spec->fv);
1135 di.format = var_spec->input;
1142 /* Reads a case from the data file and parses it according to
1143 list-format syntax rules.
1144 Returns true if successful, false at end of file or on I/O error. */
1146 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
1148 struct dls_var_spec *var_spec;
1151 if (dfm_eof (dls->reader))
1154 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1156 struct fixed_string field;
1159 /* Cut out a field and check for end-of-line. */
1160 column = cut_field (dls, &field, &end_blank);
1163 if (get_undefined ())
1164 msg (SW, _("Missing value(s) for all variables from %s onward. "
1165 "These will be filled with the system-missing value "
1166 "or blanks, as appropriate."),
1168 for (; var_spec; var_spec = var_spec->next)
1170 int width = get_format_var_width (&var_spec->input);
1172 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1174 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1182 di.s = ls_c_str (&field);
1183 di.e = ls_end (&field);
1184 di.v = case_data_rw (c, var_spec->fv);
1187 di.format = var_spec->input;
1192 dfm_forward_record (dls->reader);
1196 /* Destroys SPEC. */
1198 destroy_dls_var_spec (struct dls_var_spec *spec)
1200 struct dls_var_spec *next;
1202 while (spec != NULL)
1210 /* Destroys DATA LIST transformation DLS.
1211 Returns true if successful, false if an I/O error occurred. */
1213 data_list_trns_free (void *dls_)
1215 struct data_list_pgm *dls = dls_;
1217 destroy_dls_var_spec (dls->first);
1218 dfm_close_reader (dls->reader);
1223 /* Handle DATA LIST transformation DLS, parsing data into C. */
1225 data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED)
1227 struct data_list_pgm *dls = dls_;
1230 if (read_from_data_list (dls, c))
1231 retval = TRNS_CONTINUE;
1232 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
1234 /* An I/O error, or encountering end of file for a second
1235 time, should be escalated into a more serious error. */
1236 retval = TRNS_ERROR;
1239 retval = TRNS_DROP_CASE;
1241 /* If there was an END subcommand handle it. */
1242 if (dls->end != NULL)
1244 double *end = &case_data_rw (c, dls->end->fv)->f;
1245 if (retval == TRNS_DROP_CASE)
1248 retval = TRNS_CONTINUE;
1257 /* Reads all the records from the data file and passes them to
1259 Returns true if successful, false if an I/O error occurred. */
1261 data_list_source_read (struct case_source *source,
1263 write_case_func *write_case, write_case_data wc_data)
1265 struct data_list_pgm *dls = source->aux;
1271 if (!read_from_data_list (dls, c))
1272 return !dfm_reader_error (dls->reader);
1274 dfm_push (dls->reader);
1275 ok = write_case (wc_data);
1276 dfm_pop (dls->reader);
1282 /* Destroys the source's internal data. */
1284 data_list_source_destroy (struct case_source *source)
1286 data_list_trns_free (source->aux);
1289 static const struct case_source_class data_list_source_class =
1293 data_list_source_read,
1294 data_list_source_destroy,
1297 /* REPEATING DATA. */
1299 /* Represents a number or a variable. */
1300 struct rpd_num_or_var
1302 int num; /* Value, or 0. */
1303 struct variable *var; /* Variable, if number==0. */
1306 /* REPEATING DATA private data structure. */
1307 struct repeating_data_trns
1309 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1310 struct dfm_reader *reader; /* Input file, never NULL. */
1312 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1313 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1314 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1315 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1316 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1317 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1319 /* ID subcommand. */
1320 int id_beg, id_end; /* Beginning & end columns. */
1321 struct variable *id_var; /* DATA LIST variable. */
1322 struct fmt_spec id_spec; /* Input format spec. */
1323 union value *id_value; /* ID value. */
1325 write_case_func *write_case;
1326 write_case_data wc_data;
1329 static trns_free_func repeating_data_trns_free;
1330 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1331 static int parse_repeating_data (struct dls_var_spec **,
1332 struct dls_var_spec **);
1333 static void find_variable_input_spec (struct variable *v,
1334 struct fmt_spec *spec);
1336 int cmd_repeating_data (void);
1338 /* Parses the REPEATING DATA command. */
1340 cmd_repeating_data (void)
1342 struct repeating_data_trns *rpd;
1343 int table = 1; /* Print table? */
1344 bool saw_starts = false; /* Saw STARTS subcommand? */
1345 bool saw_occurs = false; /* Saw OCCURS subcommand? */
1346 bool saw_length = false; /* Saw LENGTH subcommand? */
1347 bool saw_continued = false; /* Saw CONTINUED subcommand? */
1348 bool saw_id = false; /* Saw ID subcommand? */
1349 struct file_handle *const fh = fh_get_default_handle ();
1351 assert (in_input_program ());
1353 rpd = xmalloc (sizeof *rpd);
1354 rpd->reader = dfm_open_reader (fh);
1355 rpd->first = rpd->last = NULL;
1356 rpd->starts_beg.num = 0;
1357 rpd->starts_beg.var = NULL;
1358 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1359 = rpd->cont_end = rpd->starts_beg;
1360 rpd->id_beg = rpd->id_end = 0;
1362 rpd->id_value = NULL;
1368 if (lex_match_id ("FILE"))
1370 struct file_handle *file;
1372 file = fh_parse (FH_REF_FILE | FH_REF_INLINE);
1377 msg (SE, _("REPEATING DATA must use the same file as its "
1378 "corresponding DATA LIST or FILE TYPE."));
1382 else if (lex_match_id ("STARTS"))
1387 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1392 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1395 lex_negative_to_dash ();
1396 if (lex_match ('-'))
1398 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1401 /* Otherwise, rpd->starts_end is uninitialized. We
1402 will initialize it later from the record length
1403 of the file. We can't do so now because the
1404 file handle may not be specified yet. */
1407 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1408 && rpd->starts_beg.num > rpd->starts_end.num)
1410 msg (SE, _("STARTS beginning column (%d) exceeds "
1411 "STARTS ending column (%d)."),
1412 rpd->starts_beg.num, rpd->starts_end.num);
1416 else if (lex_match_id ("OCCURS"))
1421 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1426 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1429 else if (lex_match_id ("LENGTH"))
1434 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1439 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1442 else if (lex_match_id ("CONTINUED"))
1447 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1450 saw_continued = true;
1452 if (!lex_match ('/'))
1454 if (!parse_num_or_var (&rpd->cont_beg,
1455 "CONTINUED beginning column"))
1458 lex_negative_to_dash ();
1460 && !parse_num_or_var (&rpd->cont_end,
1461 "CONTINUED ending column"))
1464 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1465 && rpd->cont_beg.num > rpd->cont_end.num)
1467 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1468 "CONTINUED ending column (%d)."),
1469 rpd->cont_beg.num, rpd->cont_end.num);
1474 rpd->cont_beg.num = 1;
1476 else if (lex_match_id ("ID"))
1481 msg (SE, _("%s subcommand given multiple times."),"ID");
1486 if (!lex_force_int ())
1488 if (lex_integer () < 1)
1490 msg (SE, _("ID beginning column (%ld) must be positive."),
1494 rpd->id_beg = lex_integer ();
1497 lex_negative_to_dash ();
1499 if (lex_match ('-'))
1501 if (!lex_force_int ())
1503 if (lex_integer () < 1)
1505 msg (SE, _("ID ending column (%ld) must be positive."),
1509 if (lex_integer () < rpd->id_end)
1511 msg (SE, _("ID ending column (%ld) cannot be less than "
1512 "ID beginning column (%d)."),
1513 lex_integer (), rpd->id_beg);
1517 rpd->id_end = lex_integer ();
1520 else rpd->id_end = rpd->id_beg;
1522 if (!lex_force_match ('='))
1524 rpd->id_var = parse_variable ();
1525 if (rpd->id_var == NULL)
1528 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1529 rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value);
1531 else if (lex_match_id ("TABLE"))
1533 else if (lex_match_id ("NOTABLE"))
1535 else if (lex_match_id ("DATA"))
1543 if (!lex_force_match ('/'))
1547 /* Comes here when DATA specification encountered. */
1548 if (!saw_starts || !saw_occurs)
1551 msg (SE, _("Missing required specification STARTS."));
1553 msg (SE, _("Missing required specification OCCURS."));
1557 /* Enforce ID restriction. */
1558 if (saw_id && !saw_continued)
1560 msg (SE, _("ID specified without CONTINUED."));
1564 /* Calculate and check starts_end, cont_end if necessary. */
1565 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1567 rpd->starts_end.num = fh_get_record_width (fh);
1568 if (rpd->starts_beg.num != 0
1569 && rpd->starts_beg.num > rpd->starts_end.num)
1571 msg (SE, _("STARTS beginning column (%d) exceeds "
1572 "default STARTS ending column taken from file's "
1573 "record width (%d)."),
1574 rpd->starts_beg.num, rpd->starts_end.num);
1578 if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL)
1580 rpd->cont_end.num = fh_get_record_width (fh);
1581 if (rpd->cont_beg.num != 0
1582 && rpd->cont_beg.num > rpd->cont_end.num)
1584 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1585 "default CONTINUED ending column taken from file's "
1586 "record width (%d)."),
1587 rpd->cont_beg.num, rpd->cont_end.num);
1593 if (!parse_repeating_data (&rpd->first, &rpd->last))
1596 /* Calculate length if necessary. */
1599 struct dls_var_spec *iter;
1601 for (iter = rpd->first; iter; iter = iter->next)
1602 if (iter->lc > rpd->length.num)
1603 rpd->length.num = iter->lc;
1604 assert (rpd->length.num != 0);
1608 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1610 add_transformation (repeating_data_trns_proc, repeating_data_trns_free, rpd);
1612 return lex_end_of_command ();
1615 repeating_data_trns_free (rpd);
1616 return CMD_CASCADING_FAILURE;
1619 /* Finds the input format specification for variable V and puts
1620 it in SPEC. Because of the way that DATA LIST is structured,
1621 this is nontrivial. */
1623 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1627 for (i = 0; i < n_trns; i++)
1629 struct transformation *trns = &t_trns[i];
1631 if (trns->proc == data_list_trns_proc)
1633 struct data_list_pgm *pgm = trns->private;
1634 struct dls_var_spec *iter;
1636 for (iter = pgm->first; iter; iter = iter->next)
1639 *spec = iter->input;
1648 /* Parses a number or a variable name from the syntax file and puts
1649 the results in VALUE. Ensures that the number is at least 1; else
1650 emits an error based on MESSAGE. Returns nonzero only if
1653 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1658 value->var = parse_variable ();
1659 if (value->var == NULL)
1661 if (value->var->type == ALPHA)
1663 msg (SE, _("String variable not allowed here."));
1667 else if (lex_is_integer ())
1669 value->num = lex_integer ();
1673 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1679 msg (SE, _("Variable or integer expected for %s."), message);
1685 /* Parses data specifications for repeating data groups, adding
1686 them to the linked list with head FIRST and tail LAST.
1687 Returns nonzero only if successful. */
1689 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1691 struct fixed_parsing_state fx;
1697 while (token != '.')
1699 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1702 if (lex_is_number ())
1704 if (!fixed_parse_compatible (&fx, first, last))
1707 else if (token == '(')
1709 if (!fixed_parse_fortran (&fx, first, last))
1714 msg (SE, _("SPSS-like or FORTRAN-like format "
1715 "specification expected after variable names."));
1719 for (i = 0; i < fx.name_cnt; i++)
1727 for (i = 0; i < fx.name_cnt; i++)
1733 /* Obtains the real value for rpd_num_or_var N in case C and returns
1734 it. The valid range is nonnegative numbers, but numbers outside
1735 this range can be returned and should be handled by the caller as
1738 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1742 double v = case_num (c, n->var->fv);
1743 return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1;
1749 /* Parameter record passed to rpd_parse_record(). */
1750 struct rpd_parse_info
1752 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1753 const char *line; /* Line being parsed. */
1754 size_t len; /* Line length. */
1755 int beg, end; /* First and last column of first occurrence. */
1756 int ofs; /* Column offset between repeated occurrences. */
1757 struct ccase *c; /* Case to fill in. */
1758 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1759 int max_occurs; /* Max number of occurrences to parse. */
1762 /* Parses one record of repeated data and outputs corresponding
1763 cases. Returns number of occurrences parsed up to the
1764 maximum specified in INFO. */
1766 rpd_parse_record (const struct rpd_parse_info *info)
1768 struct repeating_data_trns *t = info->trns;
1769 int cur = info->beg;
1772 /* Handle record ID values. */
1775 union value id_temp[MAX_ELEMS_PER_VALUE];
1777 /* Parse record ID into V. */
1781 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1782 di.v = info->verify_id ? id_temp : t->id_value;
1785 di.format = t->id_spec;
1792 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1794 char expected_str [MAX_FORMATTED_LEN + 1];
1795 char actual_str [MAX_FORMATTED_LEN + 1];
1797 data_out (expected_str, &t->id_var->print, t->id_value);
1798 expected_str[t->id_var->print.w] = '\0';
1800 data_out (actual_str, &t->id_var->print, id_temp);
1801 actual_str[t->id_var->print.w] = '\0';
1804 _("Encountered mismatched record ID \"%s\" "
1805 "expecting \"%s\"."),
1806 actual_str, expected_str);
1812 /* Iterate over the set of expected occurrences and record each of
1813 them as a separate case. FIXME: We need to execute any
1814 transformations that follow the current one. */
1818 for (occurrences = 0; occurrences < info->max_occurs; )
1820 if (cur + info->ofs > info->end + 1)
1825 struct dls_var_spec *var_spec = t->first;
1827 for (; var_spec; var_spec = var_spec->next)
1829 int fc = var_spec->fc - 1 + cur;
1830 int lc = var_spec->lc - 1 + cur;
1832 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1837 _("Variable %s starting in column %d extends "
1838 "beyond physical record length of %d."),
1839 var_spec->v->name, fc, info->len);
1845 data_in_finite_line (&di, info->line, info->len, fc, lc);
1846 di.v = case_data_rw (info->c, var_spec->fv);
1849 di.format = var_spec->input;
1859 if (!t->write_case (t->wc_data))
1867 /* Reads one set of repetitions of the elements in the REPEATING
1868 DATA structure. Returns TRNS_CONTINUE on success,
1869 TRNS_DROP_CASE on end of file or on failure, or TRNS_NEXT_CASE. */
1871 repeating_data_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
1873 struct repeating_data_trns *t = trns_;
1875 struct fixed_string line; /* Current record. */
1877 int starts_beg; /* Starting column. */
1878 int starts_end; /* Ending column. */
1879 int occurs; /* Number of repetitions. */
1880 int length; /* Length of each occurrence. */
1881 int cont_beg; /* Starting column for continuation lines. */
1882 int cont_end; /* Ending column for continuation lines. */
1884 int occurs_left; /* Number of occurrences remaining. */
1886 int code; /* Return value from rpd_parse_record(). */
1888 int skip_first_record = 0;
1890 dfm_push (t->reader);
1892 /* Read the current record. */
1893 dfm_reread_record (t->reader, 1);
1894 dfm_expand_tabs (t->reader);
1895 if (dfm_eof (t->reader))
1896 return TRNS_DROP_CASE;
1897 dfm_get_record (t->reader, &line);
1898 dfm_forward_record (t->reader);
1900 /* Calculate occurs, length. */
1901 occurs_left = occurs = realize_value (&t->occurs, c);
1904 rpd_msg (SE, _("Invalid value %d for OCCURS."), occurs);
1905 return TRNS_NEXT_CASE;
1907 starts_beg = realize_value (&t->starts_beg, c);
1908 if (starts_beg <= 0)
1910 rpd_msg (SE, _("Beginning column for STARTS (%d) must be at least 1."),
1912 return TRNS_NEXT_CASE;
1914 starts_end = realize_value (&t->starts_end, c);
1915 if (starts_end < starts_beg)
1917 rpd_msg (SE, _("Ending column for STARTS (%d) is less than "
1918 "beginning column (%d)."),
1919 starts_end, starts_beg);
1920 skip_first_record = 1;
1922 length = realize_value (&t->length, c);
1925 rpd_msg (SE, _("Invalid value %d for LENGTH."), length);
1927 occurs = occurs_left = 1;
1929 cont_beg = realize_value (&t->cont_beg, c);
1932 rpd_msg (SE, _("Beginning column for CONTINUED (%d) must be "
1935 return TRNS_DROP_CASE;
1937 cont_end = realize_value (&t->cont_end, c);
1938 if (cont_end < cont_beg)
1940 rpd_msg (SE, _("Ending column for CONTINUED (%d) is less than "
1941 "beginning column (%d)."),
1942 cont_end, cont_beg);
1943 return TRNS_DROP_CASE;
1946 /* Parse the first record. */
1947 if (!skip_first_record)
1949 struct rpd_parse_info info;
1951 info.line = ls_c_str (&line);
1952 info.len = ls_length (&line);
1953 info.beg = starts_beg;
1954 info.end = starts_end;
1958 info.max_occurs = occurs_left;
1959 code = rpd_parse_record (&info);
1961 return TRNS_DROP_CASE;
1962 occurs_left -= code;
1964 else if (cont_beg == 0)
1965 return TRNS_NEXT_CASE;
1967 /* Make sure, if some occurrences are left, that we have
1968 continuation records. */
1969 if (occurs_left > 0 && cont_beg == 0)
1972 _("Number of repetitions specified on OCCURS (%d) "
1973 "exceed number of repetitions available in "
1974 "space on STARTS (%d), and CONTINUED not specified."),
1975 occurs, (starts_end - starts_beg + 1) / length);
1976 return TRNS_DROP_CASE;
1979 /* Go on to additional records. */
1980 while (occurs_left != 0)
1982 struct rpd_parse_info info;
1984 assert (occurs_left >= 0);
1986 /* Read in another record. */
1987 if (dfm_eof (t->reader))
1990 _("Unexpected end of file with %d repetitions "
1991 "remaining out of %d."),
1992 occurs_left, occurs);
1993 return TRNS_DROP_CASE;
1995 dfm_expand_tabs (t->reader);
1996 dfm_get_record (t->reader, &line);
1997 dfm_forward_record (t->reader);
1999 /* Parse this record. */
2001 info.line = ls_c_str (&line);
2002 info.len = ls_length (&line);
2003 info.beg = cont_beg;
2004 info.end = cont_end;
2008 info.max_occurs = occurs_left;
2009 code = rpd_parse_record (&info);;
2011 return TRNS_DROP_CASE;
2012 occurs_left -= code;
2015 dfm_pop (t->reader);
2017 /* FIXME: This is a kluge until we've implemented multiplexing of
2019 return TRNS_NEXT_CASE;
2022 /* Frees a REPEATING DATA transformation.
2023 Returns true if successful, false if an I/O error occurred. */
2025 repeating_data_trns_free (void *rpd_)
2027 struct repeating_data_trns *rpd = rpd_;
2029 destroy_dls_var_spec (rpd->first);
2030 dfm_close_reader (rpd->reader);
2031 free (rpd->id_value);
2036 /* Lets repeating_data_trns_proc() know how to write the cases
2037 that it composes. Not elegant. */
2039 repeating_data_set_write_case (struct transformation *trns_,
2040 write_case_func *write_case,
2041 write_case_data wc_data)
2043 struct repeating_data_trns *t = trns_->private;
2045 assert (trns_->proc == repeating_data_trns_proc);
2046 t->write_case = write_case;
2047 t->wc_data = wc_data;
2050 /* Reports a message in CLASS with the given FORMAT as text,
2051 prefixing the message with "REPEATING DATA: " to make the
2054 rpd_msg (enum msg_class class, const char *format, ...)
2060 ds_create (&text, "REPEATING DATA: ");
2061 va_start (args, format);
2062 ds_vprintf (&text, format, args);
2065 m.category = msg_class_to_category (class);
2066 m.severity = msg_class_to_severity (class);
2067 m.where.file_name = NULL;
2068 m.where.line_number = 0;
2069 m.text = ds_c_str (&text);