1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
30 #include "debug-print.h"
33 #include "file-handle.h"
43 /* Utility function. */
45 /* FIXME: Either REPEATING DATA must be the last transformation, or we
46 must multiplex the transformations that follow (i.e., perform them
47 for every case that we produce from a repetition instance).
48 Currently we do neither. We should do one or the other. */
50 /* Describes how to parse one variable. */
53 struct dls_var_spec *next; /* Next specification in list. */
55 /* Both free and fixed formats. */
56 struct fmt_spec input; /* Input format of this field. */
57 struct variable *v; /* Associated variable. Used only in
58 parsing. Not safe later. */
59 int fv; /* First value in case. */
61 /* Fixed format only. */
62 int rec; /* Record number (1-based). */
63 int fc, lc; /* Column numbers in record. */
65 /* Free format only. */
66 char name[9]; /* Name of variable. */
69 /* Constants for DATA LIST type. */
70 /* Must match table in cmd_data_list(). */
78 /* DATA LIST private data structure. */
82 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
83 struct file_handle *handle; /* Input file, never NULL. */
84 /* Do not reorder preceding fields. */
86 int type; /* A DLS_* constant. */
87 struct variable *end; /* Variable specified on END subcommand. */
88 int eof; /* End of file encountered. */
89 int nrec; /* Number of records. */
92 static int parse_fixed (struct data_list_pgm *);
93 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
94 static void dump_fixed_table (const struct dls_var_spec *specs,
95 const struct file_handle *handle, int nrec);
96 static void dump_free_table (const struct data_list_pgm *);
97 static void destroy_dls_var_spec (struct dls_var_spec *);
98 static void destroy_dls (struct trns_header *);
99 static int read_one_case (struct trns_header *, struct ccase *);
101 /* Message title for REPEATING DATA. */
102 #define RPD_ERR "REPEATING DATA: "
107 /* DATA LIST program under construction. */
108 struct data_list_pgm *dls;
110 /* 0=print no table, 1=print table. (TABLE subcommand.) */
113 lex_match_id ("DATA");
114 lex_match_id ("LIST");
116 if (!case_source_is_complex (vfm_source))
117 discard_variables ();
119 dls = xmalloc (sizeof *dls);
120 dls->handle = default_handle;
125 dls->first = dls->last = NULL;
129 if (lex_match_id ("FILE"))
132 dls->handle = fh_parse_file_handle ();
135 if (case_source_is_class (vfm_source, &file_type_source_class)
136 && dls->handle != default_handle)
138 msg (SE, _("DATA LIST may not use a different file from "
139 "that specified on its surrounding FILE TYPE."));
143 else if (lex_match_id ("RECORDS"))
147 if (!lex_force_int ())
149 dls->nrec = lex_integer ();
153 else if (lex_match_id ("END"))
157 msg (SE, _("The END subcommand may only be specified once."));
162 if (!lex_force_id ())
164 dls->end = dict_lookup_var (default_dict, tokid);
166 dls->end = dict_create_var_assert (default_dict, tokid, 0);
169 else if (token == T_ID)
171 /* Must match DLS_* constants. */
172 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
177 for (p = id; *p; p++)
178 if (lex_id_match (*p, tokid))
193 msg (SE, _("Only one of FIXED, FREE, or LIST may "
210 default_handle = dls->handle;
213 dls->type = DLS_FIXED;
217 if (dls->type == DLS_FREE)
223 if (dls->type == DLS_FIXED)
225 if (!parse_fixed (dls))
228 dump_fixed_table (dls->first, dls->handle, dls->nrec);
232 if (!parse_free (&dls->first, &dls->last))
235 dump_free_table (dls);
238 if (vfm_source != NULL)
240 struct data_list_pgm *new_pgm;
242 dls->h.proc = read_one_case;
243 dls->h.free = destroy_dls;
245 new_pgm = xmalloc (sizeof *new_pgm);
246 memcpy (new_pgm, &dls, sizeof *new_pgm);
247 add_transformation (&new_pgm->h);
250 vfm_source = create_case_source (&data_list_source_class, dls);
255 destroy_dls_var_spec (dls->first);
261 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
262 struct dls_var_spec *spec)
269 (*last)->next = spec;
273 /* Fixed-format parsing. */
275 /* Used for chaining together fortran-like format specifiers. */
278 struct fmt_list *next;
281 struct fmt_list *down;
284 /* Used as "local" variables among the fixed-format parsing funcs. If
285 it were guaranteed that PSPP were going to be compiled by gcc,
286 I'd make all these functions a single set of nested functions. */
287 struct fixed_parsing_state
289 char **name; /* Variable names. */
290 int name_cnt; /* Number of names. */
292 int recno; /* Index of current record. */
293 int sc; /* 1-based column number of starting column for
294 next field to output. */
297 static int fixed_parse_compatible (struct fixed_parsing_state *,
298 struct dls_var_spec **,
299 struct dls_var_spec **);
300 static int fixed_parse_fortran (struct fixed_parsing_state *,
301 struct dls_var_spec **,
302 struct dls_var_spec **);
305 parse_fixed (struct data_list_pgm *dls)
307 struct fixed_parsing_state fx;
315 while (lex_match ('/'))
318 if (lex_integer_p ())
320 if (lex_integer () < fx.recno)
322 msg (SE, _("The record number specified, %ld, is "
323 "before the previous record, %d. Data "
324 "fields must be listed in order of "
325 "increasing record number."),
326 lex_integer (), fx.recno - 1);
330 fx.recno = lex_integer ();
336 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
341 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
344 else if (token == '(')
346 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
351 msg (SE, _("SPSS-like or FORTRAN-like format "
352 "specification expected after variable names."));
356 for (i = 0; i < fx.name_cnt; i++)
360 if (dls->first == NULL)
362 msg (SE, _("At least one variable must be specified."));
365 if (dls->nrec && dls->last->rec > dls->nrec)
367 msg (SE, _("Variables are specified on records that "
368 "should not exist according to RECORDS subcommand."));
372 dls->nrec = dls->last->rec;
375 lex_error (_("expecting end of command"));
381 for (i = 0; i < fx.name_cnt; i++)
388 fixed_parse_compatible (struct fixed_parsing_state *fx,
389 struct dls_var_spec **first, struct dls_var_spec **last)
391 struct fmt_spec input;
397 if (!lex_force_int ())
402 msg (SE, _("Column positions for fields must be positive."));
408 lex_negative_to_dash ();
411 if (!lex_force_int ())
416 msg (SE, _("Column positions for fields must be positive."));
421 msg (SE, _("The ending column for a field must be "
422 "greater than the starting column."));
431 /* Divide columns evenly. */
432 input.w = (lc - fc + 1) / fx->name_cnt;
433 if ((lc - fc + 1) % fx->name_cnt)
435 msg (SE, _("The %d columns %d-%d "
436 "can't be evenly divided into %d fields."),
437 lc - fc + 1, fc, lc, fx->name_cnt);
441 /* Format specifier. */
444 struct fmt_desc *fdp;
450 input.type = parse_format_specifier_name (&cp, 0);
451 if (input.type == -1)
455 msg (SE, _("A format specifier on this line "
456 "has extra characters on the end."));
466 if (lex_integer_p ())
468 if (lex_integer () < 1)
470 msg (SE, _("The value for number of decimal places "
471 "must be at least 1."));
475 input.d = lex_integer ();
481 fdp = &formats[input.type];
482 if (fdp->n_args < 2 && input.d)
484 msg (SE, _("Input format %s doesn't accept decimal places."),
492 if (!lex_force_match (')'))
500 if (!check_input_specifier (&input))
503 /* Start column for next specification. */
506 /* Width of variables to create. */
507 if (input.type == FMT_A || input.type == FMT_AHEX)
512 /* Create variables and var specs. */
513 for (i = 0; i < fx->name_cnt; i++)
515 struct dls_var_spec *spec;
518 v = dict_create_var (default_dict, fx->name[i], width);
521 convert_fmt_ItoO (&input, &v->print);
523 if (!case_source_is_complex (vfm_source))
528 v = dict_lookup_var_assert (default_dict, fx->name[i]);
529 if (vfm_source == NULL)
531 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
534 if ((width != 0) != (v->width != 0))
536 msg (SE, _("There is already a variable %s of a "
541 if (width != 0 && width != v->width)
543 msg (SE, _("There is already a string variable %s of a "
544 "different width."), fx->name[i]);
549 spec = xmalloc (sizeof *spec);
553 spec->rec = fx->recno;
554 spec->fc = fc + input.w * i;
555 spec->lc = spec->fc + input.w - 1;
556 append_var_spec (first, last, spec);
561 /* Destroy a format list and, optionally, all its sublists. */
563 destroy_fmt_list (struct fmt_list *f, int recurse)
565 struct fmt_list *next;
570 if (recurse && f->f.type == FMT_DESCEND)
571 destroy_fmt_list (f->down, 1);
576 /* Takes a hierarchically structured fmt_list F as constructed by
577 fixed_parse_fortran(), and flattens it into a linear list of
578 dls_var_spec's. NAME_IDX is used to take values from the list
579 of names in FX; it should initially point to a value of 0. */
581 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
582 struct dls_var_spec **first, struct dls_var_spec **last,
587 for (; f; f = f->next)
588 if (f->f.type == FMT_X)
590 else if (f->f.type == FMT_T)
592 else if (f->f.type == FMT_NEWREC)
594 fx->recno += f->count;
598 for (i = 0; i < f->count; i++)
599 if (f->f.type == FMT_DESCEND)
601 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
606 struct dls_var_spec *spec;
610 if (formats[f->f.type].cat & FCAT_STRING)
614 if (*name_idx >= fx->name_cnt)
616 msg (SE, _("The number of format "
617 "specifications exceeds the given number of "
622 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
625 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
629 if (!case_source_is_complex (vfm_source))
632 spec = xmalloc (sizeof *spec);
636 spec->rec = fx->recno;
638 spec->lc = fx->sc + f->f.w - 1;
639 append_var_spec (first, last, spec);
641 convert_fmt_ItoO (&spec->input, &v->print);
649 /* Recursively parses a FORTRAN-like format specification. LEVEL
650 is the level of recursion, starting from 0. Returns the
651 parsed specification if successful, or a null pointer on
653 static struct fmt_list *
654 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
655 struct dls_var_spec **first,
656 struct dls_var_spec **last)
658 struct fmt_list *head = NULL;
659 struct fmt_list *tail = NULL;
661 lex_force_match ('(');
665 struct fmt_list *new = xmalloc (sizeof *new);
668 /* Append new to list. */
676 if (lex_integer_p ())
678 new->count = lex_integer ();
684 /* Parse format specifier. */
687 new->f.type = FMT_DESCEND;
688 new->down = fixed_parse_fortran_internal (fx, first, last);
689 if (new->down == NULL)
692 else if (lex_match ('/'))
693 new->f.type = FMT_NEWREC;
694 else if (!parse_format_specifier (&new->f, 1)
695 || !check_input_specifier (&new->f))
700 lex_force_match (')');
705 destroy_fmt_list (head, 0);
710 /* Parses a FORTRAN-like format specification. Returns nonzero
713 fixed_parse_fortran (struct fixed_parsing_state *fx,
714 struct dls_var_spec **first, struct dls_var_spec **last)
716 struct fmt_list *list;
719 list = fixed_parse_fortran_internal (fx, first, last);
724 dump_fmt_list (fx, list, first, last, &name_idx);
725 destroy_fmt_list (list, 1);
726 if (name_idx < fx->name_cnt)
728 msg (SE, _("There aren't enough format specifications "
729 "to match the number of variable names given."));
736 /* Displays a table giving information on fixed-format variable
737 parsing on DATA LIST. */
738 /* FIXME: The `Columns' column should be divided into three columns,
739 one for the starting column, one for the dash, one for the ending
740 column; then right-justify the starting column and left-justify the
743 dump_fixed_table (const struct dls_var_spec *specs,
744 const struct file_handle *handle, int nrec)
746 const struct dls_var_spec *spec;
749 const char *filename;
752 for (i = 0, spec = specs; spec; spec = spec->next)
754 t = tab_create (4, i + 1, 0);
755 tab_columns (t, TAB_COL_DOWN, 1);
756 tab_headers (t, 0, 0, 1, 0);
757 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
758 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
759 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
760 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
761 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
762 tab_hline (t, TAL_2, 0, 3, 1);
763 tab_dim (t, tab_natural_dimensions);
765 for (i = 1, spec = specs; spec; spec = spec->next, i++)
767 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
768 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
769 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
771 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
772 fmt_to_string (&spec->input));
775 filename = fh_handle_name (handle);
776 if (filename == NULL)
778 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
779 sprintf (buf, (handle != inline_file
780 ? ngettext ("Reading %d record from file %s.",
781 "Reading %d records from file %s.", nrec)
782 : ngettext ("Reading %d record from the command file.",
783 "Reading %d records from the command file.",
787 tab_title (t, 0, buf);
789 fh_handle_name (NULL);
793 /* Free-format parsing. */
796 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
801 struct fmt_spec input, output;
807 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
811 if (!parse_format_specifier (&input, 0)
812 || !check_input_specifier (&input)
813 || !lex_force_match (')'))
815 for (i = 0; i < name_cnt; i++)
820 convert_fmt_ItoO (&input, &output);
831 if (input.type == FMT_A || input.type == FMT_AHEX)
835 for (i = 0; i < name_cnt; i++)
837 struct dls_var_spec *spec;
840 v = dict_create_var (default_dict, name[i], width);
843 msg (SE, _("%s is a duplicate variable name."), name[i]);
846 v->print = v->write = output;
848 if (!case_source_is_complex (vfm_source))
851 spec = xmalloc (sizeof *spec);
855 strcpy (spec->name, name[i]);
856 append_var_spec (first, last, spec);
858 for (i = 0; i < name_cnt; i++)
864 lex_error (_("expecting end of command"));
868 /* Displays a table giving information on free-format variable parsing
871 dump_free_table (const struct data_list_pgm *dls)
877 struct dls_var_spec *spec;
878 for (i = 0, spec = dls->first; spec; spec = spec->next)
882 t = tab_create (2, i + 1, 0);
883 tab_columns (t, TAB_COL_DOWN, 1);
884 tab_headers (t, 0, 0, 1, 0);
885 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
886 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
887 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
888 tab_hline (t, TAL_2, 0, 1, 1);
889 tab_dim (t, tab_natural_dimensions);
892 struct dls_var_spec *spec;
894 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
896 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
897 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
902 const char *filename;
904 filename = fh_handle_name (dls->handle);
905 if (filename == NULL)
908 (dls->handle != inline_file
909 ? _("Reading free-form data from file %s.")
910 : _("Reading free-form data from the command file.")),
915 fh_handle_name (NULL);
918 /* Input procedure. */
920 /* Extracts a field from the current position in the current record.
921 Fields can be unquoted or quoted with single- or double-quote
922 characters. *RET_LEN is set to the field length, *RET_CP is set to
923 the field itself. After parsing the field, sets the current
924 position in the record to just past the field. Returns 0 on
925 failure or a 1-based column number indicating the beginning of the
928 cut_field (const struct data_list_pgm *dls, char **ret_cp, int *ret_len)
933 cp = dfm_get_record (dls->handle, &len);
939 /* Skip leading whitespace and commas. */
940 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
945 /* Three types of fields: quoted with ', quoted with ", unquoted. */
946 if (*cp == '\'' || *cp == '"')
951 while (cp < ep && *cp != quote)
953 *ret_len = cp - *ret_cp;
957 msg (SW, _("Scope of string exceeds line."));
962 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
964 *ret_len = cp - *ret_cp;
968 int beginning_column;
970 dfm_set_record (dls->handle, *ret_cp);
971 beginning_column = dfm_get_cur_col (dls->handle) + 1;
973 dfm_set_record (dls->handle, cp);
975 return beginning_column;
979 typedef int data_list_read_func (const struct data_list_pgm *);
980 static data_list_read_func read_from_data_list_fixed;
981 static data_list_read_func read_from_data_list_free;
982 static data_list_read_func read_from_data_list_list;
984 /* Returns the proper function to read the kind of DATA LIST
985 data specified by DLS. */
986 static data_list_read_func *
987 get_data_list_read_func (const struct data_list_pgm *dls)
992 return read_from_data_list_fixed;
996 return read_from_data_list_free;
1000 return read_from_data_list_list;
1008 /* Reads a case from the data file and parses it according to
1009 fixed-format syntax rules. Returns -1 on success, -2 at end
1012 read_from_data_list_fixed (const struct data_list_pgm *dls)
1014 struct dls_var_spec *var_spec = dls->first;
1017 if (!dfm_get_record (dls->handle, NULL))
1019 for (i = 1; i <= dls->nrec; i++)
1022 char *line = dfm_get_record (dls->handle, &len);
1026 /* Note that this can't occur on the first record. */
1027 msg (SW, _("Partial case of %d of %d records discarded."),
1032 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1036 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1037 di.v = &temp_case->data[var_spec->fv];
1039 di.f1 = var_spec->fc;
1040 di.format = var_spec->input;
1045 dfm_fwd_record (dls->handle);
1051 /* Reads a case from the data file and parses it according to
1052 free-format syntax rules. Returns -1 on success, -2 at end of
1055 read_from_data_list_free (const struct data_list_pgm *dls)
1057 struct dls_var_spec *var_spec;
1061 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1065 /* Cut out a field and read in a new record if necessary. */
1068 column = cut_field (dls, &field, &len);
1072 if (dfm_get_record (dls->handle, NULL))
1073 dfm_fwd_record (dls->handle);
1074 if (!dfm_get_record (dls->handle, NULL))
1076 if (var_spec != dls->first)
1077 msg (SW, _("Partial case discarded. The first variable "
1078 "missing was %s."), var_spec->name);
1088 di.v = &temp_case->data[var_spec->fv];
1091 di.format = var_spec->input;
1098 /* Reads a case from the data file and parses it according to
1099 list-format syntax rules. Returns -1 on success, -2 at end of
1102 read_from_data_list_list (const struct data_list_pgm *dls)
1104 struct dls_var_spec *var_spec;
1108 if (!dfm_get_record (dls->handle, NULL))
1111 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1113 /* Cut out a field and check for end-of-line. */
1114 int column = cut_field (dls, &field, &len);
1119 msg (SW, _("Missing value(s) for all variables from %s onward. "
1120 "These will be filled with the system-missing value "
1121 "or blanks, as appropriate."),
1123 for (; var_spec; var_spec = var_spec->next)
1125 int width = get_format_var_width (&var_spec->input);
1127 temp_case->data[var_spec->fv].f = SYSMIS;
1129 memset (temp_case->data[var_spec->fv].s, ' ', width);
1139 di.v = &temp_case->data[var_spec->fv];
1142 di.format = var_spec->input;
1147 dfm_fwd_record (dls->handle);
1151 /* Destroys SPEC. */
1153 destroy_dls_var_spec (struct dls_var_spec *spec)
1155 struct dls_var_spec *next;
1157 while (spec != NULL)
1165 /* Destroys DATA LIST transformation PGM. */
1167 destroy_dls (struct trns_header *pgm)
1169 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1170 destroy_dls_var_spec (dls->first);
1171 fh_close_handle (dls->handle);
1175 /* Note that since this is exclusively an input program, C is
1176 guaranteed to be temp_case. */
1178 read_one_case (struct trns_header *t, struct ccase *c UNUSED)
1180 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1181 data_list_read_func *read_func;
1184 dfm_push (dls->handle);
1186 read_func = get_data_list_read_func (dls);
1187 retval = read_func (dls);
1189 /* Handle end of file. */
1192 /* If we already encountered end of file then this is an
1196 msg (SE, _("Attempt to read past end of file."));
1198 dfm_pop (dls->handle);
1202 /* Otherwise simply note it. */
1208 /* If there was an END subcommand handle it. */
1209 if (dls->end != NULL)
1213 temp_case->data[dls->end->fv].f = 1.0;
1217 temp_case->data[dls->end->fv].f = 0.0;
1220 dfm_pop (dls->handle);
1225 /* Reads all the records from the data file and passes them to
1228 data_list_source_read (struct case_source *source,
1229 write_case_func *write_case, write_case_data wc_data)
1231 struct data_list_pgm *dls = source->aux;
1232 data_list_read_func *read_func = get_data_list_read_func (dls);
1234 dfm_push (dls->handle);
1235 while (read_func (dls) != -2)
1236 if (!write_case (wc_data))
1238 dfm_pop (dls->handle);
1240 fh_close_handle (dls->handle);
1243 /* Destroys the source's internal data. */
1245 data_list_source_destroy (struct case_source *source)
1247 destroy_dls (source->aux);
1250 const struct case_source_class data_list_source_class =
1253 data_list_source_read,
1254 data_list_source_destroy,
1257 /* REPEATING DATA. */
1259 /* Represents a number or a variable. */
1260 struct rpd_num_or_var
1262 int num; /* Value, or 0. */
1263 struct variable *var; /* Variable, if number==0. */
1266 /* REPEATING DATA private data structure. */
1267 struct repeating_data_trns
1269 struct trns_header h;
1270 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1271 struct file_handle *handle; /* Input file, never NULL. */
1273 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1274 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1275 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1276 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1277 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1278 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1280 /* ID subcommand. */
1281 int id_beg, id_end; /* Beginning & end columns. */
1282 struct variable *id_var; /* DATA LIST variable. */
1283 struct fmt_spec id_spec; /* Input format spec. */
1284 union value *id_value; /* ID value. */
1286 write_case_func *write_case;
1287 write_case_data wc_data;
1290 int repeating_data_trns_proc (struct trns_header *, struct ccase *);
1291 void repeating_data_trns_free (struct trns_header *);
1292 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1293 static int parse_repeating_data (struct dls_var_spec **,
1294 struct dls_var_spec **);
1295 static void find_variable_input_spec (struct variable *v,
1296 struct fmt_spec *spec);
1298 /* Parses the REPEATING DATA command. */
1300 cmd_repeating_data (void)
1302 struct repeating_data_trns *rpd;
1304 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1307 /* Bits are set when a particular subcommand has been seen. */
1310 lex_match_id ("REPEATING");
1311 lex_match_id ("DATA");
1313 assert (case_source_is_complex (vfm_source));
1315 rpd = xmalloc (sizeof *rpd);
1316 rpd->handle = default_handle;
1317 rpd->first = rpd->last = NULL;
1318 rpd->starts_beg.num = 0;
1319 rpd->starts_beg.var = NULL;
1320 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1321 = rpd->cont_end = rpd->starts_beg;
1322 rpd->id_beg = rpd->id_end = 0;
1324 rpd->id_value = NULL;
1330 if (lex_match_id ("FILE"))
1333 rpd->handle = fh_parse_file_handle ();
1336 if (rpd->handle != default_handle)
1338 msg (SE, _("REPEATING DATA must use the same file as its "
1339 "corresponding DATA LIST or FILE TYPE."));
1343 else if (lex_match_id ("STARTS"))
1348 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1353 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1356 lex_negative_to_dash ();
1357 if (lex_match ('-'))
1359 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1362 /* Otherwise, rpd->starts_end is left uninitialized.
1363 This is okay. We will initialize it later from the
1364 record length of the file. We can't do this now
1365 because we can't be sure that the user has specified
1366 the file handle yet. */
1369 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1370 && rpd->starts_beg.num > rpd->starts_end.num)
1372 msg (SE, _("STARTS beginning column (%d) exceeds "
1373 "STARTS ending column (%d)."),
1374 rpd->starts_beg.num, rpd->starts_end.num);
1378 else if (lex_match_id ("OCCURS"))
1383 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1388 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1391 else if (lex_match_id ("LENGTH"))
1396 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1401 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1404 else if (lex_match_id ("CONTINUED"))
1409 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1414 if (!lex_match ('/'))
1416 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1419 lex_negative_to_dash ();
1421 && !parse_num_or_var (&rpd->cont_end,
1422 "CONTINUED ending column"))
1425 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1426 && rpd->cont_beg.num > rpd->cont_end.num)
1428 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1429 "CONTINUED ending column (%d)."),
1430 rpd->cont_beg.num, rpd->cont_end.num);
1435 rpd->cont_beg.num = 1;
1437 else if (lex_match_id ("ID"))
1442 msg (SE, _("%s subcommand given multiple times."),"ID");
1447 if (!lex_force_int ())
1449 if (lex_integer () < 1)
1451 msg (SE, _("ID beginning column (%ld) must be positive."),
1455 rpd->id_beg = lex_integer ();
1458 lex_negative_to_dash ();
1460 if (lex_match ('-'))
1462 if (!lex_force_int ())
1464 if (lex_integer () < 1)
1466 msg (SE, _("ID ending column (%ld) must be positive."),
1470 if (lex_integer () < rpd->id_end)
1472 msg (SE, _("ID ending column (%ld) cannot be less than "
1473 "ID beginning column (%d)."),
1474 lex_integer (), rpd->id_beg);
1478 rpd->id_end = lex_integer ();
1481 else rpd->id_end = rpd->id_beg;
1483 if (!lex_force_match ('='))
1485 rpd->id_var = parse_variable ();
1486 if (rpd->id_var == NULL)
1489 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1490 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1492 else if (lex_match_id ("TABLE"))
1494 else if (lex_match_id ("NOTABLE"))
1496 else if (lex_match_id ("DATA"))
1504 if (!lex_force_match ('/'))
1508 /* Comes here when DATA specification encountered. */
1509 if ((seen & (1 | 2)) != (1 | 2))
1511 if ((seen & 1) == 0)
1512 msg (SE, _("Missing required specification STARTS."));
1513 if ((seen & 2) == 0)
1514 msg (SE, _("Missing required specification OCCURS."));
1518 /* Enforce ID restriction. */
1519 if ((seen & 16) && !(seen & 8))
1521 msg (SE, _("ID specified without CONTINUED."));
1525 /* Calculate starts_end, cont_end if necessary. */
1526 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1527 rpd->starts_end.num = fh_record_width (rpd->handle);
1528 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1529 rpd->cont_end.num = fh_record_width (rpd->handle);
1531 /* Calculate length if possible. */
1532 if ((seen & 4) == 0)
1534 struct dls_var_spec *iter;
1536 for (iter = rpd->first; iter; iter = iter->next)
1538 if (iter->lc > rpd->length.num)
1539 rpd->length.num = iter->lc;
1541 assert (rpd->length.num != 0);
1545 if (!parse_repeating_data (&rpd->first, &rpd->last))
1549 dump_fixed_table (rpd->first, rpd->handle, rpd->last->rec);
1552 struct repeating_data_trns *new_trns;
1554 rpd->h.proc = repeating_data_trns_proc;
1555 rpd->h.free = repeating_data_trns_free;
1557 new_trns = xmalloc (sizeof *new_trns);
1558 memcpy (new_trns, &rpd, sizeof *new_trns);
1559 add_transformation ((struct trns_header *) new_trns);
1562 return lex_end_of_command ();
1565 destroy_dls_var_spec (rpd->first);
1566 free (rpd->id_value);
1570 /* Because of the way that DATA LIST is structured, it's not trivial
1571 to determine what input format is associated with a given variable.
1572 This function finds the input format specification for variable V
1573 and puts it in SPEC. */
1575 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1579 for (i = 0; i < n_trns; i++)
1581 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1583 if (pgm->h.proc == read_one_case)
1585 struct dls_var_spec *iter;
1587 for (iter = pgm->first; iter; iter = iter->next)
1590 *spec = iter->input;
1599 /* Parses a number or a variable name from the syntax file and puts
1600 the results in VALUE. Ensures that the number is at least 1; else
1601 emits an error based on MESSAGE. Returns nonzero only if
1604 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1609 value->var = parse_variable ();
1610 if (value->var == NULL)
1612 if (value->var->type == ALPHA)
1614 msg (SE, _("String variable not allowed here."));
1618 else if (lex_integer_p ())
1620 value->num = lex_integer ();
1624 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1630 msg (SE, _("Variable or integer expected for %s."), message);
1636 /* Parses data specifications for repeating data groups. Taken from
1637 parse_fixed(). Returns nonzero only if successful. */
1639 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1641 struct fixed_parsing_state fx;
1647 while (token != '.')
1649 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1654 if (!fixed_parse_compatible (&fx, first, last))
1657 else if (token == '(')
1659 if (!fixed_parse_fortran (&fx, first, last))
1664 msg (SE, _("SPSS-like or FORTRAN-like format "
1665 "specification expected after variable names."));
1669 for (i = 0; i < fx.name_cnt; i++)
1675 lex_error (_("expecting end of command"));
1682 for (i = 0; i < fx.name_cnt; i++)
1688 /* Obtains the real value for rpd_num_or_var N in case C and returns
1689 it. The valid range is nonnegative numbers, but numbers outside
1690 this range can be returned and should be handled by the caller as
1693 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1698 assert (n->num == 0);
1701 double v = c->data[n->var->fv].f;
1703 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1712 /* Parameter record passed to rpd_parse_record(). */
1713 struct rpd_parse_info
1715 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1716 const char *line; /* Line being parsed. */
1717 size_t len; /* Line length. */
1718 int beg, end; /* First and last column of first occurrence. */
1719 int ofs; /* Column offset between repeated occurrences. */
1720 struct ccase *c; /* Case to fill in. */
1721 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1722 int max_occurs; /* Max number of occurrences to parse. */
1725 /* Parses one record of repeated data and outputs corresponding
1726 cases. Returns number of occurrences parsed up to the
1727 maximum specified in INFO. */
1729 rpd_parse_record (const struct rpd_parse_info *info)
1731 struct repeating_data_trns *t = info->trns;
1732 int cur = info->beg;
1735 /* Handle record ID values. */
1738 union value id_temp[MAX_ELEMS_PER_VALUE];
1740 /* Parse record ID into V. */
1744 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1745 di.v = info->verify_id ? id_temp : t->id_value;
1748 di.format = t->id_spec;
1755 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1757 char expected_str [MAX_FORMATTED_LEN + 1];
1758 char actual_str [MAX_FORMATTED_LEN + 1];
1760 data_out (expected_str, &t->id_var->print, t->id_value);
1761 expected_str[t->id_var->print.w] = '\0';
1763 data_out (actual_str, &t->id_var->print, id_temp);
1764 actual_str[t->id_var->print.w] = '\0';
1767 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1768 actual_str, expected_str);
1774 /* Iterate over the set of expected occurrences and record each of
1775 them as a separate case. FIXME: We need to execute any
1776 transformations that follow the current one. */
1780 for (occurrences = 0; occurrences < info->max_occurs; )
1782 if (cur + info->ofs > info->end + 1)
1787 struct dls_var_spec *var_spec = t->first;
1789 for (; var_spec; var_spec = var_spec->next)
1791 int fc = var_spec->fc - 1 + cur;
1792 int lc = var_spec->lc - 1 + cur;
1794 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1799 _("Variable %s starting in column %d extends "
1800 "beyond physical record length of %d."),
1801 var_spec->v->name, fc, info->len);
1807 data_in_finite_line (&di, info->line, info->len, fc, lc);
1808 di.v = &info->c->data[var_spec->fv];
1811 di.format = var_spec->input;
1821 if (!t->write_case (t->wc_data))
1829 /* Analogous to read_one_case; reads one set of repetitions of the
1830 elements in the REPEATING DATA structure. Returns -1 on success,
1831 -2 on end of file or on failure. */
1833 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c)
1835 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1837 char *line; /* Current record. */
1838 int len; /* Length of current record. */
1840 int starts_beg; /* Starting column. */
1841 int starts_end; /* Ending column. */
1842 int occurs; /* Number of repetitions. */
1843 int length; /* Length of each occurrence. */
1844 int cont_beg; /* Starting column for continuation lines. */
1845 int cont_end; /* Ending column for continuation lines. */
1847 int occurs_left; /* Number of occurrences remaining. */
1849 int code; /* Return value from rpd_parse_record(). */
1851 int skip_first_record = 0;
1853 dfm_push (t->handle);
1855 /* Read the current record. */
1856 dfm_bkwd_record (t->handle, 1);
1857 line = dfm_get_record (t->handle, &len);
1860 dfm_fwd_record (t->handle);
1862 /* Calculate occurs, length. */
1863 occurs_left = occurs = realize_value (&t->occurs, c);
1866 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1869 starts_beg = realize_value (&t->starts_beg, c);
1870 if (starts_beg <= 0)
1872 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1877 starts_end = realize_value (&t->starts_end, c);
1878 if (starts_end < starts_beg)
1880 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1881 "beginning column (%d)."),
1882 starts_end, starts_beg);
1883 skip_first_record = 1;
1885 length = realize_value (&t->length, c);
1888 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1890 occurs = occurs_left = 1;
1892 cont_beg = realize_value (&t->cont_beg, c);
1895 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1900 cont_end = realize_value (&t->cont_end, c);
1901 if (cont_end < cont_beg)
1903 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1904 "beginning column (%d)."),
1905 cont_end, cont_beg);
1909 /* Parse the first record. */
1910 if (!skip_first_record)
1912 struct rpd_parse_info info;
1916 info.beg = starts_beg;
1917 info.end = starts_end;
1920 info.max_occurs = occurs_left;
1921 code = rpd_parse_record (&info);;
1925 else if (cont_beg == 0)
1928 /* Make sure, if some occurrences are left, that we have
1929 continuation records. */
1930 occurs_left -= code;
1931 if (occurs_left != 0 && cont_beg == 0)
1934 _("Number of repetitions specified on OCCURS (%d) "
1935 "exceed number of repetitions available in "
1936 "space on STARTS (%d), and CONTINUED not specified."),
1941 /* Go on to additional records. */
1942 while (occurs_left != 0)
1944 struct rpd_parse_info info;
1946 assert (occurs_left >= 0);
1948 /* Read in another record. */
1949 line = dfm_get_record (t->handle, &len);
1953 _("Unexpected end of file with %d repetitions "
1954 "remaining out of %d."),
1955 occurs_left, occurs);
1958 dfm_fwd_record (t->handle);
1960 /* Parse this record. */
1964 info.beg = cont_beg;
1965 info.end = cont_end;
1968 info.max_occurs = occurs_left;
1969 code = rpd_parse_record (&info);;
1972 occurs_left -= code;
1975 dfm_pop (t->handle);
1977 /* FIXME: This is a kluge until we've implemented multiplexing of
1983 repeating_data_trns_free (struct trns_header *rpd_)
1985 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
1987 destroy_dls_var_spec (rpd->first);
1988 fh_close_handle (rpd->handle);
1989 free (rpd->id_value);
1992 /* This is a kluge. It is only here until I have more time
1993 tocome up with something better. It lets
1994 repeating_data_trns_proc() know how to write the cases that it
1997 repeating_data_set_write_case (struct trns_header *trns,
1998 write_case_func *write_case,
1999 write_case_data wc_data)
2001 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2003 assert (trns->proc == repeating_data_trns_proc);
2004 t->write_case = write_case;
2005 t->wc_data = wc_data;