1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
30 #include "debug-print.h"
33 #include "file-handle.h"
43 /* Utility function. */
45 /* FIXME: Either REPEATING DATA must be the last transformation, or we
46 must multiplex the transformations that follow (i.e., perform them
47 for every case that we produce from a repetition instance).
48 Currently we do neither. We should do one or the other. */
50 /* Describes how to parse one variable. */
53 struct dls_var_spec *next; /* Next specification in list. */
55 /* Both free and fixed formats. */
56 struct fmt_spec input; /* Input format of this field. */
57 struct variable *v; /* Associated variable. Used only in
58 parsing. Not safe later. */
59 int fv; /* First value in case. */
61 /* Fixed format only. */
62 int rec; /* Record number (1-based). */
63 int fc, lc; /* Column numbers in record. */
65 /* Free format only. */
66 char name[9]; /* Name of variable. */
69 /* Constants for DATA LIST type. */
70 /* Must match table in cmd_data_list(). */
78 /* DATA LIST private data structure. */
82 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
83 struct file_handle *handle; /* Input file, never NULL. */
84 /* Do not reorder preceding fields. */
86 int type; /* A DLS_* constant. */
87 struct variable *end; /* Variable specified on END subcommand. */
88 int eof; /* End of file encountered. */
89 int nrec; /* Number of records. */
92 static int parse_fixed (struct data_list_pgm *);
93 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
94 static void dump_fixed_table (const struct dls_var_spec *specs,
95 const struct file_handle *handle, int nrec);
96 static void dump_free_table (const struct data_list_pgm *);
97 static void destroy_dls_var_spec (struct dls_var_spec *);
98 static trns_free_func destroy_dls;
99 static trns_proc_func read_one_case;
101 /* Message title for REPEATING DATA. */
102 #define RPD_ERR "REPEATING DATA: "
107 /* DATA LIST program under construction. */
108 struct data_list_pgm *dls;
110 /* 0=print no table, 1=print table. (TABLE subcommand.) */
113 lex_match_id ("DATA");
114 lex_match_id ("LIST");
116 if (!case_source_is_complex (vfm_source))
117 discard_variables ();
119 dls = xmalloc (sizeof *dls);
120 dls->handle = default_handle;
125 dls->first = dls->last = NULL;
129 if (lex_match_id ("FILE"))
132 dls->handle = fh_parse_file_handle ();
135 if (case_source_is_class (vfm_source, &file_type_source_class)
136 && dls->handle != default_handle)
138 msg (SE, _("DATA LIST may not use a different file from "
139 "that specified on its surrounding FILE TYPE."));
143 else if (lex_match_id ("RECORDS"))
147 if (!lex_force_int ())
149 dls->nrec = lex_integer ();
153 else if (lex_match_id ("END"))
157 msg (SE, _("The END subcommand may only be specified once."));
162 if (!lex_force_id ())
164 dls->end = dict_lookup_var (default_dict, tokid);
166 dls->end = dict_create_var_assert (default_dict, tokid, 0);
169 else if (token == T_ID)
171 /* Must match DLS_* constants. */
172 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
177 for (p = id; *p; p++)
178 if (lex_id_match (*p, tokid))
193 msg (SE, _("Only one of FIXED, FREE, or LIST may "
210 default_handle = dls->handle;
213 dls->type = DLS_FIXED;
217 if (dls->type == DLS_FREE)
223 if (dls->type == DLS_FIXED)
225 if (!parse_fixed (dls))
228 dump_fixed_table (dls->first, dls->handle, dls->nrec);
232 if (!parse_free (&dls->first, &dls->last))
235 dump_free_table (dls);
238 if (vfm_source != NULL)
240 struct data_list_pgm *new_pgm;
242 dls->h.proc = read_one_case;
243 dls->h.free = destroy_dls;
245 new_pgm = xmalloc (sizeof *new_pgm);
246 memcpy (new_pgm, &dls, sizeof *new_pgm);
247 add_transformation (&new_pgm->h);
250 vfm_source = create_case_source (&data_list_source_class, dls);
255 destroy_dls_var_spec (dls->first);
261 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
262 struct dls_var_spec *spec)
269 (*last)->next = spec;
273 /* Fixed-format parsing. */
275 /* Used for chaining together fortran-like format specifiers. */
278 struct fmt_list *next;
281 struct fmt_list *down;
284 /* Used as "local" variables among the fixed-format parsing funcs. If
285 it were guaranteed that PSPP were going to be compiled by gcc,
286 I'd make all these functions a single set of nested functions. */
287 struct fixed_parsing_state
289 char **name; /* Variable names. */
290 int name_cnt; /* Number of names. */
292 int recno; /* Index of current record. */
293 int sc; /* 1-based column number of starting column for
294 next field to output. */
297 static int fixed_parse_compatible (struct fixed_parsing_state *,
298 struct dls_var_spec **,
299 struct dls_var_spec **);
300 static int fixed_parse_fortran (struct fixed_parsing_state *,
301 struct dls_var_spec **,
302 struct dls_var_spec **);
305 parse_fixed (struct data_list_pgm *dls)
307 struct fixed_parsing_state fx;
315 while (lex_match ('/'))
318 if (lex_integer_p ())
320 if (lex_integer () < fx.recno)
322 msg (SE, _("The record number specified, %ld, is "
323 "before the previous record, %d. Data "
324 "fields must be listed in order of "
325 "increasing record number."),
326 lex_integer (), fx.recno - 1);
330 fx.recno = lex_integer ();
336 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
341 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
344 else if (token == '(')
346 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
351 msg (SE, _("SPSS-like or FORTRAN-like format "
352 "specification expected after variable names."));
356 for (i = 0; i < fx.name_cnt; i++)
360 if (dls->first == NULL)
362 msg (SE, _("At least one variable must be specified."));
365 if (dls->nrec && dls->last->rec > dls->nrec)
367 msg (SE, _("Variables are specified on records that "
368 "should not exist according to RECORDS subcommand."));
372 dls->nrec = dls->last->rec;
375 lex_error (_("expecting end of command"));
381 for (i = 0; i < fx.name_cnt; i++)
388 fixed_parse_compatible (struct fixed_parsing_state *fx,
389 struct dls_var_spec **first, struct dls_var_spec **last)
391 struct fmt_spec input;
397 if (!lex_force_int ())
402 msg (SE, _("Column positions for fields must be positive."));
408 lex_negative_to_dash ();
411 if (!lex_force_int ())
416 msg (SE, _("Column positions for fields must be positive."));
421 msg (SE, _("The ending column for a field must be "
422 "greater than the starting column."));
431 /* Divide columns evenly. */
432 input.w = (lc - fc + 1) / fx->name_cnt;
433 if ((lc - fc + 1) % fx->name_cnt)
435 msg (SE, _("The %d columns %d-%d "
436 "can't be evenly divided into %d fields."),
437 lc - fc + 1, fc, lc, fx->name_cnt);
441 /* Format specifier. */
444 struct fmt_desc *fdp;
450 input.type = parse_format_specifier_name (&cp, 0);
451 if (input.type == -1)
455 msg (SE, _("A format specifier on this line "
456 "has extra characters on the end."));
466 if (lex_integer_p ())
468 if (lex_integer () < 1)
470 msg (SE, _("The value for number of decimal places "
471 "must be at least 1."));
475 input.d = lex_integer ();
481 fdp = &formats[input.type];
482 if (fdp->n_args < 2 && input.d)
484 msg (SE, _("Input format %s doesn't accept decimal places."),
492 if (!lex_force_match (')'))
500 if (!check_input_specifier (&input))
503 /* Start column for next specification. */
506 /* Width of variables to create. */
507 if (input.type == FMT_A || input.type == FMT_AHEX)
512 /* Create variables and var specs. */
513 for (i = 0; i < fx->name_cnt; i++)
515 struct dls_var_spec *spec;
518 v = dict_create_var (default_dict, fx->name[i], width);
521 convert_fmt_ItoO (&input, &v->print);
523 if (!case_source_is_complex (vfm_source))
528 v = dict_lookup_var_assert (default_dict, fx->name[i]);
529 if (vfm_source == NULL)
531 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
534 if ((width != 0) != (v->width != 0))
536 msg (SE, _("There is already a variable %s of a "
541 if (width != 0 && width != v->width)
543 msg (SE, _("There is already a string variable %s of a "
544 "different width."), fx->name[i]);
549 spec = xmalloc (sizeof *spec);
553 spec->rec = fx->recno;
554 spec->fc = fc + input.w * i;
555 spec->lc = spec->fc + input.w - 1;
556 append_var_spec (first, last, spec);
561 /* Destroy a format list and, optionally, all its sublists. */
563 destroy_fmt_list (struct fmt_list *f, int recurse)
565 struct fmt_list *next;
570 if (recurse && f->f.type == FMT_DESCEND)
571 destroy_fmt_list (f->down, 1);
576 /* Takes a hierarchically structured fmt_list F as constructed by
577 fixed_parse_fortran(), and flattens it into a linear list of
578 dls_var_spec's. NAME_IDX is used to take values from the list
579 of names in FX; it should initially point to a value of 0. */
581 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
582 struct dls_var_spec **first, struct dls_var_spec **last,
587 for (; f; f = f->next)
588 if (f->f.type == FMT_X)
590 else if (f->f.type == FMT_T)
592 else if (f->f.type == FMT_NEWREC)
594 fx->recno += f->count;
598 for (i = 0; i < f->count; i++)
599 if (f->f.type == FMT_DESCEND)
601 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
606 struct dls_var_spec *spec;
610 if (formats[f->f.type].cat & FCAT_STRING)
614 if (*name_idx >= fx->name_cnt)
616 msg (SE, _("The number of format "
617 "specifications exceeds the given number of "
622 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
625 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
629 if (!case_source_is_complex (vfm_source))
632 spec = xmalloc (sizeof *spec);
636 spec->rec = fx->recno;
638 spec->lc = fx->sc + f->f.w - 1;
639 append_var_spec (first, last, spec);
641 convert_fmt_ItoO (&spec->input, &v->print);
649 /* Recursively parses a FORTRAN-like format specification. LEVEL
650 is the level of recursion, starting from 0. Returns the
651 parsed specification if successful, or a null pointer on
653 static struct fmt_list *
654 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
655 struct dls_var_spec **first,
656 struct dls_var_spec **last)
658 struct fmt_list *head = NULL;
659 struct fmt_list *tail = NULL;
661 lex_force_match ('(');
665 struct fmt_list *new = xmalloc (sizeof *new);
668 /* Append new to list. */
676 if (lex_integer_p ())
678 new->count = lex_integer ();
684 /* Parse format specifier. */
687 new->f.type = FMT_DESCEND;
688 new->down = fixed_parse_fortran_internal (fx, first, last);
689 if (new->down == NULL)
692 else if (lex_match ('/'))
693 new->f.type = FMT_NEWREC;
694 else if (!parse_format_specifier (&new->f, 1)
695 || !check_input_specifier (&new->f))
700 lex_force_match (')');
705 destroy_fmt_list (head, 0);
710 /* Parses a FORTRAN-like format specification. Returns nonzero
713 fixed_parse_fortran (struct fixed_parsing_state *fx,
714 struct dls_var_spec **first, struct dls_var_spec **last)
716 struct fmt_list *list;
719 list = fixed_parse_fortran_internal (fx, first, last);
724 dump_fmt_list (fx, list, first, last, &name_idx);
725 destroy_fmt_list (list, 1);
726 if (name_idx < fx->name_cnt)
728 msg (SE, _("There aren't enough format specifications "
729 "to match the number of variable names given."));
736 /* Displays a table giving information on fixed-format variable
737 parsing on DATA LIST. */
738 /* FIXME: The `Columns' column should be divided into three columns,
739 one for the starting column, one for the dash, one for the ending
740 column; then right-justify the starting column and left-justify the
743 dump_fixed_table (const struct dls_var_spec *specs,
744 const struct file_handle *handle, int nrec)
746 const struct dls_var_spec *spec;
749 const char *filename;
752 for (i = 0, spec = specs; spec; spec = spec->next)
754 t = tab_create (4, i + 1, 0);
755 tab_columns (t, TAB_COL_DOWN, 1);
756 tab_headers (t, 0, 0, 1, 0);
757 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
758 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
759 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
760 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
761 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
762 tab_hline (t, TAL_2, 0, 3, 1);
763 tab_dim (t, tab_natural_dimensions);
765 for (i = 1, spec = specs; spec; spec = spec->next, i++)
767 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
768 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
769 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
771 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
772 fmt_to_string (&spec->input));
775 filename = fh_handle_name (handle);
776 if (filename == NULL)
778 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
779 sprintf (buf, (handle != inline_file
780 ? ngettext ("Reading %d record from file %s.",
781 "Reading %d records from file %s.", nrec)
782 : ngettext ("Reading %d record from the command file.",
783 "Reading %d records from the command file.",
787 tab_title (t, 0, buf);
789 fh_handle_name (NULL);
793 /* Free-format parsing. */
796 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
801 struct fmt_spec input, output;
807 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
811 if (!parse_format_specifier (&input, 0)
812 || !check_input_specifier (&input)
813 || !lex_force_match (')'))
815 for (i = 0; i < name_cnt; i++)
820 convert_fmt_ItoO (&input, &output);
831 if (input.type == FMT_A || input.type == FMT_AHEX)
835 for (i = 0; i < name_cnt; i++)
837 struct dls_var_spec *spec;
840 v = dict_create_var (default_dict, name[i], width);
843 msg (SE, _("%s is a duplicate variable name."), name[i]);
846 v->print = v->write = output;
848 if (!case_source_is_complex (vfm_source))
851 spec = xmalloc (sizeof *spec);
855 strcpy (spec->name, name[i]);
856 append_var_spec (first, last, spec);
858 for (i = 0; i < name_cnt; i++)
864 lex_error (_("expecting end of command"));
868 /* Displays a table giving information on free-format variable parsing
871 dump_free_table (const struct data_list_pgm *dls)
877 struct dls_var_spec *spec;
878 for (i = 0, spec = dls->first; spec; spec = spec->next)
882 t = tab_create (2, i + 1, 0);
883 tab_columns (t, TAB_COL_DOWN, 1);
884 tab_headers (t, 0, 0, 1, 0);
885 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
886 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
887 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
888 tab_hline (t, TAL_2, 0, 1, 1);
889 tab_dim (t, tab_natural_dimensions);
892 struct dls_var_spec *spec;
894 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
896 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
897 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
902 const char *filename;
904 filename = fh_handle_name (dls->handle);
905 if (filename == NULL)
908 (dls->handle != inline_file
909 ? _("Reading free-form data from file %s.")
910 : _("Reading free-form data from the command file.")),
915 fh_handle_name (NULL);
918 /* Input procedure. */
920 /* Extracts a field from the current position in the current record.
921 Fields can be unquoted or quoted with single- or double-quote
922 characters. *RET_LEN is set to the field length, *RET_CP is set to
923 the field itself. After parsing the field, sets the current
924 position in the record to just past the field. Returns 0 on
925 failure or a 1-based column number indicating the beginning of the
928 cut_field (const struct data_list_pgm *dls, char **ret_cp, int *ret_len)
933 cp = dfm_get_record (dls->handle, &len);
939 /* Skip leading whitespace and commas. */
940 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
945 /* Three types of fields: quoted with ', quoted with ", unquoted. */
946 if (*cp == '\'' || *cp == '"')
951 while (cp < ep && *cp != quote)
953 *ret_len = cp - *ret_cp;
957 msg (SW, _("Scope of string exceeds line."));
962 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
964 *ret_len = cp - *ret_cp;
968 int beginning_column;
970 dfm_set_record (dls->handle, *ret_cp);
971 beginning_column = dfm_get_cur_col (dls->handle) + 1;
973 dfm_set_record (dls->handle, cp);
975 return beginning_column;
979 typedef int data_list_read_func (const struct data_list_pgm *);
980 static data_list_read_func read_from_data_list_fixed;
981 static data_list_read_func read_from_data_list_free;
982 static data_list_read_func read_from_data_list_list;
984 /* Returns the proper function to read the kind of DATA LIST
985 data specified by DLS. */
986 static data_list_read_func *
987 get_data_list_read_func (const struct data_list_pgm *dls)
992 return read_from_data_list_fixed;
996 return read_from_data_list_free;
1000 return read_from_data_list_list;
1008 /* Reads a case from the data file and parses it according to
1009 fixed-format syntax rules. Returns -1 on success, -2 at end
1012 read_from_data_list_fixed (const struct data_list_pgm *dls)
1014 struct dls_var_spec *var_spec = dls->first;
1017 if (!dfm_get_record (dls->handle, NULL))
1019 for (i = 1; i <= dls->nrec; i++)
1022 char *line = dfm_get_record (dls->handle, &len);
1026 /* Note that this can't occur on the first record. */
1027 msg (SW, _("Partial case of %d of %d records discarded."),
1032 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1036 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1037 di.v = &temp_case->data[var_spec->fv];
1039 di.f1 = var_spec->fc;
1040 di.format = var_spec->input;
1045 dfm_fwd_record (dls->handle);
1051 /* Reads a case from the data file and parses it according to
1052 free-format syntax rules. Returns -1 on success, -2 at end of
1055 read_from_data_list_free (const struct data_list_pgm *dls)
1057 struct dls_var_spec *var_spec;
1061 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1065 /* Cut out a field and read in a new record if necessary. */
1068 column = cut_field (dls, &field, &len);
1072 if (dfm_get_record (dls->handle, NULL))
1073 dfm_fwd_record (dls->handle);
1074 if (!dfm_get_record (dls->handle, NULL))
1076 if (var_spec != dls->first)
1077 msg (SW, _("Partial case discarded. The first variable "
1078 "missing was %s."), var_spec->name);
1088 di.v = &temp_case->data[var_spec->fv];
1091 di.format = var_spec->input;
1098 /* Reads a case from the data file and parses it according to
1099 list-format syntax rules. Returns -1 on success, -2 at end of
1102 read_from_data_list_list (const struct data_list_pgm *dls)
1104 struct dls_var_spec *var_spec;
1108 if (!dfm_get_record (dls->handle, NULL))
1111 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1113 /* Cut out a field and check for end-of-line. */
1114 int column = cut_field (dls, &field, &len);
1119 msg (SW, _("Missing value(s) for all variables from %s onward. "
1120 "These will be filled with the system-missing value "
1121 "or blanks, as appropriate."),
1123 for (; var_spec; var_spec = var_spec->next)
1125 int width = get_format_var_width (&var_spec->input);
1127 temp_case->data[var_spec->fv].f = SYSMIS;
1129 memset (temp_case->data[var_spec->fv].s, ' ', width);
1139 di.v = &temp_case->data[var_spec->fv];
1142 di.format = var_spec->input;
1147 dfm_fwd_record (dls->handle);
1151 /* Destroys SPEC. */
1153 destroy_dls_var_spec (struct dls_var_spec *spec)
1155 struct dls_var_spec *next;
1157 while (spec != NULL)
1165 /* Destroys DATA LIST transformation PGM. */
1167 destroy_dls (struct trns_header *pgm)
1169 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1170 destroy_dls_var_spec (dls->first);
1171 fh_close_handle (dls->handle);
1175 /* Note that since this is exclusively an input program, C is
1176 guaranteed to be temp_case. */
1178 read_one_case (struct trns_header *t, struct ccase *c UNUSED,
1179 int case_num UNUSED)
1181 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1182 data_list_read_func *read_func;
1185 dfm_push (dls->handle);
1187 read_func = get_data_list_read_func (dls);
1188 retval = read_func (dls);
1190 /* Handle end of file. */
1193 /* If we already encountered end of file then this is an
1197 msg (SE, _("Attempt to read past end of file."));
1199 dfm_pop (dls->handle);
1203 /* Otherwise simply note it. */
1209 /* If there was an END subcommand handle it. */
1210 if (dls->end != NULL)
1214 temp_case->data[dls->end->fv].f = 1.0;
1218 temp_case->data[dls->end->fv].f = 0.0;
1221 dfm_pop (dls->handle);
1226 /* Reads all the records from the data file and passes them to
1229 data_list_source_read (struct case_source *source,
1230 write_case_func *write_case, write_case_data wc_data)
1232 struct data_list_pgm *dls = source->aux;
1233 data_list_read_func *read_func = get_data_list_read_func (dls);
1235 dfm_push (dls->handle);
1236 while (read_func (dls) != -2)
1237 if (!write_case (wc_data))
1239 dfm_pop (dls->handle);
1241 fh_close_handle (dls->handle);
1244 /* Destroys the source's internal data. */
1246 data_list_source_destroy (struct case_source *source)
1248 destroy_dls (source->aux);
1251 const struct case_source_class data_list_source_class =
1255 data_list_source_read,
1256 data_list_source_destroy,
1259 /* REPEATING DATA. */
1261 /* Represents a number or a variable. */
1262 struct rpd_num_or_var
1264 int num; /* Value, or 0. */
1265 struct variable *var; /* Variable, if number==0. */
1268 /* REPEATING DATA private data structure. */
1269 struct repeating_data_trns
1271 struct trns_header h;
1272 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1273 struct file_handle *handle; /* Input file, never NULL. */
1275 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1276 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1277 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1278 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1279 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1280 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1282 /* ID subcommand. */
1283 int id_beg, id_end; /* Beginning & end columns. */
1284 struct variable *id_var; /* DATA LIST variable. */
1285 struct fmt_spec id_spec; /* Input format spec. */
1286 union value *id_value; /* ID value. */
1288 write_case_func *write_case;
1289 write_case_data wc_data;
1292 static trns_free_func repeating_data_trns_free;
1293 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1294 static int parse_repeating_data (struct dls_var_spec **,
1295 struct dls_var_spec **);
1296 static void find_variable_input_spec (struct variable *v,
1297 struct fmt_spec *spec);
1299 /* Parses the REPEATING DATA command. */
1301 cmd_repeating_data (void)
1303 struct repeating_data_trns *rpd;
1305 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1308 /* Bits are set when a particular subcommand has been seen. */
1311 lex_match_id ("REPEATING");
1312 lex_match_id ("DATA");
1314 assert (case_source_is_complex (vfm_source));
1316 rpd = xmalloc (sizeof *rpd);
1317 rpd->handle = default_handle;
1318 rpd->first = rpd->last = NULL;
1319 rpd->starts_beg.num = 0;
1320 rpd->starts_beg.var = NULL;
1321 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1322 = rpd->cont_end = rpd->starts_beg;
1323 rpd->id_beg = rpd->id_end = 0;
1325 rpd->id_value = NULL;
1331 if (lex_match_id ("FILE"))
1334 rpd->handle = fh_parse_file_handle ();
1337 if (rpd->handle != default_handle)
1339 msg (SE, _("REPEATING DATA must use the same file as its "
1340 "corresponding DATA LIST or FILE TYPE."));
1344 else if (lex_match_id ("STARTS"))
1349 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1354 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1357 lex_negative_to_dash ();
1358 if (lex_match ('-'))
1360 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1363 /* Otherwise, rpd->starts_end is left uninitialized.
1364 This is okay. We will initialize it later from the
1365 record length of the file. We can't do this now
1366 because we can't be sure that the user has specified
1367 the file handle yet. */
1370 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1371 && rpd->starts_beg.num > rpd->starts_end.num)
1373 msg (SE, _("STARTS beginning column (%d) exceeds "
1374 "STARTS ending column (%d)."),
1375 rpd->starts_beg.num, rpd->starts_end.num);
1379 else if (lex_match_id ("OCCURS"))
1384 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1389 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1392 else if (lex_match_id ("LENGTH"))
1397 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1402 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1405 else if (lex_match_id ("CONTINUED"))
1410 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1415 if (!lex_match ('/'))
1417 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1420 lex_negative_to_dash ();
1422 && !parse_num_or_var (&rpd->cont_end,
1423 "CONTINUED ending column"))
1426 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1427 && rpd->cont_beg.num > rpd->cont_end.num)
1429 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1430 "CONTINUED ending column (%d)."),
1431 rpd->cont_beg.num, rpd->cont_end.num);
1436 rpd->cont_beg.num = 1;
1438 else if (lex_match_id ("ID"))
1443 msg (SE, _("%s subcommand given multiple times."),"ID");
1448 if (!lex_force_int ())
1450 if (lex_integer () < 1)
1452 msg (SE, _("ID beginning column (%ld) must be positive."),
1456 rpd->id_beg = lex_integer ();
1459 lex_negative_to_dash ();
1461 if (lex_match ('-'))
1463 if (!lex_force_int ())
1465 if (lex_integer () < 1)
1467 msg (SE, _("ID ending column (%ld) must be positive."),
1471 if (lex_integer () < rpd->id_end)
1473 msg (SE, _("ID ending column (%ld) cannot be less than "
1474 "ID beginning column (%d)."),
1475 lex_integer (), rpd->id_beg);
1479 rpd->id_end = lex_integer ();
1482 else rpd->id_end = rpd->id_beg;
1484 if (!lex_force_match ('='))
1486 rpd->id_var = parse_variable ();
1487 if (rpd->id_var == NULL)
1490 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1491 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1493 else if (lex_match_id ("TABLE"))
1495 else if (lex_match_id ("NOTABLE"))
1497 else if (lex_match_id ("DATA"))
1505 if (!lex_force_match ('/'))
1509 /* Comes here when DATA specification encountered. */
1510 if ((seen & (1 | 2)) != (1 | 2))
1512 if ((seen & 1) == 0)
1513 msg (SE, _("Missing required specification STARTS."));
1514 if ((seen & 2) == 0)
1515 msg (SE, _("Missing required specification OCCURS."));
1519 /* Enforce ID restriction. */
1520 if ((seen & 16) && !(seen & 8))
1522 msg (SE, _("ID specified without CONTINUED."));
1526 /* Calculate starts_end, cont_end if necessary. */
1527 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1528 rpd->starts_end.num = fh_record_width (rpd->handle);
1529 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1530 rpd->cont_end.num = fh_record_width (rpd->handle);
1532 /* Calculate length if possible. */
1533 if ((seen & 4) == 0)
1535 struct dls_var_spec *iter;
1537 for (iter = rpd->first; iter; iter = iter->next)
1539 if (iter->lc > rpd->length.num)
1540 rpd->length.num = iter->lc;
1542 assert (rpd->length.num != 0);
1546 if (!parse_repeating_data (&rpd->first, &rpd->last))
1550 dump_fixed_table (rpd->first, rpd->handle, rpd->last->rec);
1553 struct repeating_data_trns *new_trns;
1555 rpd->h.proc = repeating_data_trns_proc;
1556 rpd->h.free = repeating_data_trns_free;
1558 new_trns = xmalloc (sizeof *new_trns);
1559 memcpy (new_trns, &rpd, sizeof *new_trns);
1560 add_transformation ((struct trns_header *) new_trns);
1563 return lex_end_of_command ();
1566 destroy_dls_var_spec (rpd->first);
1567 free (rpd->id_value);
1571 /* Because of the way that DATA LIST is structured, it's not trivial
1572 to determine what input format is associated with a given variable.
1573 This function finds the input format specification for variable V
1574 and puts it in SPEC. */
1576 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1580 for (i = 0; i < n_trns; i++)
1582 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1584 if (pgm->h.proc == read_one_case)
1586 struct dls_var_spec *iter;
1588 for (iter = pgm->first; iter; iter = iter->next)
1591 *spec = iter->input;
1600 /* Parses a number or a variable name from the syntax file and puts
1601 the results in VALUE. Ensures that the number is at least 1; else
1602 emits an error based on MESSAGE. Returns nonzero only if
1605 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1610 value->var = parse_variable ();
1611 if (value->var == NULL)
1613 if (value->var->type == ALPHA)
1615 msg (SE, _("String variable not allowed here."));
1619 else if (lex_integer_p ())
1621 value->num = lex_integer ();
1625 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1631 msg (SE, _("Variable or integer expected for %s."), message);
1637 /* Parses data specifications for repeating data groups. Taken from
1638 parse_fixed(). Returns nonzero only if successful. */
1640 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1642 struct fixed_parsing_state fx;
1648 while (token != '.')
1650 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1655 if (!fixed_parse_compatible (&fx, first, last))
1658 else if (token == '(')
1660 if (!fixed_parse_fortran (&fx, first, last))
1665 msg (SE, _("SPSS-like or FORTRAN-like format "
1666 "specification expected after variable names."));
1670 for (i = 0; i < fx.name_cnt; i++)
1676 lex_error (_("expecting end of command"));
1683 for (i = 0; i < fx.name_cnt; i++)
1689 /* Obtains the real value for rpd_num_or_var N in case C and returns
1690 it. The valid range is nonnegative numbers, but numbers outside
1691 this range can be returned and should be handled by the caller as
1694 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1699 assert (n->num == 0);
1702 double v = c->data[n->var->fv].f;
1704 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1713 /* Parameter record passed to rpd_parse_record(). */
1714 struct rpd_parse_info
1716 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1717 const char *line; /* Line being parsed. */
1718 size_t len; /* Line length. */
1719 int beg, end; /* First and last column of first occurrence. */
1720 int ofs; /* Column offset between repeated occurrences. */
1721 struct ccase *c; /* Case to fill in. */
1722 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1723 int max_occurs; /* Max number of occurrences to parse. */
1726 /* Parses one record of repeated data and outputs corresponding
1727 cases. Returns number of occurrences parsed up to the
1728 maximum specified in INFO. */
1730 rpd_parse_record (const struct rpd_parse_info *info)
1732 struct repeating_data_trns *t = info->trns;
1733 int cur = info->beg;
1736 /* Handle record ID values. */
1739 union value id_temp[MAX_ELEMS_PER_VALUE];
1741 /* Parse record ID into V. */
1745 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1746 di.v = info->verify_id ? id_temp : t->id_value;
1749 di.format = t->id_spec;
1756 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1758 char expected_str [MAX_FORMATTED_LEN + 1];
1759 char actual_str [MAX_FORMATTED_LEN + 1];
1761 data_out (expected_str, &t->id_var->print, t->id_value);
1762 expected_str[t->id_var->print.w] = '\0';
1764 data_out (actual_str, &t->id_var->print, id_temp);
1765 actual_str[t->id_var->print.w] = '\0';
1768 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1769 actual_str, expected_str);
1775 /* Iterate over the set of expected occurrences and record each of
1776 them as a separate case. FIXME: We need to execute any
1777 transformations that follow the current one. */
1781 for (occurrences = 0; occurrences < info->max_occurs; )
1783 if (cur + info->ofs > info->end + 1)
1788 struct dls_var_spec *var_spec = t->first;
1790 for (; var_spec; var_spec = var_spec->next)
1792 int fc = var_spec->fc - 1 + cur;
1793 int lc = var_spec->lc - 1 + cur;
1795 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1800 _("Variable %s starting in column %d extends "
1801 "beyond physical record length of %d."),
1802 var_spec->v->name, fc, info->len);
1808 data_in_finite_line (&di, info->line, info->len, fc, lc);
1809 di.v = &info->c->data[var_spec->fv];
1812 di.format = var_spec->input;
1822 if (!t->write_case (t->wc_data))
1830 /* Analogous to read_one_case; reads one set of repetitions of the
1831 elements in the REPEATING DATA structure. Returns -1 on success,
1832 -2 on end of file or on failure. */
1834 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1835 int case_num UNUSED)
1837 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1839 char *line; /* Current record. */
1840 int len; /* Length of current record. */
1842 int starts_beg; /* Starting column. */
1843 int starts_end; /* Ending column. */
1844 int occurs; /* Number of repetitions. */
1845 int length; /* Length of each occurrence. */
1846 int cont_beg; /* Starting column for continuation lines. */
1847 int cont_end; /* Ending column for continuation lines. */
1849 int occurs_left; /* Number of occurrences remaining. */
1851 int code; /* Return value from rpd_parse_record(). */
1853 int skip_first_record = 0;
1855 dfm_push (t->handle);
1857 /* Read the current record. */
1858 dfm_bkwd_record (t->handle, 1);
1859 line = dfm_get_record (t->handle, &len);
1862 dfm_fwd_record (t->handle);
1864 /* Calculate occurs, length. */
1865 occurs_left = occurs = realize_value (&t->occurs, c);
1868 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1871 starts_beg = realize_value (&t->starts_beg, c);
1872 if (starts_beg <= 0)
1874 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1879 starts_end = realize_value (&t->starts_end, c);
1880 if (starts_end < starts_beg)
1882 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1883 "beginning column (%d)."),
1884 starts_end, starts_beg);
1885 skip_first_record = 1;
1887 length = realize_value (&t->length, c);
1890 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1892 occurs = occurs_left = 1;
1894 cont_beg = realize_value (&t->cont_beg, c);
1897 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1902 cont_end = realize_value (&t->cont_end, c);
1903 if (cont_end < cont_beg)
1905 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1906 "beginning column (%d)."),
1907 cont_end, cont_beg);
1911 /* Parse the first record. */
1912 if (!skip_first_record)
1914 struct rpd_parse_info info;
1918 info.beg = starts_beg;
1919 info.end = starts_end;
1922 info.max_occurs = occurs_left;
1923 code = rpd_parse_record (&info);;
1927 else if (cont_beg == 0)
1930 /* Make sure, if some occurrences are left, that we have
1931 continuation records. */
1932 occurs_left -= code;
1933 if (occurs_left != 0 && cont_beg == 0)
1936 _("Number of repetitions specified on OCCURS (%d) "
1937 "exceed number of repetitions available in "
1938 "space on STARTS (%d), and CONTINUED not specified."),
1943 /* Go on to additional records. */
1944 while (occurs_left != 0)
1946 struct rpd_parse_info info;
1948 assert (occurs_left >= 0);
1950 /* Read in another record. */
1951 line = dfm_get_record (t->handle, &len);
1955 _("Unexpected end of file with %d repetitions "
1956 "remaining out of %d."),
1957 occurs_left, occurs);
1960 dfm_fwd_record (t->handle);
1962 /* Parse this record. */
1966 info.beg = cont_beg;
1967 info.end = cont_end;
1970 info.max_occurs = occurs_left;
1971 code = rpd_parse_record (&info);;
1974 occurs_left -= code;
1977 dfm_pop (t->handle);
1979 /* FIXME: This is a kluge until we've implemented multiplexing of
1985 repeating_data_trns_free (struct trns_header *rpd_)
1987 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
1989 destroy_dls_var_spec (rpd->first);
1990 fh_close_handle (rpd->handle);
1991 free (rpd->id_value);
1994 /* This is a kluge. It is only here until I have more time
1995 tocome up with something better. It lets
1996 repeating_data_trns_proc() know how to write the cases that it
1999 repeating_data_set_write_case (struct trns_header *trns,
2000 write_case_func *write_case,
2001 write_case_data wc_data)
2003 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2005 assert (trns->proc == repeating_data_trns_proc);
2006 t->write_case = write_case;
2007 t->wc_data = wc_data;