1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
29 #include "debug-print.h"
32 #include "file-handle.h"
42 /* Utility function. */
44 /* FIXME: Either REPEATING DATA must be the last transformation, or we
45 must multiplex the transformations that follow (i.e., perform them
46 for every case that we produce from a repetition instance).
47 Currently we do neither. We should do one or the other. */
49 /* Describes how to parse one variable. */
52 struct dls_var_spec *next;
53 struct variable *v; /* Associated variable. Used only in
54 parsing. Not safe later. */
55 char name[9]; /* Free-format: Name of variable. */
56 int rec; /* Fixed-format: Record number (1-based). */
57 int fc, lc; /* Fixed-format: Column numbers in record. */
58 struct fmt_spec input; /* Input format of this field. */
59 int fv; /* First value in case. */
60 int width; /* 0=numeric, >0=width of alpha field. */
63 /* Constants for DATA LIST type. */
64 /* Must match table in cmd_data_list(). */
72 /* DATA LIST private data structure. */
76 struct dls_var_spec *spec; /* Variable parsing specifications. */
77 struct file_handle *handle; /* Input file, never NULL. */
78 /* Do not reorder preceding fields. */
80 int type; /* A DLS_* constant. */
81 struct variable *end; /* Variable specified on END subcommand. */
82 int eof; /* End of file encountered. */
83 int nrec; /* Number of records. */
86 /* Holds information on parsing the data file. */
87 static struct data_list_pgm dls;
89 /* Pointer to a pointer to where the first dls_var_spec should go. */
90 static struct dls_var_spec **first;
92 /* Last dls_var_spec in the chain. Used for building the linked-list. */
93 static struct dls_var_spec *next;
95 static int parse_fixed (void);
96 static int parse_free (void);
97 static void dump_fixed_table (void);
98 static void dump_free_table (void);
99 static void destroy_dls (struct trns_header *);
100 static int read_one_case (struct trns_header *, struct ccase *);
102 /* Message title for REPEATING DATA. */
103 #define RPD_ERR "REPEATING DATA: "
108 /* 0=print no table, 1=print table. (TABLE subcommand.) */
111 lex_match_id ("DATA");
112 lex_match_id ("LIST");
114 if (vfm_source != &input_program_source
115 && vfm_source != &file_type_source)
116 discard_variables ();
118 dls.handle = default_handle;
129 if (lex_match_id ("FILE"))
132 dls.handle = fh_parse_file_handle ();
135 if (vfm_source == &file_type_source && dls.handle != default_handle)
137 msg (SE, _("DATA LIST may not use a different file from "
138 "that specified on its surrounding FILE TYPE."));
142 else if (lex_match_id ("RECORDS"))
146 if (!lex_force_int ())
148 dls.nrec = lex_integer ();
152 else if (lex_match_id ("END"))
156 msg (SE, _("The END subcommand may only be specified once."));
161 if (!lex_force_id ())
163 dls.end = dict_lookup_var (default_dict, tokid);
166 dls.end = dict_create_var (default_dict, tokid, 0);
167 assert (dls.end != NULL);
171 else if (token == T_ID)
173 /* Must match DLS_* constants. */
174 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
179 for (p = id; *p; p++)
180 if (lex_id_match (*p, tokid))
195 msg (SE, _("Only one of FIXED, FREE, or LIST may "
212 default_handle = dls.handle;
215 dls.type = DLS_FIXED;
219 if (dls.type == DLS_FREE)
225 if (dls.type == DLS_FIXED)
240 if (vfm_source != NULL)
242 struct data_list_pgm *new_pgm;
244 dls.h.proc = read_one_case;
245 dls.h.free = destroy_dls;
247 new_pgm = xmalloc (sizeof *new_pgm);
248 memcpy (new_pgm, &dls, sizeof *new_pgm);
249 add_transformation ((struct trns_header *) new_pgm);
252 vfm_source = &data_list_source;
258 append_var_spec (struct dls_var_spec *spec)
261 *first = next = xmalloc (sizeof *spec);
263 next = next->next = xmalloc (sizeof *spec);
265 memcpy (next, spec, sizeof *spec);
269 /* Fixed-format parsing. */
271 /* Used for chaining together fortran-like format specifiers. */
274 struct fmt_list *next;
277 struct fmt_list *down;
280 /* Used as "local" variables among the fixed-format parsing funcs. If
281 it were guaranteed that PSPP were going to be compiled by gcc,
282 I'd make all these functions a single set of nested functions. */
285 char **name; /* Variable names. */
286 int nname; /* Number of names. */
287 int cname; /* dump_fmt_list: index of next name to use. */
289 int recno; /* Index of current record. */
290 int sc; /* 1-based column number of starting column for
291 next field to output. */
293 struct dls_var_spec spec; /* Next specification to output. */
294 int fc, lc; /* First, last column in set of fields specified
297 int level; /* Nesting level in fixed_parse_fortran(). */
301 static int fixed_parse_compatible (void);
302 static struct fmt_list *fixed_parse_fortran (void);
314 while (lex_match ('/'))
317 if (lex_integer_p ())
319 if (lex_integer () < fx.recno)
321 msg (SE, _("The record number specified, %ld, is "
322 "before the previous record, %d. Data "
323 "fields must be listed in order of "
324 "increasing record number."),
325 lex_integer (), fx.recno - 1);
329 fx.recno = lex_integer ();
334 fx.spec.rec = fx.recno;
336 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
341 if (!fixed_parse_compatible ())
344 else if (token == '(')
348 if (!fixed_parse_fortran ())
353 msg (SE, _("SPSS-like or FORTRAN-like format "
354 "specification expected after variable names."));
358 for (i = 0; i < fx.nname; i++)
362 if (dls.nrec && next->rec > dls.nrec)
364 msg (SE, _("Variables are specified on records that "
365 "should not exist according to RECORDS subcommand."));
369 dls.nrec = next->rec;
372 lex_error (_("expecting end of command"));
378 for (i = 0; i < fx.nname; i++)
385 fixed_parse_compatible (void)
390 if (!lex_force_int ())
393 fx.fc = lex_integer ();
396 msg (SE, _("Column positions for fields must be positive."));
401 lex_negative_to_dash ();
404 if (!lex_force_int ())
406 fx.lc = lex_integer ();
409 msg (SE, _("Column positions for fields must be positive."));
412 else if (fx.lc < fx.fc)
414 msg (SE, _("The ending column for a field must be "
415 "greater than the starting column."));
424 fx.spec.input.w = fx.lc - fx.fc + 1;
427 struct fmt_desc *fdp;
433 fx.spec.input.type = parse_format_specifier_name (&cp, 0);
434 if (fx.spec.input.type == -1)
438 msg (SE, _("A format specifier on this line "
439 "has extra characters on the end."));
447 fx.spec.input.type = FMT_F;
449 if (lex_integer_p ())
451 if (lex_integer () < 1)
453 msg (SE, _("The value for number of decimal places "
454 "must be at least 1."));
458 fx.spec.input.d = lex_integer ();
464 fdp = &formats[fx.spec.input.type];
465 if (fdp->n_args < 2 && fx.spec.input.d)
467 msg (SE, _("Input format %s doesn't accept decimal places."),
472 if (fx.spec.input.d > 16)
473 fx.spec.input.d = 16;
475 if (!lex_force_match (')'))
480 fx.spec.input.type = FMT_F;
486 if ((fx.lc - fx.fc + 1) % fx.nname)
488 msg (SE, _("The %d columns %d-%d "
489 "can't be evenly divided into %d fields."),
490 fx.lc - fx.fc + 1, fx.fc, fx.lc, fx.nname);
494 dividend = (fx.lc - fx.fc + 1) / fx.nname;
495 fx.spec.input.w = dividend;
496 if (!check_input_specifier (&fx.spec.input))
499 for (i = 0; i < fx.nname; i++)
505 if (fx.spec.input.type == FMT_A || fx.spec.input.type == FMT_AHEX)
516 v = dict_create_var (default_dict, fx.name[i], width);
519 convert_fmt_ItoO (&fx.spec.input, &v->print);
524 v = dict_lookup_var (default_dict, fx.name[i]);
528 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
533 msg (SE, _("There is already a variable %s of a "
538 if (type == ALPHA && dividend != v->width)
540 msg (SE, _("There is already a string variable %s of a "
541 "different width."), fx.name[i]);
547 fx.spec.fc = fx.fc + dividend * i;
548 fx.spec.lc = fx.spec.fc + dividend - 1;
550 fx.spec.width = v->width;
551 append_var_spec (&fx.spec);
556 /* Destroy a format list and, optionally, all its sublists. */
558 destroy_fmt_list (struct fmt_list *f, int recurse)
560 struct fmt_list *next;
565 if (recurse && f->f.type == FMT_DESCEND)
566 destroy_fmt_list (f->down, 1);
571 /* Takes a hierarchically structured fmt_list F as constructed by
572 fixed_parse_fortran(), and flattens it into a linear list of
575 dump_fmt_list (struct fmt_list *f)
579 for (; f; f = f->next)
580 if (f->f.type == FMT_X)
582 else if (f->f.type == FMT_T)
584 else if (f->f.type == FMT_NEWREC)
586 fx.recno += f->count;
590 for (i = 0; i < f->count; i++)
591 if (f->f.type == FMT_DESCEND)
593 if (!dump_fmt_list (f->down))
602 if (formats[f->f.type].cat & FCAT_STRING)
612 if (fx.cname >= fx.nname)
614 msg (SE, _("The number of format "
615 "specifications exceeds the number of "
616 "variable names given."));
620 fx.spec.v = v = dict_create_var (default_dict,
625 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
629 fx.spec.input = f->f;
630 convert_fmt_ItoO (&fx.spec.input, &v->print);
633 fx.spec.rec = fx.recno;
635 fx.spec.lc = fx.sc + f->f.w - 1;
637 fx.spec.width = v->width;
638 append_var_spec (&fx.spec);
645 /* Calls itself recursively to parse nested levels of parentheses.
646 Returns to its original caller: NULL, to indicate error; non-NULL,
647 but nothing useful, to indicate success (it returns a free()'d
649 static struct fmt_list *
650 fixed_parse_fortran (void)
652 struct fmt_list *head;
653 struct fmt_list *fl = NULL;
655 lex_get (); /* Skip opening parenthesis. */
659 fl = fl->next = xmalloc (sizeof *fl);
661 head = fl = xmalloc (sizeof *fl);
663 if (lex_integer_p ())
665 fl->count = lex_integer ();
673 fl->f.type = FMT_DESCEND;
675 fl->down = fixed_parse_fortran ();
680 else if (lex_match ('/'))
681 fl->f.type = FMT_NEWREC;
682 else if (!parse_format_specifier (&fl->f, 1)
683 || !check_input_specifier (&fl->f))
695 dump_fmt_list (head);
696 if (fx.cname < fx.nname)
698 msg (SE, _("There aren't enough format specifications "
699 "to match the number of variable names given."));
702 destroy_fmt_list (head, 1);
707 destroy_fmt_list (head, 0);
712 /* Displays a table giving information on fixed-format variable
713 parsing on DATA LIST. */
714 /* FIXME: The `Columns' column should be divided into three columns,
715 one for the starting column, one for the dash, one for the ending
716 column; then right-justify the starting column and left-justify the
719 dump_fixed_table (void)
721 struct dls_var_spec *spec;
724 const char *filename;
727 for (i = 0, spec = *first; spec; spec = spec->next)
729 t = tab_create (4, i + 1, 0);
730 tab_columns (t, TAB_COL_DOWN, 1);
731 tab_headers (t, 0, 0, 1, 0);
732 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
733 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
734 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
735 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
736 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
737 tab_hline (t, TAL_2, 0, 3, 1);
738 tab_dim (t, tab_natural_dimensions);
740 for (i = 1, spec = *first; spec; spec = spec->next, i++)
742 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
743 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
744 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
746 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
747 fmt_to_string (&spec->input));
750 if (*first == dls.spec)
752 filename = fh_handle_name (dls.handle);
753 if (filename == NULL)
755 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
756 sprintf (buf, (dls.handle != inline_file
758 ngettext("Reading %d record from file %s.",
759 "Reading %d records from file %s.",dls.nrec)
761 ngettext("Reading %d record from the command file.",
762 "Reading %d records from the command file.",
768 buf = local_alloc (strlen (_("Occurrence data specifications.")) + 1);
769 strcpy (buf, _("Occurrence data specifications."));
772 tab_title (t, 0, buf);
774 fh_handle_name (NULL);
778 /* Free-format parsing. */
783 struct dls_var_spec spec;
784 struct fmt_spec in, out;
794 if (!parse_DATA_LIST_vars (&name, &nname, PV_NONE))
798 if (!parse_format_specifier (&in, 0) || !check_input_specifier (&in))
800 if (!lex_force_match (')'))
802 convert_fmt_ItoO (&in, &out);
814 if (in.type == FMT_A || in.type == FMT_AHEX)
818 for (i = 0; i < nname; i++)
822 spec.v = v = dict_create_var (default_dict, name[i], width);
825 msg (SE, _("%s is a duplicate variable name."), name[i]);
829 v->print = v->write = out;
831 strcpy (spec.name, name[i]);
834 append_var_spec (&spec);
836 for (i = 0; i < nname; i++)
842 lex_error (_("expecting end of command"));
846 for (i = 0; i < nname; i++)
852 /* Displays a table giving information on free-format variable parsing
855 dump_free_table (void)
861 struct dls_var_spec *spec;
862 for (i = 0, spec = dls.spec; spec; spec = spec->next)
866 t = tab_create (2, i + 1, 0);
867 tab_columns (t, TAB_COL_DOWN, 1);
868 tab_headers (t, 0, 0, 1, 0);
869 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
870 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
871 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
872 tab_hline (t, TAL_2, 0, 1, 1);
873 tab_dim (t, tab_natural_dimensions);
876 struct dls_var_spec *spec;
878 for (i = 1, spec = dls.spec; spec; spec = spec->next, i++)
880 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
881 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
886 const char *filename;
888 filename = fh_handle_name (dls.handle);
889 if (filename == NULL)
892 (dls.handle != inline_file
893 ? _("Reading free-form data from file %s.")
894 : _("Reading free-form data from the command file.")),
899 fh_handle_name (NULL);
902 /* Input procedure. */
904 /* Pointer to relevant parsing data. Static just to avoid passing it
906 static struct data_list_pgm *dlsp;
908 /* Extracts a field from the current position in the current record.
909 Fields can be unquoted or quoted with single- or double-quote
910 characters. *RET_LEN is set to the field length, *RET_CP is set to
911 the field itself. After parsing the field, sets the current
912 position in the record to just past the field. Returns 0 on
913 failure or a 1-based column number indicating the beginning of the
916 cut_field (char **ret_cp, int *ret_len)
921 cp = dfm_get_record (dlsp->handle, &len);
927 /* Skip leading whitespace and commas. */
928 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
933 /* Three types of fields: quoted with ', quoted with ", unquoted. */
934 if (*cp == '\'' || *cp == '"')
939 while (cp < ep && *cp != quote)
941 *ret_len = cp - *ret_cp;
945 msg (SW, _("Scope of string exceeds line."));
950 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
952 *ret_len = cp - *ret_cp;
956 int beginning_column;
958 dfm_set_record (dlsp->handle, *ret_cp);
959 beginning_column = dfm_get_cur_col (dlsp->handle) + 1;
961 dfm_set_record (dlsp->handle, cp);
963 return beginning_column;
967 static int read_from_data_list_fixed (void);
968 static int read_from_data_list_free (void);
969 static int read_from_data_list_list (void);
970 static int do_reading (int flag);
972 /* FLAG==0: reads any number of cases into temp_case and calls
973 write_case() for each one, returns garbage. FLAG!=0: reads one
974 case into temp_case and returns -2 on eof, -1 otherwise.
975 Uses dlsp as the relevant parsing description. */
977 do_reading (int flag)
983 dfm_push (dlsp->handle);
988 func = read_from_data_list_fixed;
991 func = read_from_data_list_free;
994 func = read_from_data_list_list;
1006 msg (SE, _("Attempt to read past end of file."));
1015 if (dlsp->end != NULL)
1019 printf ("end of file, setting %s to 1\n", dlsp->end->name);
1020 temp_case->data[dlsp->end->fv].f = 1.0;
1025 printf ("not end of file, setting %s to 0\n", dlsp->end->name);
1026 temp_case->data[dlsp->end->fv].f = 0.0;
1032 while (func () != -2)
1035 debug_printf ((_("abort in write_case()\n")));
1038 fh_close_handle (dlsp->handle);
1040 dfm_pop (dlsp->handle);
1045 /* Reads a case from the data file and parses it according to
1046 fixed-format syntax rules. */
1048 read_from_data_list_fixed (void)
1050 struct dls_var_spec *var_spec = dlsp->spec;
1053 if (!dfm_get_record (dlsp->handle, NULL))
1055 for (i = 1; i <= dlsp->nrec; i++)
1058 char *line = dfm_get_record (dlsp->handle, &len);
1062 /* Note that this can't occur on the first record. */
1063 msg (SW, _("Partial case of %d of %d records discarded."),
1068 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1072 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1073 di.v = &temp_case->data[var_spec->fv];
1075 di.f1 = var_spec->fc;
1076 di.format = var_spec->input;
1081 dfm_fwd_record (dlsp->handle);
1087 /* Reads a case from the data file and parses it according to
1088 free-format syntax rules. */
1090 read_from_data_list_free (void)
1092 struct dls_var_spec *var_spec;
1096 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1100 /* Cut out a field and read in a new record if necessary. */
1103 column = cut_field (&field, &len);
1107 if (dfm_get_record (dlsp->handle, NULL))
1108 dfm_fwd_record (dlsp->handle);
1109 if (!dfm_get_record (dlsp->handle, NULL))
1111 if (var_spec != dlsp->spec)
1112 msg (SW, _("Partial case discarded. The first variable "
1113 "missing was %s."), var_spec->name);
1123 di.v = &temp_case->data[var_spec->fv];
1126 di.format = var_spec->input;
1133 /* Reads a case from the data file and parses it according to
1134 list-format syntax rules. */
1136 read_from_data_list_list (void)
1138 struct dls_var_spec *var_spec;
1142 if (!dfm_get_record (dlsp->handle, NULL))
1145 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1147 /* Cut out a field and check for end-of-line. */
1148 int column = cut_field (&field, &len);
1153 msg (SW, _("Missing value(s) for all variables from %s onward. "
1154 "These will be filled with the system-missing value "
1155 "or blanks, as appropriate."),
1157 for (; var_spec; var_spec = var_spec->next)
1158 if (var_spec->width == 0)
1159 temp_case->data[var_spec->fv].f = SYSMIS;
1161 memset (temp_case->data[var_spec->fv].s, ' ', var_spec->width);
1170 di.v = &temp_case->data[var_spec->fv];
1173 di.format = var_spec->input;
1178 dfm_fwd_record (dlsp->handle);
1182 /* Destroys DATA LIST transformation or input program PGM. */
1184 destroy_dls (struct trns_header *pgm)
1186 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1187 struct dls_var_spec *iter, *next;
1196 fh_close_handle (dls->handle);
1199 /* Note that since this is exclusively an input program, C is
1200 guaranteed to be temp_case. */
1202 read_one_case (struct trns_header *t, struct ccase *c unused)
1204 dlsp = (struct data_list_pgm *) t;
1205 return do_reading (1);
1208 /* Reads all the records from the data file and passes them to
1211 data_list_source_read (void)
1217 /* Destroys the source's internal data. */
1219 data_list_source_destroy_source (void)
1221 destroy_dls ((struct trns_header *) & dls);
1224 struct case_stream data_list_source =
1227 data_list_source_read,
1230 data_list_source_destroy_source,
1235 /* REPEATING DATA. */
1237 /* Represents a number or a variable. */
1238 struct rpd_num_or_var
1240 int num; /* Value, or 0. */
1241 struct variable *var; /* Variable, if number==0. */
1244 /* REPEATING DATA private data structure. */
1245 struct repeating_data_trns
1247 struct trns_header h;
1248 struct dls_var_spec *spec; /* Variable parsing specifications. */
1249 struct file_handle *handle; /* Input file, never NULL. */
1250 /* Do not reorder preceding fields. */
1252 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1253 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1254 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1255 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1256 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1257 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1258 int id_beg, id_end; /* ID subcommand, beginning & end columns. */
1259 struct variable *id_var; /* ID subcommand, DATA LIST variable. */
1260 struct fmt_spec id_spec; /* ID subcommand, input format spec. */
1263 /* Information about the transformation being parsed. */
1264 static struct repeating_data_trns rpd;
1266 static int read_one_set_of_repetitions (struct trns_header *, struct ccase *);
1267 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1268 static int parse_repeating_data (void);
1269 static void find_variable_input_spec (struct variable *v,
1270 struct fmt_spec *spec);
1272 /* Parses the REPEATING DATA command. */
1274 cmd_repeating_data (void)
1276 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1279 /* Bits are set when a particular subcommand has been seen. */
1282 lex_match_id ("REPEATING");
1283 lex_match_id ("DATA");
1285 assert (vfm_source == &input_program_source
1286 || vfm_source == &file_type_source);
1288 rpd.handle = default_handle;
1289 rpd.starts_beg.num = 0;
1290 rpd.starts_beg.var = NULL;
1291 rpd.starts_end = rpd.occurs = rpd.length = rpd.cont_beg
1292 = rpd.cont_end = rpd.starts_beg;
1293 rpd.id_beg = rpd.id_end = 0;
1303 if (lex_match_id ("FILE"))
1306 rpd.handle = fh_parse_file_handle ();
1309 if (rpd.handle != default_handle)
1311 msg (SE, _("REPEATING DATA must use the same file as its "
1312 "corresponding DATA LIST or FILE TYPE."));
1316 else if (lex_match_id ("STARTS"))
1321 msg (SE, _("STARTS subcommand given multiple times."));
1326 if (!parse_num_or_var (&rpd.starts_beg, "STARTS beginning column"))
1329 lex_negative_to_dash ();
1330 if (lex_match ('-'))
1332 if (!parse_num_or_var (&rpd.starts_end, "STARTS ending column"))
1335 /* Otherwise, rpd.starts_end is left uninitialized.
1336 This is okay. We will initialize it later from the
1337 record length of the file. We can't do this now
1338 because we can't be sure that the user has specified
1339 the file handle yet. */
1342 if (rpd.starts_beg.num != 0 && rpd.starts_end.num != 0
1343 && rpd.starts_beg.num > rpd.starts_end.num)
1345 msg (SE, _("STARTS beginning column (%d) exceeds "
1346 "STARTS ending column (%d)."),
1347 rpd.starts_beg.num, rpd.starts_end.num);
1351 else if (lex_match_id ("OCCURS"))
1356 msg (SE, _("OCCURS subcommand given multiple times."));
1361 if (!parse_num_or_var (&rpd.occurs, "OCCURS"))
1364 else if (lex_match_id ("LENGTH"))
1369 msg (SE, _("LENGTH subcommand given multiple times."));
1374 if (!parse_num_or_var (&rpd.length, "LENGTH"))
1377 else if (lex_match_id ("CONTINUED"))
1382 msg (SE, _("CONTINUED subcommand given multiple times."));
1387 if (!lex_match ('/'))
1389 if (!parse_num_or_var (&rpd.cont_beg, "CONTINUED beginning column"))
1392 lex_negative_to_dash ();
1394 && !parse_num_or_var (&rpd.cont_end,
1395 "CONTINUED ending column"))
1398 if (rpd.cont_beg.num != 0 && rpd.cont_end.num != 0
1399 && rpd.cont_beg.num > rpd.cont_end.num)
1401 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1402 "CONTINUED ending column (%d)."),
1403 rpd.cont_beg.num, rpd.cont_end.num);
1408 rpd.cont_beg.num = 1;
1410 else if (lex_match_id ("ID"))
1415 msg (SE, _("ID subcommand given multiple times."));
1420 if (!lex_force_int ())
1422 if (lex_integer () < 1)
1424 msg (SE, _("ID beginning column (%ld) must be positive."),
1428 rpd.id_beg = lex_integer ();
1431 lex_negative_to_dash ();
1433 if (lex_match ('-'))
1435 if (!lex_force_int ())
1437 if (lex_integer () < 1)
1439 msg (SE, _("ID ending column (%ld) must be positive."),
1443 if (lex_integer () < rpd.id_end)
1445 msg (SE, _("ID ending column (%ld) cannot be less than "
1446 "ID beginning column (%d)."),
1447 lex_integer (), rpd.id_beg);
1451 rpd.id_end = lex_integer ();
1454 else rpd.id_end = rpd.id_beg;
1456 if (!lex_force_match ('='))
1458 rpd.id_var = parse_variable ();
1459 if (rpd.id_var == NULL)
1462 find_variable_input_spec (rpd.id_var, &rpd.id_spec);
1464 else if (lex_match_id ("TABLE"))
1466 else if (lex_match_id ("NOTABLE"))
1468 else if (lex_match_id ("DATA"))
1476 if (!lex_force_match ('/'))
1480 /* Comes here when DATA specification encountered. */
1481 if ((seen & (1 | 2)) != (1 | 2))
1483 if ((seen & 1) == 0)
1484 msg (SE, _("Missing required specification STARTS."));
1485 if ((seen & 2) == 0)
1486 msg (SE, _("Missing required specification OCCURS."));
1490 /* Enforce ID restriction. */
1491 if ((seen & 16) && !(seen & 8))
1493 msg (SE, _("ID specified without CONTINUED."));
1497 /* Calculate starts_end, cont_end if necessary. */
1498 if (rpd.starts_end.num == 0 && rpd.starts_end.var == NULL)
1499 rpd.starts_end.num = fh_record_width (rpd.handle);
1500 if (rpd.cont_end.num == 0 && rpd.starts_end.var == NULL)
1501 rpd.cont_end.num = fh_record_width (rpd.handle);
1503 /* Calculate length if possible. */
1504 if ((seen & 4) == 0)
1506 struct dls_var_spec *iter;
1508 for (iter = rpd.spec; iter; iter = iter->next)
1510 if (iter->lc > rpd.length.num)
1511 rpd.length.num = iter->lc;
1513 assert (rpd.length.num != 0);
1517 if (!parse_repeating_data ())
1521 dump_fixed_table ();
1524 struct repeating_data_trns *new_trns;
1526 rpd.h.proc = read_one_set_of_repetitions;
1527 rpd.h.free = destroy_dls;
1529 new_trns = xmalloc (sizeof *new_trns);
1530 memcpy (new_trns, &rpd, sizeof *new_trns);
1531 add_transformation ((struct trns_header *) new_trns);
1534 return lex_end_of_command ();
1537 /* Because of the way that DATA LIST is structured, it's not trivial
1538 to determine what input format is associated with a given variable.
1539 This function finds the input format specification for variable V
1540 and puts it in SPEC. */
1542 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1546 for (i = 0; i < n_trns; i++)
1548 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1550 if (pgm->h.proc == read_one_case)
1552 struct dls_var_spec *iter;
1554 for (iter = pgm->spec; iter; iter = iter->next)
1557 *spec = iter->input;
1566 /* Parses a number or a variable name from the syntax file and puts
1567 the results in VALUE. Ensures that the number is at least 1; else
1568 emits an error based on MESSAGE. Returns nonzero only if
1571 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1576 value->var = parse_variable ();
1577 if (value->var == NULL)
1579 if (value->var->type == ALPHA)
1581 msg (SE, _("String variable not allowed here."));
1585 else if (lex_integer_p ())
1587 value->num = lex_integer ();
1591 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1597 msg (SE, _("Variable or integer expected for %s."), message);
1603 /* Parses data specifications for repeating data groups. Taken from
1604 parse_fixed(). Returns nonzero only if successful. */
1606 parse_repeating_data (void)
1613 while (token != '.')
1615 fx.spec.rec = fx.recno;
1617 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
1622 if (!fixed_parse_compatible ())
1625 else if (token == '(')
1629 if (!fixed_parse_fortran ())
1634 msg (SE, _("SPSS-like or FORTRAN-like format "
1635 "specification expected after variable names."));
1639 for (i = 0; i < fx.nname; i++)
1645 lex_error (_("expecting end of command"));
1652 for (i = 0; i < fx.nname; i++)
1658 /* Obtains the real value for rpd_num_or_var N in case C and returns
1659 it. The valid range is nonnegative numbers, but numbers outside
1660 this range can be returned and should be handled by the caller as
1663 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1668 assert (n->num == 0);
1671 double v = c->data[n->var->fv].f;
1673 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1682 /* Parses one record of repeated data and outputs corresponding cases.
1683 Repeating data is present in line LINE having length LEN.
1684 Repeating data begins in column BEG and continues through column
1685 END inclusive (1-based columns); occurrences are offset OFS columns
1686 from each other. C is the case that will be filled in; T is the
1687 REPEATING DATA transformation. The record ID will be verified if
1688 COMPARE_ID is nonzero; if it is zero, then the record ID is
1689 initialized to the ID present in the case (assuming that ID
1690 location was specified by the user). Returns number of occurrences
1691 parsed up to the specified maximum of MAX_OCCURS. */
1693 rpd_parse_record (int beg, int end, int ofs, struct ccase *c,
1694 struct repeating_data_trns *t,
1695 char *line, int len, int compare_id, int max_occurs)
1700 /* Handle record ID values. */
1703 static union value comparator;
1709 data_in_finite_line (&di, line, len, t->id_beg, t->id_end);
1713 di.format = t->id_spec;
1719 if (compare_id == 0)
1721 else if ((t->id_var->type == NUMERIC && comparator.f != v.f)
1722 || (t->id_var->type == ALPHA
1723 && strncmp (comparator.s, v.s, t->id_var->width)))
1728 if (!data_out (comp_str, &t->id_var->print, &comparator))
1730 if (!data_out (v_str, &t->id_var->print, &v))
1733 comp_str[t->id_var->print.w] = v_str[t->id_var->print.w] = 0;
1736 _("Mismatched case ID (%s). Expected value was %s."),
1743 /* Iterate over the set of expected occurrences and record each of
1744 them as a separate case. FIXME: We need to execute any
1745 transformations that follow the current one. */
1749 for (occurrences = 0; occurrences < max_occurs; )
1751 if (cur + ofs > end + 1)
1756 struct dls_var_spec *var_spec = t->spec;
1758 for (; var_spec; var_spec = var_spec->next)
1760 int fc = var_spec->fc - 1 + cur;
1761 int lc = var_spec->lc - 1 + cur;
1763 if (fc > len && !warned && var_spec->input.type != FMT_A)
1768 _("Variable %s startging in column %d extends "
1769 "beyond physical record length of %d."),
1770 var_spec->v->name, fc, len);
1776 data_in_finite_line (&di, line, len, fc, lc);
1777 di.v = &c->data[var_spec->fv];
1780 di.format = var_spec->input;
1798 /* Analogous to read_one_case; reads one set of repetitions of the
1799 elements in the REPEATING DATA structure. Returns -1 on success,
1800 -2 on end of file or on failure. */
1802 read_one_set_of_repetitions (struct trns_header *trns, struct ccase *c)
1804 dfm_push (dlsp->handle);
1807 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1809 char *line; /* Current record. */
1810 int len; /* Length of current record. */
1812 int starts_beg; /* Starting column. */
1813 int starts_end; /* Ending column. */
1814 int occurs; /* Number of repetitions. */
1815 int length; /* Length of each occurrence. */
1816 int cont_beg; /* Starting column for continuation lines. */
1817 int cont_end; /* Ending column for continuation lines. */
1819 int occurs_left; /* Number of occurrences remaining. */
1821 int code; /* Return value from rpd_parse_record(). */
1823 int skip_first_record = 0;
1825 /* Read the current record. */
1826 dfm_bkwd_record (dlsp->handle, 1);
1827 line = dfm_get_record (dlsp->handle, &len);
1830 dfm_fwd_record (dlsp->handle);
1832 /* Calculate occurs, length. */
1833 occurs_left = occurs = realize_value (&t->occurs, c);
1836 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1839 starts_beg = realize_value (&t->starts_beg, c);
1840 if (starts_beg <= 0)
1842 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1847 starts_end = realize_value (&t->starts_end, c);
1848 if (starts_end < starts_beg)
1850 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1851 "beginning column (%d)."),
1852 starts_end, starts_beg);
1853 skip_first_record = 1;
1855 length = realize_value (&t->length, c);
1858 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1860 occurs = occurs_left = 1;
1862 cont_beg = realize_value (&t->cont_beg, c);
1865 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1870 cont_end = realize_value (&t->cont_end, c);
1871 if (cont_end < cont_beg)
1873 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1874 "beginning column (%d)."),
1875 cont_end, cont_beg);
1879 /* Parse the first record. */
1880 if (!skip_first_record)
1882 code = rpd_parse_record (starts_beg, starts_end, length, c, t, line,
1883 len, 0, occurs_left);
1887 else if (cont_beg == 0)
1890 /* Make sure, if some occurrences are left, that we have
1891 continuation records. */
1892 occurs_left -= code;
1893 if (occurs_left != 0 && cont_beg == 0)
1896 _("Number of repetitions specified on OCCURS (%d) "
1897 "exceed number of repetitions available in "
1898 "space on STARTS (%d), and CONTINUED not specified."),
1903 /* Go on to additional records. */
1904 while (occurs_left != 0)
1906 assert (occurs_left >= 0);
1908 /* Read in another record. */
1909 line = dfm_get_record (dlsp->handle, &len);
1913 _("Unexpected end of file with %d repetitions "
1914 "remaining out of %d."),
1915 occurs_left, occurs);
1918 dfm_fwd_record (dlsp->handle);
1920 /* Parse this record. */
1921 code = rpd_parse_record (cont_beg, cont_end, length, c, t, line,
1922 len, 1, occurs_left);
1925 occurs_left -= code;
1929 dfm_pop (dlsp->handle);
1931 /* FIXME: This is a kluge until we've implemented multiplexing of