1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* AIX requires this to be the first thing in the file. */
23 #define alloca __builtin_alloca
31 #ifndef alloca /* predefined by HP cc +Olibcalls */
46 #include "debug-print.h"
49 #include "file-handle.h"
59 /* Utility function. */
61 /* FIXME: Either REPEATING DATA must be the last transformation, or we
62 must multiplex the transformations that follow (i.e., perform them
63 for every case that we produce from a repetition instance).
64 Currently we do neither. We should do one or the other. */
66 /* Describes how to parse one variable. */
69 struct dls_var_spec *next;
70 struct variable *v; /* Associated variable. Used only in
71 parsing. Not safe later. */
72 char name[9]; /* Free-format: Name of variable. */
73 int rec; /* Fixed-format: Record number (1-based). */
74 int fc, lc; /* Fixed-format: Column numbers in record. */
75 struct fmt_spec input; /* Input format of this field. */
76 int fv; /* First value in case. */
77 int width; /* 0=numeric, >0=width of alpha field. */
80 /* Constants for DATA LIST type. */
81 /* Must match table in cmd_data_list(). */
89 /* DATA LIST private data structure. */
93 struct dls_var_spec *spec; /* Variable parsing specifications. */
94 struct file_handle *handle; /* Input file, never NULL. */
95 /* Do not reorder preceding fields. */
97 int type; /* A DLS_* constant. */
98 struct variable *end; /* Variable specified on END subcommand. */
99 int eof; /* End of file encountered. */
100 int nrec; /* Number of records. */
103 /* Holds information on parsing the data file. */
104 static struct data_list_pgm dls;
106 /* Pointer to a pointer to where the first dls_var_spec should go. */
107 static struct dls_var_spec **first;
109 /* Last dls_var_spec in the chain. Used for building the linked-list. */
110 static struct dls_var_spec *next;
112 static int parse_fixed (void);
113 static int parse_free (void);
114 static void dump_fixed_table (void);
115 static void dump_free_table (void);
116 static void destroy_dls (struct trns_header *);
117 static int read_one_case (struct trns_header *, struct ccase *);
119 /* Message title for REPEATING DATA. */
120 #define RPD_ERR "REPEATING DATA: "
125 /* 0=print no table, 1=print table. (TABLE subcommand.) */
128 lex_match_id ("DATA");
129 lex_match_id ("LIST");
131 if (vfm_source != &input_program_source
132 && vfm_source != &file_type_source)
133 discard_variables ();
135 dls.handle = default_handle;
146 if (lex_match_id ("FILE"))
149 dls.handle = fh_parse_file_handle ();
152 if (vfm_source == &file_type_source && dls.handle != default_handle)
154 msg (SE, _("DATA LIST may not use a different file from "
155 "that specified on its surrounding FILE TYPE."));
159 else if (lex_match_id ("RECORDS"))
163 if (!lex_force_int ())
165 dls.nrec = lex_integer ();
169 else if (lex_match_id ("END"))
173 msg (SE, _("The END subcommand may only be specified once."));
178 if (!lex_force_id ())
180 dls.end = dict_lookup_var (default_dict, tokid);
183 dls.end = dict_create_var (default_dict, tokid, 0);
184 assert (dls.end != NULL);
188 else if (token == T_ID)
190 /* Must match DLS_* constants. */
191 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
196 for (p = id; *p; p++)
197 if (lex_id_match (*p, tokid))
212 msg (SE, _("Only one of FIXED, FREE, or LIST may "
229 default_handle = dls.handle;
232 dls.type = DLS_FIXED;
236 if (dls.type == DLS_FREE)
242 if (dls.type == DLS_FIXED)
257 if (vfm_source != NULL)
259 struct data_list_pgm *new_pgm;
261 dls.h.proc = read_one_case;
262 dls.h.free = destroy_dls;
264 new_pgm = xmalloc (sizeof *new_pgm);
265 memcpy (new_pgm, &dls, sizeof *new_pgm);
266 add_transformation ((struct trns_header *) new_pgm);
269 vfm_source = &data_list_source;
275 append_var_spec (struct dls_var_spec *spec)
278 *first = next = xmalloc (sizeof *spec);
280 next = next->next = xmalloc (sizeof *spec);
282 memcpy (next, spec, sizeof *spec);
286 /* Fixed-format parsing. */
288 /* Used for chaining together fortran-like format specifiers. */
291 struct fmt_list *next;
294 struct fmt_list *down;
297 /* Used as "local" variables among the fixed-format parsing funcs. If
298 it were guaranteed that PSPP were going to be compiled by gcc,
299 I'd make all these functions a single set of nested functions. */
302 char **name; /* Variable names. */
303 int nname; /* Number of names. */
304 int cname; /* dump_fmt_list: index of next name to use. */
306 int recno; /* Index of current record. */
307 int sc; /* 1-based column number of starting column for
308 next field to output. */
310 struct dls_var_spec spec; /* Next specification to output. */
311 int fc, lc; /* First, last column in set of fields specified
314 int level; /* Nesting level in fixed_parse_fortran(). */
318 static int fixed_parse_compatible (void);
319 static struct fmt_list *fixed_parse_fortran (void);
331 while (lex_match ('/'))
334 if (lex_integer_p ())
336 if (lex_integer () < fx.recno)
338 msg (SE, _("The record number specified, %ld, is "
339 "before the previous record, %d. Data "
340 "fields must be listed in order of "
341 "increasing record number."),
342 lex_integer (), fx.recno - 1);
346 fx.recno = lex_integer ();
351 fx.spec.rec = fx.recno;
353 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
358 if (!fixed_parse_compatible ())
361 else if (token == '(')
365 if (!fixed_parse_fortran ())
370 msg (SE, _("SPSS-like or FORTRAN-like format "
371 "specification expected after variable names."));
375 for (i = 0; i < fx.nname; i++)
379 if (dls.nrec && next->rec > dls.nrec)
381 msg (SE, _("Variables are specified on records that "
382 "should not exist according to RECORDS subcommand."));
386 dls.nrec = next->rec;
389 lex_error (_("expecting end of command"));
395 for (i = 0; i < fx.nname; i++)
402 fixed_parse_compatible (void)
407 if (!lex_force_int ())
410 fx.fc = lex_integer ();
413 msg (SE, _("Column positions for fields must be positive."));
418 lex_negative_to_dash ();
421 if (!lex_force_int ())
423 fx.lc = lex_integer ();
426 msg (SE, _("Column positions for fields must be positive."));
429 else if (fx.lc < fx.fc)
431 msg (SE, _("The ending column for a field must be "
432 "greater than the starting column."));
441 fx.spec.input.w = fx.lc - fx.fc + 1;
444 struct fmt_desc *fdp;
450 fx.spec.input.type = parse_format_specifier_name (&cp, 0);
451 if (fx.spec.input.type == -1)
455 msg (SE, _("A format specifier on this line "
456 "has extra characters on the end."));
464 fx.spec.input.type = FMT_F;
466 if (lex_integer_p ())
468 if (lex_integer () < 1)
470 msg (SE, _("The value for number of decimal places "
471 "must be at least 1."));
475 fx.spec.input.d = lex_integer ();
481 fdp = &formats[fx.spec.input.type];
482 if (fdp->n_args < 2 && fx.spec.input.d)
484 msg (SE, _("Input format %s doesn't accept decimal places."),
489 if (fx.spec.input.d > 16)
490 fx.spec.input.d = 16;
492 if (!lex_force_match (')'))
497 fx.spec.input.type = FMT_F;
503 if ((fx.lc - fx.fc + 1) % fx.nname)
505 msg (SE, _("The %d columns %d-%d "
506 "can't be evenly divided into %d fields."),
507 fx.lc - fx.fc + 1, fx.fc, fx.lc, fx.nname);
511 dividend = (fx.lc - fx.fc + 1) / fx.nname;
512 fx.spec.input.w = dividend;
513 if (!check_input_specifier (&fx.spec.input))
516 for (i = 0; i < fx.nname; i++)
522 if (fx.spec.input.type == FMT_A || fx.spec.input.type == FMT_AHEX)
533 v = dict_create_var (default_dict, fx.name[i], width);
536 convert_fmt_ItoO (&fx.spec.input, &v->print);
541 v = dict_lookup_var (default_dict, fx.name[i]);
545 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
550 msg (SE, _("There is already a variable %s of a "
555 if (type == ALPHA && dividend != v->width)
557 msg (SE, _("There is already a string variable %s of a "
558 "different width."), fx.name[i]);
564 fx.spec.fc = fx.fc + dividend * i;
565 fx.spec.lc = fx.spec.fc + dividend - 1;
567 fx.spec.width = v->width;
568 append_var_spec (&fx.spec);
573 /* Destroy a format list and, optionally, all its sublists. */
575 destroy_fmt_list (struct fmt_list *f, int recurse)
577 struct fmt_list *next;
582 if (recurse && f->f.type == FMT_DESCEND)
583 destroy_fmt_list (f->down, 1);
588 /* Takes a hierarchically structured fmt_list F as constructed by
589 fixed_parse_fortran(), and flattens it into a linear list of
592 dump_fmt_list (struct fmt_list *f)
596 for (; f; f = f->next)
597 if (f->f.type == FMT_X)
599 else if (f->f.type == FMT_T)
601 else if (f->f.type == FMT_NEWREC)
603 fx.recno += f->count;
607 for (i = 0; i < f->count; i++)
608 if (f->f.type == FMT_DESCEND)
610 if (!dump_fmt_list (f->down))
619 if (formats[f->f.type].cat & FCAT_STRING)
629 if (fx.cname >= fx.nname)
631 msg (SE, _("The number of format "
632 "specifications exceeds the number of "
633 "variable names given."));
637 fx.spec.v = v = dict_create_var (default_dict,
642 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
646 fx.spec.input = f->f;
647 convert_fmt_ItoO (&fx.spec.input, &v->print);
650 fx.spec.rec = fx.recno;
652 fx.spec.lc = fx.sc + f->f.w - 1;
654 fx.spec.width = v->width;
655 append_var_spec (&fx.spec);
662 /* Calls itself recursively to parse nested levels of parentheses.
663 Returns to its original caller: NULL, to indicate error; non-NULL,
664 but nothing useful, to indicate success (it returns a free()'d
666 static struct fmt_list *
667 fixed_parse_fortran (void)
669 struct fmt_list *head;
670 struct fmt_list *fl = NULL;
672 lex_get (); /* Skip opening parenthesis. */
676 fl = fl->next = xmalloc (sizeof *fl);
678 head = fl = xmalloc (sizeof *fl);
680 if (lex_integer_p ())
682 fl->count = lex_integer ();
690 fl->f.type = FMT_DESCEND;
692 fl->down = fixed_parse_fortran ();
697 else if (lex_match ('/'))
698 fl->f.type = FMT_NEWREC;
699 else if (!parse_format_specifier (&fl->f, 1)
700 || !check_input_specifier (&fl->f))
712 dump_fmt_list (head);
713 if (fx.cname < fx.nname)
715 msg (SE, _("There aren't enough format specifications "
716 "to match the number of variable names given."));
719 destroy_fmt_list (head, 1);
724 destroy_fmt_list (head, 0);
729 /* Displays a table giving information on fixed-format variable
730 parsing on DATA LIST. */
731 /* FIXME: The `Columns' column should be divided into three columns,
732 one for the starting column, one for the dash, one for the ending
733 column; then right-justify the starting column and left-justify the
736 dump_fixed_table (void)
738 struct dls_var_spec *spec;
741 const char *filename;
744 for (i = 0, spec = *first; spec; spec = spec->next)
746 t = tab_create (4, i + 1, 0);
747 tab_columns (t, TAB_COL_DOWN, 1);
748 tab_headers (t, 0, 0, 1, 0);
749 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
750 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
751 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
752 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
753 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
754 tab_hline (t, TAL_2, 0, 3, 1);
755 tab_dim (t, tab_natural_dimensions);
757 for (i = 1, spec = *first; spec; spec = spec->next, i++)
759 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
760 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
761 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
763 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
764 fmt_to_string (&spec->input));
767 if (*first == dls.spec)
769 filename = fh_handle_name (dls.handle);
770 if (filename == NULL)
772 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
773 sprintf (buf, (dls.handle != inline_file
775 ngettext("Reading %d record from file %s.",
776 "Reading %d records from file %s.",dls.nrec)
778 ngettext("Reading %d record from the command file.",
779 "Reading %d records from the command file.",
785 buf = local_alloc (strlen (_("Occurrence data specifications.")) + 1);
786 strcpy (buf, _("Occurrence data specifications."));
789 tab_title (t, 0, buf);
791 fh_handle_name (NULL);
795 /* Free-format parsing. */
800 struct dls_var_spec spec;
801 struct fmt_spec in, out;
811 if (!parse_DATA_LIST_vars (&name, &nname, PV_NONE))
815 if (!parse_format_specifier (&in, 0) || !check_input_specifier (&in))
817 if (!lex_force_match (')'))
819 convert_fmt_ItoO (&in, &out);
831 if (in.type == FMT_A || in.type == FMT_AHEX)
835 for (i = 0; i < nname; i++)
839 spec.v = v = dict_create_var (default_dict, name[i], width);
842 msg (SE, _("%s is a duplicate variable name."), name[i]);
846 v->print = v->write = out;
848 strcpy (spec.name, name[i]);
851 append_var_spec (&spec);
853 for (i = 0; i < nname; i++)
859 lex_error (_("expecting end of command"));
863 for (i = 0; i < nname; i++)
869 /* Displays a table giving information on free-format variable parsing
872 dump_free_table (void)
878 struct dls_var_spec *spec;
879 for (i = 0, spec = dls.spec; spec; spec = spec->next)
883 t = tab_create (2, i + 1, 0);
884 tab_columns (t, TAB_COL_DOWN, 1);
885 tab_headers (t, 0, 0, 1, 0);
886 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
887 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
888 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
889 tab_hline (t, TAL_2, 0, 1, 1);
890 tab_dim (t, tab_natural_dimensions);
893 struct dls_var_spec *spec;
895 for (i = 1, spec = dls.spec; spec; spec = spec->next, i++)
897 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
898 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
903 const char *filename;
905 filename = fh_handle_name (dls.handle);
906 if (filename == NULL)
909 (dls.handle != inline_file
910 ? _("Reading free-form data from file %s.")
911 : _("Reading free-form data from the command file.")),
916 fh_handle_name (NULL);
919 /* Input procedure. */
921 /* Pointer to relevant parsing data. Static just to avoid passing it
923 static struct data_list_pgm *dlsp;
925 /* Extracts a field from the current position in the current record.
926 Fields can be unquoted or quoted with single- or double-quote
927 characters. *RET_LEN is set to the field length, *RET_CP is set to
928 the field itself. After parsing the field, sets the current
929 position in the record to just past the field. Returns 0 on
930 failure or a 1-based column number indicating the beginning of the
933 cut_field (char **ret_cp, int *ret_len)
938 cp = dfm_get_record (dlsp->handle, &len);
944 /* Skip leading whitespace and commas. */
945 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
950 /* Three types of fields: quoted with ', quoted with ", unquoted. */
951 if (*cp == '\'' || *cp == '"')
956 while (cp < ep && *cp != quote)
958 *ret_len = cp - *ret_cp;
962 msg (SW, _("Scope of string exceeds line."));
967 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
969 *ret_len = cp - *ret_cp;
973 int beginning_column;
975 dfm_set_record (dlsp->handle, *ret_cp);
976 beginning_column = dfm_get_cur_col (dlsp->handle) + 1;
978 dfm_set_record (dlsp->handle, cp);
980 return beginning_column;
984 static int read_from_data_list_fixed (void);
985 static int read_from_data_list_free (void);
986 static int read_from_data_list_list (void);
987 static int do_reading (int flag);
989 /* FLAG==0: reads any number of cases into temp_case and calls
990 write_case() for each one, returns garbage. FLAG!=0: reads one
991 case into temp_case and returns -2 on eof, -1 otherwise.
992 Uses dlsp as the relevant parsing description. */
994 do_reading (int flag)
1000 dfm_push (dlsp->handle);
1005 func = read_from_data_list_fixed;
1008 func = read_from_data_list_free;
1011 func = read_from_data_list_list;
1023 msg (SE, _("Attempt to read past end of file."));
1032 if (dlsp->end != NULL)
1036 printf ("end of file, setting %s to 1\n", dlsp->end->name);
1037 temp_case->data[dlsp->end->fv].f = 1.0;
1042 printf ("not end of file, setting %s to 0\n", dlsp->end->name);
1043 temp_case->data[dlsp->end->fv].f = 0.0;
1049 while (func () != -2)
1052 debug_printf ((_("abort in write_case()\n")));
1055 fh_close_handle (dlsp->handle);
1057 dfm_pop (dlsp->handle);
1062 /* Reads a case from the data file and parses it according to
1063 fixed-format syntax rules. */
1065 read_from_data_list_fixed (void)
1067 struct dls_var_spec *var_spec = dlsp->spec;
1070 if (!dfm_get_record (dlsp->handle, NULL))
1072 for (i = 1; i <= dlsp->nrec; i++)
1075 char *line = dfm_get_record (dlsp->handle, &len);
1079 /* Note that this can't occur on the first record. */
1080 msg (SW, _("Partial case of %d of %d records discarded."),
1085 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1089 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1090 di.v = &temp_case->data[var_spec->fv];
1092 di.f1 = var_spec->fc;
1093 di.format = var_spec->input;
1098 dfm_fwd_record (dlsp->handle);
1104 /* Reads a case from the data file and parses it according to
1105 free-format syntax rules. */
1107 read_from_data_list_free (void)
1109 struct dls_var_spec *var_spec;
1113 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1117 /* Cut out a field and read in a new record if necessary. */
1120 column = cut_field (&field, &len);
1124 if (dfm_get_record (dlsp->handle, NULL))
1125 dfm_fwd_record (dlsp->handle);
1126 if (!dfm_get_record (dlsp->handle, NULL))
1128 if (var_spec != dlsp->spec)
1129 msg (SW, _("Partial case discarded. The first variable "
1130 "missing was %s."), var_spec->name);
1140 di.v = &temp_case->data[var_spec->fv];
1143 di.format = var_spec->input;
1150 /* Reads a case from the data file and parses it according to
1151 list-format syntax rules. */
1153 read_from_data_list_list (void)
1155 struct dls_var_spec *var_spec;
1159 if (!dfm_get_record (dlsp->handle, NULL))
1162 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1164 /* Cut out a field and check for end-of-line. */
1165 int column = cut_field (&field, &len);
1170 msg (SW, _("Missing value(s) for all variables from %s onward. "
1171 "These will be filled with the system-missing value "
1172 "or blanks, as appropriate."),
1174 for (; var_spec; var_spec = var_spec->next)
1175 if (var_spec->width == 0)
1176 temp_case->data[var_spec->fv].f = SYSMIS;
1178 memset (temp_case->data[var_spec->fv].s, ' ', var_spec->width);
1187 di.v = &temp_case->data[var_spec->fv];
1190 di.format = var_spec->input;
1195 dfm_fwd_record (dlsp->handle);
1199 /* Destroys DATA LIST transformation or input program PGM. */
1201 destroy_dls (struct trns_header *pgm)
1203 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1204 struct dls_var_spec *iter, *next;
1213 fh_close_handle (dls->handle);
1216 /* Note that since this is exclusively an input program, C is
1217 guaranteed to be temp_case. */
1219 read_one_case (struct trns_header *t, struct ccase *c unused)
1221 dlsp = (struct data_list_pgm *) t;
1222 return do_reading (1);
1225 /* Reads all the records from the data file and passes them to
1228 data_list_source_read (void)
1234 /* Destroys the source's internal data. */
1236 data_list_source_destroy_source (void)
1238 destroy_dls ((struct trns_header *) & dls);
1241 struct case_stream data_list_source =
1244 data_list_source_read,
1247 data_list_source_destroy_source,
1252 /* REPEATING DATA. */
1254 /* Represents a number or a variable. */
1255 struct rpd_num_or_var
1257 int num; /* Value, or 0. */
1258 struct variable *var; /* Variable, if number==0. */
1261 /* REPEATING DATA private data structure. */
1262 struct repeating_data_trns
1264 struct trns_header h;
1265 struct dls_var_spec *spec; /* Variable parsing specifications. */
1266 struct file_handle *handle; /* Input file, never NULL. */
1267 /* Do not reorder preceding fields. */
1269 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1270 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1271 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1272 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1273 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1274 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1275 int id_beg, id_end; /* ID subcommand, beginning & end columns. */
1276 struct variable *id_var; /* ID subcommand, DATA LIST variable. */
1277 struct fmt_spec id_spec; /* ID subcommand, input format spec. */
1280 /* Information about the transformation being parsed. */
1281 static struct repeating_data_trns rpd;
1283 static int read_one_set_of_repetitions (struct trns_header *, struct ccase *);
1284 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1285 static int parse_repeating_data (void);
1286 static void find_variable_input_spec (struct variable *v,
1287 struct fmt_spec *spec);
1289 /* Parses the REPEATING DATA command. */
1291 cmd_repeating_data (void)
1293 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1296 /* Bits are set when a particular subcommand has been seen. */
1299 lex_match_id ("REPEATING");
1300 lex_match_id ("DATA");
1302 assert (vfm_source == &input_program_source
1303 || vfm_source == &file_type_source);
1305 rpd.handle = default_handle;
1306 rpd.starts_beg.num = 0;
1307 rpd.starts_beg.var = NULL;
1308 rpd.starts_end = rpd.occurs = rpd.length = rpd.cont_beg
1309 = rpd.cont_end = rpd.starts_beg;
1310 rpd.id_beg = rpd.id_end = 0;
1320 if (lex_match_id ("FILE"))
1323 rpd.handle = fh_parse_file_handle ();
1326 if (rpd.handle != default_handle)
1328 msg (SE, _("REPEATING DATA must use the same file as its "
1329 "corresponding DATA LIST or FILE TYPE."));
1333 else if (lex_match_id ("STARTS"))
1338 msg (SE, _("STARTS subcommand given multiple times."));
1343 if (!parse_num_or_var (&rpd.starts_beg, "STARTS beginning column"))
1346 lex_negative_to_dash ();
1347 if (lex_match ('-'))
1349 if (!parse_num_or_var (&rpd.starts_end, "STARTS ending column"))
1352 /* Otherwise, rpd.starts_end is left uninitialized.
1353 This is okay. We will initialize it later from the
1354 record length of the file. We can't do this now
1355 because we can't be sure that the user has specified
1356 the file handle yet. */
1359 if (rpd.starts_beg.num != 0 && rpd.starts_end.num != 0
1360 && rpd.starts_beg.num > rpd.starts_end.num)
1362 msg (SE, _("STARTS beginning column (%d) exceeds "
1363 "STARTS ending column (%d)."),
1364 rpd.starts_beg.num, rpd.starts_end.num);
1368 else if (lex_match_id ("OCCURS"))
1373 msg (SE, _("OCCURS subcommand given multiple times."));
1378 if (!parse_num_or_var (&rpd.occurs, "OCCURS"))
1381 else if (lex_match_id ("LENGTH"))
1386 msg (SE, _("LENGTH subcommand given multiple times."));
1391 if (!parse_num_or_var (&rpd.length, "LENGTH"))
1394 else if (lex_match_id ("CONTINUED"))
1399 msg (SE, _("CONTINUED subcommand given multiple times."));
1404 if (!lex_match ('/'))
1406 if (!parse_num_or_var (&rpd.cont_beg, "CONTINUED beginning column"))
1409 lex_negative_to_dash ();
1411 && !parse_num_or_var (&rpd.cont_end,
1412 "CONTINUED ending column"))
1415 if (rpd.cont_beg.num != 0 && rpd.cont_end.num != 0
1416 && rpd.cont_beg.num > rpd.cont_end.num)
1418 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1419 "CONTINUED ending column (%d)."),
1420 rpd.cont_beg.num, rpd.cont_end.num);
1425 rpd.cont_beg.num = 1;
1427 else if (lex_match_id ("ID"))
1432 msg (SE, _("ID subcommand given multiple times."));
1437 if (!lex_force_int ())
1439 if (lex_integer () < 1)
1441 msg (SE, _("ID beginning column (%ld) must be positive."),
1445 rpd.id_beg = lex_integer ();
1448 lex_negative_to_dash ();
1450 if (lex_match ('-'))
1452 if (!lex_force_int ())
1454 if (lex_integer () < 1)
1456 msg (SE, _("ID ending column (%ld) must be positive."),
1460 if (lex_integer () < rpd.id_end)
1462 msg (SE, _("ID ending column (%ld) cannot be less than "
1463 "ID beginning column (%d)."),
1464 lex_integer (), rpd.id_beg);
1468 rpd.id_end = lex_integer ();
1471 else rpd.id_end = rpd.id_beg;
1473 if (!lex_force_match ('='))
1475 rpd.id_var = parse_variable ();
1476 if (rpd.id_var == NULL)
1479 find_variable_input_spec (rpd.id_var, &rpd.id_spec);
1481 else if (lex_match_id ("TABLE"))
1483 else if (lex_match_id ("NOTABLE"))
1485 else if (lex_match_id ("DATA"))
1493 if (!lex_force_match ('/'))
1497 /* Comes here when DATA specification encountered. */
1498 if ((seen & (1 | 2)) != (1 | 2))
1500 if ((seen & 1) == 0)
1501 msg (SE, _("Missing required specification STARTS."));
1502 if ((seen & 2) == 0)
1503 msg (SE, _("Missing required specification OCCURS."));
1507 /* Enforce ID restriction. */
1508 if ((seen & 16) && !(seen & 8))
1510 msg (SE, _("ID specified without CONTINUED."));
1514 /* Calculate starts_end, cont_end if necessary. */
1515 if (rpd.starts_end.num == 0 && rpd.starts_end.var == NULL)
1516 rpd.starts_end.num = fh_record_width (rpd.handle);
1517 if (rpd.cont_end.num == 0 && rpd.starts_end.var == NULL)
1518 rpd.cont_end.num = fh_record_width (rpd.handle);
1520 /* Calculate length if possible. */
1521 if ((seen & 4) == 0)
1523 struct dls_var_spec *iter;
1525 for (iter = rpd.spec; iter; iter = iter->next)
1527 if (iter->lc > rpd.length.num)
1528 rpd.length.num = iter->lc;
1530 assert (rpd.length.num != 0);
1534 if (!parse_repeating_data ())
1538 dump_fixed_table ();
1541 struct repeating_data_trns *new_trns;
1543 rpd.h.proc = read_one_set_of_repetitions;
1544 rpd.h.free = destroy_dls;
1546 new_trns = xmalloc (sizeof *new_trns);
1547 memcpy (new_trns, &rpd, sizeof *new_trns);
1548 add_transformation ((struct trns_header *) new_trns);
1551 return lex_end_of_command ();
1554 /* Because of the way that DATA LIST is structured, it's not trivial
1555 to determine what input format is associated with a given variable.
1556 This function finds the input format specification for variable V
1557 and puts it in SPEC. */
1559 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1563 for (i = 0; i < n_trns; i++)
1565 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1567 if (pgm->h.proc == read_one_case)
1569 struct dls_var_spec *iter;
1571 for (iter = pgm->spec; iter; iter = iter->next)
1574 *spec = iter->input;
1583 /* Parses a number or a variable name from the syntax file and puts
1584 the results in VALUE. Ensures that the number is at least 1; else
1585 emits an error based on MESSAGE. Returns nonzero only if
1588 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1593 value->var = parse_variable ();
1594 if (value->var == NULL)
1596 if (value->var->type == ALPHA)
1598 msg (SE, _("String variable not allowed here."));
1602 else if (lex_integer_p ())
1604 value->num = lex_integer ();
1608 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1614 msg (SE, _("Variable or integer expected for %s."), message);
1620 /* Parses data specifications for repeating data groups. Taken from
1621 parse_fixed(). Returns nonzero only if successful. */
1623 parse_repeating_data (void)
1630 while (token != '.')
1632 fx.spec.rec = fx.recno;
1634 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
1639 if (!fixed_parse_compatible ())
1642 else if (token == '(')
1646 if (!fixed_parse_fortran ())
1651 msg (SE, _("SPSS-like or FORTRAN-like format "
1652 "specification expected after variable names."));
1656 for (i = 0; i < fx.nname; i++)
1662 lex_error (_("expecting end of command"));
1669 for (i = 0; i < fx.nname; i++)
1675 /* Obtains the real value for rpd_num_or_var N in case C and returns
1676 it. The valid range is nonnegative numbers, but numbers outside
1677 this range can be returned and should be handled by the caller as
1680 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1685 assert (n->num == 0);
1688 double v = c->data[n->var->fv].f;
1690 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1699 /* Parses one record of repeated data and outputs corresponding cases.
1700 Repeating data is present in line LINE having length LEN.
1701 Repeating data begins in column BEG and continues through column
1702 END inclusive (1-based columns); occurrences are offset OFS columns
1703 from each other. C is the case that will be filled in; T is the
1704 REPEATING DATA transformation. The record ID will be verified if
1705 COMPARE_ID is nonzero; if it is zero, then the record ID is
1706 initialized to the ID present in the case (assuming that ID
1707 location was specified by the user). Returns number of occurrences
1708 parsed up to the specified maximum of MAX_OCCURS. */
1710 rpd_parse_record (int beg, int end, int ofs, struct ccase *c,
1711 struct repeating_data_trns *t,
1712 char *line, int len, int compare_id, int max_occurs)
1717 /* Handle record ID values. */
1720 static union value comparator;
1726 data_in_finite_line (&di, line, len, t->id_beg, t->id_end);
1730 di.format = t->id_spec;
1736 if (compare_id == 0)
1738 else if ((t->id_var->type == NUMERIC && comparator.f != v.f)
1739 || (t->id_var->type == ALPHA
1740 && strncmp (comparator.s, v.s, t->id_var->width)))
1745 if (!data_out (comp_str, &t->id_var->print, &comparator))
1747 if (!data_out (v_str, &t->id_var->print, &v))
1750 comp_str[t->id_var->print.w] = v_str[t->id_var->print.w] = 0;
1753 _("Mismatched case ID (%s). Expected value was %s."),
1760 /* Iterate over the set of expected occurrences and record each of
1761 them as a separate case. FIXME: We need to execute any
1762 transformations that follow the current one. */
1766 for (occurrences = 0; occurrences < max_occurs; )
1768 if (cur + ofs > end + 1)
1773 struct dls_var_spec *var_spec = t->spec;
1775 for (; var_spec; var_spec = var_spec->next)
1777 int fc = var_spec->fc - 1 + cur;
1778 int lc = var_spec->lc - 1 + cur;
1780 if (fc > len && !warned && var_spec->input.type != FMT_A)
1785 _("Variable %s startging in column %d extends "
1786 "beyond physical record length of %d."),
1787 var_spec->v->name, fc, len);
1793 data_in_finite_line (&di, line, len, fc, lc);
1794 di.v = &c->data[var_spec->fv];
1797 di.format = var_spec->input;
1815 /* Analogous to read_one_case; reads one set of repetitions of the
1816 elements in the REPEATING DATA structure. Returns -1 on success,
1817 -2 on end of file or on failure. */
1819 read_one_set_of_repetitions (struct trns_header *trns, struct ccase *c)
1821 dfm_push (dlsp->handle);
1824 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1826 char *line; /* Current record. */
1827 int len; /* Length of current record. */
1829 int starts_beg; /* Starting column. */
1830 int starts_end; /* Ending column. */
1831 int occurs; /* Number of repetitions. */
1832 int length; /* Length of each occurrence. */
1833 int cont_beg; /* Starting column for continuation lines. */
1834 int cont_end; /* Ending column for continuation lines. */
1836 int occurs_left; /* Number of occurrences remaining. */
1838 int code; /* Return value from rpd_parse_record(). */
1840 int skip_first_record = 0;
1842 /* Read the current record. */
1843 dfm_bkwd_record (dlsp->handle, 1);
1844 line = dfm_get_record (dlsp->handle, &len);
1847 dfm_fwd_record (dlsp->handle);
1849 /* Calculate occurs, length. */
1850 occurs_left = occurs = realize_value (&t->occurs, c);
1853 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1856 starts_beg = realize_value (&t->starts_beg, c);
1857 if (starts_beg <= 0)
1859 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1864 starts_end = realize_value (&t->starts_end, c);
1865 if (starts_end < starts_beg)
1867 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1868 "beginning column (%d)."),
1869 starts_end, starts_beg);
1870 skip_first_record = 1;
1872 length = realize_value (&t->length, c);
1875 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1877 occurs = occurs_left = 1;
1879 cont_beg = realize_value (&t->cont_beg, c);
1882 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1887 cont_end = realize_value (&t->cont_end, c);
1888 if (cont_end < cont_beg)
1890 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1891 "beginning column (%d)."),
1892 cont_end, cont_beg);
1896 /* Parse the first record. */
1897 if (!skip_first_record)
1899 code = rpd_parse_record (starts_beg, starts_end, length, c, t, line,
1900 len, 0, occurs_left);
1904 else if (cont_beg == 0)
1907 /* Make sure, if some occurrences are left, that we have
1908 continuation records. */
1909 occurs_left -= code;
1910 if (occurs_left != 0 && cont_beg == 0)
1913 _("Number of repetitions specified on OCCURS (%d) "
1914 "exceed number of repetitions available in "
1915 "space on STARTS (%d), and CONTINUED not specified."),
1920 /* Go on to additional records. */
1921 while (occurs_left != 0)
1923 assert (occurs_left >= 0);
1925 /* Read in another record. */
1926 line = dfm_get_record (dlsp->handle, &len);
1930 _("Unexpected end of file with %d repetitions "
1931 "remaining out of %d."),
1932 occurs_left, occurs);
1935 dfm_fwd_record (dlsp->handle);
1937 /* Parse this record. */
1938 code = rpd_parse_record (cont_beg, cont_end, length, c, t, line,
1939 len, 1, occurs_left);
1942 occurs_left -= code;
1946 dfm_pop (dlsp->handle);
1948 /* FIXME: This is a kluge until we've implemented multiplexing of