1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
30 #include "debug-print.h"
33 #include "file-handle.h"
43 /* Utility function. */
45 /* FIXME: Either REPEATING DATA must be the last transformation, or we
46 must multiplex the transformations that follow (i.e., perform them
47 for every case that we produce from a repetition instance).
48 Currently we do neither. We should do one or the other. */
50 /* Describes how to parse one variable. */
53 struct dls_var_spec *next;
54 struct variable *v; /* Associated variable. Used only in
55 parsing. Not safe later. */
56 char name[9]; /* Free-format: Name of variable. */
57 int rec; /* Fixed-format: Record number (1-based). */
58 int fc, lc; /* Fixed-format: Column numbers in record. */
59 struct fmt_spec input; /* Input format of this field. */
60 int fv; /* First value in case. */
61 int width; /* 0=numeric, >0=width of alpha field. */
64 /* Constants for DATA LIST type. */
65 /* Must match table in cmd_data_list(). */
73 /* DATA LIST private data structure. */
77 struct dls_var_spec *spec; /* Variable parsing specifications. */
78 struct file_handle *handle; /* Input file, never NULL. */
79 /* Do not reorder preceding fields. */
81 int type; /* A DLS_* constant. */
82 struct variable *end; /* Variable specified on END subcommand. */
83 int eof; /* End of file encountered. */
84 int nrec; /* Number of records. */
87 /* Holds information on parsing the data file. */
88 static struct data_list_pgm dls;
90 /* Pointer to a pointer to where the first dls_var_spec should go. */
91 static struct dls_var_spec **first;
93 /* Last dls_var_spec in the chain. Used for building the linked-list. */
94 static struct dls_var_spec *next;
96 static int parse_fixed (void);
97 static int parse_free (void);
98 static void dump_fixed_table (void);
99 static void dump_free_table (void);
100 static void destroy_dls (struct trns_header *);
101 static int read_one_case (struct trns_header *, struct ccase *);
103 /* Message title for REPEATING DATA. */
104 #define RPD_ERR "REPEATING DATA: "
109 /* 0=print no table, 1=print table. (TABLE subcommand.) */
112 lex_match_id ("DATA");
113 lex_match_id ("LIST");
115 if (vfm_source != &input_program_source
116 && vfm_source != &file_type_source)
117 discard_variables ();
119 dls.handle = default_handle;
130 if (lex_match_id ("FILE"))
133 dls.handle = fh_parse_file_handle ();
136 if (vfm_source == &file_type_source && dls.handle != default_handle)
138 msg (SE, _("DATA LIST may not use a different file from "
139 "that specified on its surrounding FILE TYPE."));
143 else if (lex_match_id ("RECORDS"))
147 if (!lex_force_int ())
149 dls.nrec = lex_integer ();
153 else if (lex_match_id ("END"))
157 msg (SE, _("The END subcommand may only be specified once."));
162 if (!lex_force_id ())
164 dls.end = dict_lookup_var (default_dict, tokid);
167 dls.end = dict_create_var (default_dict, tokid, 0);
168 assert (dls.end != NULL);
172 else if (token == T_ID)
174 /* Must match DLS_* constants. */
175 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
180 for (p = id; *p; p++)
181 if (lex_id_match (*p, tokid))
196 msg (SE, _("Only one of FIXED, FREE, or LIST may "
213 default_handle = dls.handle;
216 dls.type = DLS_FIXED;
220 if (dls.type == DLS_FREE)
226 if (dls.type == DLS_FIXED)
241 if (vfm_source != NULL)
243 struct data_list_pgm *new_pgm;
245 dls.h.proc = read_one_case;
246 dls.h.free = destroy_dls;
248 new_pgm = xmalloc (sizeof *new_pgm);
249 memcpy (new_pgm, &dls, sizeof *new_pgm);
250 add_transformation ((struct trns_header *) new_pgm);
253 vfm_source = &data_list_source;
259 append_var_spec (struct dls_var_spec *spec)
262 *first = next = xmalloc (sizeof *spec);
264 next = next->next = xmalloc (sizeof *spec);
266 memcpy (next, spec, sizeof *spec);
270 /* Fixed-format parsing. */
272 /* Used for chaining together fortran-like format specifiers. */
275 struct fmt_list *next;
278 struct fmt_list *down;
281 /* Used as "local" variables among the fixed-format parsing funcs. If
282 it were guaranteed that PSPP were going to be compiled by gcc,
283 I'd make all these functions a single set of nested functions. */
286 char **name; /* Variable names. */
287 int nname; /* Number of names. */
288 int cname; /* dump_fmt_list: index of next name to use. */
290 int recno; /* Index of current record. */
291 int sc; /* 1-based column number of starting column for
292 next field to output. */
294 struct dls_var_spec spec; /* Next specification to output. */
295 int fc, lc; /* First, last column in set of fields specified
298 int level; /* Nesting level in fixed_parse_fortran(). */
302 static int fixed_parse_compatible (void);
303 static struct fmt_list *fixed_parse_fortran (void);
315 while (lex_match ('/'))
318 if (lex_integer_p ())
320 if (lex_integer () < fx.recno)
322 msg (SE, _("The record number specified, %ld, is "
323 "before the previous record, %d. Data "
324 "fields must be listed in order of "
325 "increasing record number."),
326 lex_integer (), fx.recno - 1);
330 fx.recno = lex_integer ();
335 fx.spec.rec = fx.recno;
337 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
342 if (!fixed_parse_compatible ())
345 else if (token == '(')
349 if (!fixed_parse_fortran ())
354 msg (SE, _("SPSS-like or FORTRAN-like format "
355 "specification expected after variable names."));
359 for (i = 0; i < fx.nname; i++)
363 if (dls.nrec && next->rec > dls.nrec)
365 msg (SE, _("Variables are specified on records that "
366 "should not exist according to RECORDS subcommand."));
370 dls.nrec = next->rec;
373 lex_error (_("expecting end of command"));
379 for (i = 0; i < fx.nname; i++)
386 fixed_parse_compatible (void)
391 if (!lex_force_int ())
394 fx.fc = lex_integer ();
397 msg (SE, _("Column positions for fields must be positive."));
402 lex_negative_to_dash ();
405 if (!lex_force_int ())
407 fx.lc = lex_integer ();
410 msg (SE, _("Column positions for fields must be positive."));
413 else if (fx.lc < fx.fc)
415 msg (SE, _("The ending column for a field must be "
416 "greater than the starting column."));
425 fx.spec.input.w = fx.lc - fx.fc + 1;
428 struct fmt_desc *fdp;
434 fx.spec.input.type = parse_format_specifier_name (&cp, 0);
435 if (fx.spec.input.type == -1)
439 msg (SE, _("A format specifier on this line "
440 "has extra characters on the end."));
448 fx.spec.input.type = FMT_F;
450 if (lex_integer_p ())
452 if (lex_integer () < 1)
454 msg (SE, _("The value for number of decimal places "
455 "must be at least 1."));
459 fx.spec.input.d = lex_integer ();
465 fdp = &formats[fx.spec.input.type];
466 if (fdp->n_args < 2 && fx.spec.input.d)
468 msg (SE, _("Input format %s doesn't accept decimal places."),
473 if (fx.spec.input.d > 16)
474 fx.spec.input.d = 16;
476 if (!lex_force_match (')'))
481 fx.spec.input.type = FMT_F;
487 if ((fx.lc - fx.fc + 1) % fx.nname)
489 msg (SE, _("The %d columns %d-%d "
490 "can't be evenly divided into %d fields."),
491 fx.lc - fx.fc + 1, fx.fc, fx.lc, fx.nname);
495 dividend = (fx.lc - fx.fc + 1) / fx.nname;
496 fx.spec.input.w = dividend;
497 if (!check_input_specifier (&fx.spec.input))
500 for (i = 0; i < fx.nname; i++)
506 if (fx.spec.input.type == FMT_A || fx.spec.input.type == FMT_AHEX)
517 v = dict_create_var (default_dict, fx.name[i], width);
520 convert_fmt_ItoO (&fx.spec.input, &v->print);
525 v = dict_lookup_var (default_dict, fx.name[i]);
529 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
534 msg (SE, _("There is already a variable %s of a "
539 if (type == ALPHA && dividend != v->width)
541 msg (SE, _("There is already a string variable %s of a "
542 "different width."), fx.name[i]);
548 fx.spec.fc = fx.fc + dividend * i;
549 fx.spec.lc = fx.spec.fc + dividend - 1;
551 fx.spec.width = v->width;
552 append_var_spec (&fx.spec);
557 /* Destroy a format list and, optionally, all its sublists. */
559 destroy_fmt_list (struct fmt_list *f, int recurse)
561 struct fmt_list *next;
566 if (recurse && f->f.type == FMT_DESCEND)
567 destroy_fmt_list (f->down, 1);
572 /* Takes a hierarchically structured fmt_list F as constructed by
573 fixed_parse_fortran(), and flattens it into a linear list of
576 dump_fmt_list (struct fmt_list *f)
580 for (; f; f = f->next)
581 if (f->f.type == FMT_X)
583 else if (f->f.type == FMT_T)
585 else if (f->f.type == FMT_NEWREC)
587 fx.recno += f->count;
591 for (i = 0; i < f->count; i++)
592 if (f->f.type == FMT_DESCEND)
594 if (!dump_fmt_list (f->down))
603 if (formats[f->f.type].cat & FCAT_STRING)
613 if (fx.cname >= fx.nname)
615 msg (SE, _("The number of format "
616 "specifications exceeds the number of "
617 "variable names given."));
621 fx.spec.v = v = dict_create_var (default_dict,
626 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
630 fx.spec.input = f->f;
631 convert_fmt_ItoO (&fx.spec.input, &v->print);
634 fx.spec.rec = fx.recno;
636 fx.spec.lc = fx.sc + f->f.w - 1;
638 fx.spec.width = v->width;
639 append_var_spec (&fx.spec);
646 /* Calls itself recursively to parse nested levels of parentheses.
647 Returns to its original caller: NULL, to indicate error; non-NULL,
648 but nothing useful, to indicate success (it returns a free()'d
650 static struct fmt_list *
651 fixed_parse_fortran (void)
653 struct fmt_list *head;
654 struct fmt_list *fl = NULL;
656 lex_get (); /* Skip opening parenthesis. */
660 fl = fl->next = xmalloc (sizeof *fl);
662 head = fl = xmalloc (sizeof *fl);
664 if (lex_integer_p ())
666 fl->count = lex_integer ();
674 fl->f.type = FMT_DESCEND;
676 fl->down = fixed_parse_fortran ();
681 else if (lex_match ('/'))
682 fl->f.type = FMT_NEWREC;
683 else if (!parse_format_specifier (&fl->f, 1)
684 || !check_input_specifier (&fl->f))
696 dump_fmt_list (head);
697 if (fx.cname < fx.nname)
699 msg (SE, _("There aren't enough format specifications "
700 "to match the number of variable names given."));
703 destroy_fmt_list (head, 1);
708 destroy_fmt_list (head, 0);
713 /* Displays a table giving information on fixed-format variable
714 parsing on DATA LIST. */
715 /* FIXME: The `Columns' column should be divided into three columns,
716 one for the starting column, one for the dash, one for the ending
717 column; then right-justify the starting column and left-justify the
720 dump_fixed_table (void)
722 struct dls_var_spec *spec;
725 const char *filename;
728 for (i = 0, spec = *first; spec; spec = spec->next)
730 t = tab_create (4, i + 1, 0);
731 tab_columns (t, TAB_COL_DOWN, 1);
732 tab_headers (t, 0, 0, 1, 0);
733 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
734 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
735 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
736 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
737 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
738 tab_hline (t, TAL_2, 0, 3, 1);
739 tab_dim (t, tab_natural_dimensions);
741 for (i = 1, spec = *first; spec; spec = spec->next, i++)
743 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
744 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
745 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
747 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
748 fmt_to_string (&spec->input));
751 if (*first == dls.spec)
753 filename = fh_handle_name (dls.handle);
754 if (filename == NULL)
756 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
757 sprintf (buf, (dls.handle != inline_file
759 ngettext("Reading %d record from file %s.",
760 "Reading %d records from file %s.",dls.nrec)
762 ngettext("Reading %d record from the command file.",
763 "Reading %d records from the command file.",
769 buf = local_alloc (strlen (_("Occurrence data specifications.")) + 1);
770 strcpy (buf, _("Occurrence data specifications."));
773 tab_title (t, 0, buf);
775 fh_handle_name (NULL);
779 /* Free-format parsing. */
784 struct dls_var_spec spec;
785 struct fmt_spec in, out;
795 if (!parse_DATA_LIST_vars (&name, &nname, PV_NONE))
799 if (!parse_format_specifier (&in, 0) || !check_input_specifier (&in))
801 if (!lex_force_match (')'))
803 convert_fmt_ItoO (&in, &out);
815 if (in.type == FMT_A || in.type == FMT_AHEX)
819 for (i = 0; i < nname; i++)
823 spec.v = v = dict_create_var (default_dict, name[i], width);
826 msg (SE, _("%s is a duplicate variable name."), name[i]);
830 v->print = v->write = out;
832 strcpy (spec.name, name[i]);
835 append_var_spec (&spec);
837 for (i = 0; i < nname; i++)
843 lex_error (_("expecting end of command"));
847 for (i = 0; i < nname; i++)
853 /* Displays a table giving information on free-format variable parsing
856 dump_free_table (void)
862 struct dls_var_spec *spec;
863 for (i = 0, spec = dls.spec; spec; spec = spec->next)
867 t = tab_create (2, i + 1, 0);
868 tab_columns (t, TAB_COL_DOWN, 1);
869 tab_headers (t, 0, 0, 1, 0);
870 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
871 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
872 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
873 tab_hline (t, TAL_2, 0, 1, 1);
874 tab_dim (t, tab_natural_dimensions);
877 struct dls_var_spec *spec;
879 for (i = 1, spec = dls.spec; spec; spec = spec->next, i++)
881 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
882 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
887 const char *filename;
889 filename = fh_handle_name (dls.handle);
890 if (filename == NULL)
893 (dls.handle != inline_file
894 ? _("Reading free-form data from file %s.")
895 : _("Reading free-form data from the command file.")),
900 fh_handle_name (NULL);
903 /* Input procedure. */
905 /* Pointer to relevant parsing data. Static just to avoid passing it
907 static struct data_list_pgm *dlsp;
909 /* Extracts a field from the current position in the current record.
910 Fields can be unquoted or quoted with single- or double-quote
911 characters. *RET_LEN is set to the field length, *RET_CP is set to
912 the field itself. After parsing the field, sets the current
913 position in the record to just past the field. Returns 0 on
914 failure or a 1-based column number indicating the beginning of the
917 cut_field (char **ret_cp, int *ret_len)
922 cp = dfm_get_record (dlsp->handle, &len);
928 /* Skip leading whitespace and commas. */
929 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
934 /* Three types of fields: quoted with ', quoted with ", unquoted. */
935 if (*cp == '\'' || *cp == '"')
940 while (cp < ep && *cp != quote)
942 *ret_len = cp - *ret_cp;
946 msg (SW, _("Scope of string exceeds line."));
951 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
953 *ret_len = cp - *ret_cp;
957 int beginning_column;
959 dfm_set_record (dlsp->handle, *ret_cp);
960 beginning_column = dfm_get_cur_col (dlsp->handle) + 1;
962 dfm_set_record (dlsp->handle, cp);
964 return beginning_column;
968 static int read_from_data_list_fixed (void);
969 static int read_from_data_list_free (void);
970 static int read_from_data_list_list (void);
972 /* FLAG==0: reads any number of cases into temp_case and calls
973 write_case() for each one, returns garbage. FLAG!=0: reads one
974 case into temp_case and returns -2 on eof, -1 otherwise.
975 Uses dlsp as the relevant parsing description. */
977 do_reading (int flag, write_case_func *write_case, write_case_data wc_data)
983 dfm_push (dlsp->handle);
988 func = read_from_data_list_fixed;
991 func = read_from_data_list_free;
994 func = read_from_data_list_list;
1006 msg (SE, _("Attempt to read past end of file."));
1015 if (dlsp->end != NULL)
1019 printf ("end of file, setting %s to 1\n", dlsp->end->name);
1020 temp_case->data[dlsp->end->fv].f = 1.0;
1025 printf ("not end of file, setting %s to 0\n", dlsp->end->name);
1026 temp_case->data[dlsp->end->fv].f = 0.0;
1032 while (func () != -2)
1033 if (!write_case (wc_data))
1035 debug_printf ((_("abort in write_case()\n")));
1038 fh_close_handle (dlsp->handle);
1040 dfm_pop (dlsp->handle);
1045 /* Reads a case from the data file and parses it according to
1046 fixed-format syntax rules. */
1048 read_from_data_list_fixed (void)
1050 struct dls_var_spec *var_spec = dlsp->spec;
1053 if (!dfm_get_record (dlsp->handle, NULL))
1055 for (i = 1; i <= dlsp->nrec; i++)
1058 char *line = dfm_get_record (dlsp->handle, &len);
1062 /* Note that this can't occur on the first record. */
1063 msg (SW, _("Partial case of %d of %d records discarded."),
1068 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1072 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1073 di.v = &temp_case->data[var_spec->fv];
1075 di.f1 = var_spec->fc;
1076 di.format = var_spec->input;
1081 dfm_fwd_record (dlsp->handle);
1087 /* Reads a case from the data file and parses it according to
1088 free-format syntax rules. */
1090 read_from_data_list_free (void)
1092 struct dls_var_spec *var_spec;
1096 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1100 /* Cut out a field and read in a new record if necessary. */
1103 column = cut_field (&field, &len);
1107 if (dfm_get_record (dlsp->handle, NULL))
1108 dfm_fwd_record (dlsp->handle);
1109 if (!dfm_get_record (dlsp->handle, NULL))
1111 if (var_spec != dlsp->spec)
1112 msg (SW, _("Partial case discarded. The first variable "
1113 "missing was %s."), var_spec->name);
1123 di.v = &temp_case->data[var_spec->fv];
1126 di.format = var_spec->input;
1133 /* Reads a case from the data file and parses it according to
1134 list-format syntax rules. */
1136 read_from_data_list_list (void)
1138 struct dls_var_spec *var_spec;
1142 if (!dfm_get_record (dlsp->handle, NULL))
1145 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1147 /* Cut out a field and check for end-of-line. */
1148 int column = cut_field (&field, &len);
1153 msg (SW, _("Missing value(s) for all variables from %s onward. "
1154 "These will be filled with the system-missing value "
1155 "or blanks, as appropriate."),
1157 for (; var_spec; var_spec = var_spec->next)
1158 if (var_spec->width == 0)
1159 temp_case->data[var_spec->fv].f = SYSMIS;
1161 memset (temp_case->data[var_spec->fv].s, ' ', var_spec->width);
1170 di.v = &temp_case->data[var_spec->fv];
1173 di.format = var_spec->input;
1178 dfm_fwd_record (dlsp->handle);
1182 /* Destroys DATA LIST transformation or input program PGM. */
1184 destroy_dls (struct trns_header *pgm)
1186 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1187 struct dls_var_spec *iter, *next;
1196 fh_close_handle (dls->handle);
1199 /* Note that since this is exclusively an input program, C is
1200 guaranteed to be temp_case. */
1202 read_one_case (struct trns_header *t, struct ccase *c UNUSED)
1204 dlsp = (struct data_list_pgm *) t;
1205 return do_reading (1, NULL, NULL);
1208 /* Reads all the records from the data file and passes them to
1211 data_list_source_read (write_case_func *write_case, write_case_data wc_data)
1214 do_reading (0, write_case, wc_data);
1217 /* Destroys the source's internal data. */
1219 data_list_source_destroy_source (void)
1221 destroy_dls ((struct trns_header *) & dls);
1224 struct case_stream data_list_source =
1227 data_list_source_read,
1230 data_list_source_destroy_source,
1235 /* REPEATING DATA. */
1237 /* Represents a number or a variable. */
1238 struct rpd_num_or_var
1240 int num; /* Value, or 0. */
1241 struct variable *var; /* Variable, if number==0. */
1244 /* REPEATING DATA private data structure. */
1245 struct repeating_data_trns
1247 struct trns_header h;
1248 struct dls_var_spec *spec; /* Variable parsing specifications. */
1249 struct file_handle *handle; /* Input file, never NULL. */
1250 /* Do not reorder preceding fields. */
1252 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1253 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1254 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1255 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1256 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1257 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1258 int id_beg, id_end; /* ID subcommand, beginning & end columns. */
1259 struct variable *id_var; /* ID subcommand, DATA LIST variable. */
1260 struct fmt_spec id_spec; /* ID subcommand, input format spec. */
1261 write_case_func *write_case;
1262 write_case_data wc_data;
1265 /* Information about the transformation being parsed. */
1266 static struct repeating_data_trns rpd;
1268 int repeating_data_trns_proc (struct trns_header *, struct ccase *);
1269 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1270 static int parse_repeating_data (void);
1271 static void find_variable_input_spec (struct variable *v,
1272 struct fmt_spec *spec);
1274 /* Parses the REPEATING DATA command. */
1276 cmd_repeating_data (void)
1278 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1281 /* Bits are set when a particular subcommand has been seen. */
1284 lex_match_id ("REPEATING");
1285 lex_match_id ("DATA");
1287 assert (vfm_source == &input_program_source
1288 || vfm_source == &file_type_source);
1290 rpd.handle = default_handle;
1291 rpd.starts_beg.num = 0;
1292 rpd.starts_beg.var = NULL;
1293 rpd.starts_end = rpd.occurs = rpd.length = rpd.cont_beg
1294 = rpd.cont_end = rpd.starts_beg;
1295 rpd.id_beg = rpd.id_end = 0;
1305 if (lex_match_id ("FILE"))
1308 rpd.handle = fh_parse_file_handle ();
1311 if (rpd.handle != default_handle)
1313 msg (SE, _("REPEATING DATA must use the same file as its "
1314 "corresponding DATA LIST or FILE TYPE."));
1318 else if (lex_match_id ("STARTS"))
1323 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1328 if (!parse_num_or_var (&rpd.starts_beg, "STARTS beginning column"))
1331 lex_negative_to_dash ();
1332 if (lex_match ('-'))
1334 if (!parse_num_or_var (&rpd.starts_end, "STARTS ending column"))
1337 /* Otherwise, rpd.starts_end is left uninitialized.
1338 This is okay. We will initialize it later from the
1339 record length of the file. We can't do this now
1340 because we can't be sure that the user has specified
1341 the file handle yet. */
1344 if (rpd.starts_beg.num != 0 && rpd.starts_end.num != 0
1345 && rpd.starts_beg.num > rpd.starts_end.num)
1347 msg (SE, _("STARTS beginning column (%d) exceeds "
1348 "STARTS ending column (%d)."),
1349 rpd.starts_beg.num, rpd.starts_end.num);
1353 else if (lex_match_id ("OCCURS"))
1358 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1363 if (!parse_num_or_var (&rpd.occurs, "OCCURS"))
1366 else if (lex_match_id ("LENGTH"))
1371 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1376 if (!parse_num_or_var (&rpd.length, "LENGTH"))
1379 else if (lex_match_id ("CONTINUED"))
1384 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1389 if (!lex_match ('/'))
1391 if (!parse_num_or_var (&rpd.cont_beg, "CONTINUED beginning column"))
1394 lex_negative_to_dash ();
1396 && !parse_num_or_var (&rpd.cont_end,
1397 "CONTINUED ending column"))
1400 if (rpd.cont_beg.num != 0 && rpd.cont_end.num != 0
1401 && rpd.cont_beg.num > rpd.cont_end.num)
1403 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1404 "CONTINUED ending column (%d)."),
1405 rpd.cont_beg.num, rpd.cont_end.num);
1410 rpd.cont_beg.num = 1;
1412 else if (lex_match_id ("ID"))
1417 msg (SE, _("%s subcommand given multiple times."),"ID");
1422 if (!lex_force_int ())
1424 if (lex_integer () < 1)
1426 msg (SE, _("ID beginning column (%ld) must be positive."),
1430 rpd.id_beg = lex_integer ();
1433 lex_negative_to_dash ();
1435 if (lex_match ('-'))
1437 if (!lex_force_int ())
1439 if (lex_integer () < 1)
1441 msg (SE, _("ID ending column (%ld) must be positive."),
1445 if (lex_integer () < rpd.id_end)
1447 msg (SE, _("ID ending column (%ld) cannot be less than "
1448 "ID beginning column (%d)."),
1449 lex_integer (), rpd.id_beg);
1453 rpd.id_end = lex_integer ();
1456 else rpd.id_end = rpd.id_beg;
1458 if (!lex_force_match ('='))
1460 rpd.id_var = parse_variable ();
1461 if (rpd.id_var == NULL)
1464 find_variable_input_spec (rpd.id_var, &rpd.id_spec);
1466 else if (lex_match_id ("TABLE"))
1468 else if (lex_match_id ("NOTABLE"))
1470 else if (lex_match_id ("DATA"))
1478 if (!lex_force_match ('/'))
1482 /* Comes here when DATA specification encountered. */
1483 if ((seen & (1 | 2)) != (1 | 2))
1485 if ((seen & 1) == 0)
1486 msg (SE, _("Missing required specification STARTS."));
1487 if ((seen & 2) == 0)
1488 msg (SE, _("Missing required specification OCCURS."));
1492 /* Enforce ID restriction. */
1493 if ((seen & 16) && !(seen & 8))
1495 msg (SE, _("ID specified without CONTINUED."));
1499 /* Calculate starts_end, cont_end if necessary. */
1500 if (rpd.starts_end.num == 0 && rpd.starts_end.var == NULL)
1501 rpd.starts_end.num = fh_record_width (rpd.handle);
1502 if (rpd.cont_end.num == 0 && rpd.starts_end.var == NULL)
1503 rpd.cont_end.num = fh_record_width (rpd.handle);
1505 /* Calculate length if possible. */
1506 if ((seen & 4) == 0)
1508 struct dls_var_spec *iter;
1510 for (iter = rpd.spec; iter; iter = iter->next)
1512 if (iter->lc > rpd.length.num)
1513 rpd.length.num = iter->lc;
1515 assert (rpd.length.num != 0);
1519 if (!parse_repeating_data ())
1523 dump_fixed_table ();
1526 struct repeating_data_trns *new_trns;
1528 rpd.h.proc = repeating_data_trns_proc;
1529 rpd.h.free = destroy_dls;
1531 new_trns = xmalloc (sizeof *new_trns);
1532 memcpy (new_trns, &rpd, sizeof *new_trns);
1533 add_transformation ((struct trns_header *) new_trns);
1536 return lex_end_of_command ();
1539 /* Because of the way that DATA LIST is structured, it's not trivial
1540 to determine what input format is associated with a given variable.
1541 This function finds the input format specification for variable V
1542 and puts it in SPEC. */
1544 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1548 for (i = 0; i < n_trns; i++)
1550 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1552 if (pgm->h.proc == read_one_case)
1554 struct dls_var_spec *iter;
1556 for (iter = pgm->spec; iter; iter = iter->next)
1559 *spec = iter->input;
1568 /* Parses a number or a variable name from the syntax file and puts
1569 the results in VALUE. Ensures that the number is at least 1; else
1570 emits an error based on MESSAGE. Returns nonzero only if
1573 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1578 value->var = parse_variable ();
1579 if (value->var == NULL)
1581 if (value->var->type == ALPHA)
1583 msg (SE, _("String variable not allowed here."));
1587 else if (lex_integer_p ())
1589 value->num = lex_integer ();
1593 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1599 msg (SE, _("Variable or integer expected for %s."), message);
1605 /* Parses data specifications for repeating data groups. Taken from
1606 parse_fixed(). Returns nonzero only if successful. */
1608 parse_repeating_data (void)
1615 while (token != '.')
1617 fx.spec.rec = fx.recno;
1619 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
1624 if (!fixed_parse_compatible ())
1627 else if (token == '(')
1631 if (!fixed_parse_fortran ())
1636 msg (SE, _("SPSS-like or FORTRAN-like format "
1637 "specification expected after variable names."));
1641 for (i = 0; i < fx.nname; i++)
1647 lex_error (_("expecting end of command"));
1654 for (i = 0; i < fx.nname; i++)
1660 /* Obtains the real value for rpd_num_or_var N in case C and returns
1661 it. The valid range is nonnegative numbers, but numbers outside
1662 this range can be returned and should be handled by the caller as
1665 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1670 assert (n->num == 0);
1673 double v = c->data[n->var->fv].f;
1675 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1684 /* Parses one record of repeated data and outputs corresponding cases.
1685 Repeating data is present in line LINE having length LEN.
1686 Repeating data begins in column BEG and continues through column
1687 END inclusive (1-based columns); occurrences are offset OFS columns
1688 from each other. C is the case that will be filled in; T is the
1689 REPEATING DATA transformation. The record ID will be verified if
1690 COMPARE_ID is nonzero; if it is zero, then the record ID is
1691 initialized to the ID present in the case (assuming that ID
1692 location was specified by the user). Returns number of occurrences
1693 parsed up to the specified maximum of MAX_OCCURS. */
1695 rpd_parse_record (int beg, int end, int ofs, struct ccase *c,
1696 struct repeating_data_trns *t,
1697 char *line, int len, int compare_id, int max_occurs)
1702 /* Handle record ID values. */
1705 static union value comparator;
1711 data_in_finite_line (&di, line, len, t->id_beg, t->id_end);
1715 di.format = t->id_spec;
1721 if (compare_id == 0)
1723 else if ((t->id_var->type == NUMERIC && comparator.f != v.f)
1724 || (t->id_var->type == ALPHA
1725 && strncmp (comparator.s, v.s, t->id_var->width)))
1730 if (!data_out (comp_str, &t->id_var->print, &comparator))
1732 if (!data_out (v_str, &t->id_var->print, &v))
1735 comp_str[t->id_var->print.w] = v_str[t->id_var->print.w] = 0;
1738 _("Mismatched case ID (%s). Expected value was %s."),
1745 /* Iterate over the set of expected occurrences and record each of
1746 them as a separate case. FIXME: We need to execute any
1747 transformations that follow the current one. */
1751 for (occurrences = 0; occurrences < max_occurs; )
1753 if (cur + ofs > end + 1)
1758 struct dls_var_spec *var_spec = t->spec;
1760 for (; var_spec; var_spec = var_spec->next)
1762 int fc = var_spec->fc - 1 + cur;
1763 int lc = var_spec->lc - 1 + cur;
1765 if (fc > len && !warned && var_spec->input.type != FMT_A)
1770 _("Variable %s starting in column %d extends "
1771 "beyond physical record length of %d."),
1772 var_spec->v->name, fc, len);
1778 data_in_finite_line (&di, line, len, fc, lc);
1779 di.v = &c->data[var_spec->fv];
1782 di.format = var_spec->input;
1792 if (!t->write_case (t->wc_data))
1800 /* Analogous to read_one_case; reads one set of repetitions of the
1801 elements in the REPEATING DATA structure. Returns -1 on success,
1802 -2 on end of file or on failure. */
1804 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c)
1806 dfm_push (dlsp->handle);
1809 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1811 char *line; /* Current record. */
1812 int len; /* Length of current record. */
1814 int starts_beg; /* Starting column. */
1815 int starts_end; /* Ending column. */
1816 int occurs; /* Number of repetitions. */
1817 int length; /* Length of each occurrence. */
1818 int cont_beg; /* Starting column for continuation lines. */
1819 int cont_end; /* Ending column for continuation lines. */
1821 int occurs_left; /* Number of occurrences remaining. */
1823 int code; /* Return value from rpd_parse_record(). */
1825 int skip_first_record = 0;
1827 /* Read the current record. */
1828 dfm_bkwd_record (dlsp->handle, 1);
1829 line = dfm_get_record (dlsp->handle, &len);
1832 dfm_fwd_record (dlsp->handle);
1834 /* Calculate occurs, length. */
1835 occurs_left = occurs = realize_value (&t->occurs, c);
1838 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1841 starts_beg = realize_value (&t->starts_beg, c);
1842 if (starts_beg <= 0)
1844 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1849 starts_end = realize_value (&t->starts_end, c);
1850 if (starts_end < starts_beg)
1852 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1853 "beginning column (%d)."),
1854 starts_end, starts_beg);
1855 skip_first_record = 1;
1857 length = realize_value (&t->length, c);
1860 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1862 occurs = occurs_left = 1;
1864 cont_beg = realize_value (&t->cont_beg, c);
1867 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1872 cont_end = realize_value (&t->cont_end, c);
1873 if (cont_end < cont_beg)
1875 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1876 "beginning column (%d)."),
1877 cont_end, cont_beg);
1881 /* Parse the first record. */
1882 if (!skip_first_record)
1884 code = rpd_parse_record (starts_beg, starts_end, length, c, t, line,
1885 len, 0, occurs_left);
1889 else if (cont_beg == 0)
1892 /* Make sure, if some occurrences are left, that we have
1893 continuation records. */
1894 occurs_left -= code;
1895 if (occurs_left != 0 && cont_beg == 0)
1898 _("Number of repetitions specified on OCCURS (%d) "
1899 "exceed number of repetitions available in "
1900 "space on STARTS (%d), and CONTINUED not specified."),
1905 /* Go on to additional records. */
1906 while (occurs_left != 0)
1908 assert (occurs_left >= 0);
1910 /* Read in another record. */
1911 line = dfm_get_record (dlsp->handle, &len);
1915 _("Unexpected end of file with %d repetitions "
1916 "remaining out of %d."),
1917 occurs_left, occurs);
1920 dfm_fwd_record (dlsp->handle);
1922 /* Parse this record. */
1923 code = rpd_parse_record (cont_beg, cont_end, length, c, t, line,
1924 len, 1, occurs_left);
1927 occurs_left -= code;
1931 dfm_pop (dlsp->handle);
1933 /* FIXME: This is a kluge until we've implemented multiplexing of
1938 /* This is a kluge. It is only here until I have more time
1939 tocome up with something better. It lets
1940 repeating_data_trns_proc() know how to write the cases that it
1943 repeating_data_set_write_case (struct trns_header *trns,
1944 write_case_func *write_case,
1945 write_case_data wc_data)
1947 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1949 assert (trns->proc == repeating_data_trns_proc);
1950 t->write_case = write_case;
1951 t->wc_data = wc_data;