1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* AIX requires this to be the first thing in the file. */
23 #define alloca __builtin_alloca
31 #ifndef alloca /* predefined by HP cc +Olibcalls */
46 #include "debug-print.h"
49 #include "file-handle.h"
59 /* Utility function. */
61 /* FIXME: Either REPEATING DATA must be the last transformation, or we
62 must multiplex the transformations that follow (i.e., perform them
63 for every case that we produce from a repetition instance).
64 Currently we do neither. We should do one or the other. */
66 /* Describes how to parse one variable. */
69 struct dls_var_spec *next;
70 struct variable *v; /* Associated variable. Used only in
71 parsing. Not safe later. */
72 char name[9]; /* Free-format: Name of variable. */
73 int rec; /* Fixed-format: Record number (1-based). */
74 int fc, lc; /* Fixed-format: Column numbers in record. */
75 struct fmt_spec input; /* Input format of this field. */
76 int fv; /* First value in case. */
77 int type; /* 0=numeric, >0=width of alpha field. */
80 /* Constants for DATA LIST type. */
81 /* Must match table in cmd_data_list(). */
89 /* DATA LIST private data structure. */
93 struct dls_var_spec *spec; /* Variable parsing specifications. */
94 struct file_handle *handle; /* Input file, never NULL. */
95 /* Do not reorder preceding fields. */
97 int type; /* A DLS_* constant. */
98 struct variable *end; /* Variable specified on END subcommand. */
99 int eof; /* End of file encountered. */
100 int nrec; /* Number of records. */
103 /* Holds information on parsing the data file. */
104 static struct data_list_pgm dls;
106 /* Pointer to a pointer to where the first dls_var_spec should go. */
107 static struct dls_var_spec **first;
109 /* Last dls_var_spec in the chain. Used for building the linked-list. */
110 static struct dls_var_spec *next;
112 static int parse_fixed (void);
113 static int parse_free (void);
114 static void dump_fixed_table (void);
115 static void dump_free_table (void);
116 static void destroy_dls (struct trns_header *);
117 static int read_one_case (struct trns_header *, struct ccase *);
119 /* Message title for REPEATING DATA. */
120 #define RPD_ERR "REPEATING DATA: "
125 /* 0=print no table, 1=print table. (TABLE subcommand.) */
128 lex_match_id ("DATA");
129 lex_match_id ("LIST");
131 if (vfm_source != &input_program_source
132 && vfm_source != &file_type_source)
133 discard_variables ();
135 dls.handle = default_handle;
146 if (lex_match_id ("FILE"))
149 dls.handle = fh_parse_file_handle ();
152 if (vfm_source == &file_type_source && dls.handle != default_handle)
154 msg (SE, _("DATA LIST may not use a different file from "
155 "that specified on its surrounding FILE TYPE."));
159 else if (lex_match_id ("RECORDS"))
163 if (!lex_force_int ())
165 dls.nrec = lex_integer ();
169 else if (lex_match_id ("END"))
173 msg (SE, _("The END subcommand may only be specified once."));
178 if (!lex_force_id ())
180 dls.end = find_variable (tokid);
182 dls.end = force_create_variable (&default_dict, tokid, NUMERIC, 0);
185 else if (token == T_ID)
187 /* Must match DLS_* constants. */
188 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
193 for (p = id; *p; p++)
194 if (lex_id_match (*p, tokid))
209 msg (SE, _("Only one of FIXED, FREE, or LIST may "
226 default_handle = dls.handle;
229 dls.type = DLS_FIXED;
233 if (dls.type == DLS_FREE)
239 if (dls.type == DLS_FIXED)
254 if (vfm_source != NULL)
256 struct data_list_pgm *new_pgm;
258 dls.h.proc = read_one_case;
259 dls.h.free = destroy_dls;
261 new_pgm = xmalloc (sizeof *new_pgm);
262 memcpy (new_pgm, &dls, sizeof *new_pgm);
263 add_transformation ((struct trns_header *) new_pgm);
266 vfm_source = &data_list_source;
272 append_var_spec (struct dls_var_spec *spec)
275 *first = next = xmalloc (sizeof *spec);
277 next = next->next = xmalloc (sizeof *spec);
280 spec->type = ROUND_UP (spec->type, 8);
283 memcpy (next, spec, sizeof *spec);
287 /* Fixed-format parsing. */
289 /* Used for chaining together fortran-like format specifiers. */
292 struct fmt_list *next;
295 struct fmt_list *down;
298 /* Used as "local" variables among the fixed-format parsing funcs. If
299 it were guaranteed that PSPP were going to be compiled by gcc,
300 I'd make all these functions a single set of nested functions. */
303 char **name; /* Variable names. */
304 int nname; /* Number of names. */
305 int cname; /* dump_fmt_list: index of next name to use. */
307 int recno; /* Index of current record. */
308 int sc; /* 1-based column number of starting column for
309 next field to output. */
311 struct dls_var_spec spec; /* Next specification to output. */
312 int fc, lc; /* First, last column in set of fields specified
315 int level; /* Nesting level in fixed_parse_fortran(). */
319 static int fixed_parse_compatible (void);
320 static struct fmt_list *fixed_parse_fortran (void);
332 while (lex_match ('/'))
335 if (lex_integer_p ())
337 if (lex_integer () < fx.recno)
339 msg (SE, _("The record number specified, %ld, is "
340 "before the previous record, %d. Data "
341 "fields must be listed in order of "
342 "increasing record number."),
343 lex_integer (), fx.recno - 1);
347 fx.recno = lex_integer ();
352 fx.spec.rec = fx.recno;
354 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
359 if (!fixed_parse_compatible ())
362 else if (token == '(')
366 if (!fixed_parse_fortran ())
371 msg (SE, _("SPSS-like or FORTRAN-like format "
372 "specification expected after variable names."));
376 for (i = 0; i < fx.nname; i++)
380 if (dls.nrec && next->rec > dls.nrec)
382 msg (SE, _("Variables are specified on records that "
383 "should not exist according to RECORDS subcommand."));
387 dls.nrec = next->rec;
390 lex_error (_("expecting end of command"));
396 for (i = 0; i < fx.nname; i++)
403 fixed_parse_compatible (void)
408 if (!lex_force_int ())
411 fx.fc = lex_integer ();
414 msg (SE, _("Column positions for fields must be positive."));
419 lex_negative_to_dash ();
422 if (!lex_force_int ())
424 fx.lc = lex_integer ();
427 msg (SE, _("Column positions for fields must be positive."));
430 else if (fx.lc < fx.fc)
432 msg (SE, _("The ending column for a field must be "
433 "greater than the starting column."));
442 fx.spec.input.w = fx.lc - fx.fc + 1;
445 struct fmt_desc *fdp;
451 fx.spec.input.type = parse_format_specifier_name (&cp, 0);
452 if (fx.spec.input.type == -1)
456 msg (SE, _("A format specifier on this line "
457 "has extra characters on the end."));
465 fx.spec.input.type = FMT_F;
467 if (lex_integer_p ())
469 if (lex_integer () < 1)
471 msg (SE, _("The value for number of decimal places "
472 "must be at least 1."));
476 fx.spec.input.d = lex_integer ();
482 fdp = &formats[fx.spec.input.type];
483 if (fdp->n_args < 2 && fx.spec.input.d)
485 msg (SE, _("Input format %s doesn't accept decimal places."),
490 if (fx.spec.input.d > 16)
491 fx.spec.input.d = 16;
493 if (!lex_force_match (')'))
498 fx.spec.input.type = FMT_F;
504 if ((fx.lc - fx.fc + 1) % fx.nname)
506 msg (SE, _("The %d columns %d-%d "
507 "can't be evenly divided into %d fields."),
508 fx.lc - fx.fc + 1, fx.fc, fx.lc, fx.nname);
512 dividend = (fx.lc - fx.fc + 1) / fx.nname;
513 fx.spec.input.w = dividend;
514 if (!check_input_specifier (&fx.spec.input))
517 for (i = 0; i < fx.nname; i++)
522 if (fx.spec.input.type == FMT_A || fx.spec.input.type == FMT_AHEX)
527 v = create_variable (&default_dict, fx.name[i], type, dividend);
530 convert_fmt_ItoO (&fx.spec.input, &v->print);
535 v = find_variable (fx.name[i]);
539 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
544 msg (SE, _("There is already a variable %s of a "
549 if (type == ALPHA && dividend != v->width)
551 msg (SE, _("There is already a string variable %s of a "
552 "different width."), fx.name[i]);
558 fx.spec.fc = fx.fc + dividend * i;
559 fx.spec.lc = fx.spec.fc + dividend - 1;
561 fx.spec.type = v->type == NUMERIC ? 0 : v->width;
562 append_var_spec (&fx.spec);
567 /* Destroy a format list and, optionally, all its sublists. */
569 destroy_fmt_list (struct fmt_list *f, int recurse)
571 struct fmt_list *next;
576 if (recurse && f->f.type == FMT_DESCEND)
577 destroy_fmt_list (f->down, 1);
582 /* Takes a hierarchically structured fmt_list F as constructed by
583 fixed_parse_fortran(), and flattens it into a linear list of
586 dump_fmt_list (struct fmt_list *f)
590 for (; f; f = f->next)
591 if (f->f.type == FMT_X)
593 else if (f->f.type == FMT_T)
595 else if (f->f.type == FMT_NEWREC)
597 fx.recno += f->count;
601 for (i = 0; i < f->count; i++)
602 if (f->f.type == FMT_DESCEND)
604 if (!dump_fmt_list (f->down))
612 type = (formats[f->f.type].cat & FCAT_STRING) ? ALPHA : NUMERIC;
613 if (fx.cname >= fx.nname)
615 msg (SE, _("The number of format "
616 "specifications exceeds the number of "
617 "variable names given."));
621 fx.spec.v = v = create_variable (&default_dict,
626 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
630 fx.spec.input = f->f;
631 convert_fmt_ItoO (&fx.spec.input, &v->print);
634 fx.spec.rec = fx.recno;
636 fx.spec.lc = fx.sc + f->f.w - 1;
638 fx.spec.type = v->type == NUMERIC ? 0 : v->width;
639 append_var_spec (&fx.spec);
646 /* Calls itself recursively to parse nested levels of parentheses.
647 Returns to its original caller: NULL, to indicate error; non-NULL,
648 but nothing useful, to indicate success (it returns a free()'d
650 static struct fmt_list *
651 fixed_parse_fortran (void)
653 struct fmt_list *head;
654 struct fmt_list *fl = NULL;
656 lex_get (); /* Skip opening parenthesis. */
660 fl = fl->next = xmalloc (sizeof *fl);
662 head = fl = xmalloc (sizeof *fl);
664 if (lex_integer_p ())
666 fl->count = lex_integer ();
674 fl->f.type = FMT_DESCEND;
676 fl->down = fixed_parse_fortran ();
681 else if (lex_match ('/'))
682 fl->f.type = FMT_NEWREC;
683 else if (!parse_format_specifier (&fl->f, 1)
684 || !check_input_specifier (&fl->f))
696 dump_fmt_list (head);
697 if (fx.cname < fx.nname)
699 msg (SE, _("There aren't enough format specifications "
700 "to match the number of variable names given."));
703 destroy_fmt_list (head, 1);
708 destroy_fmt_list (head, 0);
713 /* Displays a table giving information on fixed-format variable
714 parsing on DATA LIST. */
715 /* FIXME: The `Columns' column should be divided into three columns,
716 one for the starting column, one for the dash, one for the ending
717 column; then right-justify the starting column and left-justify the
720 dump_fixed_table (void)
722 struct dls_var_spec *spec;
725 const char *filename;
728 for (i = 0, spec = *first; spec; spec = spec->next)
730 t = tab_create (4, i + 1, 0);
731 tab_columns (t, TAB_COL_DOWN, 1);
732 tab_headers (t, 0, 0, 1, 0);
733 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
734 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
735 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
736 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
737 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
738 tab_hline (t, TAL_2, 0, 3, 1);
739 tab_dim (t, tab_natural_dimensions);
741 for (i = 1, spec = *first; spec; spec = spec->next, i++)
743 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
744 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
745 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
747 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
748 fmt_to_string (&spec->input));
751 if (*first == dls.spec)
753 filename = fh_handle_name (dls.handle);
754 if (filename == NULL)
756 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
757 sprintf (buf, (dls.handle != inline_file
758 ? _("Reading %d record%s from file %s.")
759 : _("Reading %d record%s from the command file.")),
760 dls.nrec, dls.nrec != 1 ? "s" : "", filename);
764 buf = local_alloc (strlen (_("Occurrence data specifications.")) + 1);
765 strcpy (buf, _("Occurrence data specifications."));
768 tab_title (t, 0, buf);
770 fh_handle_name (NULL);
774 /* Free-format parsing. */
779 struct dls_var_spec spec;
780 struct fmt_spec in, out;
787 memset (&spec, 0, sizeof spec);
792 if (!parse_DATA_LIST_vars (&name, &nname, PV_NONE))
796 if (!parse_format_specifier (&in, 0) || !check_input_specifier (&in))
798 if (!lex_force_match (')'))
800 convert_fmt_ItoO (&in, &out);
812 if (in.type == FMT_A || in.type == FMT_AHEX)
816 for (i = 0; i < nname; i++)
820 spec.v = v = create_variable (&default_dict, name[i], type, in.w);
823 msg (SE, _("%s is a duplicate variable name."), name[i]);
827 v->print = v->write = out;
829 strcpy (spec.name, name[i]);
831 spec.type = type == NUMERIC ? 0 : v->width;
832 append_var_spec (&spec);
834 for (i = 0; i < nname; i++)
840 lex_error (_("expecting end of command"));
844 for (i = 0; i < nname; i++)
850 /* Displays a table giving information on free-format variable parsing
853 dump_free_table (void)
859 struct dls_var_spec *spec;
860 for (i = 0, spec = dls.spec; spec; spec = spec->next)
864 t = tab_create (2, i + 1, 0);
865 tab_columns (t, TAB_COL_DOWN, 1);
866 tab_headers (t, 0, 0, 1, 0);
867 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
868 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
869 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
870 tab_hline (t, TAL_2, 0, 1, 1);
871 tab_dim (t, tab_natural_dimensions);
874 struct dls_var_spec *spec;
876 for (i = 1, spec = dls.spec; spec; spec = spec->next, i++)
878 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
879 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
884 const char *filename;
886 filename = fh_handle_name (dls.handle);
887 if (filename == NULL)
890 (dls.handle != inline_file
891 ? _("Reading free-form data from file %s.")
892 : _("Reading free-form data from the command file.")),
897 fh_handle_name (NULL);
900 /* Input procedure. */
902 /* Pointer to relevant parsing data. Static just to avoid passing it
904 static struct data_list_pgm *dlsp;
906 /* Extracts a field from the current position in the current record.
907 Fields can be unquoted or quoted with single- or double-quote
908 characters. *RET_LEN is set to the field length, *RET_CP is set to
909 the field itself. After parsing the field, sets the current
910 position in the record to just past the field. Returns 0 on
911 failure or a 1-based column number indicating the beginning of the
914 cut_field (char **ret_cp, int *ret_len)
919 cp = dfm_get_record (dlsp->handle, &len);
925 /* Skip leading whitespace and commas. */
926 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
931 /* Three types of fields: quoted with ', quoted with ", unquoted. */
932 if (*cp == '\'' || *cp == '"')
937 while (cp < ep && *cp != quote)
939 *ret_len = cp - *ret_cp;
943 msg (SW, _("Scope of string exceeds line."));
948 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
950 *ret_len = cp - *ret_cp;
954 int beginning_column;
956 dfm_set_record (dlsp->handle, *ret_cp);
957 beginning_column = dfm_get_cur_col (dlsp->handle) + 1;
959 dfm_set_record (dlsp->handle, cp);
961 return beginning_column;
965 static int read_from_data_list_fixed (void);
966 static int read_from_data_list_free (void);
967 static int read_from_data_list_list (void);
968 static int do_reading (int flag);
970 /* FLAG==0: reads any number of cases into temp_case and calls
971 write_case() for each one, returns garbage. FLAG!=0: reads one
972 case into temp_case and returns -2 on eof, -1 otherwise.
973 Uses dlsp as the relevant parsing description. */
975 do_reading (int flag)
981 dfm_push (dlsp->handle);
986 func = read_from_data_list_fixed;
989 func = read_from_data_list_free;
992 func = read_from_data_list_list;
1004 msg (SE, _("Attempt to read past end of file."));
1013 if (dlsp->end != NULL)
1017 printf ("end of file, setting %s to 1\n", dlsp->end->name);
1018 temp_case->data[dlsp->end->fv].f = 1.0;
1023 printf ("not end of file, setting %s to 0\n", dlsp->end->name);
1024 temp_case->data[dlsp->end->fv].f = 0.0;
1030 while (func () != -2)
1033 debug_printf ((_("abort in write_case()\n")));
1036 fh_close_handle (dlsp->handle);
1038 code = 0; /* prevent error at `return code;' */
1041 dfm_pop (dlsp->handle);
1046 /* Reads a case from the data file and parses it according to
1047 fixed-format syntax rules. */
1049 read_from_data_list_fixed (void)
1051 struct dls_var_spec *var_spec = dlsp->spec;
1054 if (!dfm_get_record (dlsp->handle, NULL))
1056 for (i = 1; i <= dlsp->nrec; i++)
1059 char *line = dfm_get_record (dlsp->handle, &len);
1063 /* Note that this can't occur on the first record. */
1064 msg (SW, _("Partial case of %d of %d records discarded."),
1069 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1073 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1074 di.v = &temp_case->data[var_spec->fv];
1076 di.f1 = var_spec->fc;
1077 di.format = var_spec->input;
1082 dfm_fwd_record (dlsp->handle);
1088 /* Reads a case from the data file and parses it according to
1089 free-format syntax rules. */
1091 read_from_data_list_free (void)
1093 struct dls_var_spec *var_spec;
1097 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1101 /* Cut out a field and read in a new record if necessary. */
1104 column = cut_field (&field, &len);
1108 if (dfm_get_record (dlsp->handle, NULL))
1109 dfm_fwd_record (dlsp->handle);
1110 if (!dfm_get_record (dlsp->handle, NULL))
1112 if (var_spec != dlsp->spec)
1113 msg (SW, _("Partial case discarded. The first variable "
1114 "missing was %s."), var_spec->name);
1124 di.v = &temp_case->data[var_spec->fv];
1127 di.format = var_spec->input;
1134 /* Reads a case from the data file and parses it according to
1135 list-format syntax rules. */
1137 read_from_data_list_list (void)
1139 struct dls_var_spec *var_spec;
1143 if (!dfm_get_record (dlsp->handle, NULL))
1146 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1148 /* Cut out a field and check for end-of-line. */
1149 int column = cut_field (&field, &len);
1154 msg (SW, _("Missing value(s) for all variables from %s onward. "
1155 "These will be filled with the system-missing value "
1156 "or blanks, as appropriate."),
1158 for (; var_spec; var_spec = var_spec->next)
1159 if (!var_spec->type)
1160 temp_case->data[var_spec->fv].f = SYSMIS;
1162 memset (temp_case->data[var_spec->fv].s, ' ', var_spec->type);
1171 di.v = &temp_case->data[var_spec->fv];
1174 di.format = var_spec->input;
1179 dfm_fwd_record (dlsp->handle);
1183 /* Destroys DATA LIST transformation or input program PGM. */
1185 destroy_dls (struct trns_header *pgm)
1187 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1188 struct dls_var_spec *iter, *next;
1197 fh_close_handle (dls->handle);
1200 /* Note that since this is exclusively an input program, C is
1201 guaranteed to be temp_case. */
1203 read_one_case (struct trns_header *t, struct ccase *c unused)
1205 dlsp = (struct data_list_pgm *) t;
1206 return do_reading (1);
1209 /* Reads all the records from the data file and passes them to
1212 data_list_source_read (void)
1218 /* Destroys the source's internal data. */
1220 data_list_source_destroy_source (void)
1222 destroy_dls ((struct trns_header *) & dls);
1225 struct case_stream data_list_source =
1228 data_list_source_read,
1231 data_list_source_destroy_source,
1236 /* REPEATING DATA. */
1238 /* Represents a number or a variable. */
1239 struct rpd_num_or_var
1241 int num; /* Value, or 0. */
1242 struct variable *var; /* Variable, if number==0. */
1245 /* REPEATING DATA private data structure. */
1246 struct repeating_data_trns
1248 struct trns_header h;
1249 struct dls_var_spec *spec; /* Variable parsing specifications. */
1250 struct file_handle *handle; /* Input file, never NULL. */
1251 /* Do not reorder preceding fields. */
1253 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1254 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1255 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1256 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1257 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1258 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1259 int id_beg, id_end; /* ID subcommand, beginning & end columns. */
1260 struct variable *id_var; /* ID subcommand, DATA LIST variable. */
1261 struct fmt_spec id_spec; /* ID subcommand, input format spec. */
1264 /* Information about the transformation being parsed. */
1265 static struct repeating_data_trns rpd;
1267 static int read_one_set_of_repetitions (struct trns_header *, struct ccase *);
1268 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1269 static int parse_repeating_data (void);
1270 static void find_variable_input_spec (struct variable *v,
1271 struct fmt_spec *spec);
1273 /* Parses the REPEATING DATA command. */
1275 cmd_repeating_data (void)
1277 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1280 /* Bits are set when a particular subcommand has been seen. */
1283 lex_match_id ("REPEATING");
1284 lex_match_id ("DATA");
1286 assert (vfm_source == &input_program_source
1287 || vfm_source == &file_type_source);
1289 rpd.handle = default_handle;
1290 rpd.starts_beg.num = 0;
1291 rpd.starts_beg.var = NULL;
1292 rpd.starts_end = rpd.occurs = rpd.length = rpd.cont_beg
1293 = rpd.cont_end = rpd.starts_beg;
1294 rpd.id_beg = rpd.id_end = 0;
1304 if (lex_match_id ("FILE"))
1307 rpd.handle = fh_parse_file_handle ();
1310 if (rpd.handle != default_handle)
1312 msg (SE, _("REPEATING DATA must use the same file as its "
1313 "corresponding DATA LIST or FILE TYPE."));
1317 else if (lex_match_id ("STARTS"))
1322 msg (SE, _("STARTS subcommand given multiple times."));
1327 if (!parse_num_or_var (&rpd.starts_beg, "STARTS beginning column"))
1330 lex_negative_to_dash ();
1331 if (lex_match ('-'))
1333 if (!parse_num_or_var (&rpd.starts_end, "STARTS ending column"))
1336 /* Otherwise, rpd.starts_end is left uninitialized.
1337 This is okay. We will initialize it later from the
1338 record length of the file. We can't do this now
1339 because we can't be sure that the user has specified
1340 the file handle yet. */
1343 if (rpd.starts_beg.num != 0 && rpd.starts_end.num != 0
1344 && rpd.starts_beg.num > rpd.starts_end.num)
1346 msg (SE, _("STARTS beginning column (%d) exceeds "
1347 "STARTS ending column (%d)."),
1348 rpd.starts_beg.num, rpd.starts_end.num);
1352 else if (lex_match_id ("OCCURS"))
1357 msg (SE, _("OCCURS subcommand given multiple times."));
1362 if (!parse_num_or_var (&rpd.occurs, "OCCURS"))
1365 else if (lex_match_id ("LENGTH"))
1370 msg (SE, _("LENGTH subcommand given multiple times."));
1375 if (!parse_num_or_var (&rpd.length, "LENGTH"))
1378 else if (lex_match_id ("CONTINUED"))
1383 msg (SE, _("CONTINUED subcommand given multiple times."));
1388 if (!lex_match ('/'))
1390 if (!parse_num_or_var (&rpd.cont_beg, "CONTINUED beginning column"))
1393 lex_negative_to_dash ();
1395 && !parse_num_or_var (&rpd.cont_end,
1396 "CONTINUED ending column"))
1399 if (rpd.cont_beg.num != 0 && rpd.cont_end.num != 0
1400 && rpd.cont_beg.num > rpd.cont_end.num)
1402 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1403 "CONTINUED ending column (%d)."),
1404 rpd.cont_beg.num, rpd.cont_end.num);
1409 rpd.cont_beg.num = 1;
1411 else if (lex_match_id ("ID"))
1416 msg (SE, _("ID subcommand given multiple times."));
1421 if (!lex_force_int ())
1423 if (lex_integer () < 1)
1425 msg (SE, _("ID beginning column (%ld) must be positive."),
1429 rpd.id_beg = lex_integer ();
1432 lex_negative_to_dash ();
1434 if (lex_match ('-'))
1436 if (!lex_force_int ())
1438 if (lex_integer () < 1)
1440 msg (SE, _("ID ending column (%ld) must be positive."),
1444 if (lex_integer () < rpd.id_end)
1446 msg (SE, _("ID ending column (%ld) cannot be less than "
1447 "ID beginning column (%d)."),
1448 lex_integer (), rpd.id_beg);
1452 rpd.id_end = lex_integer ();
1455 else rpd.id_end = rpd.id_beg;
1457 if (!lex_force_match ('='))
1459 rpd.id_var = parse_variable ();
1460 if (rpd.id_var == NULL)
1463 find_variable_input_spec (rpd.id_var, &rpd.id_spec);
1465 else if (lex_match_id ("TABLE"))
1467 else if (lex_match_id ("NOTABLE"))
1469 else if (lex_match_id ("DATA"))
1477 if (!lex_force_match ('/'))
1481 /* Comes here when DATA specification encountered. */
1482 if ((seen & (1 | 2)) != (1 | 2))
1484 if ((seen & 1) == 0)
1485 msg (SE, _("Missing required specification STARTS."));
1486 if ((seen & 2) == 0)
1487 msg (SE, _("Missing required specification OCCURS."));
1491 /* Enforce ID restriction. */
1492 if ((seen & 16) && !(seen & 8))
1494 msg (SE, _("ID specified without CONTINUED."));
1498 /* Calculate starts_end, cont_end if necessary. */
1499 if (rpd.starts_end.num == 0 && rpd.starts_end.var == NULL)
1500 rpd.starts_end.num = fh_record_width (rpd.handle);
1501 if (rpd.cont_end.num == 0 && rpd.starts_end.var == NULL)
1502 rpd.cont_end.num = fh_record_width (rpd.handle);
1504 /* Calculate length if possible. */
1505 if ((seen & 4) == 0)
1507 struct dls_var_spec *iter;
1509 for (iter = rpd.spec; iter; iter = iter->next)
1511 if (iter->lc > rpd.length.num)
1512 rpd.length.num = iter->lc;
1514 assert (rpd.length.num != 0);
1518 if (!parse_repeating_data ())
1522 dump_fixed_table ();
1525 struct repeating_data_trns *new_trns;
1527 rpd.h.proc = read_one_set_of_repetitions;
1528 rpd.h.free = destroy_dls;
1530 new_trns = xmalloc (sizeof *new_trns);
1531 memcpy (new_trns, &rpd, sizeof *new_trns);
1532 add_transformation ((struct trns_header *) new_trns);
1535 return lex_end_of_command ();
1538 /* Because of the way that DATA LIST is structured, it's not trivial
1539 to determine what input format is associated with a given variable.
1540 This function finds the input format specification for variable V
1541 and puts it in SPEC. */
1543 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1547 for (i = 0; i < n_trns; i++)
1549 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1551 if (pgm->h.proc == read_one_case)
1553 struct dls_var_spec *iter;
1555 for (iter = pgm->spec; iter; iter = iter->next)
1558 *spec = iter->input;
1567 /* Parses a number or a variable name from the syntax file and puts
1568 the results in VALUE. Ensures that the number is at least 1; else
1569 emits an error based on MESSAGE. Returns nonzero only if
1572 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1577 value->var = parse_variable ();
1578 if (value->var == NULL)
1580 if (value->var->type == ALPHA)
1582 msg (SE, _("String variable not allowed here."));
1586 else if (lex_integer_p ())
1588 value->num = lex_integer ();
1592 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1598 msg (SE, _("Variable or integer expected for %s."), message);
1604 /* Parses data specifications for repeating data groups. Taken from
1605 parse_fixed(). Returns nonzero only if successful. */
1607 parse_repeating_data (void)
1614 while (token != '.')
1616 fx.spec.rec = fx.recno;
1618 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
1623 if (!fixed_parse_compatible ())
1626 else if (token == '(')
1630 if (!fixed_parse_fortran ())
1635 msg (SE, _("SPSS-like or FORTRAN-like format "
1636 "specification expected after variable names."));
1640 for (i = 0; i < fx.nname; i++)
1646 lex_error (_("expecting end of command"));
1653 for (i = 0; i < fx.nname; i++)
1659 /* Obtains the real value for rpd_num_or_var N in case C and returns
1660 it. The valid range is nonnegative numbers, but numbers outside
1661 this range can be returned and should be handled by the caller as
1664 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1669 assert (n->num == 0);
1672 double v = c->data[n->var->fv].f;
1674 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1683 /* Parses one record of repeated data and outputs corresponding cases.
1684 Repeating data is present in line LINE having length LEN.
1685 Repeating data begins in column BEG and continues through column
1686 END inclusive (1-based columns); occurrences are offset OFS columns
1687 from each other. C is the case that will be filled in; T is the
1688 REPEATING DATA transformation. The record ID will be verified if
1689 COMPARE_ID is nonzero; if it is zero, then the record ID is
1690 initialized to the ID present in the case (assuming that ID
1691 location was specified by the user). Returns number of occurrences
1692 parsed up to the specified maximum of MAX_OCCURS. */
1694 rpd_parse_record (int beg, int end, int ofs, struct ccase *c,
1695 struct repeating_data_trns *t,
1696 char *line, int len, int compare_id, int max_occurs)
1701 /* Handle record ID values. */
1704 static union value comparator;
1710 data_in_finite_line (&di, line, len, t->id_beg, t->id_end);
1714 di.format = t->id_spec;
1720 if (compare_id == 0)
1722 else if ((t->id_var->type == NUMERIC && comparator.f != v.f)
1723 || (t->id_var->type == ALPHA
1724 && strncmp (comparator.s, v.s, t->id_var->width)))
1729 if (!data_out (comp_str, &t->id_var->print, &comparator))
1731 if (!data_out (v_str, &t->id_var->print, &v))
1734 comp_str[t->id_var->print.w] = v_str[t->id_var->print.w] = 0;
1737 _("Mismatched case ID (%s). Expected value was %s."),
1744 /* Iterate over the set of expected occurrences and record each of
1745 them as a separate case. FIXME: We need to execute any
1746 transformations that follow the current one. */
1750 for (occurrences = 0; occurrences < max_occurs; )
1752 if (cur + ofs > end + 1)
1757 struct dls_var_spec *var_spec = t->spec;
1759 for (; var_spec; var_spec = var_spec->next)
1761 int fc = var_spec->fc - 1 + cur;
1762 int lc = var_spec->lc - 1 + cur;
1764 if (fc > len && !warned && var_spec->input.type != FMT_A)
1769 _("Variable %s startging in column %d extends "
1770 "beyond physical record length of %d."),
1771 var_spec->v->name, fc, len);
1777 data_in_finite_line (&di, line, len, fc, lc);
1778 di.v = &c->data[var_spec->fv];
1781 di.format = var_spec->input;
1799 /* Analogous to read_one_case; reads one set of repetitions of the
1800 elements in the REPEATING DATA structure. Returns -1 on success,
1801 -2 on end of file or on failure. */
1803 read_one_set_of_repetitions (struct trns_header *trns, struct ccase *c)
1805 dfm_push (dlsp->handle);
1808 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1810 char *line; /* Current record. */
1811 int len; /* Length of current record. */
1813 int starts_beg; /* Starting column. */
1814 int starts_end; /* Ending column. */
1815 int occurs; /* Number of repetitions. */
1816 int length; /* Length of each occurrence. */
1817 int cont_beg; /* Starting column for continuation lines. */
1818 int cont_end; /* Ending column for continuation lines. */
1820 int occurs_left; /* Number of occurrences remaining. */
1822 int code; /* Return value from rpd_parse_record(). */
1824 int skip_first_record = 0;
1826 /* Read the current record. */
1827 dfm_bkwd_record (dlsp->handle, 1);
1828 line = dfm_get_record (dlsp->handle, &len);
1831 dfm_fwd_record (dlsp->handle);
1833 /* Calculate occurs, length. */
1834 occurs_left = occurs = realize_value (&t->occurs, c);
1837 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1840 starts_beg = realize_value (&t->starts_beg, c);
1841 if (starts_beg <= 0)
1843 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1848 starts_end = realize_value (&t->starts_end, c);
1849 if (starts_end < starts_beg)
1851 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1852 "beginning column (%d)."),
1853 starts_end, starts_beg);
1854 skip_first_record = 1;
1856 length = realize_value (&t->length, c);
1859 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1861 occurs = occurs_left = 1;
1863 cont_beg = realize_value (&t->cont_beg, c);
1866 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1871 cont_end = realize_value (&t->cont_end, c);
1872 if (cont_end < cont_beg)
1874 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1875 "beginning column (%d)."),
1876 cont_end, cont_beg);
1880 /* Parse the first record. */
1881 if (!skip_first_record)
1883 code = rpd_parse_record (starts_beg, starts_end, length, c, t, line,
1884 len, 0, occurs_left);
1888 else if (cont_beg == 0)
1891 /* Make sure, if some occurrences are left, that we have
1892 continuation records. */
1893 occurs_left -= code;
1894 if (occurs_left != 0 && cont_beg == 0)
1897 _("Number of repetitions specified on OCCURS (%d) "
1898 "exceed number of repetitions available in "
1899 "space on STARTS (%d), and CONTINUED not specified."),
1904 /* Go on to additional records. */
1905 while (occurs_left != 0)
1907 assert (occurs_left >= 0);
1909 /* Read in another record. */
1910 line = dfm_get_record (dlsp->handle, &len);
1914 _("Unexpected end of file with %d repetitions "
1915 "remaining out of %d."),
1916 occurs_left, occurs);
1919 dfm_fwd_record (dlsp->handle);
1921 /* Parse this record. */
1922 code = rpd_parse_record (cont_beg, cont_end, length, c, t, line,
1923 len, 1, occurs_left);
1926 occurs_left -= code;
1930 dfm_pop (dlsp->handle);
1932 /* FIXME: This is a kluge until we've implemented multiplexing of